regexpu-core
Advanced tools
Comparing version 4.8.0 to 5.0.0
{ | ||
"name": "regexpu-core", | ||
"version": "4.8.0", | ||
"version": "5.0.0", | ||
"description": "regexpu’s core functionality (i.e. `rewritePattern(pattern, flag)`), capable of translating ES6 Unicode regular expressions to ES5.", | ||
@@ -52,5 +52,5 @@ "homepage": "https://mths.be/regexpu", | ||
"regenerate": "^1.4.2", | ||
"regenerate-unicode-properties": "^9.0.0", | ||
"regjsgen": "^0.5.2", | ||
"regjsparser": "^0.7.0", | ||
"regenerate-unicode-properties": "^10.0.1", | ||
"regjsgen": "^0.6.0", | ||
"regjsparser": "^0.8.2", | ||
"unicode-match-property-ecmascript": "^2.0.0", | ||
@@ -57,0 +57,0 @@ "unicode-match-property-value-ecmascript": "^2.0.0" |
159
README.md
@@ -54,86 +54,119 @@ # regexpu-core [![Build status](https://github.com/mathiasbynens/regexpu-core/workflows/run-checks/badge.svg)](https://github.com/mathiasbynens/regexpu-core/actions?query=workflow%3Arun-checks) [![regexpu-core on npm](https://img.shields.io/npm/v/regexpu-core)](https://www.npmjs.com/package/regexpu-core) | ||
#### `dotAllFlag` (default: `false`) | ||
#### Stable regular expression features | ||
Setting this option to `true` enables support for [the `s` (`dotAll`) flag](https://github.com/mathiasbynens/es-regexp-dotall-flag). | ||
These options can be set to `false` or `'transform'`. When using `'transform'`, the corresponding features are compiled to older syntax that can run in older browsers. When using `false` (the default), they are not compiled and they can be relied upon to compile more modern features. | ||
```js | ||
rewritePattern('.'); | ||
// → '[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uFFFF]' | ||
- `unicodeFlag` - The `u` flag, enabling support for Unicode code point escapes in the form `\u{...}`. | ||
rewritePattern('.', '', { | ||
'dotAllFlag': true | ||
}); | ||
// → '[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uFFFF]' | ||
```js | ||
rewritePattern('\\u{ab}', '', { | ||
unicodeFlag: 'transform' | ||
}); | ||
// → '\\u{ab}' | ||
rewritePattern('.', 's', { | ||
'dotAllFlag': true | ||
}); | ||
// → '[\\0-\\uFFFF]' | ||
rewritePattern('\\u{ab}', 'u', { | ||
unicodeFlag: 'transform' | ||
}); | ||
// → '\\xAB' | ||
``` | ||
rewritePattern('.', 'su', { | ||
'dotAllFlag': true | ||
}); | ||
// → '(?:[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])' | ||
``` | ||
- `dotAllFlag` - The [`s` (`dotAll`) flag](https://github.com/mathiasbynens/es-regexp-dotall-flag). | ||
#### `unicodePropertyEscape` (default: `false`) | ||
```js | ||
rewritePattern('.', '', { | ||
dotAllFlag: 'transform' | ||
}); | ||
// → '[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uFFFF]' | ||
Setting this option to `true` enables [support for Unicode property escapes](property-escapes.md): | ||
rewritePattern('.', 's', { | ||
dotAllFlag: 'transform' | ||
}); | ||
// → '[\\0-\\uFFFF]' | ||
```js | ||
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', { | ||
'unicodePropertyEscape': true | ||
}); | ||
// → '(?:\\uD811[\\uDC00-\\uDE46])' | ||
``` | ||
rewritePattern('.', 'su', { | ||
dotAllFlag: 'transform' | ||
}); | ||
// → '(?:[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])' | ||
``` | ||
#### `lookbehind` (default: `false`) | ||
- `unicodePropertyEscapes` - [Unicode property escapes](property-escapes.md). | ||
Setting this option to `true` enables support for [lookbehind assertions](https://github.com/tc39/proposal-regexp-lookbehind). | ||
By default they are compiled to Unicode code point escapes of the form `\u{...}`. If the `unicodeFlag` option is set to `'transform'` they often result in larger output, although there are cases (such as `\p{Lu}`) where it actually _decreases_ the output size. | ||
```js | ||
rewritePattern('(?<=.)a', '', { | ||
'lookbehind': true | ||
}); | ||
// → '(?<=[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uFFFF])a' | ||
``` | ||
```js | ||
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', { | ||
unicodePropertyEscapes: 'transform' | ||
}); | ||
// → '[\\u{14400}-\\u{14646}]' | ||
#### `namedGroup` (default: `false`) | ||
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', { | ||
unicodeFlag: 'transform', | ||
unicodePropertyEscapes: 'transform' | ||
}); | ||
// → '(?:\\uD811[\\uDC00-\\uDE46])' | ||
``` | ||
Setting this option to `true` enables support for [named capture groups](https://github.com/tc39/proposal-regexp-named-groups). | ||
- `namedGroups` - [Named capture groups](https://github.com/tc39/proposal-regexp-named-groups). | ||
```js | ||
rewritePattern('(?<name>.)\k<name>', '', { | ||
'namedGroup': true | ||
}); | ||
// → '(.)\1' | ||
``` | ||
```js | ||
rewritePattern('(?<name>.)\\k<name>', '', { | ||
namedGroup: "transform" | ||
}); | ||
// → '(.)\1' | ||
``` | ||
#### `onNamedGroup` | ||
#### Experimental regular expression features | ||
This option is a function that gets called when a named capture group is found. It receives two parameters: | ||
the name of the group, and its index. | ||
These options can be set to `false`, `'parse'` and `'transform'`. When using `'transform'`, the corresponding features are compiled to older syntax that can run in older browsers. When using `'parse'`, they are parsed and left as-is in the output pattern. When using `false` (the default), they result in a syntax error if used. | ||
```js | ||
rewritePattern('(?<name>.)\k<name>', '', { | ||
'namedGroup': true, | ||
onNamedGroup(name, index) { | ||
console.log(name, index); | ||
// → 'name', 1 | ||
} | ||
}); | ||
``` | ||
Once these features become stable (when the proposals are accepted as part of ECMAScript), they will be parsed by default and thus `'parse'` will behave like `false`. | ||
#### `useUnicodeFlag` (default: `false`) | ||
- `unicodeSetsFlag` - [The `v` (`unicodeSets`) flag](https://github.com/tc39/proposal-regexp-set-notation) | ||
Setting this option to `true` enables the use of Unicode code point escapes of the form `\u{…}`. Note that in regular expressions, such escape sequences only work correctly when the ES2015 `u` flag is set. Enabling this setting often results in more compact output, although there are cases (such as `\p{Lu}`) where it actually _increases_ the output size. | ||
```js | ||
rewritePattern('[\\p{Emoji}&&\\p{ASCII}]', 'u', { | ||
unicodeSetsFlag: 'transform' | ||
}); | ||
// → '[#\*0-9]' | ||
``` | ||
```js | ||
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', { | ||
'unicodePropertyEscape': true, | ||
'useUnicodeFlag': true | ||
}); | ||
// → '[\\u{14400}-\\u{14646}]' | ||
``` | ||
By default, patterns with the `v` flag are transformed to patterns with the `u` flag. If you want to downlevel them more you can set the `unicodeFlag: 'transform'` option. | ||
```js | ||
rewritePattern('[^[a-h]&&[f-z]]', 'v', { | ||
unicodeSetsFlag: 'transform' | ||
}); | ||
// → '[^f-h]' (to be used with /u) | ||
``` | ||
```js | ||
rewritePattern('[^[a-h]&&[f-z]]', 'v', { | ||
unicodeSetsFlag: 'transform', | ||
unicodeFlag: 'transform' | ||
}); | ||
// → '(?:(?![f-h])[\s\S])' (to be used without /u) | ||
``` | ||
#### Miscellaneous options | ||
- `onNamedGroup` | ||
This option is a function that gets called when a named capture group is found. It receives two parameters: | ||
the name of the group, and its index. | ||
```js | ||
rewritePattern('(?<name>.)\\k<name>', '', { | ||
onNamedGroup(name, index) { | ||
console.log(name, index); | ||
// → 'name', 1 | ||
} | ||
}); | ||
``` | ||
### Caveats | ||
- [Lookbehind assertions](https://github.com/tc39/proposal-regexp-lookbehind) cannot be transformed to older syntax. | ||
- When using `namedGroups: 'transform'`, _regexpu-core_ only takes care of the _syntax_: you will still need a runtime wrapper around the regular expression to populate the `.groups` property of `RegExp.prototype.match()`'s result. If you are using _regexpu-core_ via Babel, it's handled automatically. | ||
## For maintainers | ||
@@ -140,0 +173,0 @@ |
@@ -14,5 +14,2 @@ 'use strict'; | ||
const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF); | ||
// Without the `u` flag, the range stops at 0xFFFF. | ||
// https://mths.be/es6#sec-pattern-semantics | ||
const BMP_SET = regenerate().addRange(0x0, 0xFFFF); | ||
@@ -66,4 +63,9 @@ // Prepare a Regenerate set containing all code points that are supposed to be | ||
} catch (exception) {} | ||
// It’s not a `General_Category` value, so check if it’s a binary | ||
// property. Note: `unicodeMatchProperty` throws on invalid properties. | ||
// It’s not a `General_Category` value, so check if it’s a property | ||
// of strings. | ||
try { | ||
return getUnicodePropertyValueSet('Property_of_Strings', value); | ||
} catch (exception) {} | ||
// Lastly, check if it’s a binary property of single code points. | ||
// Note: `unicodeMatchProperty` throws on invalid properties. | ||
const property = unicodeMatchProperty(value); | ||
@@ -86,7 +88,27 @@ return getUnicodePropertyValueSet(property); | ||
if (isNegative) { | ||
return UNICODE_SET.clone().remove(set); | ||
if (set.strings) { | ||
throw new Error('Cannot negate Unicode property of strings'); | ||
} | ||
return { | ||
characters: UNICODE_SET.clone().remove(set.characters), | ||
strings: new Set() | ||
}; | ||
} | ||
return set.clone(); | ||
return { | ||
characters: set.characters.clone(), | ||
strings: new Set(set.strings || []) | ||
}; | ||
}; | ||
const getUnicodePropertyEscapeCharacterClassData = (property, isNegative) => { | ||
const set = getUnicodePropertyEscapeSet(property, isNegative); | ||
const data = getCharacterClassEmptyData(); | ||
data.singleChars = set.characters; | ||
if (set.strings.size > 0) { | ||
data.longStrings = set.strings; | ||
data.maybeIncludesStrings = true; | ||
} | ||
return data; | ||
}; | ||
// Given a range of code points, add any case-folded code points in that range | ||
@@ -104,2 +126,12 @@ // to a set. | ||
}; | ||
regenerate.prototype.iuRemoveRange = function(min, max) { | ||
const $this = this; | ||
do { | ||
const folded = caseFold(min); | ||
if (folded) { | ||
$this.remove(folded); | ||
} | ||
} while (++min <= max); | ||
return $this; | ||
}; | ||
@@ -135,14 +167,211 @@ const update = (item, pattern) => { | ||
const processCharacterClass = (characterClassItem, regenerateOptions) => { | ||
const set = regenerate(); | ||
const buildHandler = (action) => { | ||
switch (action) { | ||
case 'union': | ||
return { | ||
single: (data, cp) => { | ||
data.singleChars.add(cp); | ||
}, | ||
regSet: (data, set2) => { | ||
data.singleChars.add(set2); | ||
}, | ||
range: (data, start, end) => { | ||
data.singleChars.addRange(start, end); | ||
}, | ||
iuRange: (data, start, end) => { | ||
data.singleChars.iuAddRange(start, end); | ||
}, | ||
nested: (data, nestedData) => { | ||
data.singleChars.add(nestedData.singleChars); | ||
for (const str of nestedData.longStrings) data.longStrings.add(str); | ||
if (nestedData.maybeIncludesStrings) data.maybeIncludesStrings = true; | ||
} | ||
}; | ||
case 'union-negative': { | ||
const regSet = (data, set2) => { | ||
data.singleChars = UNICODE_SET.clone().remove(set2).add(data.singleChars); | ||
}; | ||
return { | ||
single: (data, cp) => { | ||
const unicode = UNICODE_SET.clone(); | ||
data.singleChars = data.singleChars.contains(cp) ? unicode : unicode.remove(cp); | ||
}, | ||
regSet: regSet, | ||
range: (data, start, end) => { | ||
data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars); | ||
}, | ||
iuRange: (data, start, end) => { | ||
data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end).add(data.singleChars); | ||
}, | ||
nested: (data, nestedData) => { | ||
regSet(data, nestedData.singleChars); | ||
if (nestedData.maybeIncludesStrings) throw new Error('ASSERTION ERROR'); | ||
} | ||
}; | ||
} | ||
case 'intersection': { | ||
const regSet = (data, set2) => { | ||
if (data.first) data.singleChars = set2; | ||
else data.singleChars.intersection(set2); | ||
}; | ||
return { | ||
single: (data, cp) => { | ||
data.singleChars = data.first || data.singleChars.contains(cp) ? regenerate(cp) : regenerate(); | ||
data.longStrings.clear(); | ||
data.maybeIncludesStrings = false; | ||
}, | ||
regSet: (data, set) => { | ||
regSet(data, set); | ||
data.longStrings.clear(); | ||
data.maybeIncludesStrings = false; | ||
}, | ||
range: (data, start, end) => { | ||
if (data.first) data.singleChars.addRange(start, end); | ||
else data.singleChars.intersection(regenerate().addRange(start, end)); | ||
data.longStrings.clear(); | ||
data.maybeIncludesStrings = false; | ||
}, | ||
iuRange: (data, start, end) => { | ||
if (data.first) data.singleChars.iuAddRange(start, end); | ||
else data.singleChars.intersection(regenerate().iuAddRange(start, end)); | ||
data.longStrings.clear(); | ||
data.maybeIncludesStrings = false; | ||
}, | ||
nested: (data, nestedData) => { | ||
regSet(data, nestedData.singleChars); | ||
if (data.first) { | ||
data.longStrings = nestedData.longStrings; | ||
data.maybeIncludesStrings = nestedData.maybeIncludesStrings; | ||
} else { | ||
for (const str of data.longStrings) { | ||
if (!nestedData.longStrings.has(str)) data.longStrings.delete(str); | ||
} | ||
if (!nestedData.maybeIncludesStrings) data.maybeIncludesStrings = false; | ||
} | ||
} | ||
}; | ||
} | ||
case 'subtraction': { | ||
const regSet = (data, set2) => { | ||
if (data.first) data.singleChars.add(set2); | ||
else data.singleChars.remove(set2); | ||
}; | ||
return { | ||
single: (data, cp) => { | ||
if (data.first) data.singleChars.add(cp); | ||
else data.singleChars.remove(cp); | ||
}, | ||
regSet: regSet, | ||
range: (data, start, end) => { | ||
if (data.first) data.singleChars.addRange(start, end); | ||
else data.singleChars.removeRange(start, end); | ||
}, | ||
iuRange: (data, start, end) => { | ||
if (data.first) data.singleChars.iuAddRange(start, end); | ||
else data.singleChars.iuRemoveRange(start, end); | ||
}, | ||
nested: (data, nestedData) => { | ||
regSet(data, nestedData.singleChars); | ||
if (data.first) { | ||
data.longStrings = nestedData.longStrings; | ||
data.maybeIncludesStrings = nestedData.maybeIncludesStrings; | ||
} else { | ||
for (const str of data.longStrings) { | ||
if (nestedData.longStrings.has(str)) data.longStrings.delete(str); | ||
} | ||
} | ||
} | ||
}; | ||
} | ||
// The `default` clause is only here as a safeguard; it should never be | ||
// reached. Code coverage tools should ignore it. | ||
/* istanbul ignore next */ | ||
default: | ||
throw new Error(`Unknown set action: ${ characterClassItem.kind }`); | ||
} | ||
}; | ||
const getCharacterClassEmptyData = () => ({ | ||
transformed: config.transform.unicodeFlag, | ||
singleChars: regenerate(), | ||
longStrings: new Set(), | ||
hasEmptyString: false, | ||
first: true, | ||
maybeIncludesStrings: false | ||
}); | ||
const maybeFold = (codePoint) => { | ||
if (config.flags.ignoreCase && config.transform.unicodeFlag) { | ||
const folded = caseFold(codePoint); | ||
if (folded) { | ||
return [codePoint, folded]; | ||
} | ||
} | ||
return [codePoint]; | ||
}; | ||
const computeClassStrings = (classStrings, regenerateOptions) => { | ||
let data = getCharacterClassEmptyData(); | ||
for (const string of classStrings.strings) { | ||
if (string.characters.length === 1) { | ||
maybeFold(string.characters[0].codePoint).forEach((cp) => { | ||
data.singleChars.add(cp); | ||
}); | ||
} else { | ||
let stringifiedString; | ||
if (config.flags.ignoreCase && config.transform.unicodeFlag) { | ||
stringifiedString = ''; | ||
for (const ch of string.characters) { | ||
let set = regenerate(ch.codePoint); | ||
const folded = caseFold(ch.codePoint); | ||
if (folded) set.add(folded); | ||
stringifiedString += set.toString(regenerateOptions); | ||
} | ||
} else { | ||
stringifiedString = string.characters.map(ch => generate(ch)).join('') | ||
} | ||
data.longStrings.add(stringifiedString); | ||
data.maybeIncludesStrings = true; | ||
} | ||
} | ||
return data; | ||
} | ||
const computeCharacterClass = (characterClassItem, regenerateOptions) => { | ||
let data = getCharacterClassEmptyData(); | ||
let handlePositive; | ||
let handleNegative; | ||
switch (characterClassItem.kind) { | ||
case 'union': | ||
handlePositive = buildHandler('union'); | ||
handleNegative = buildHandler('union-negative'); | ||
break; | ||
case 'intersection': | ||
handlePositive = buildHandler('intersection'); | ||
handleNegative = buildHandler('subtraction'); | ||
break; | ||
case 'subtraction': | ||
handlePositive = buildHandler('subtraction'); | ||
handleNegative = buildHandler('intersection'); | ||
break; | ||
// The `default` clause is only here as a safeguard; it should never be | ||
// reached. Code coverage tools should ignore it. | ||
/* istanbul ignore next */ | ||
default: | ||
throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`); | ||
} | ||
for (const item of characterClassItem.body) { | ||
switch (item.type) { | ||
case 'value': | ||
set.add(item.codePoint); | ||
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) { | ||
const folded = caseFold(item.codePoint); | ||
if (folded) { | ||
set.add(folded); | ||
} | ||
} | ||
maybeFold(item.codePoint).forEach((cp) => { | ||
handlePositive.single(data, cp); | ||
}); | ||
break; | ||
@@ -152,17 +381,32 @@ case 'characterClassRange': | ||
const max = item.max.codePoint; | ||
set.addRange(min, max); | ||
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) { | ||
set.iuAddRange(min, max); | ||
handlePositive.range(data, min, max); | ||
if (config.flags.ignoreCase && config.transform.unicodeFlag) { | ||
handlePositive.iuRange(data, min, max); | ||
} | ||
break; | ||
case 'characterClassEscape': | ||
set.add(getCharacterClassEscapeSet( | ||
handlePositive.regSet(data, getCharacterClassEscapeSet( | ||
item.value, | ||
config.unicode, | ||
config.ignoreCase | ||
config.flags.unicode, | ||
config.flags.ignoreCase | ||
)); | ||
break; | ||
case 'unicodePropertyEscape': | ||
set.add(getUnicodePropertyEscapeSet(item.value, item.negative)); | ||
const nestedData = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative); | ||
handlePositive.nested(data, nestedData); | ||
data.transformed = | ||
data.transformed || | ||
config.transform.unicodePropertyEscapes || | ||
(config.transform.unicodeSetsFlag && nestedData.maybeIncludesStrings); | ||
break; | ||
case 'characterClass': | ||
const handler = item.negative ? handleNegative : handlePositive; | ||
const res = computeCharacterClass(item, regenerateOptions); | ||
handler.nested(data, res); | ||
data.transformed = true; | ||
break; | ||
case 'classStrings': | ||
handlePositive.nested(data, computeClassStrings(item, regenerateOptions)); | ||
data.transformed = true; | ||
break; | ||
// The `default` clause is only here as a safeguard; it should never be | ||
@@ -174,8 +418,39 @@ // reached. Code coverage tools should ignore it. | ||
} | ||
data.first = false; | ||
} | ||
if (characterClassItem.negative) { | ||
update(characterClassItem, `(?!${set.toString(regenerateOptions)})[\\s\\S]`) | ||
} else { | ||
update(characterClassItem, set.toString(regenerateOptions)); | ||
if (characterClassItem.negative && data.maybeIncludesStrings) { | ||
throw new SyntaxError('Cannot negate set containing strings'); | ||
} | ||
return data; | ||
} | ||
const processCharacterClass = ( | ||
characterClassItem, | ||
regenerateOptions, | ||
computed = computeCharacterClass(characterClassItem, regenerateOptions), | ||
) => { | ||
const negative = characterClassItem.negative; | ||
const { singleChars, transformed, longStrings } = computed; | ||
if (transformed) { | ||
const setStr = singleChars.toString(regenerateOptions); | ||
if (negative) { | ||
if (config.useUnicodeFlag) { | ||
update(characterClassItem, `[^${setStr[0] === '[' ? setStr.slice(1, -1) : setStr}]`) | ||
} else { | ||
update(characterClassItem, `(?!${setStr})[\\s\\S]`) | ||
} | ||
} else { | ||
const hasEmptyString = longStrings.has(''); | ||
const pieces = Array.from(longStrings).sort((a, b) => b.length - a.length); | ||
if (setStr !== '[]' || longStrings.size === 0) { | ||
pieces.splice(pieces.length - (hasEmptyString ? 1 : 0), 0, setStr); | ||
} | ||
update(characterClassItem, pieces.join('|')); | ||
} | ||
} | ||
return characterClassItem; | ||
@@ -199,10 +474,8 @@ }; | ||
case 'dot': | ||
if (config.useDotAllFlag) { | ||
break; | ||
} else if (config.unicode) { | ||
if (config.transform.unicodeFlag) { | ||
update( | ||
item, | ||
getUnicodeDotSet(config.dotAll).toString(regenerateOptions) | ||
getUnicodeDotSet(config.flags.dotAll).toString(regenerateOptions) | ||
); | ||
} else if (config.dotAll) { | ||
} else if (config.transform.dotAllFlag) { | ||
// TODO: consider changing this at the regenerate level. | ||
@@ -216,7 +489,17 @@ update(item, '[\\s\\S]'); | ||
case 'unicodePropertyEscape': | ||
if (config.unicodePropertyEscape) { | ||
const data = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative); | ||
if (data.maybeIncludesStrings) { | ||
if (!config.flags.unicodeSets) { | ||
throw new Error( | ||
'Properties of strings are only supported when using the unicodeSets (v) flag.' | ||
); | ||
} | ||
if (config.transform.unicodeSetsFlag) { | ||
data.transformed = true; | ||
item = processCharacterClass(item, regenerateOptions, data); | ||
} | ||
} else if (config.transform.unicodePropertyEscapes) { | ||
update( | ||
item, | ||
getUnicodePropertyEscapeSet(item.value, item.negative) | ||
.toString(regenerateOptions) | ||
data.singleChars.toString(regenerateOptions) | ||
); | ||
@@ -226,10 +509,12 @@ } | ||
case 'characterClassEscape': | ||
update( | ||
item, | ||
getCharacterClassEscapeSet( | ||
item.value, | ||
config.unicode, | ||
config.ignoreCase | ||
).toString(regenerateOptions) | ||
); | ||
if (config.transform.unicodeFlag) { | ||
update( | ||
item, | ||
getCharacterClassEscapeSet( | ||
item.value, | ||
/* config.transform.unicodeFlag implies config.flags.unicode */ true, | ||
config.flags.ignoreCase | ||
).toString(regenerateOptions) | ||
); | ||
} | ||
break; | ||
@@ -240,3 +525,3 @@ case 'group': | ||
} | ||
if (item.name && config.namedGroup) { | ||
if (item.name && config.transform.namedGroups) { | ||
const name = item.name.value; | ||
@@ -276,3 +561,3 @@ | ||
const set = regenerate(codePoint); | ||
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) { | ||
if (config.flags.ignoreCase && config.transform.unicodeFlag) { | ||
const folded = caseFold(codePoint); | ||
@@ -316,31 +601,85 @@ if (folded) { | ||
const config = { | ||
'ignoreCase': false, | ||
'unicode': false, | ||
'dotAll': false, | ||
'useDotAllFlag': false, | ||
'useUnicodeFlag': false, | ||
'unicodePropertyEscape': false, | ||
'namedGroup': false | ||
'flags': { | ||
'ignoreCase': false, | ||
'unicode': false, | ||
'unicodeSets': false, | ||
'dotAll': false, | ||
}, | ||
'transform': { | ||
'dotAllFlag': false, | ||
'unicodeFlag': false, | ||
'unicodeSetsFlag': false, | ||
'unicodePropertyEscapes': false, | ||
'namedGroups': false, | ||
}, | ||
get useUnicodeFlag() { | ||
return (this.flags.unicode || this.flags.unicodeSets) && !this.transform.unicodeFlag; | ||
} | ||
}; | ||
const validateOptions = (options) => { | ||
if (!options) return; | ||
for (const key of Object.keys(options)) { | ||
const value = options[key]; | ||
switch (key) { | ||
case 'dotAllFlag': | ||
case 'unicodeFlag': | ||
case 'unicodePropertyEscapes': | ||
case 'namedGroups': | ||
if (value != null && value !== false && value !== 'transform') { | ||
throw new Error(`.${key} must be false (default) or 'transform'.`); | ||
} | ||
break; | ||
case 'unicodeSetsFlag': | ||
if (value != null && value !== false && value !== 'parse' && value !== 'transform') { | ||
throw new Error(`.${key} must be false (default), 'parse' or 'transform'.`); | ||
} | ||
break; | ||
case 'onNamedGroup': | ||
if (value != null && typeof value !== 'function') { | ||
throw new Error('.onNamedGroup must be a function.'); | ||
} | ||
break; | ||
default: | ||
throw new Error(`.${key} is not a valid regexpu-core option.`); | ||
} | ||
} | ||
}; | ||
const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false; | ||
const transform = (options, name) => options ? options[name] === 'transform' : false; | ||
const rewritePattern = (pattern, flags, options) => { | ||
config.unicode = flags && flags.includes('u'); | ||
validateOptions(options); | ||
config.flags.unicode = hasFlag(flags, 'u'); | ||
config.flags.unicodeSets = hasFlag(flags, 'v'); | ||
config.flags.ignoreCase = hasFlag(flags, 'i'); | ||
config.flags.dotAll = hasFlag(flags, 's'); | ||
config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag'); | ||
config.transform.unicodeFlag = (config.flags.unicode || config.flags.unicodeSets) && transform(options, 'unicodeFlag'); | ||
config.transform.unicodeSetsFlag = config.flags.unicodeSets && transform(options, 'unicodeSetsFlag'); | ||
// unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform' | ||
config.transform.unicodePropertyEscapes = config.flags.unicode && ( | ||
transform(options, 'unicodeFlag') || transform(options, 'unicodePropertyEscapes') | ||
); | ||
config.transform.namedGroups = transform(options, 'namedGroups'); | ||
const regjsparserFeatures = { | ||
'unicodePropertyEscape': config.unicode, | ||
'unicodeSet': Boolean(options && options.unicodeSetsFlag), | ||
// Enable every stable RegExp feature by default | ||
'unicodePropertyEscape': true, | ||
'namedGroups': true, | ||
'lookbehind': options && options.lookbehind | ||
'lookbehind': true, | ||
}; | ||
config.ignoreCase = flags && flags.includes('i'); | ||
const supportDotAllFlag = options && options.dotAllFlag; | ||
config.dotAll = supportDotAllFlag && flags && flags.includes('s'); | ||
config.namedGroup = options && options.namedGroup; | ||
config.useDotAllFlag = options && options.useDotAllFlag; | ||
config.useUnicodeFlag = options && options.useUnicodeFlag; | ||
config.unicodePropertyEscape = options && options.unicodePropertyEscape; | ||
if (supportDotAllFlag && config.useDotAllFlag) { | ||
throw new Error('`useDotAllFlag` and `dotAllFlag` cannot both be true!'); | ||
} | ||
const regenerateOptions = { | ||
'hasUnicodeFlag': config.useUnicodeFlag, | ||
'bmpOnly': !config.unicode | ||
'bmpOnly': !config.flags.unicode | ||
}; | ||
const groups = { | ||
@@ -352,2 +691,3 @@ 'onNamedGroup': options && options.onNamedGroup, | ||
}; | ||
const tree = parse(pattern, flags, regjsparserFeatures); | ||
@@ -354,0 +694,0 @@ // Note: `processTerm` mutates `tree` and `groups`. |
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
46484
1399
202
0
+ Addedregenerate-unicode-properties@10.1.1(transitive)
+ Addedregjsgen@0.6.0(transitive)
+ Addedregjsparser@0.8.4(transitive)
- Removedregenerate-unicode-properties@9.0.0(transitive)
- Removedregjsgen@0.5.2(transitive)
- Removedregjsparser@0.7.0(transitive)
Updatedregjsgen@^0.6.0
Updatedregjsparser@^0.8.2