regexpu-core
Advanced tools
Comparing version 5.2.2 to 5.3.0
{ | ||
"name": "regexpu-core", | ||
"version": "5.2.2", | ||
"version": "5.3.0", | ||
"description": "regexpu’s core functionality (i.e. `rewritePattern(pattern, flag)`), capable of translating ES6 Unicode regular expressions to ES5.", | ||
@@ -53,3 +53,3 @@ "homepage": "https://mths.be/regexpu", | ||
"regenerate-unicode-properties": "^10.1.0", | ||
"regjsgen": "^0.7.1", | ||
"@babel/regjsgen": "^0.8.0", | ||
"regjsparser": "^0.9.1", | ||
@@ -65,5 +65,5 @@ "unicode-match-property-ecmascript": "^2.0.0", | ||
"mocha": "^10.1.0", | ||
"regexpu-fixtures": "2.1.4", | ||
"regexpu-fixtures": "mathiasbynens/regexpu-fixtures", | ||
"@unicode/unicode-15.0.0": "^1.3.1" | ||
} | ||
} |
@@ -149,3 +149,11 @@ # regexpu-core [![Build status](https://github.com/mathiasbynens/regexpu-core/workflows/run-checks/badge.svg)](https://github.com/mathiasbynens/regexpu-core/actions?query=workflow%3Arun-checks) [![regexpu-core on npm](https://img.shields.io/npm/v/regexpu-core)](https://www.npmjs.com/package/regexpu-core) | ||
- `modifiers` - [Inline `m`/`s`/`i` modifiers](https://github.com/tc39/proposal-regexp-modifiers) | ||
```js | ||
rewritePattern('(?i:[a-z])[a-z]', '', { | ||
modifiers: 'transform' | ||
}); | ||
// → '(?:[a-zA-Z])([a-z])' | ||
``` | ||
#### Miscellaneous options | ||
@@ -167,2 +175,16 @@ | ||
- `onNewFlags` | ||
This option is a function that gets called to pass the flags that the resulting pattern must be interpreted with. | ||
```js | ||
rewritePattern('abc', 'um', '', { | ||
unicodeFlag: 'transform', | ||
onNewFlags(flags) { | ||
console.log(flags); | ||
// → 'm' | ||
} | ||
}) | ||
``` | ||
### Caveats | ||
@@ -169,0 +191,0 @@ |
'use strict'; | ||
const generate = require('regjsgen').generate; | ||
const generate = require('@babel/regjsgen').generate; | ||
const parse = require('regjsparser').parse; | ||
@@ -30,12 +30,16 @@ const regenerate = require('regenerate'); | ||
const ASTRAL_SET = regenerate().addRange(0x10000, 0x10FFFF); | ||
const NEWLINE_SET = regenerate().add( | ||
// `LineTerminator`s (https://mths.be/es6#sec-line-terminators): | ||
0x000A, // Line Feed <LF> | ||
0x000D, // Carriage Return <CR> | ||
0x2028, // Line Separator <LS> | ||
0x2029 // Paragraph Separator <PS> | ||
); | ||
// Prepare a Regenerate set containing all code points that are supposed to be | ||
// matched by `/./u`. https://mths.be/es6#sec-atom | ||
const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points | ||
.remove( | ||
// minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators): | ||
0x000A, // Line Feed <LF> | ||
0x000D, // Carriage Return <CR> | ||
0x2028, // Line Separator <LS> | ||
0x2029 // Paragraph Separator <PS> | ||
); | ||
.remove(NEWLINE_SET); | ||
@@ -130,2 +134,13 @@ const getCharacterClassEscapeSet = (character, unicode, ignoreCase) => { | ||
function configNeedCaseFoldAscii() { | ||
return !!config.modifiersData.i; | ||
} | ||
function configNeedCaseFoldUnicode() { | ||
// config.modifiersData.i : undefined | false | ||
if (config.modifiersData.i === false) return false; | ||
if (!config.transform.unicodeFlag) return false; | ||
return Boolean(config.modifiersData.i || config.flags.ignoreCase); | ||
} | ||
// Given a range of code points, add any case-folded code points in that range | ||
@@ -136,3 +151,3 @@ // to a set. | ||
do { | ||
const folded = caseFold(min); | ||
const folded = caseFold(min, configNeedCaseFoldAscii(), configNeedCaseFoldUnicode()); | ||
if (folded) { | ||
@@ -147,3 +162,3 @@ $this.add(folded); | ||
do { | ||
const folded = caseFold(min); | ||
const folded = caseFold(min, configNeedCaseFoldAscii(), configNeedCaseFoldUnicode()); | ||
if (folded) { | ||
@@ -157,3 +172,9 @@ $this.remove(folded); | ||
const update = (item, pattern) => { | ||
let tree = parse(pattern, config.useUnicodeFlag ? 'u' : ''); | ||
let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '', { | ||
lookbehind: true, | ||
namedGroups: true, | ||
unicodePropertyEscape: true, | ||
unicodeSet: true, | ||
modifiers: true, | ||
}); | ||
switch (tree.type) { | ||
@@ -182,4 +203,13 @@ case 'characterClass': | ||
const caseFold = (codePoint) => { | ||
return iuMappings.get(codePoint) || false; | ||
const caseFold = (codePoint, includeAscii, includeUnicode) => { | ||
let folded = (includeUnicode ? iuMappings.get(codePoint) : undefined) || []; | ||
if (typeof folded === 'number') folded = [folded]; | ||
if (includeAscii) { | ||
if (codePoint >= 0x41 && codePoint <= 0x5A) { | ||
folded.push(codePoint + 0x20); | ||
} else if (codePoint >= 0x61 && codePoint <= 0x7A) { | ||
folded.push(codePoint - 0x20); | ||
} | ||
} | ||
return folded.length == 0 ? false : folded; | ||
}; | ||
@@ -325,4 +355,7 @@ | ||
const maybeFold = (codePoint) => { | ||
if (config.flags.ignoreCase && config.transform.unicodeFlag) { | ||
const folded = caseFold(codePoint); | ||
const caseFoldAscii = configNeedCaseFoldAscii(); | ||
const caseFoldUnicode = configNeedCaseFoldUnicode(); | ||
if (caseFoldAscii || caseFoldUnicode) { | ||
const folded = caseFold(codePoint, caseFoldAscii, caseFoldUnicode); | ||
if (folded) { | ||
@@ -338,2 +371,5 @@ return [codePoint, folded]; | ||
const caseFoldAscii = configNeedCaseFoldAscii(); | ||
const caseFoldUnicode = configNeedCaseFoldUnicode(); | ||
for (const string of classStrings.strings) { | ||
@@ -346,7 +382,7 @@ if (string.characters.length === 1) { | ||
let stringifiedString; | ||
if (config.flags.ignoreCase && config.transform.unicodeFlag) { | ||
if (caseFoldUnicode || caseFoldAscii) { | ||
stringifiedString = ''; | ||
for (const ch of string.characters) { | ||
let set = regenerate(ch.codePoint); | ||
const folded = caseFold(ch.codePoint); | ||
const folded = maybeFold(ch.codePoint); | ||
if (folded) set.add(folded); | ||
@@ -393,2 +429,5 @@ stringifiedString += set.toString(regenerateOptions); | ||
const caseFoldAscii = configNeedCaseFoldAscii(); | ||
const caseFoldUnicode = configNeedCaseFoldUnicode(); | ||
for (const item of characterClassItem.body) { | ||
@@ -405,4 +444,5 @@ switch (item.type) { | ||
handlePositive.range(data, min, max); | ||
if (config.flags.ignoreCase && config.transform.unicodeFlag) { | ||
if (caseFoldAscii || caseFoldUnicode) { | ||
handlePositive.iuRange(data, min, max); | ||
data.transformed = true; | ||
} | ||
@@ -466,3 +506,34 @@ break; | ||
} else { | ||
update(characterClassItem, `(?!${setStr})[\\s\\S]`) | ||
if (config.flags.unicode) { | ||
if (config.flags.ignoreCase) { | ||
const astralCharsSet = singleChars.clone().intersection(ASTRAL_SET); | ||
// Assumption: singleChars do not contain lone surrogates. | ||
// Regex like /[^\ud800]/u is not supported | ||
const surrogateOrBMPSetStr = singleChars | ||
.clone() | ||
.remove(astralCharsSet) | ||
.addRange(0xd800, 0xdfff) | ||
.toString({ bmpOnly: true }); | ||
// Don't generate negative lookahead for astral characters | ||
// because the case folding is not working anyway as we break | ||
// code points into surrogate pairs. | ||
const astralNegativeSetStr = ASTRAL_SET | ||
.clone() | ||
.remove(astralCharsSet) | ||
.toString(regenerateOptions); | ||
// The transform here does not support lone surrogates. | ||
update( | ||
characterClassItem, | ||
`(?!${surrogateOrBMPSetStr})[\\s\\S]|${astralNegativeSetStr}` | ||
); | ||
} else { | ||
// Generate negative set directly when case folding is not involved. | ||
update( | ||
characterClassItem, | ||
UNICODE_SET.clone().remove(singleChars).toString(regenerateOptions) | ||
); | ||
} | ||
} else { | ||
update(characterClassItem, `(?!${setStr})[\\s\\S]`); | ||
} | ||
} | ||
@@ -490,2 +561,27 @@ } else { | ||
const processModifiers = (item, regenerateOptions, groups) => { | ||
const enabling = item.modifierFlags.enabling; | ||
const disabling = item.modifierFlags.disabling; | ||
delete item.modifierFlags; | ||
item.behavior = 'ignore'; | ||
const oldData = Object.assign({}, config.modifiersData); | ||
enabling.split('').forEach(flag => { | ||
config.modifiersData[flag] = true; | ||
}); | ||
disabling.split('').forEach(flag => { | ||
config.modifiersData[flag] = false; | ||
}); | ||
item.body = item.body.map(term => { | ||
return processTerm(term, regenerateOptions, groups); | ||
}); | ||
config.modifiersData = oldData; | ||
return item; | ||
} | ||
const processTerm = (item, regenerateOptions, groups) => { | ||
@@ -497,5 +593,5 @@ switch (item.type) { | ||
item, | ||
getUnicodeDotSet(config.flags.dotAll).toString(regenerateOptions) | ||
getUnicodeDotSet(config.flags.dotAll || config.modifiersData.s).toString(regenerateOptions) | ||
); | ||
} else if (config.transform.dotAllFlag) { | ||
} else if (config.transform.dotAllFlag || config.modifiersData.s) { | ||
// TODO: consider changing this at the regenerate level. | ||
@@ -571,2 +667,5 @@ update(item, '[\\s\\S]'); | ||
} | ||
if (item.modifierFlags && config.transform.modifiers) { | ||
return processModifiers(item, regenerateOptions, groups); | ||
} | ||
/* falls through */ | ||
@@ -595,8 +694,4 @@ case 'quantifier': | ||
const set = regenerate(codePoint); | ||
if (config.flags.ignoreCase && config.transform.unicodeFlag) { | ||
const folded = caseFold(codePoint); | ||
if (folded) { | ||
set.add(folded); | ||
} | ||
} | ||
const folded = maybeFold(codePoint); | ||
set.add(folded); | ||
update(item, set.toString(regenerateOptions)); | ||
@@ -641,4 +736,10 @@ break; | ||
case 'anchor': | ||
if (config.modifiersData.m) { | ||
if (item.kind == 'start') { | ||
update(item, `(?:^|(?<=${NEWLINE_SET.toString()}))`); | ||
} else if (item.kind == 'end') { | ||
update(item, `(?:$|(?=${NEWLINE_SET.toString()}))`); | ||
} | ||
} | ||
case 'empty': | ||
case 'group': | ||
// Nothing to do here. | ||
@@ -661,2 +762,3 @@ break; | ||
'dotAll': false, | ||
'multiline': false, | ||
}, | ||
@@ -669,3 +771,9 @@ 'transform': { | ||
'namedGroups': false, | ||
'modifiers': false, | ||
}, | ||
'modifiersData': { | ||
'i': undefined, | ||
's': undefined, | ||
'm': undefined, | ||
}, | ||
get useUnicodeFlag() { | ||
@@ -690,2 +798,3 @@ return (this.flags.unicode || this.flags.unicodeSets) && !this.transform.unicodeFlag; | ||
break; | ||
case 'modifiers': | ||
case 'unicodeSetsFlag': | ||
@@ -697,4 +806,5 @@ if (value != null && value !== false && value !== 'parse' && value !== 'transform') { | ||
case 'onNamedGroup': | ||
case 'onNewFlags': | ||
if (value != null && typeof value !== 'function') { | ||
throw new Error('.onNamedGroup must be a function.'); | ||
throw new Error(`.${key} must be a function.`); | ||
} | ||
@@ -718,2 +828,3 @@ break; | ||
config.flags.dotAll = hasFlag(flags, 's'); | ||
config.flags.multiline = hasFlag(flags, 'm'); | ||
@@ -729,5 +840,11 @@ config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag'); | ||
config.transform.namedGroups = transform(options, 'namedGroups'); | ||
config.transform.modifiers = transform(options, 'modifiers'); | ||
config.modifiersData.i = undefined; | ||
config.modifiersData.s = undefined; | ||
config.modifiersData.m = undefined; | ||
const regjsparserFeatures = { | ||
'unicodeSet': Boolean(options && options.unicodeSetsFlag), | ||
'modifiers': Boolean(options && options.modifiers), | ||
@@ -754,5 +871,50 @@ // Enable every stable RegExp feature by default | ||
const tree = parse(pattern, flags, regjsparserFeatures); | ||
if (config.transform.modifiers) { | ||
if (/\(\?[a-z]*-[a-z]+:/.test(pattern)) { | ||
// the pattern _likely_ contain inline disabled modifiers | ||
// we need to traverse to make sure that they are actually modifiers and to collect them | ||
const allDisabledModifiers = Object.create(null) | ||
const itemStack = [tree]; | ||
let node; | ||
while (node = itemStack.pop(), node != undefined) { | ||
if (Array.isArray(node)) { | ||
Array.prototype.push.apply(itemStack, node); | ||
} else if (typeof node == 'object' && node != null) { | ||
for (const key of Object.keys(node)) { | ||
const value = node[key]; | ||
if (key == 'modifierFlags') { | ||
if (value.disabling.length > 0){ | ||
value.disabling.split("").forEach((flag)=>{ | ||
allDisabledModifiers[flag] = true | ||
}); | ||
} | ||
} else if (typeof value == 'object' && value != null) { | ||
itemStack.push(value); | ||
} | ||
} | ||
} | ||
} | ||
for (const flag of Object.keys(allDisabledModifiers)) { | ||
config.modifiersData[flag] = true; | ||
} | ||
} | ||
} | ||
// Note: `processTerm` mutates `tree` and `groups`. | ||
processTerm(tree, regenerateOptions, groups); | ||
assertNoUnmatchedReferences(groups); | ||
const onNewFlags = options && options.onNewFlags; | ||
if (onNewFlags) onNewFlags(flags.split('').filter((flag) => { | ||
switch (flag) { | ||
case 'u': | ||
return !config.transform.unicodeFlag; | ||
case 'v': | ||
return !config.transform.unicodeSetsFlag; | ||
default: | ||
return !config.modifiersData[flag]; | ||
} | ||
}).join('')); | ||
return generate(tree); | ||
@@ -759,0 +921,0 @@ }; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
53540
1579
227
1
+ Added@babel/regjsgen@^0.8.0
+ Added@babel/regjsgen@0.8.0(transitive)
- Removedregjsgen@^0.7.1
- Removedregjsgen@0.7.1(transitive)