Socket
Socket
Sign inDemoInstall

regexpu-core

Package Overview
Dependencies
Maintainers
4
Versions
49
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

regexpu-core - npm Package Compare versions

Comparing version 5.2.2 to 5.3.0

6

package.json
{
"name": "regexpu-core",
"version": "5.2.2",
"version": "5.3.0",
"description": "regexpu’s core functionality (i.e. `rewritePattern(pattern, flag)`), capable of translating ES6 Unicode regular expressions to ES5.",

@@ -53,3 +53,3 @@ "homepage": "https://mths.be/regexpu",

"regenerate-unicode-properties": "^10.1.0",
"regjsgen": "^0.7.1",
"@babel/regjsgen": "^0.8.0",
"regjsparser": "^0.9.1",

@@ -65,5 +65,5 @@ "unicode-match-property-ecmascript": "^2.0.0",

"mocha": "^10.1.0",
"regexpu-fixtures": "2.1.4",
"regexpu-fixtures": "mathiasbynens/regexpu-fixtures",
"@unicode/unicode-15.0.0": "^1.3.1"
}
}

@@ -149,3 +149,11 @@ # regexpu-core [![Build status](https://github.com/mathiasbynens/regexpu-core/workflows/run-checks/badge.svg)](https://github.com/mathiasbynens/regexpu-core/actions?query=workflow%3Arun-checks) [![regexpu-core on npm](https://img.shields.io/npm/v/regexpu-core)](https://www.npmjs.com/package/regexpu-core)

- `modifiers` - [Inline `m`/`s`/`i` modifiers](https://github.com/tc39/proposal-regexp-modifiers)
```js
rewritePattern('(?i:[a-z])[a-z]', '', {
modifiers: 'transform'
});
// → '(?:[a-zA-Z])([a-z])'
```
#### Miscellaneous options

@@ -167,2 +175,16 @@

- `onNewFlags`
This option is a function that gets called to pass the flags that the resulting pattern must be interpreted with.
```js
rewritePattern('abc', 'um', '', {
unicodeFlag: 'transform',
onNewFlags(flags) {
console.log(flags);
// → 'm'
}
})
```
### Caveats

@@ -169,0 +191,0 @@

'use strict';
const generate = require('regjsgen').generate;
const generate = require('@babel/regjsgen').generate;
const parse = require('regjsparser').parse;

@@ -30,12 +30,16 @@ const regenerate = require('regenerate');

const ASTRAL_SET = regenerate().addRange(0x10000, 0x10FFFF);
const NEWLINE_SET = regenerate().add(
// `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
0x000A, // Line Feed <LF>
0x000D, // Carriage Return <CR>
0x2028, // Line Separator <LS>
0x2029 // Paragraph Separator <PS>
);
// Prepare a Regenerate set containing all code points that are supposed to be
// matched by `/./u`. https://mths.be/es6#sec-atom
const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
.remove(
// minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
0x000A, // Line Feed <LF>
0x000D, // Carriage Return <CR>
0x2028, // Line Separator <LS>
0x2029 // Paragraph Separator <PS>
);
.remove(NEWLINE_SET);

@@ -130,2 +134,13 @@ const getCharacterClassEscapeSet = (character, unicode, ignoreCase) => {

function configNeedCaseFoldAscii() {
return !!config.modifiersData.i;
}
function configNeedCaseFoldUnicode() {
// config.modifiersData.i : undefined | false
if (config.modifiersData.i === false) return false;
if (!config.transform.unicodeFlag) return false;
return Boolean(config.modifiersData.i || config.flags.ignoreCase);
}
// Given a range of code points, add any case-folded code points in that range

@@ -136,3 +151,3 @@ // to a set.

do {
const folded = caseFold(min);
const folded = caseFold(min, configNeedCaseFoldAscii(), configNeedCaseFoldUnicode());
if (folded) {

@@ -147,3 +162,3 @@ $this.add(folded);

do {
const folded = caseFold(min);
const folded = caseFold(min, configNeedCaseFoldAscii(), configNeedCaseFoldUnicode());
if (folded) {

@@ -157,3 +172,9 @@ $this.remove(folded);

const update = (item, pattern) => {
let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '');
let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '', {
lookbehind: true,
namedGroups: true,
unicodePropertyEscape: true,
unicodeSet: true,
modifiers: true,
});
switch (tree.type) {

@@ -182,4 +203,13 @@ case 'characterClass':

const caseFold = (codePoint) => {
return iuMappings.get(codePoint) || false;
const caseFold = (codePoint, includeAscii, includeUnicode) => {
let folded = (includeUnicode ? iuMappings.get(codePoint) : undefined) || [];
if (typeof folded === 'number') folded = [folded];
if (includeAscii) {
if (codePoint >= 0x41 && codePoint <= 0x5A) {
folded.push(codePoint + 0x20);
} else if (codePoint >= 0x61 && codePoint <= 0x7A) {
folded.push(codePoint - 0x20);
}
}
return folded.length == 0 ? false : folded;
};

@@ -325,4 +355,7 @@

const maybeFold = (codePoint) => {
if (config.flags.ignoreCase && config.transform.unicodeFlag) {
const folded = caseFold(codePoint);
const caseFoldAscii = configNeedCaseFoldAscii();
const caseFoldUnicode = configNeedCaseFoldUnicode();
if (caseFoldAscii || caseFoldUnicode) {
const folded = caseFold(codePoint, caseFoldAscii, caseFoldUnicode);
if (folded) {

@@ -338,2 +371,5 @@ return [codePoint, folded];

const caseFoldAscii = configNeedCaseFoldAscii();
const caseFoldUnicode = configNeedCaseFoldUnicode();
for (const string of classStrings.strings) {

@@ -346,7 +382,7 @@ if (string.characters.length === 1) {

let stringifiedString;
if (config.flags.ignoreCase && config.transform.unicodeFlag) {
if (caseFoldUnicode || caseFoldAscii) {
stringifiedString = '';
for (const ch of string.characters) {
let set = regenerate(ch.codePoint);
const folded = caseFold(ch.codePoint);
const folded = maybeFold(ch.codePoint);
if (folded) set.add(folded);

@@ -393,2 +429,5 @@ stringifiedString += set.toString(regenerateOptions);

const caseFoldAscii = configNeedCaseFoldAscii();
const caseFoldUnicode = configNeedCaseFoldUnicode();
for (const item of characterClassItem.body) {

@@ -405,4 +444,5 @@ switch (item.type) {

handlePositive.range(data, min, max);
if (config.flags.ignoreCase && config.transform.unicodeFlag) {
if (caseFoldAscii || caseFoldUnicode) {
handlePositive.iuRange(data, min, max);
data.transformed = true;
}

@@ -466,3 +506,34 @@ break;

} else {
update(characterClassItem, `(?!${setStr})[\\s\\S]`)
if (config.flags.unicode) {
if (config.flags.ignoreCase) {
const astralCharsSet = singleChars.clone().intersection(ASTRAL_SET);
// Assumption: singleChars do not contain lone surrogates.
// Regex like /[^\ud800]/u is not supported
const surrogateOrBMPSetStr = singleChars
.clone()
.remove(astralCharsSet)
.addRange(0xd800, 0xdfff)
.toString({ bmpOnly: true });
// Don't generate negative lookahead for astral characters
// because the case folding is not working anyway as we break
// code points into surrogate pairs.
const astralNegativeSetStr = ASTRAL_SET
.clone()
.remove(astralCharsSet)
.toString(regenerateOptions);
// The transform here does not support lone surrogates.
update(
characterClassItem,
`(?!${surrogateOrBMPSetStr})[\\s\\S]|${astralNegativeSetStr}`
);
} else {
// Generate negative set directly when case folding is not involved.
update(
characterClassItem,
UNICODE_SET.clone().remove(singleChars).toString(regenerateOptions)
);
}
} else {
update(characterClassItem, `(?!${setStr})[\\s\\S]`);
}
}

@@ -490,2 +561,27 @@ } else {

const processModifiers = (item, regenerateOptions, groups) => {
const enabling = item.modifierFlags.enabling;
const disabling = item.modifierFlags.disabling;
delete item.modifierFlags;
item.behavior = 'ignore';
const oldData = Object.assign({}, config.modifiersData);
enabling.split('').forEach(flag => {
config.modifiersData[flag] = true;
});
disabling.split('').forEach(flag => {
config.modifiersData[flag] = false;
});
item.body = item.body.map(term => {
return processTerm(term, regenerateOptions, groups);
});
config.modifiersData = oldData;
return item;
}
const processTerm = (item, regenerateOptions, groups) => {

@@ -497,5 +593,5 @@ switch (item.type) {

item,
getUnicodeDotSet(config.flags.dotAll).toString(regenerateOptions)
getUnicodeDotSet(config.flags.dotAll || config.modifiersData.s).toString(regenerateOptions)
);
} else if (config.transform.dotAllFlag) {
} else if (config.transform.dotAllFlag || config.modifiersData.s) {
// TODO: consider changing this at the regenerate level.

@@ -571,2 +667,5 @@ update(item, '[\\s\\S]');

}
if (item.modifierFlags && config.transform.modifiers) {
return processModifiers(item, regenerateOptions, groups);
}
/* falls through */

@@ -595,8 +694,4 @@ case 'quantifier':

const set = regenerate(codePoint);
if (config.flags.ignoreCase && config.transform.unicodeFlag) {
const folded = caseFold(codePoint);
if (folded) {
set.add(folded);
}
}
const folded = maybeFold(codePoint);
set.add(folded);
update(item, set.toString(regenerateOptions));

@@ -641,4 +736,10 @@ break;

case 'anchor':
if (config.modifiersData.m) {
if (item.kind == 'start') {
update(item, `(?:^|(?<=${NEWLINE_SET.toString()}))`);
} else if (item.kind == 'end') {
update(item, `(?:$|(?=${NEWLINE_SET.toString()}))`);
}
}
case 'empty':
case 'group':
// Nothing to do here.

@@ -661,2 +762,3 @@ break;

'dotAll': false,
'multiline': false,
},

@@ -669,3 +771,9 @@ 'transform': {

'namedGroups': false,
'modifiers': false,
},
'modifiersData': {
'i': undefined,
's': undefined,
'm': undefined,
},
get useUnicodeFlag() {

@@ -690,2 +798,3 @@ return (this.flags.unicode || this.flags.unicodeSets) && !this.transform.unicodeFlag;

break;
case 'modifiers':
case 'unicodeSetsFlag':

@@ -697,4 +806,5 @@ if (value != null && value !== false && value !== 'parse' && value !== 'transform') {

case 'onNamedGroup':
case 'onNewFlags':
if (value != null && typeof value !== 'function') {
throw new Error('.onNamedGroup must be a function.');
throw new Error(`.${key} must be a function.`);
}

@@ -718,2 +828,3 @@ break;

config.flags.dotAll = hasFlag(flags, 's');
config.flags.multiline = hasFlag(flags, 'm');

@@ -729,5 +840,11 @@ config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag');

config.transform.namedGroups = transform(options, 'namedGroups');
config.transform.modifiers = transform(options, 'modifiers');
config.modifiersData.i = undefined;
config.modifiersData.s = undefined;
config.modifiersData.m = undefined;
const regjsparserFeatures = {
'unicodeSet': Boolean(options && options.unicodeSetsFlag),
'modifiers': Boolean(options && options.modifiers),

@@ -754,5 +871,50 @@ // Enable every stable RegExp feature by default

const tree = parse(pattern, flags, regjsparserFeatures);
if (config.transform.modifiers) {
if (/\(\?[a-z]*-[a-z]+:/.test(pattern)) {
// the pattern _likely_ contain inline disabled modifiers
// we need to traverse to make sure that they are actually modifiers and to collect them
const allDisabledModifiers = Object.create(null)
const itemStack = [tree];
let node;
while (node = itemStack.pop(), node != undefined) {
if (Array.isArray(node)) {
Array.prototype.push.apply(itemStack, node);
} else if (typeof node == 'object' && node != null) {
for (const key of Object.keys(node)) {
const value = node[key];
if (key == 'modifierFlags') {
if (value.disabling.length > 0){
value.disabling.split("").forEach((flag)=>{
allDisabledModifiers[flag] = true
});
}
} else if (typeof value == 'object' && value != null) {
itemStack.push(value);
}
}
}
}
for (const flag of Object.keys(allDisabledModifiers)) {
config.modifiersData[flag] = true;
}
}
}
// Note: `processTerm` mutates `tree` and `groups`.
processTerm(tree, regenerateOptions, groups);
assertNoUnmatchedReferences(groups);
const onNewFlags = options && options.onNewFlags;
if (onNewFlags) onNewFlags(flags.split('').filter((flag) => {
switch (flag) {
case 'u':
return !config.transform.unicodeFlag;
case 'v':
return !config.transform.unicodeSetsFlag;
default:
return !config.modifiersData[flag];
}
}).join(''));
return generate(tree);

@@ -759,0 +921,0 @@ };

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc