shift-regexp-acceptor
Advanced tools
Comparing version 1.0.1 to 2.0.0
@@ -7,4 +7,36 @@ 'use strict'; | ||
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); | ||
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); /** | ||
* Copyright 2018 Shape Security, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License") | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
/* eslint-disable no-use-before-define */ | ||
var _unicodeMatchPropertyValueEcmascript = require('unicode-match-property-value-ecmascript'); | ||
var _unicodeMatchPropertyValueEcmascript2 = _interopRequireDefault(_unicodeMatchPropertyValueEcmascript); | ||
var _unicodeMatchPropertyEcmascript = require('unicode-match-property-ecmascript'); | ||
var _unicodeMatchPropertyEcmascript2 = _interopRequireDefault(_unicodeMatchPropertyEcmascript); | ||
var _unicodePropertyAliasesEcmascript = require('unicode-property-aliases-ecmascript'); | ||
var _unicodePropertyAliasesEcmascript2 = _interopRequireDefault(_unicodePropertyAliasesEcmascript); | ||
var _unicode = require('./unicode'); | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } | ||
function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } } | ||
@@ -14,22 +46,12 @@ | ||
/** | ||
* Copyright 2018 Shape Security, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License") | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
var catchIsFalse = function catchIsFalse(predicate) { | ||
try { | ||
return !!predicate(); | ||
} catch (e) { | ||
return false; | ||
} | ||
}; | ||
/* eslint-disable no-use-before-define */ | ||
var syntaxCharacters = '^$\\.*+?()[]{}|'.split(''); | ||
var extendedSyntaxCharacters = '^$.*+?()[|'.split(''); | ||
var extendedSyntaxCharacters = '^$\\.*+?()[|'.split(''); | ||
@@ -44,2 +66,12 @@ var controlEscapeCharacters = 'fnrtv'.split(''); | ||
var INVALID_NAMED_BACKREFERENCE_SENTINEL = { INVALID_NAMED_BACKREFERENCE_SENTINE: true }; | ||
function isIdentifierStart(ch) { | ||
return ch < 128 ? _unicode.idStartBool[ch] : _unicode.idStartLargeRegex.test(String.fromCodePoint(ch)); | ||
} | ||
function isIdentifierPart(ch) { | ||
return ch < 128 ? _unicode.idContinueBool[ch] : _unicode.idContinueLargeRegex.test(String.fromCodePoint(ch)); | ||
} | ||
var PatternAcceptorState = function () { | ||
@@ -53,2 +85,4 @@ function PatternAcceptorState(pattern, unicode) { | ||
this.backreferences = []; | ||
this.backreferenceNames = []; | ||
this.groupingNames = []; | ||
this.capturingGroups = 0; | ||
@@ -88,2 +122,59 @@ } | ||
}, { | ||
key: 'eatIdentifierCodePoint', | ||
value: function eatIdentifierCodePoint() { | ||
var characterValue = void 0; | ||
var originalIndex = this.index; | ||
var character = void 0; | ||
if (this.match('\\u')) { | ||
this.skipCodePoint(); | ||
characterValue = acceptUnicodeEscape(this); | ||
if (!characterValue.matched) { | ||
this.index = originalIndex; | ||
return null; | ||
} | ||
characterValue = characterValue.value; | ||
character = String.fromCodePoint(characterValue); | ||
} else { | ||
character = this.nextCodePoint(); | ||
if (character == null) { | ||
this.index = originalIndex; | ||
return null; | ||
} | ||
this.index += character.length; | ||
characterValue = character.codePointAt(0); | ||
} | ||
return { character: character, characterValue: characterValue }; | ||
} | ||
}, { | ||
key: 'eatIdentifierStart', | ||
value: function eatIdentifierStart() { | ||
var originalIndex = this.index; | ||
var codePoint = this.eatIdentifierCodePoint(); | ||
if (codePoint === null) { | ||
this.index = originalIndex; | ||
return null; | ||
} | ||
if (codePoint.character === '_' || codePoint.character === '$' || isIdentifierStart(codePoint.characterValue)) { | ||
return codePoint.character; | ||
} | ||
this.index = originalIndex; | ||
return null; | ||
} | ||
}, { | ||
key: 'eatIdentifierPart', | ||
value: function eatIdentifierPart() { | ||
var originalIndex = this.index; | ||
var codePoint = this.eatIdentifierCodePoint(); | ||
if (codePoint === null) { | ||
this.index = originalIndex; | ||
return null; | ||
} | ||
// ZWNJ / ZWJ | ||
if (codePoint.character === '\u200C' || codePoint.character === '\u200D' || codePoint.character === '$' || isIdentifierPart(codePoint.characterValue)) { | ||
return codePoint.character; | ||
} | ||
this.index = originalIndex; | ||
return null; | ||
} | ||
}, { | ||
key: 'eatAny', | ||
@@ -220,26 +311,56 @@ value: function eatAny() { | ||
var accepted = acceptDisjunction(state); | ||
if (accepted.matched && state.unicode) { | ||
var _iteratorNormalCompletion4 = true; | ||
var _didIteratorError4 = false; | ||
var _iteratorError4 = undefined; | ||
if (accepted.matched) { | ||
if (state.unicode) { | ||
var _iteratorNormalCompletion4 = true; | ||
var _didIteratorError4 = false; | ||
var _iteratorError4 = undefined; | ||
try { | ||
for (var _iterator4 = state.backreferences[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) { | ||
var backreference = _step4.value; | ||
try { | ||
for (var _iterator4 = state.backreferences[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) { | ||
var backreference = _step4.value; | ||
if (backreference > state.capturingGroups) { | ||
return false; | ||
if (backreference > state.capturingGroups) { | ||
return false; | ||
} | ||
} | ||
} catch (err) { | ||
_didIteratorError4 = true; | ||
_iteratorError4 = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion4 && _iterator4.return) { | ||
_iterator4.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError4) { | ||
throw _iteratorError4; | ||
} | ||
} | ||
} | ||
} catch (err) { | ||
_didIteratorError4 = true; | ||
_iteratorError4 = err; | ||
} finally { | ||
} | ||
if (state.groupingNames.length > 0 || state.unicode) { | ||
var _iteratorNormalCompletion5 = true; | ||
var _didIteratorError5 = false; | ||
var _iteratorError5 = undefined; | ||
try { | ||
if (!_iteratorNormalCompletion4 && _iterator4.return) { | ||
_iterator4.return(); | ||
for (var _iterator5 = state.backreferenceNames[Symbol.iterator](), _step5; !(_iteratorNormalCompletion5 = (_step5 = _iterator5.next()).done); _iteratorNormalCompletion5 = true) { | ||
var backreferenceName = _step5.value; | ||
if (state.groupingNames.indexOf(backreferenceName) === -1) { | ||
return false; | ||
} | ||
} | ||
} catch (err) { | ||
_didIteratorError5 = true; | ||
_iteratorError5 = err; | ||
} finally { | ||
if (_didIteratorError4) { | ||
throw _iteratorError4; | ||
try { | ||
if (!_iteratorNormalCompletion5 && _iterator5.return) { | ||
_iterator5.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError5) { | ||
throw _iteratorError5; | ||
} | ||
} | ||
@@ -347,9 +468,9 @@ } | ||
return function (state) { | ||
var _iteratorNormalCompletion5 = true; | ||
var _didIteratorError5 = false; | ||
var _iteratorError5 = undefined; | ||
var _iteratorNormalCompletion6 = true; | ||
var _didIteratorError6 = false; | ||
var _iteratorError6 = undefined; | ||
try { | ||
for (var _iterator5 = acceptors[Symbol.iterator](), _step5; !(_iteratorNormalCompletion5 = (_step5 = _iterator5.next()).done); _iteratorNormalCompletion5 = true) { | ||
var predicate = _step5.value; | ||
for (var _iterator6 = acceptors[Symbol.iterator](), _step6; !(_iteratorNormalCompletion6 = (_step6 = _iterator6.next()).done); _iteratorNormalCompletion6 = true) { | ||
var predicate = _step6.value; | ||
@@ -362,12 +483,12 @@ var value = predicate(state); | ||
} catch (err) { | ||
_didIteratorError5 = true; | ||
_iteratorError5 = err; | ||
_didIteratorError6 = true; | ||
_iteratorError6 = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion5 && _iterator5.return) { | ||
_iterator5.return(); | ||
if (!_iteratorNormalCompletion6 && _iterator6.return) { | ||
_iterator6.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError5) { | ||
throw _iteratorError5; | ||
if (_didIteratorError6) { | ||
throw _iteratorError6; | ||
} | ||
@@ -406,3 +527,8 @@ } | ||
var acceptAssertion = function acceptAssertion(state) { | ||
return { matched: !!state.eatAny('^', '$', '\\b', '\\B') || acceptQuantifiableAssertion(state).matched }; | ||
if (state.eatAny('^', '$', '\\b', '\\B')) { | ||
return { matched: true }; | ||
} | ||
return acceptLabeledGroup(function (subState) { | ||
return subState.unicode ? !!subState.eatAny('?=', '?!', '?<=', '?<!') : !!subState.eatAny('?<=', '?<!'); | ||
})(state); | ||
}; | ||
@@ -484,2 +610,4 @@ | ||
return subState.eat('\\') ? acceptAtomEscape(subState) : { matched: false }; | ||
}), backtrackOnFailure(function (subState) { | ||
return { matched: subState.eat('\\') && subState.match('c') }; | ||
}), acceptCharacterClass, acceptLabeledGroup(function (subState) { | ||
@@ -495,5 +623,20 @@ return subState.eat('?:'); | ||
var acceptGrouping = backtrackOnFailure(function (state) { | ||
if (!state.eat('(') || !acceptDisjunction(state, ')').matched) { | ||
if (!state.eat('(')) { | ||
return { matched: false }; | ||
} | ||
var groupName = backtrackOnFailure(function (subState) { | ||
if (!state.eat('?')) { | ||
return { matched: false }; | ||
} | ||
return acceptGroupName(subState); | ||
})(state); | ||
if (!acceptDisjunction(state, ')').matched) { | ||
return { matched: false }; | ||
} | ||
if (groupName.matched) { | ||
if (state.groupingNames.indexOf(groupName.data) !== -1) { | ||
return { matched: false }; | ||
} | ||
state.groupingNames.push(groupName.data); | ||
} | ||
state.capturingGroups++; | ||
@@ -515,5 +658,62 @@ return { matched: true }; | ||
var acceptCharacterClassEscape = function acceptCharacterClassEscape(state) { | ||
return { matched: !!state.eatAny('d', 'D', 's', 'S', 'w', 'W') }; | ||
if (state.eatAny('d', 'D', 's', 'S', 'w', 'W')) { | ||
return { matched: true }; | ||
} | ||
if (state.unicode) { | ||
return backtrackOnFailure(function (subState) { | ||
if (!subState.eat('p{') && !subState.eat('P{')) { | ||
return { matched: false }; | ||
} | ||
if (!acceptUnicodePropertyValueExpression(subState).matched) { | ||
return { matched: false }; | ||
} | ||
return { matched: !!subState.eat('}') }; | ||
})(state); | ||
} | ||
return { matched: false }; | ||
}; | ||
var acceptUnicodePropertyName = function acceptUnicodePropertyName(state) { | ||
var characters = []; | ||
var character = void 0; | ||
while (character = state.eatAny.apply(state, _toConsumableArray(controlCharacters).concat(['_']))) { | ||
// eslint-disable-line no-cond-assign | ||
characters.push(character); | ||
} | ||
return { matched: characters.length > 0, data: characters.join('') }; | ||
}; | ||
var acceptUnicodePropertyValue = function acceptUnicodePropertyValue(state) { | ||
var characters = []; | ||
var character = void 0; | ||
while (character = state.eatAny.apply(state, _toConsumableArray(controlCharacters).concat(_toConsumableArray(decimalDigits), ['_']))) { | ||
// eslint-disable-line no-cond-assign | ||
characters.push(character); | ||
} | ||
return { matched: characters.length > 0, data: characters.join('') }; | ||
}; | ||
var acceptLoneUnicodePropertyNameOrValue = function acceptLoneUnicodePropertyNameOrValue(state) { | ||
var loneValue = acceptUnicodePropertyValue(state); | ||
return { matched: loneValue.matched && catchIsFalse(function () { | ||
return (0, _unicodeMatchPropertyEcmascript2.default)(loneValue.data); | ||
}) }; | ||
}; | ||
var acceptUnicodePropertyValueExpression = function acceptUnicodePropertyValueExpression(state) { | ||
return anyOf(backtrackOnFailure(function (subState) { | ||
var name = acceptUnicodePropertyName(subState); | ||
if (!name.matched || !subState.eat('=')) { | ||
return { matched: false }; | ||
} | ||
var value = acceptUnicodePropertyValue(subState); | ||
if (!value.matched) { | ||
return { matched: false }; | ||
} | ||
return { matched: catchIsFalse(function () { | ||
return (0, _unicodeMatchPropertyValueEcmascript2.default)(_unicodePropertyAliasesEcmascript2.default.get(name.data), value.data); | ||
}) }; | ||
}), backtrackOnFailure(acceptLoneUnicodePropertyNameOrValue))(state); | ||
}; | ||
var acceptCharacterEscape = anyOf(function (state) { | ||
@@ -594,3 +794,3 @@ var eaten = state.eatAny.apply(state, _toConsumableArray(controlEscapeCharacters)); | ||
var next = state.nextCodePoint(); | ||
if (next !== null && next !== 'c') { | ||
if (next !== null && next !== 'c' && next !== 'k') { | ||
state.skipCodePoint(); | ||
@@ -602,4 +802,38 @@ return { matched: true, value: next.codePointAt(0) }; | ||
var acceptAtomEscape = anyOf(acceptDecimalEscape, acceptCharacterClassEscape, acceptCharacterEscape); | ||
var acceptGroupNameBackreference = backtrackOnFailure(function (state) { | ||
if (!state.eat('k')) { | ||
return { matched: false }; | ||
} | ||
var name = acceptGroupName(state); | ||
if (!name.matched) { | ||
state.backreferenceNames.push(INVALID_NAMED_BACKREFERENCE_SENTINEL); | ||
return { matched: true }; | ||
} | ||
state.backreferenceNames.push(name.data); | ||
return { matched: true }; | ||
}); | ||
var acceptGroupName = backtrackOnFailure(function (state) { | ||
if (!state.eat('<')) { | ||
return { matched: false }; | ||
} | ||
var characters = []; | ||
var start = state.eatIdentifierStart(); | ||
if (!start) { | ||
return { matched: false }; | ||
} | ||
characters.push(start); | ||
var part = void 0; | ||
while (part = state.eatIdentifierPart()) { | ||
// eslint-disable-line no-cond-assign | ||
characters.push(part); | ||
} | ||
if (!state.eat('>')) { | ||
return { matched: false }; | ||
} | ||
return { matched: characters.length > 0, data: characters.join('') }; | ||
}); | ||
var acceptAtomEscape = anyOf(acceptDecimalEscape, acceptCharacterClassEscape, acceptCharacterEscape, acceptGroupNameBackreference); | ||
var acceptCharacterClass = backtrackOnFailure(function (state) { | ||
@@ -632,19 +866,16 @@ if (!state.eat('[')) { | ||
var acceptClassAtomNoDash = function acceptClassAtomNoDash(localState) { | ||
if (localState.match('\\')) { | ||
var ret = backtrackOnFailure(function (subState) { | ||
subState.eat('\\'); | ||
return acceptClassEscape(subState); | ||
})(localState); | ||
if (ret.matched) { | ||
return ret; | ||
} else if (!localState.match('\\c') || localState.unicode) { | ||
return { matched: false }; | ||
} | ||
} | ||
var nextCodePoint = localState.nextCodePoint(); | ||
if (nextCodePoint === null || nextCodePoint === ']' || nextCodePoint === '-') { | ||
if (nextCodePoint === ']' || nextCodePoint === '-' || nextCodePoint === null) { | ||
return { matched: false }; | ||
} | ||
localState.skipCodePoint(); | ||
return { matched: true, value: nextCodePoint.codePointAt(0) }; | ||
if (nextCodePoint !== '\\') { | ||
localState.skipCodePoint(); | ||
return { matched: true, value: nextCodePoint.codePointAt(0) }; | ||
} | ||
localState.eat('\\'); | ||
var classEscape = acceptClassEscape(localState); | ||
if (!classEscape.matched && localState.nextCodePoint() === 'c' && !localState.unicode) { | ||
return { matched: true, value: '\\'.charCodeAt(0) }; | ||
} | ||
return classEscape; | ||
}; | ||
@@ -651,0 +882,0 @@ |
{ | ||
"name": "shift-regexp-acceptor", | ||
"version": "1.0.1", | ||
"description": "Regexp acceptor for ECMAScript 2016", | ||
"version": "2.0.0", | ||
"description": "Regexp acceptor for ECMAScript 2018", | ||
"author": "Shape Security", | ||
@@ -21,2 +21,7 @@ "homepage": "https://github.com/shapesecurity/shift-regexp-acceptor-js", | ||
}, | ||
"dependencies": { | ||
"unicode-match-property-value-ecmascript": "1.0.2", | ||
"unicode-match-property-ecmascript": "1.0.4", | ||
"unicode-property-aliases-ecmascript": "1.0.4" | ||
}, | ||
"devDependencies": { | ||
@@ -23,0 +28,0 @@ "babel-cli": "6.26.0", |
@@ -7,3 +7,3 @@ Shift Regexp Acceptor | ||
This module provides a ECMA-262 regular expression [acceptor](https://en.wikipedia.org/wiki/Finite-state_machine#Acceptors_(recognizers)) for validation of regular expression literals for [ECMAScript 2016](https://www.ecma-international.org/ecma-262/7.0/#sec-regexp-regular-expression-objects), [Annex B](https://www.ecma-international.org/ecma-262/7.0/#sec-regular-expressions-patterns). | ||
This module provides a ECMA-262 regular expression [acceptor](https://en.wikipedia.org/wiki/Finite-state_machine#Acceptors_(recognizers)) for validation of regular expression literals for [ECMAScript 2016](https://www.ecma-international.org/ecma-262/9.0/#sec-regexp-regular-expression-objects), [Annex B](https://www.ecma-international.org/ecma-262/9.0/#sec-regular-expressions-patterns). | ||
@@ -10,0 +10,0 @@ ## Status |
71421
5
909
3
+ Addedunicode-canonical-property-names-ecmascript@1.0.4(transitive)
+ Addedunicode-match-property-ecmascript@1.0.4(transitive)
+ Addedunicode-match-property-value-ecmascript@1.0.2(transitive)
+ Addedunicode-property-aliases-ecmascript@1.0.4(transitive)