regjsparser
Advanced tools
Comparing version 0.6.9 to 0.7.0
{ | ||
"name": "regjsparser", | ||
"version": "0.6.9", | ||
"version": "0.7.0", | ||
"author": "'Julian Viereck' <julian.viereck@gmail.com>", | ||
@@ -5,0 +5,0 @@ "license": "BSD-2-Clause", |
364
parser.js
@@ -61,4 +61,4 @@ // regjsparser | ||
// DecimalEscape | ||
// CharacterClassEscape | ||
// CharacterEscape | ||
// CharacterClassEscape | ||
// k GroupName | ||
@@ -95,3 +95,4 @@ // | ||
// [empty] | ||
// NonemptyClassRanges | ||
// [~V] NonemptyClassRanges | ||
// [+V] ClassContents | ||
// | ||
@@ -146,2 +147,67 @@ // NonemptyClassRanges :: | ||
// <ZWJ> | ||
// | ||
// -------------------------------------------------------------- | ||
// NOTE: The following productions refer to the "set notation and | ||
// properties of strings" proposal. | ||
// https://github.com/tc39/proposal-regexp-set-notation | ||
// -------------------------------------------------------------- | ||
// | ||
// ClassContents :: | ||
// ClassUnion | ||
// ClassIntersection | ||
// ClassSubtraction | ||
// | ||
// ClassUnion :: | ||
// ClassRange ClassUnion? | ||
// ClassOperand ClassUnion? | ||
// | ||
// ClassIntersection :: | ||
// ClassOperand && [lookahead ≠ &] ClassOperand | ||
// ClassIntersection && [lookahead ≠ &] ClassOperand | ||
// | ||
// ClassSubtraction :: | ||
// ClassOperand -- ClassOperand | ||
// ClassSubtraction -- ClassOperand | ||
// | ||
// ClassOperand :: | ||
// ClassCharacter | ||
// ClassStrings | ||
// NestedClass | ||
// | ||
// NestedClass :: | ||
// [ [lookahead ≠ ^] ClassRanges[+U,+V] ] | ||
// [ ^ ClassRanges[+U,+V] ] | ||
// \ CharacterClassEscape[+U, +V] | ||
// | ||
// ClassRange :: | ||
// ClassCharacter - ClassCharacter | ||
// | ||
// ClassCharacter :: | ||
// [lookahead ∉ ClassReservedDouble] SourceCharacter but not ClassSyntaxCharacter | ||
// \ CharacterEscape[+U] | ||
// \ ClassHalfOfDouble | ||
// \ b | ||
// | ||
// ClassSyntaxCharacter :: | ||
// one of ( ) [ ] { } / - \ | | ||
// | ||
// ClassStrings :: | ||
// ( ClassString MoreClassStrings? ) | ||
// | ||
// MoreClassStrings :: | ||
// | ClassString MoreClassStrings? | ||
// | ||
// ClassString :: | ||
// [empty] | ||
// NonEmptyClassString | ||
// | ||
// NonEmptyClassString :: | ||
// ClassCharacter NonEmptyClassString? | ||
// | ||
// ClassReservedDouble :: | ||
// one of && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ __ `` ~~ | ||
// | ||
// ClassHalfOfDouble :: | ||
// one of & - ! # % , : ; < = > @ _ ` ~ | ||
// | ||
@@ -351,6 +417,7 @@ (function() { | ||
function createCharacterClass(classRanges, negative, from, to) { | ||
function createCharacterClass(contents, negative, from, to) { | ||
return addRaw({ | ||
type: 'characterClass', | ||
body: classRanges, | ||
kind: contents.kind, | ||
body: contents.body, | ||
negative: negative, | ||
@@ -381,2 +448,18 @@ range: [ | ||
function createClassStrings(strings, from, to) { | ||
return addRaw({ | ||
type: 'classStrings', | ||
strings: strings, | ||
range: [from, to] | ||
}); | ||
} | ||
function createClassString(characters, from, to) { | ||
return addRaw({ | ||
type: 'classString', | ||
characters: characters, | ||
range: [from, to] | ||
}); | ||
} | ||
function flattenBody(body) { | ||
@@ -390,6 +473,2 @@ if (body.type === 'alternative') { | ||
function isEmpty(obj) { | ||
return obj.type === 'empty'; | ||
} | ||
function incr(amount) { | ||
@@ -765,3 +844,3 @@ amount = (amount || 1); | ||
res = parseCharacterEscape(); | ||
res = parseCharacterClassEscape() || parseCharacterEscape(); | ||
@@ -775,3 +854,2 @@ return res; | ||
// DecimalIntegerLiteral [lookahead ∉ DecimalDigit] | ||
// CharacterClassEscape :: one of d D s S w W | ||
@@ -829,4 +907,20 @@ var res, match; | ||
} | ||
} else if (res = matchReg(/^[dDsSwW]/)) { | ||
} | ||
return false; | ||
} | ||
function parseCharacterClassEscape() { | ||
// CharacterClassEscape :: one of d D s S w W | ||
var res; | ||
if (res = matchReg(/^[dDsSwW]/)) { | ||
return createCharacterClassEscape(res[0]); | ||
} else if (features.unicodePropertyEscape && (hasUnicodeFlag || hasUnicodeSetFlag) && (res = matchReg(/^([pP])\{([^\}]+)\}/))) { | ||
// https://github.com/jviereck/regjsparser/issues/77 | ||
return addRaw({ | ||
type: 'unicodePropertyEscape', | ||
negative: res[1] === 'P', | ||
value: res[2], | ||
range: [res.range[0] - 1, res.range[1]], | ||
raw: res[0] | ||
}); | ||
} | ||
@@ -889,11 +983,2 @@ return false; | ||
return res; | ||
} else if (features.unicodePropertyEscape && hasUnicodeFlag && (res = matchReg(/^([pP])\{([^\}]+)\}/))) { | ||
// https://github.com/jviereck/regjsparser/issues/77 | ||
return addRaw({ | ||
type: 'unicodePropertyEscape', | ||
negative: res[1] === 'P', | ||
value: res[2], | ||
range: [res.range[0] - 1, res.range[1]], | ||
raw: res[0] | ||
}); | ||
} else { | ||
@@ -1036,8 +1121,11 @@ // IdentityEscape | ||
// [empty] | ||
// NonemptyClassRanges | ||
// [~V] NonemptyClassRanges | ||
// [+V] ClassContents | ||
var res; | ||
if (current(']')) { | ||
// Empty array means nothing insinde of the ClassRange. | ||
return []; | ||
// Empty array means nothing inside of the ClassRange. | ||
return { kind: 'union', body: [] }; | ||
} else if (hasUnicodeSetFlag) { | ||
return parseClassContents(); | ||
} else { | ||
@@ -1048,3 +1136,3 @@ res = parseNonemptyClassRanges(); | ||
} | ||
return res; | ||
return { kind: 'union', body: res }; | ||
} | ||
@@ -1097,3 +1185,3 @@ } | ||
} | ||
return res.concat(classRanges); | ||
return res.concat(classRanges.body); | ||
} | ||
@@ -1179,2 +1267,219 @@ | ||
function parseClassContents() { | ||
// ClassContents :: | ||
// ClassUnion | ||
// ClassIntersection | ||
// ClassSubtraction | ||
// | ||
// ClassUnion :: | ||
// ClassRange ClassUnion? | ||
// ClassOperand ClassUnion? | ||
// | ||
// ClassIntersection :: | ||
// ClassOperand && [lookahead ≠ &] ClassOperand | ||
// ClassIntersection && [lookahead ≠ &] ClassOperand | ||
// | ||
// ClassSubtraction :: | ||
// ClassOperand -- ClassOperand | ||
// ClassSubtraction -- ClassOperand | ||
var body = []; | ||
var kind; | ||
var from = pos; | ||
var operand = parseClassOperand(/* allowRanges*/ true); | ||
body.push(operand); | ||
if (operand.type === 'classRange') { | ||
kind = 'union'; | ||
} else if (current('&')) { | ||
kind = 'intersection'; | ||
} else if (current('-')) { | ||
kind = 'subtraction'; | ||
} else { | ||
kind = 'union'; | ||
} | ||
while (!current(']')) { | ||
if (kind === 'intersection') { | ||
skip('&'); | ||
skip('&'); | ||
if (current('&')) { | ||
bail('&& cannot be followed by &. Wrap it in parentheses: &&(&).'); | ||
} | ||
} else if (kind === 'subtraction') { | ||
skip('-'); | ||
skip('-'); | ||
} | ||
operand = parseClassOperand(/* allowRanges*/ kind === 'union'); | ||
body.push(operand); | ||
} | ||
return { kind: kind, body: body }; | ||
} | ||
function parseClassOperand(allowRanges) { | ||
// ClassOperand :: | ||
// ClassCharacter | ||
// ClassStrings | ||
// NestedClass | ||
// | ||
// NestedClass :: | ||
// [ [lookahead ≠ ^] ClassRanges[+U,+V] ] | ||
// [ ^ ClassRanges[+U,+V] ] | ||
// \ CharacterClassEscape[+U, +V] | ||
// | ||
// ClassRange :: | ||
// ClassCharacter - ClassCharacter | ||
// | ||
// ClassCharacter :: | ||
// [lookahead ∉ ClassReservedDouble] SourceCharacter but not ClassSyntaxCharacter | ||
// \ CharacterEscape[+U] | ||
// \ ClassHalfOfDouble | ||
// \ b | ||
// | ||
// ClassSyntaxCharacter :: | ||
// one of ( ) [ ] { } / - \ | | ||
var from = pos; | ||
var start, res; | ||
if (match('\\')) { | ||
if (res = parseCharacterClassEscape()) { | ||
start = res; | ||
} else if (res = parseClassCharacterEscapedHelper()) { | ||
// ClassOperand :: | ||
// ... | ||
// NestedClass | ||
// | ||
// NestedClass :: | ||
// ... | ||
// \ CharacterClassEscape[+U, +V] | ||
return res; | ||
} else { | ||
bail('Invalid escape', '\\' + lookahead(), from); | ||
} | ||
} else if (res = parseClassCharacterUnescapedHelper()) { | ||
start = res; | ||
} else if (res = parseClassStrings() || parseCharacterClass()) { | ||
// ClassOperand :: | ||
// ... | ||
// ClassStrings | ||
// NestedClass | ||
// | ||
// NestedClass :: | ||
// [ [lookahead ≠ ^] ClassRanges[+U,+V] ] | ||
// [ ^ ClassRanges[+U,+V] ] | ||
// ... | ||
return res; | ||
} else { | ||
bail('Invalid character', lookahead()); | ||
} | ||
if (allowRanges && current('-') && !next('-')) { | ||
skip('-'); | ||
if (res = parseClassCharacter()) { | ||
// ClassRange :: | ||
// ClassCharacter - ClassCharacter | ||
return createClassRange(start, res, from, pos); | ||
} | ||
bail('Invalid range end', lookahead()); | ||
} | ||
// ClassOperand :: | ||
// ClassCharacter | ||
// ... | ||
return start; | ||
} | ||
function parseClassCharacter() { | ||
// ClassCharacter :: | ||
// [lookahead ∉ ClassReservedDouble] SourceCharacter but not ClassSyntaxCharacter | ||
// \ CharacterEscape[+U] | ||
// \ ClassHalfOfDouble | ||
// \ b | ||
if (match('\\')) { | ||
if (res = parseClassCharacterEscapedHelper()) { | ||
return res; | ||
} else { | ||
bail('Invalid escape', '\\' + lookahead(), from); | ||
} | ||
} | ||
return parseClassCharacterUnescapedHelper(); | ||
} | ||
function parseClassCharacterUnescapedHelper() { | ||
// ClassCharacter :: | ||
// [lookahead ∉ ClassReservedDouble] SourceCharacter but not ClassSyntaxCharacter | ||
// ... | ||
var res; | ||
if (res = matchReg(/^[^()[\]{}/\-\\|]/)) { | ||
return createCharacter(res); | ||
}; | ||
} | ||
function parseClassCharacterEscapedHelper() { | ||
// ClassCharacter :: | ||
// ... | ||
// \ CharacterEscape[+U] | ||
// \ ClassHalfOfDouble | ||
// \ b | ||
if (match('b')) { | ||
return createEscaped('singleEscape', 0x0008, '\\b'); | ||
} else if (match('B')) { | ||
bail('\\B not possible inside of ClassContents', '', pos - 2); | ||
} else if (res = matchReg(/^[&\-!#%,:;<=>@_`~]/)) { | ||
return createEscaped('identifier', res[0].codePointAt(0), res[0]); | ||
} else if (res = parseCharacterEscape()) { | ||
return res; | ||
} else { | ||
return null; | ||
} | ||
} | ||
function parseClassStrings() { | ||
// ClassStrings :: | ||
// ( ClassString MoreClassStrings? ) | ||
var res = []; | ||
var from = pos; | ||
if (!match('(')) { | ||
return null; | ||
} | ||
do { | ||
res.push(parseClassString()); | ||
} while (match('|')); | ||
skip(')'); | ||
return createClassStrings(res, from, pos); | ||
} | ||
function parseClassString() { | ||
// ClassString :: | ||
// [empty] | ||
// NonEmptyClassString | ||
// | ||
// NonEmptyClassString :: | ||
// ClassCharacter NonEmptyClassString? | ||
var res = [], from = pos; | ||
var char; | ||
while (char = parseClassCharacter()) { | ||
res.push(char); | ||
} | ||
return createClassString(res, from, pos); | ||
} | ||
function bail(message, details, from, to) { | ||
@@ -1200,4 +1505,13 @@ from = from == null ? pos : from; | ||
var hasUnicodeFlag = (flags || "").indexOf("u") !== -1; | ||
var hasUnicodeSetFlag = (flags || "").indexOf("v") !== -1; | ||
var pos = 0; | ||
if (hasUnicodeSetFlag && !features.unicodeSet) { | ||
throw new Error('The "v" flag is only supported when the .unicodeSet option is enabled.'); | ||
} | ||
if (hasUnicodeFlag && hasUnicodeSetFlag) { | ||
throw new Error('The "u" and "v" flags are mutually exclusive.'); | ||
} | ||
// Convert the input to a string and treat the empty string special. | ||
@@ -1204,0 +1518,0 @@ str = String(str); |
58500
1426
5