Comparing version 5.0.0 to 6.0.0
411
index.js
@@ -1,23 +0,388 @@ | ||
// Copyright 2014, 2015, 2016, 2017, 2018 Simon Lydell | ||
// License: MIT. (See LICENSE.) | ||
Object.defineProperty(exports, "__esModule", { | ||
value: true | ||
}) | ||
// This regex comes from regex.coffee, and is inserted here by generate-index.js | ||
// (run `npm run build`). | ||
exports.default = /((['"])(?:(?!\2)[^\\\n\r]|\\(?:\r\n|[\s\S]))*(\2)?|`(?:[^`\\$]|\\[\s\S]|\$(?!\{)|\$\{(?:[^{}]|\{[^}]*\}?)*\}?)*(`)?)|(\/\/.*)|(\/\*(?:[^*]|\*(?!\/))*(\*\/)?)|(\/(?!\*)(?:\[(?:(?![\]\\]).|\\.)*\]|(?![\/\]\\]).|\\.)+\/(?:(?!\s*(?:\b|[\u0080-\uFFFF$\\'"~({]|[+\-!](?!=)|\.?\d))|[gmiyus]{1,6}\b(?![\u0080-\uFFFF$\\]|\s*(?:[+\-*%&|^<>!=?({]|\/(?![\/*])))))|(0[xX][\da-fA-F]+|0[oO][0-7]+|0[bB][01]+|(?:\d*\.\d+|\d+\.?)(?:[eE][+-]?\d+)?)|((?!\d)(?:(?!\s)[$\w\u0080-\uFFFF]|\\u[\da-fA-F]{4}|\\u\{[\da-fA-F]+\})+)|(--|\+\+|&&|\|\||=>|\.{3}|(?:[+\-\/%&|^]|\*{1,2}|<{1,2}|>{1,3}|!=?|={1,2})=?|[?~.,:;[\](){}])|(\s+)|(^$|[\s\S])/g | ||
exports.matchToToken = function(match) { | ||
var token = {type: "invalid", value: match[0], closed: undefined} | ||
if (match[ 1]) token.type = "string" , token.closed = !!(match[3] || match[4]) | ||
else if (match[ 5]) token.type = "comment" | ||
else if (match[ 6]) token.type = "comment", token.closed = !!match[7] | ||
else if (match[ 8]) token.type = "regex" | ||
else if (match[ 9]) token.type = "number" | ||
else if (match[10]) token.type = "name" | ||
else if (match[11]) token.type = "punctuator" | ||
else if (match[12]) token.type = "whitespace" | ||
return token | ||
} | ||
// Copyright 2014, 2015, 2016, 2017, 2018, 2019, 2020 Simon Lydell | ||
// License: MIT. | ||
var IdentifierName, JSXIdentifier, JSXPunctuator, JSXString, JSXText, KeywordsWithExpressionAfter, KeywordsWithNoLineTerminatorAfter, LineTerminatorSequence, MultiLineComment, Newline, NumericLiteral, Punctuator, RegularExpressionLiteral, SingleLineComment, StringLiteral, Template, TokensNotPrecedingObjectLiteral, TokensPrecedingExpression, WhiteSpace, jsTokens; | ||
RegularExpressionLiteral = /\/(?![*\/])(?:\[(?:(?![\]\\]).|\\.)*\]|(?![\/\]\\]).|\\.)*(\/[$_\u200C\u200D\p{ID_Continue}]*|\\)?/yu; | ||
Punctuator = /--|\+\+|&&|\|\||=>|\.{3}|\??\.(?!\d)|\?{2}|(?:[+\-%&|^]|\*{1,2}|<{1,2}|>{1,3}|!=?|={1,2}|\/(?![\/*]))=?|[?~,:;[\](){}]/y; | ||
IdentifierName = /(?=[$_\p{ID_Start}\\])(?:[$_\u200C\u200D\p{ID_Continue}]|\\u[\da-fA-F]{4}|\\u\{[\da-fA-F]+\})+/yu; | ||
StringLiteral = /(['"])(?:(?!\1)[^\\\n\r]|\\(?:\r\n|[^]))*(\1)?/y; | ||
NumericLiteral = /(?:0[xX][\da-fA-F]+|0[oO][0-7]+|0[bB][01]+)n?|0n|[1-9]\d*n|(?:(?:0(?!\d)|0\d*[89]\d*|[1-9]\d*)(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?|0[0-7]+/y; | ||
Template = /[`}](?:[^`\\$]|\\[^]|\$(?!\{))*(`|\$\{)?/y; | ||
WhiteSpace = /[\t\v\f\ufeff\p{Zs}]+/yu; | ||
LineTerminatorSequence = /\r?\n|[\r\u2028\u2029]/y; | ||
MultiLineComment = /\/\*(?:[^*]|\*(?!\/))*(\*\/)?/y; | ||
SingleLineComment = /\/\/.*/y; | ||
JSXPunctuator = /[<>.:={}]|\/(?![\/*])/y; | ||
JSXIdentifier = /[$_\p{ID_Start}][$_\u200C\u200D\p{ID_Continue}-]*/yu; | ||
JSXString = /(['"])(?:(?!\1)[^])*(\1)?/y; | ||
JSXText = /[^<>{}]+/y; | ||
TokensPrecedingExpression = /^(?:[\/+-]|\.{3}|\?(?:InterpolationIn(?:JSX|Template)|NoLineTerminatorHere|NonExpressionParenEnd|UnaryIncDec))?$|[{}([,;<>=*%&|^!~?:]$/; | ||
TokensNotPrecedingObjectLiteral = /^(?:=>|[;\]){}]|else|\?(?:NoLineTerminatorHere|NonExpressionParenEnd))?$/; | ||
KeywordsWithExpressionAfter = /^(?:await|case|default|delete|do|else|instanceof|new|return|throw|typeof|void|yield)$/; | ||
KeywordsWithNoLineTerminatorAfter = /^(?:return|throw|yield)$/; | ||
Newline = RegExp(LineTerminatorSequence.source); | ||
module.exports = jsTokens = function*(input, {jsx = false} = {}) { | ||
var braces, firstCodePoint, isExpression, lastIndex, lastSignificantToken, length, match, mode, nextLastIndex, nextLastSignificantToken, parenNesting, postfixIncDec, punctuator, stack; | ||
({length} = input); | ||
lastIndex = 0; | ||
lastSignificantToken = ""; | ||
stack = [ | ||
{tag: "JS"} | ||
]; | ||
braces = []; | ||
parenNesting = 0; | ||
postfixIncDec = false; | ||
while (lastIndex < length) { | ||
mode = stack[stack.length - 1]; | ||
switch (mode.tag) { | ||
case "JS": | ||
case "JSNonExpressionParen": | ||
case "InterpolationInTemplate": | ||
case "InterpolationInJSX": | ||
if (input[lastIndex] === "/" && (TokensPrecedingExpression.test(lastSignificantToken) || KeywordsWithExpressionAfter.test(lastSignificantToken))) { | ||
RegularExpressionLiteral.lastIndex = lastIndex; | ||
if (match = RegularExpressionLiteral.exec(input)) { | ||
lastIndex = RegularExpressionLiteral.lastIndex; | ||
lastSignificantToken = match[0]; | ||
postfixIncDec = true; | ||
yield ({ | ||
type: "RegularExpressionLiteral", | ||
value: match[0], | ||
closed: match[1] !== void 0 && match[1] !== "\\" | ||
}); | ||
continue; | ||
} | ||
} | ||
Punctuator.lastIndex = lastIndex; | ||
if (match = Punctuator.exec(input)) { | ||
punctuator = match[0]; | ||
nextLastIndex = Punctuator.lastIndex; | ||
nextLastSignificantToken = punctuator; | ||
switch (punctuator) { | ||
case "(": | ||
if (lastSignificantToken === "?NonExpressionParenKeyword") { | ||
stack.push({ | ||
tag: "JSNonExpressionParen", | ||
nesting: parenNesting | ||
}); | ||
} | ||
parenNesting++; | ||
postfixIncDec = false; | ||
break; | ||
case ")": | ||
parenNesting--; | ||
postfixIncDec = true; | ||
if (mode.tag === "JSNonExpressionParen" && parenNesting === mode.nesting) { | ||
stack.pop(); | ||
nextLastSignificantToken = "?NonExpressionParenEnd"; | ||
postfixIncDec = false; | ||
} | ||
break; | ||
case "{": | ||
Punctuator.lastIndex = 0; | ||
isExpression = !TokensNotPrecedingObjectLiteral.test(lastSignificantToken) && (TokensPrecedingExpression.test(lastSignificantToken) || KeywordsWithExpressionAfter.test(lastSignificantToken)); | ||
braces.push(isExpression); | ||
postfixIncDec = false; | ||
break; | ||
case "}": | ||
switch (mode.tag) { | ||
case "InterpolationInTemplate": | ||
if (braces.length === mode.nesting) { | ||
Template.lastIndex = lastIndex; | ||
match = Template.exec(input); | ||
lastIndex = Template.lastIndex; | ||
lastSignificantToken = match[0]; | ||
if (match[1] === "${") { | ||
lastSignificantToken = "?InterpolationInTemplate"; | ||
postfixIncDec = false; | ||
yield ({ | ||
type: "TemplateMiddle", | ||
value: match[0] | ||
}); | ||
} else { | ||
stack.pop(); | ||
postfixIncDec = true; | ||
yield ({ | ||
type: "TemplateTail", | ||
value: match[0], | ||
closed: match[1] === "`" | ||
}); | ||
} | ||
continue; | ||
} | ||
break; | ||
case "InterpolationInJSX": | ||
if (braces.length === mode.nesting) { | ||
stack.pop(); | ||
lastIndex += 1; | ||
lastSignificantToken = "}"; | ||
yield ({ | ||
type: "JSXPunctuator", | ||
value: "}" | ||
}); | ||
continue; | ||
} | ||
} | ||
postfixIncDec = braces.pop(); | ||
nextLastSignificantToken = postfixIncDec ? "?ExpressionBraceEnd" : "}"; | ||
break; | ||
case "]": | ||
postfixIncDec = true; | ||
break; | ||
case "++": | ||
case "--": | ||
nextLastSignificantToken = postfixIncDec ? "?PostfixIncDec" : "?UnaryIncDec"; | ||
break; | ||
case "<": | ||
if (jsx && (TokensPrecedingExpression.test(lastSignificantToken) || KeywordsWithExpressionAfter.test(lastSignificantToken))) { | ||
stack.push({tag: "JSXTag"}); | ||
lastIndex += 1; | ||
lastSignificantToken = "<"; | ||
yield ({ | ||
type: "JSXPunctuator", | ||
value: punctuator | ||
}); | ||
continue; | ||
} | ||
postfixIncDec = false; | ||
break; | ||
default: | ||
postfixIncDec = false; | ||
} | ||
lastIndex = nextLastIndex; | ||
lastSignificantToken = nextLastSignificantToken; | ||
yield ({ | ||
type: "Punctuator", | ||
value: punctuator | ||
}); | ||
continue; | ||
} | ||
IdentifierName.lastIndex = lastIndex; | ||
if (match = IdentifierName.exec(input)) { | ||
lastIndex = IdentifierName.lastIndex; | ||
nextLastSignificantToken = match[0]; | ||
switch (match[0]) { | ||
case "for": | ||
case "if": | ||
case "while": | ||
case "with": | ||
if (lastSignificantToken !== "." && lastSignificantToken !== "?.") { | ||
nextLastSignificantToken = "?NonExpressionParenKeyword"; | ||
} | ||
} | ||
lastSignificantToken = nextLastSignificantToken; | ||
postfixIncDec = !KeywordsWithExpressionAfter.test(match[0]); | ||
yield ({ | ||
type: "IdentifierName", | ||
value: match[0] | ||
}); | ||
continue; | ||
} | ||
StringLiteral.lastIndex = lastIndex; | ||
if (match = StringLiteral.exec(input)) { | ||
lastIndex = StringLiteral.lastIndex; | ||
lastSignificantToken = match[0]; | ||
postfixIncDec = true; | ||
yield ({ | ||
type: "StringLiteral", | ||
value: match[0], | ||
closed: match[2] !== void 0 | ||
}); | ||
continue; | ||
} | ||
NumericLiteral.lastIndex = lastIndex; | ||
if (match = NumericLiteral.exec(input)) { | ||
lastIndex = NumericLiteral.lastIndex; | ||
lastSignificantToken = match[0]; | ||
postfixIncDec = true; | ||
yield ({ | ||
type: "NumericLiteral", | ||
value: match[0] | ||
}); | ||
continue; | ||
} | ||
Template.lastIndex = lastIndex; | ||
if (match = Template.exec(input)) { | ||
lastIndex = Template.lastIndex; | ||
lastSignificantToken = match[0]; | ||
if (match[1] === "${") { | ||
lastSignificantToken = "?InterpolationInTemplate"; | ||
stack.push({ | ||
tag: "InterpolationInTemplate", | ||
nesting: braces.length | ||
}); | ||
postfixIncDec = false; | ||
yield ({ | ||
type: "TemplateHead", | ||
value: match[0] | ||
}); | ||
} else { | ||
postfixIncDec = true; | ||
yield ({ | ||
type: "NoSubstitutionTemplate", | ||
value: match[0], | ||
closed: match[1] === "`" | ||
}); | ||
} | ||
continue; | ||
} | ||
break; | ||
case "JSXTag": | ||
case "JSXTagEnd": | ||
JSXPunctuator.lastIndex = lastIndex; | ||
if (match = JSXPunctuator.exec(input)) { | ||
lastIndex = JSXPunctuator.lastIndex; | ||
nextLastSignificantToken = match[0]; | ||
switch (match[0]) { | ||
case "<": | ||
stack.push({tag: "JSXTag"}); | ||
break; | ||
case ">": | ||
stack.pop(); | ||
if (lastSignificantToken === "/" || mode.tag === "JSXTagEnd") { | ||
nextLastSignificantToken = "?JSX"; | ||
postfixIncDec = true; | ||
} else { | ||
stack.push({tag: "JSXChildren"}); | ||
} | ||
break; | ||
case "{": | ||
stack.push({ | ||
tag: "InterpolationInJSX", | ||
nesting: braces.length | ||
}); | ||
nextLastSignificantToken = "?InterpolationInJSX"; | ||
postfixIncDec = false; | ||
break; | ||
case "/": | ||
if (lastSignificantToken === "<") { | ||
stack.pop(); | ||
if (stack[stack.length - 1].tag === "JSXChildren") { | ||
stack.pop(); | ||
} | ||
stack.push({tag: "JSXTagEnd"}); | ||
} | ||
} | ||
lastSignificantToken = nextLastSignificantToken; | ||
yield ({ | ||
type: "JSXPunctuator", | ||
value: match[0] | ||
}); | ||
continue; | ||
} | ||
JSXIdentifier.lastIndex = lastIndex; | ||
if (match = JSXIdentifier.exec(input)) { | ||
lastIndex = JSXIdentifier.lastIndex; | ||
lastSignificantToken = match[0]; | ||
yield ({ | ||
type: "JSXIdentifier", | ||
value: match[0] | ||
}); | ||
continue; | ||
} | ||
JSXString.lastIndex = lastIndex; | ||
if (match = JSXString.exec(input)) { | ||
lastIndex = JSXString.lastIndex; | ||
lastSignificantToken = match[0]; | ||
yield ({ | ||
type: "JSXString", | ||
value: match[0], | ||
closed: match[2] !== void 0 | ||
}); | ||
continue; | ||
} | ||
break; | ||
case "JSXChildren": | ||
JSXText.lastIndex = lastIndex; | ||
if (match = JSXText.exec(input)) { | ||
lastIndex = JSXText.lastIndex; | ||
lastSignificantToken = match[0]; | ||
yield ({ | ||
type: "JSXText", | ||
value: match[0] | ||
}); | ||
continue; | ||
} | ||
switch (input[lastIndex]) { | ||
case "<": | ||
stack.push({tag: "JSXTag"}); | ||
lastIndex++; | ||
lastSignificantToken = "<"; | ||
yield ({ | ||
type: "JSXPunctuator", | ||
value: "<" | ||
}); | ||
continue; | ||
case "{": | ||
stack.push({ | ||
tag: "InterpolationInJSX", | ||
nesting: braces.length | ||
}); | ||
lastIndex++; | ||
lastSignificantToken = "?InterpolationInJSX"; | ||
postfixIncDec = false; | ||
yield ({ | ||
type: "JSXPunctuator", | ||
value: "{" | ||
}); | ||
continue; | ||
} | ||
} | ||
WhiteSpace.lastIndex = lastIndex; | ||
if (match = WhiteSpace.exec(input)) { | ||
lastIndex = WhiteSpace.lastIndex; | ||
yield ({ | ||
type: "WhiteSpace", | ||
value: match[0] | ||
}); | ||
continue; | ||
} | ||
LineTerminatorSequence.lastIndex = lastIndex; | ||
if (match = LineTerminatorSequence.exec(input)) { | ||
lastIndex = LineTerminatorSequence.lastIndex; | ||
postfixIncDec = false; | ||
if (KeywordsWithNoLineTerminatorAfter.test(lastSignificantToken)) { | ||
lastSignificantToken = "?NoLineTerminatorHere"; | ||
} | ||
yield ({ | ||
type: "LineTerminatorSequence", | ||
value: match[0] | ||
}); | ||
continue; | ||
} | ||
MultiLineComment.lastIndex = lastIndex; | ||
if (match = MultiLineComment.exec(input)) { | ||
lastIndex = MultiLineComment.lastIndex; | ||
if (Newline.test(match[0])) { | ||
postfixIncDec = false; | ||
if (KeywordsWithNoLineTerminatorAfter.test(lastSignificantToken)) { | ||
lastSignificantToken = "?NoLineTerminatorHere"; | ||
} | ||
} | ||
yield ({ | ||
type: "MultiLineComment", | ||
value: match[0], | ||
closed: match[1] !== void 0 | ||
}); | ||
continue; | ||
} | ||
SingleLineComment.lastIndex = lastIndex; | ||
if (match = SingleLineComment.exec(input)) { | ||
lastIndex = SingleLineComment.lastIndex; | ||
postfixIncDec = false; | ||
yield ({ | ||
type: "SingleLineComment", | ||
value: match[0] | ||
}); | ||
continue; | ||
} | ||
firstCodePoint = String.fromCodePoint(input.codePointAt(lastIndex)); | ||
lastIndex += firstCodePoint.length; | ||
lastSignificantToken = firstCodePoint; | ||
postfixIncDec = false; | ||
yield ({ | ||
type: mode.tag.startsWith("JSX") ? "JSXInvalid" : "Invalid", | ||
value: firstCodePoint | ||
}); | ||
} | ||
return void 0; | ||
}; |
{ | ||
"name": "js-tokens", | ||
"version": "5.0.0", | ||
"version": "6.0.0", | ||
"author": "Simon Lydell", | ||
"license": "MIT", | ||
"description": "A regex that tokenizes JavaScript.", | ||
"description": "Tiny JavaScript tokenizer.", | ||
"repository": "lydell/js-tokens", | ||
"type": "commonjs", | ||
"exports": "./index.js", | ||
"keywords": [ | ||
"JavaScript", | ||
"js", | ||
"ECMAScript", | ||
"es", | ||
"token", | ||
"tokens", | ||
"tokenize", | ||
"regex" | ||
], | ||
"files": [ | ||
"index.js" | ||
], | ||
"repository": "lydell/js-tokens", | ||
"scripts": { | ||
"test": "mocha --ui tdd", | ||
"esprima-compare": "node esprima-compare ./index.js everything.js/es5.js", | ||
"build": "node generate-index.js", | ||
"dev": "npm run build && npm test" | ||
}, | ||
"devDependencies": { | ||
"coffeescript": "2.4.1", | ||
"esprima": "4.0.1", | ||
"everything.js": "1.0.3", | ||
"mocha": "6.1.4" | ||
} | ||
"tokenizer", | ||
"regex", | ||
"regexp" | ||
] | ||
} |
240
README.md
@@ -1,240 +0,14 @@ | ||
Overview [![Build Status](https://travis-ci.org/lydell/js-tokens.svg?branch=master)](https://travis-ci.org/lydell/js-tokens) | ||
======== | ||
# js-tokens | ||
A regex that tokenizes JavaScript. | ||
The tiny, regex powered, lenient, _almost_ spec-compliant JavaScript tokenizer that never fails. | ||
```js | ||
var jsTokens = require("js-tokens").default | ||
const jsTokens = require("js-tokens"); | ||
var jsString = "var foo=opts.foo;\n..." | ||
const jsString = 'JSON.stringify({k:3.14**2}, null /*replacer*/, "\\t")'; | ||
jsString.match(jsTokens) | ||
// ["var", " ", "foo", "=", "opts", ".", "foo", ";", "\n", ...] | ||
Array.from(jsTokens(jsString), (token) => token.value).join("|"); | ||
// JSON|.|stringify|(|{|k|:|3.14|**|2|}|,| |null| |/*replacer*/|,| |"\t"|) | ||
``` | ||
Installation | ||
============ | ||
`npm install js-tokens` | ||
```js | ||
import jsTokens from "js-tokens" | ||
// or: | ||
var jsTokens = require("js-tokens").default | ||
``` | ||
Usage | ||
===== | ||
### `jsTokens` ### | ||
A regex with the `g` flag that matches JavaScript tokens. | ||
The regex _always_ matches, even invalid JavaScript and the empty string. | ||
The next match is always directly after the previous. | ||
### `var token = matchToToken(match)` ### | ||
```js | ||
import {matchToToken} from "js-tokens" | ||
// or: | ||
var matchToToken = require("js-tokens").matchToToken | ||
``` | ||
Takes a `match` returned by `jsTokens.exec(string)`, and returns a `{type: | ||
String, value: String}` object. The following types are available: | ||
- string | ||
- comment | ||
- regex | ||
- number | ||
- name | ||
- punctuator | ||
- whitespace | ||
- invalid | ||
Multi-line comments and strings also have a `closed` property indicating if the | ||
token was closed or not (see below). | ||
Comments and strings both come in several flavors. To distinguish them, check if | ||
the token starts with `//`, `/*`, `'`, `"` or `` ` ``. | ||
Names are ECMAScript IdentifierNames, that is, including both identifiers and | ||
keywords. You may use [is-keyword-js] to tell them apart. | ||
Whitespace includes both line terminators and other whitespace. | ||
[is-keyword-js]: https://github.com/crissdev/is-keyword-js | ||
ECMAScript support | ||
================== | ||
The intention is to always support the latest ECMAScript version whose feature | ||
set has been finalized. | ||
If adding support for a newer version requires changes, a new version with a | ||
major verion bump will be released. | ||
Currently, ECMAScript 2019 is supported. | ||
Invalid code handling | ||
===================== | ||
Unterminated strings are still matched as strings. JavaScript strings cannot | ||
contain (unescaped) newlines, so unterminated strings simply end at the end of | ||
the line. Unterminated template strings can contain unescaped newlines, though, | ||
so they go on to the end of input. | ||
Unterminated multi-line comments are also still matched as comments. They | ||
simply go on to the end of the input. | ||
Unterminated regex literals are likely matched as division and whatever is | ||
inside the regex. | ||
Invalid ASCII characters have their own capturing group. | ||
Invalid non-ASCII characters are treated as names, to simplify the matching of | ||
names (except unicode spaces which are treated as whitespace). Note: See also | ||
the [ES2018](#es2018) section. | ||
Regex literals may contain invalid regex syntax. They are still matched as | ||
regex literals. They may also contain repeated regex flags, to keep the regex | ||
simple. | ||
Strings may contain invalid escape sequences. | ||
Limitations | ||
=========== | ||
Tokenizing JavaScript using regexes—in fact, _one single regex_—won’t be | ||
perfect. But that’s not the point either. | ||
You may compare jsTokens with [esprima] by using `esprima-compare.js`. | ||
See `npm run esprima-compare`! | ||
[esprima]: http://esprima.org/ | ||
### Template string interpolation ### | ||
Template strings are matched as single tokens, from the starting `` ` `` to the | ||
ending `` ` ``, including interpolations (whose tokens are not matched | ||
individually). | ||
Matching template string interpolations requires recursive balancing of `{` and | ||
`}`—something that JavaScript regexes cannot do. Only one level of nesting is | ||
supported. | ||
### Division and regex literals collision ### | ||
Consider this example: | ||
```js | ||
var g = 9.82 | ||
var number = bar / 2/g | ||
var regex = / 2/g | ||
``` | ||
A human can easily understand that in the `number` line we’re dealing with | ||
division, and in the `regex` line we’re dealing with a regex literal. How come? | ||
Because humans can look at the whole code to put the `/` characters in context. | ||
A JavaScript regex cannot. It only sees forwards. (Well, ES2018 regexes can also | ||
look backwards. See the [ES2018](#es2018) section). | ||
When the `jsTokens` regex scans throught the above, it will see the following | ||
at the end of both the `number` and `regex` rows: | ||
```js | ||
/ 2/g | ||
``` | ||
It is then impossible to know if that is a regex literal, or part of an | ||
expression dealing with division. | ||
Here is a similar case: | ||
```js | ||
foo /= 2/g | ||
foo(/= 2/g) | ||
``` | ||
The first line divides the `foo` variable with `2/g`. The second line calls the | ||
`foo` function with the regex literal `/= 2/g`. Again, since `jsTokens` only | ||
sees forwards, it cannot tell the two cases apart. | ||
There are some cases where we _can_ tell division and regex literals apart, | ||
though. | ||
First off, we have the simple cases where there’s only one slash in the line: | ||
```js | ||
var foo = 2/g | ||
foo /= 2 | ||
``` | ||
Regex literals cannot contain newlines, so the above cases are correctly | ||
identified as division. Things are only problematic when there are more than | ||
one non-comment slash in a single line. | ||
Secondly, not every character is a valid regex flag. | ||
```js | ||
var number = bar / 2/e | ||
``` | ||
The above example is also correctly identified as division, because `e` is not a | ||
valid regex flag. I initially wanted to future-proof by allowing `[a-zA-Z]*` | ||
(any letter) as flags, but it is not worth it since it increases the amount of | ||
ambigous cases. So only the standard `g`, `m`, `i`, `y` and `u` flags are | ||
allowed. This means that the above example will be identified as division as | ||
long as you don’t rename the `e` variable to some permutation of `gmiyus` 1 to 6 | ||
characters long. | ||
Lastly, we can look _forward_ for information. | ||
- If the token following what looks like a regex literal is not valid after a | ||
regex literal, but is valid in a division expression, then the regex literal | ||
is treated as division instead. For example, a flagless regex cannot be | ||
followed by a string, number or name, but all of those three can be the | ||
denominator of a division. | ||
- Generally, if what looks like a regex literal is followed by an operator, the | ||
regex literal is treated as division instead. This is because regexes are | ||
seldomly used with operators (such as `+`, `*`, `&&` and `==`), but division | ||
could likely be part of such an expression. | ||
Please consult the regex source and the test cases for precise information on | ||
when regex or division is matched (should you need to know). In short, you | ||
could sum it up as: | ||
If the end of a statement looks like a regex literal (even if it isn’t), it | ||
will be treated as one. Otherwise it should work as expected (if you write sane | ||
code). | ||
### ES2018 ### | ||
ES2018 added some nice regex improvements to the language. | ||
- [Unicode property escapes] should allow telling names and invalid non-ASCII | ||
characters apart without blowing up the regex size. | ||
- [Lookbehind assertions] should allow matching telling division and regex | ||
literals apart in more cases. | ||
- [Named capture groups] might simplify some things. | ||
These things would be nice to do, but are not critical. They probably have to | ||
wait until the oldest maintained Node.js LTS release supports those features. | ||
[Unicode property escapes]: http://2ality.com/2017/07/regexp-unicode-property-escapes.html | ||
[Lookbehind assertions]: http://2ality.com/2017/05/regexp-lookbehind-assertions.html | ||
[Named capture groups]: http://2ality.com/2017/05/regexp-named-capture-groups.html | ||
License | ||
======= | ||
[MIT](LICENSE). | ||
**[➡️ Full readme](https://github.com/lydell/js-tokens/)** |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
15524
0
419
14
1