js-tokens
Advanced tools
Comparing version 0.2.0 to 0.3.0
@@ -0,1 +1,14 @@ | ||
### Version 0.3.0 (2014-12-19) ### | ||
- Changed: The `jsTokens.names` array has been replaced with the | ||
`jsTokens.matchToToken` function. The capturing groups of `jsTokens` are no | ||
longer part of the public API; instead use said function. See this [gist] for | ||
an example. (Backwards-incompatible change.) | ||
- Changed: The empty string is now considered an “invalid” token, instead an | ||
“empty” token (its own group). (Backwards-incompatible change.) | ||
- Removed: component support. (Backwards-incompatible change.) | ||
[gist]: https://gist.github.com/lydell/be49dbf80c382c473004 | ||
### Version 0.2.0 (2014-06-19) ### | ||
@@ -2,0 +15,0 @@ |
@@ -9,13 +9,4 @@ // Copyright 2014 Simon Lydell | ||
var source = fs.readFileSync("regex.coffee").toString() | ||
var names = [] | ||
source.replace(/^\s*\( # <([^>]+)>/mg, function(match, name) { | ||
names.push(name) | ||
}) | ||
var code = [ | ||
"module.exports = /" + regex.source.replace(/\//g, "\\/") + "/g", | ||
"module.exports.names = " + JSON.stringify(names, null, 2), | ||
].join("\n") | ||
var code = fs.readFileSync("index.js").toString() | ||
code = code.replace(/\/.+\/.+/, regex.toString()) | ||
fs.writeFileSync("index.js", code) |
33
index.js
@@ -1,13 +0,20 @@ | ||
module.exports = /(\s+)|(\/\/.*|\/\*(?:[^*]|\*(?!\/))*(?:\*\/)?)|('(?:[^'\\\r\n]|\\(?:\r\n|[\s\S]))*'?|"(?:[^"\\\r\n]|\\(?:\r\n|[\s\S]))*"?)|(\/(?:\[(?:[^\]\\\r\n]|\\.)*\]|[^\/\]\\\r\n]|\\.)+\/(?:(?!\s*(?:\b|[\u0080-\uFFFF$\\'"~({]|[+\-!](?!=)|\.?\d))|[gmiy]{1,4}\b(?![\u0080-\uFFFF$\\]|\s*(?:[+\-*%&|^<>!=?({]|\/(?![\/*])))))|(-?(?:0[xX][\da-fA-F]+|(?:\d*\.\d+|\d+\.?)(?:[eE][+-]?\d+)?))|((?:[$\w\u0080-\uFFFF]|\\u[\da-fA-F]{4}|\\u\{[\da-fA-F]{1,6}\})+)|(--|\+\+|&&|\|\||=>|\.{3}|(?:[+\-*\/%&|^]|<{1,2}|>{1,3}|!=?|={1,2})=?|[?:~])|([;,.[\](){}])|(^$)|([\s\S])/g | ||
module.exports.names = [ | ||
"whitespace", | ||
"comment", | ||
"string", | ||
"regex", | ||
"number", | ||
"name", | ||
"operator", | ||
"punctuation", | ||
"empty", | ||
"invalid" | ||
] | ||
// Copyright 2014 Simon Lydell | ||
// X11 (“MIT”) Licensed. (See LICENSE.) | ||
// This regex comes from regex.coffee, and is inserted here by generate-index.js | ||
// (run `npm run build`). | ||
module.exports = /((['"])(?:(?!\2)[^\\\r\n]|\\(?:\r\n|[\s\S]))*(\2)?)|(\/\/.*)|(\/\*(?:[^*]|\*(?!\/))*(\*\/)?)|(\/(?!\*)(?:\[(?:[^\]\\\r\n]|\\.)*\]|[^\/\]\\\r\n]|\\.)+\/(?:(?!\s*(?:\b|[\u0080-\uFFFF$\\'"~({]|[+\-!](?!=)|\.?\d))|[gmiy]{1,4}\b(?![\u0080-\uFFFF$\\]|\s*(?:[+\-*%&|^<>!=?({]|\/(?![\/*])))))|(-?(?:0[xX][\da-fA-F]+|(?:\d*\.\d+|\d+\.?)(?:[eE][+-]?\d+)?))|((?!\d)(?:[$\w\u0080-\uFFFF]|\\u[\da-fA-F]{4}|\\u\{[\da-fA-F]{1,6}\})+)|(--|\+\+|&&|\|\||=>|\.{3}|(?:[+\-*\/%&|^]|<{1,2}|>{1,3}|!=?|={1,2})=?|[?:~])|([;,.[\](){}])|(\s+)|(^$|[\s\S])/g | ||
module.exports.matchToToken = function(match) { | ||
token = {type: "invalid", value: match[0]} | ||
if (match[ 1]) token.type = "string" , token.closed = !!match[3] | ||
if (match[ 4]) token.type = "comment" | ||
if (match[ 5]) token.type = "comment", token.closed = !!match[6] | ||
if (match[ 7]) token.type = "regex" | ||
if (match[ 8]) token.type = "number" | ||
if (match[ 9]) token.type = "name" | ||
if (match[10]) token.type = "operator" | ||
if (match[11]) token.type = "punctuation" | ||
if (match[12]) token.type = "whitespace" | ||
return token | ||
} |
{ | ||
"name": "js-tokens", | ||
"version": "0.2.0", | ||
"version": "0.3.0", | ||
"author": "Simon Lydell", | ||
"license": "MIT", | ||
"description": "A regex that tokenizes JavaScript.", | ||
"main": "index.js", | ||
"keywords": [ | ||
@@ -17,8 +16,10 @@ "JavaScript", | ||
"scripts": { | ||
"test": "mocha" | ||
"test": "mocha --ui tdd", | ||
"build": "node generate-index.js", | ||
"dev": "npm run build && npm test" | ||
}, | ||
"devDependencies": { | ||
"mocha": "^1.17.1", | ||
"coffee-script": "~1.7.1" | ||
"coffee-script": "^1.8.0", | ||
"mocha": "^2.0.1" | ||
} | ||
} | ||
} |
@@ -9,22 +9,6 @@ Overview [![Build Status](https://travis-ci.org/lydell/js-tokens.png?branch=master)](https://travis-ci.org/lydell/js-tokens) | ||
// Tokenize a whole string of JavaScript: | ||
var jsString = "var foo=opts.foo;\n..." | ||
jsString.match(jsTokens) | ||
// ["var", " ", "foo", "=", "opts", ".", "foo", ";", "\n", ...] | ||
// Rename the variable `foo` to `bar`: | ||
var lastSignificantToken | ||
jsString.replace(jsTokens, function(token) { | ||
var index = 1 | ||
while (arguments[index] === undefined) index++ | ||
var name = jsTokens.names[index-1] | ||
if (lastSignificantToken !== "." && token === "foo") { | ||
token = "bar" | ||
} | ||
if (name !== "comment" && name !== "whitespace") { | ||
lastSignificantToken = token | ||
} | ||
return token | ||
}) | ||
// ["var", " ", "bar", "=", "opts", ".", "foo", ";", "\n", ...] | ||
``` | ||
@@ -37,3 +21,2 @@ | ||
- `npm install js-tokens` | ||
- `component install lydell/js-tokens` | ||
@@ -52,13 +35,32 @@ ```js | ||
The regex _always_ matches, even invalid JavaScript and the empty string. For | ||
example, `jsTokens.exec(string)` never returns `null`. | ||
The regex _always_ matches, even invalid JavaScript and the empty string. | ||
The next match is always directly after the previous. Each token has its own | ||
capturing group. | ||
The next match is always directly after the previous. | ||
### `jsTokens.names` ### | ||
### `var token = jsTokens.matchToToken(match)` ### | ||
An array of names for each token, in the capturing group order. | ||
Takes a `match` returned by `jsTokens.exec(string)`, and returns a `{type: | ||
String, value: String}` object. The following types are available: | ||
- string | ||
- comment | ||
- regex | ||
- number | ||
- name | ||
- operator | ||
- punctuation | ||
- whitespace | ||
- invalid | ||
Multi-line comments and strings also have a `closed` property indicating if the | ||
token was closed or not (see below). | ||
Comments and strings both come in two flavors. To distinguish them, check if the | ||
token starts with `//`, `/*`, `'` or `"`. | ||
For example usage, please see this [gist]. | ||
[gist]: https://gist.github.com/lydell/be49dbf80c382c473004 | ||
Invalid code handling | ||
@@ -83,5 +85,4 @@ ===================== | ||
Regex literals may contain invalid regex syntax. They are still matched as | ||
regex literals. They may also contains repeated regex flags. (That _could_ be | ||
fixed by using a capture group in `jsTokens`, but the capturing groups are | ||
reserved—one for each token type.) | ||
regex literals. They may also contain repeated regex flags, to keep the regex | ||
simple. | ||
@@ -183,20 +184,2 @@ Strings may contain invalid escape sequences. | ||
Build | ||
===== | ||
index.js is generated by running `node generate-index.js`. The regex is written | ||
in regex.coffee. Don’t worry, you don’t need to know anything about | ||
CoffeeScript: regex.coffee should be kept as simple as possible. CoffeeScript | ||
is only used for its block regexes, which have the following benefits: | ||
- Insignificant whitespace. | ||
- Comments. | ||
- No need to escape slashes. | ||
- No need to double-escape everything (as opposed to using `RegExp("regex as a | ||
string. One backslash: \\\\")`). | ||
- Plenty of syntax highlighters available. | ||
Everything else is written in JavaScript. | ||
License | ||
@@ -203,0 +186,0 @@ ======= |
@@ -19,7 +19,6 @@ // Copyright 2014 Simon Lydell | ||
suite("jsTokens.names", function() { | ||
suite("jsTokens.matchToToken", function() { | ||
test("is an array of strings", function() { | ||
assert(util.isArray(jsTokens.names)) | ||
assert(jsTokens.names.every(function(name) { return typeof name === "string" })) | ||
test("is a function", function() { | ||
assert.equal(typeof jsTokens.matchToToken, "function") | ||
}) | ||
@@ -36,16 +35,25 @@ | ||
function matchHelper(name, string, expected) { | ||
function matchHelper(type, string, expected, extra) { | ||
extra = extra || {} | ||
if (typeof expected === "object") { | ||
extra = expected | ||
expected = undefined | ||
} | ||
jsTokens.lastIndex = 0 | ||
var match = jsTokens.exec(string) | ||
var token = jsTokens.matchToToken(jsTokens.exec(string)) | ||
var index = 1 | ||
while (match[index] === undefined) index++ | ||
var actualName = jsTokens.names[index-1] | ||
test(string, function() { | ||
if (expected === false) { | ||
assert.notEqual(actualName, name) | ||
assert.notEqual(token.type, type) | ||
} else { | ||
assert.equal(jsTokens.names[index-1], name) | ||
assert.equal(match[0], (typeof expected === "string" ? expected : string)) | ||
assert.equal(token.type, type) | ||
assert.equal( | ||
token.value, | ||
(typeof expected === "string" ? expected : string) | ||
) | ||
if ("closed" in extra) { | ||
assert.equal(token.closed, extra.closed) | ||
} else if (type === "string") { | ||
assert.equal(token.closed, true) | ||
} | ||
} | ||
@@ -90,13 +98,13 @@ }) | ||
match("/**/") | ||
match("/*comment*/") | ||
match("/* comment */") | ||
match("/***/") | ||
match("/*/*/") | ||
match("/*\n\r \r\n*/") | ||
match("/**/", {closed: true}) | ||
match("/*comment*/", {closed: true}) | ||
match("/* comment */", {closed: true}) | ||
match("/***/", {closed: true}) | ||
match("/*/*/", {closed: true}) | ||
match("/*\n\r \r\n*/", {closed: true}) | ||
match("/*") | ||
match("/*/") | ||
match("/*unclosed comment") | ||
match("/*unclosed comment\nnew Line('is', this, code ? true : false)") | ||
match("/*", {closed: false}) | ||
match("/*/", {closed: false}) | ||
match("/*unclosed", {closed: false}) | ||
match("/*unclosed\nnew Line(this == code ? true : false)", {closed: false}) | ||
@@ -133,18 +141,18 @@ }) | ||
match("'") | ||
match('"') | ||
match("'unclosed string") | ||
match('"unclosed string') | ||
match("'\n", "'") | ||
match('"\n', '"') | ||
match("'\r", "'") | ||
match('"\r', '"') | ||
match("'\r\n", "'") | ||
match('"\r\n', '"') | ||
match("'\\\n") | ||
match('"\\\n') | ||
match("'\\\r") | ||
match('"\\\r') | ||
match("'\\\r\n") | ||
match('"\\\r\n') | ||
match("'", {closed: false}) | ||
match('"', {closed: false}) | ||
match("'unclosed", {closed: false}) | ||
match('"unclosed', {closed: false}) | ||
match("'\n", "'", {closed: false}) | ||
match('"\n', '"', {closed: false}) | ||
match("'\r", "'", {closed: false}) | ||
match('"\r', '"', {closed: false}) | ||
match("'\r\n", "'", {closed: false}) | ||
match('"\r\n', '"', {closed: false}) | ||
match("'\\\n", {closed: false}) | ||
match('"\\\n', {closed: false}) | ||
match("'\\\r", {closed: false}) | ||
match('"\\\r', {closed: false}) | ||
match("'\\\r\n", {closed: false}) | ||
match('"\\\r\n', {closed: false}) | ||
@@ -664,11 +672,5 @@ }) | ||
token("empty", function(match) { | ||
token("invalid", function(match) { | ||
match("") | ||
}) | ||
token("invalid", function(match) { | ||
match("@") | ||
@@ -675,0 +677,0 @@ match("#") |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
38488
18
895
184