character-parser
Advanced tools
Comparing version
267
index.js
@@ -0,2 +1,25 @@ | ||
'use strict'; | ||
exports = (module.exports = parse); | ||
var TOKEN_TYPES = exports.TOKEN_TYPES = { | ||
LINE_COMMENT: '//', | ||
BLOCK_COMMENT: '/**/', | ||
SINGLE_QUOTE: '\'', | ||
DOUBLE_QUOTE: '"', | ||
TEMPLATE_QUOTE: '`', | ||
REGEXP: '//g' | ||
} | ||
var BRACKETS = exports.BRACKETS = { | ||
'(': ')', | ||
'{': '}', | ||
'[': ']' | ||
}; | ||
var BRACKETS_REVERSED = { | ||
')': '(', | ||
'}': '{', | ||
']': '[' | ||
}; | ||
exports.parse = parse; | ||
@@ -10,5 +33,2 @@ function parse(src, state, options) { | ||
while (index < end) { | ||
if (state.roundDepth < 0 || state.curlyDepth < 0 || state.squareDepth < 0) { | ||
throw new SyntaxError('Mismatched Bracket: ' + src[index - 1]); | ||
} | ||
exports.parseChar(src[index++], state); | ||
@@ -19,116 +39,135 @@ } | ||
exports.parseMax = parseMax; | ||
function parseMax(src, options) { | ||
options = options || {}; | ||
var start = options.start || 0; | ||
var index = start; | ||
var state = exports.defaultState(); | ||
while (state.roundDepth >= 0 && state.curlyDepth >= 0 && state.squareDepth >= 0) { | ||
if (index >= src.length) { | ||
throw new Error('The end of the string was reached with no closing bracket found.'); | ||
} | ||
exports.parseChar(src[index++], state); | ||
} | ||
var end = index - 1; | ||
return { | ||
start: start, | ||
end: end, | ||
src: src.substring(start, end) | ||
}; | ||
} | ||
exports.parseUntil = parseUntil; | ||
function parseUntil(src, delimiter, options) { | ||
options = options || {}; | ||
var includeLineComment = options.includeLineComment || false; | ||
var start = options.start || 0; | ||
var index = start; | ||
var state = exports.defaultState(); | ||
while (state.isString() || state.regexp || state.blockComment || | ||
(!includeLineComment && state.lineComment) || !startsWith(src, delimiter, index)) { | ||
while ( | ||
index < src.length | ||
) { | ||
if ((options.ignoreNesting || !state.isNesting(options)) && startsWith(src, delimiter, index)) { | ||
var end = index; | ||
return { | ||
start: start, | ||
end: end, | ||
src: src.substring(start, end) | ||
}; | ||
} | ||
exports.parseChar(src[index++], state); | ||
} | ||
var end = index; | ||
return { | ||
start: start, | ||
end: end, | ||
src: src.substring(start, end) | ||
}; | ||
var err = new Error('The end of the string was reached with no closing bracket found.'); | ||
err.code = 'CHARACTER_PARSER:END_OF_STRING_REACHED'; | ||
throw err; | ||
} | ||
exports.parseChar = parseChar; | ||
function parseChar(character, state) { | ||
if (character.length !== 1) throw new Error('Character must be a string of length 1'); | ||
if (character.length !== 1) { | ||
var err = new Error('Character must be a string of length 1'); | ||
err.name = 'InvalidArgumentError'; | ||
err.code = 'CHARACTER_PARSER:CHAR_LENGTH_NOT_ONE'; | ||
throw err; | ||
} | ||
state = state || exports.defaultState(); | ||
state.src = state.src || ''; | ||
state.src += character; | ||
var wasComment = state.blockComment || state.lineComment; | ||
var wasComment = state.isComment(); | ||
var lastChar = state.history ? state.history[0] : ''; | ||
if (state.regexpStart) { | ||
if (character === '/' || character == '*') { | ||
state.regexp = false; | ||
state.stack.pop(); | ||
} | ||
state.regexpStart = false; | ||
} | ||
if (state.lineComment) { | ||
if (character === '\n') { | ||
state.lineComment = false; | ||
} | ||
} else if (state.blockComment) { | ||
if (state.lastChar === '*' && character === '/') { | ||
state.blockComment = false; | ||
} | ||
} else if (state.singleQuote) { | ||
if (character === '\'' && !state.escaped) { | ||
state.singleQuote = false; | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
} else if (state.doubleQuote) { | ||
if (character === '"' && !state.escaped) { | ||
state.doubleQuote = false; | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
} else if (state.regexp) { | ||
if (character === '/' && !state.escaped) { | ||
state.regexp = false; | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
} else if (lastChar === '/' && character === '/') { | ||
state.history = state.history.substr(1); | ||
state.lineComment = true; | ||
} else if (lastChar === '/' && character === '*') { | ||
state.history = state.history.substr(1); | ||
state.blockComment = true; | ||
} else if (character === '/' && isRegexp(state.history)) { | ||
state.regexp = true; | ||
state.regexpStart = true; | ||
} else if (character === '\'') { | ||
state.singleQuote = true; | ||
} else if (character === '"') { | ||
state.doubleQuote = true; | ||
} else if (character === '(') { | ||
state.roundDepth++; | ||
} else if (character === ')') { | ||
state.roundDepth--; | ||
} else if (character === '{') { | ||
state.curlyDepth++; | ||
} else if (character === '}') { | ||
state.curlyDepth--; | ||
} else if (character === '[') { | ||
state.squareDepth++; | ||
} else if (character === ']') { | ||
state.squareDepth--; | ||
switch (state.current()) { | ||
case TOKEN_TYPES.LINE_COMMENT: | ||
if (character === '\n') { | ||
state.stack.pop(); | ||
} | ||
break; | ||
case TOKEN_TYPES.BLOCK_COMMENT: | ||
if (state.lastChar === '*' && character === '/') { | ||
state.stack.pop(); | ||
} | ||
break; | ||
case TOKEN_TYPES.SINGLE_QUOTE: | ||
if (character === '\'' && !state.escaped) { | ||
state.stack.pop(); | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
break; | ||
case TOKEN_TYPES.DOUBLE_QUOTE: | ||
if (character === '"' && !state.escaped) { | ||
state.stack.pop(); | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
break; | ||
case TOKEN_TYPES.TEMPLATE_QUOTE: | ||
if (character === '`' && !state.escaped) { | ||
state.stack.pop(); | ||
state.hasDollar = false; | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
state.hasDollar = false; | ||
} else if (character === '$' && !state.escaped) { | ||
state.hasDollar = true; | ||
} else if (character === '{' && state.hasDollar) { | ||
state.stack.push(BRACKETS[character]); | ||
} else { | ||
state.escaped = false; | ||
state.hasDollar = false; | ||
} | ||
break; | ||
case TOKEN_TYPES.REGEXP: | ||
if (character === '/' && !state.escaped) { | ||
state.stack.pop(); | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
break; | ||
default: | ||
if (character in BRACKETS) { | ||
state.stack.push(BRACKETS[character]); | ||
} else if (character in BRACKETS_REVERSED) { | ||
if (state.current() !== character) { | ||
var err = new SyntaxError('Mismatched Bracket: ' + character); | ||
err.code = 'CHARACTER_PARSER:MISMATCHED_BRACKET'; | ||
throw err; | ||
}; | ||
state.stack.pop(); | ||
} else if (lastChar === '/' && character === '/') { | ||
// Don't include comments in history | ||
state.history = state.history.substr(1); | ||
state.stack.push(TOKEN_TYPES.LINE_COMMENT); | ||
} else if (lastChar === '/' && character === '*') { | ||
// Don't include comment in history | ||
state.history = state.history.substr(1); | ||
state.stack.push(TOKEN_TYPES.BLOCK_COMMENT); | ||
} else if (character === '/' && isRegexp(state.history)) { | ||
state.stack.push(TOKEN_TYPES.REGEXP); | ||
// N.B. if the next character turns out to be a `*` or a `/` | ||
// then this isn't actually a regexp | ||
state.regexpStart = true; | ||
} else if (character === '\'') { | ||
state.stack.push(TOKEN_TYPES.SINGLE_QUOTE); | ||
} else if (character === '"') { | ||
state.stack.push(TOKEN_TYPES.DOUBLE_QUOTE); | ||
} else if (character === '`') { | ||
state.stack.push(TOKEN_TYPES.TEMPLATE_QUOTE); | ||
} | ||
break; | ||
} | ||
if (!state.blockComment && !state.lineComment && !wasComment) state.history = character + state.history; | ||
if (!state.isComment() && !wasComment) { | ||
state.history = character + state.history; | ||
} | ||
state.lastChar = character; // store last character for ending block comments | ||
@@ -140,26 +179,35 @@ return state; | ||
function State() { | ||
this.lineComment = false; | ||
this.blockComment = false; | ||
this.stack = []; | ||
this.singleQuote = false; | ||
this.doubleQuote = false; | ||
this.regexp = false; | ||
this.regexpStart = false; | ||
this.escaped = false; | ||
this.hasDollar = false; | ||
this.roundDepth = 0; | ||
this.curlyDepth = 0; | ||
this.squareDepth = 0; | ||
this.src = ''; | ||
this.history = '' | ||
this.lastChar = '' | ||
} | ||
State.prototype.current = function () { | ||
return this.stack[this.stack.length - 1]; | ||
}; | ||
State.prototype.isString = function () { | ||
return this.singleQuote || this.doubleQuote; | ||
return ( | ||
this.current() === TOKEN_TYPES.SINGLE_QUOTE || | ||
this.current() === TOKEN_TYPES.DOUBLE_QUOTE || | ||
this.current() === TOKEN_TYPES.TEMPLATE_QUOTE | ||
); | ||
} | ||
State.prototype.isComment = function () { | ||
return this.lineComment || this.blockComment; | ||
return this.current() === TOKEN_TYPES.LINE_COMMENT || this.current() === TOKEN_TYPES.BLOCK_COMMENT; | ||
} | ||
State.prototype.isNesting = function () { | ||
return this.isString() || this.isComment() || this.regexp || this.roundDepth > 0 || this.curlyDepth > 0 || this.squareDepth > 0 | ||
State.prototype.isNesting = function (opts) { | ||
if ( | ||
opts && opts.ignoreLineComment && | ||
this.stack.length === 1 && this.stack[0] === TOKEN_TYPES.LINE_COMMENT | ||
) { | ||
// if we are only inside a line comment, and line comments are ignored | ||
// don't count it as nesting | ||
return false; | ||
} | ||
return !!this.stack.length; | ||
} | ||
@@ -206,2 +254,3 @@ | ||
} | ||
exports.isKeyword = isKeyword | ||
@@ -208,0 +257,0 @@ function isKeyword(id) { |
{ | ||
"name": "character-parser", | ||
"version": "1.2.2", | ||
"version": "2.0.0", | ||
"description": "Parse JavaScript one character at a time to look for snippets in Templates. This is not a validator, it's just designed to allow you to have sections of JavaScript delimited by brackets robustly.", | ||
"main": "index.js", | ||
"scripts": { | ||
"test": "mocha -R spec" | ||
"coverage": "istanbul cover test/index.js", | ||
"test": "node test/index.js" | ||
}, | ||
@@ -26,5 +27,5 @@ "repository": { | ||
"devDependencies": { | ||
"better-assert": "~1.0.0", | ||
"mocha": "~1.9.0" | ||
"istanbul": "~0.3.22", | ||
"testit": "~2.0.2" | ||
} | ||
} | ||
} |
131
README.md
@@ -13,2 +13,4 @@ # character-parser | ||
### Parsing | ||
Work out how much depth changes: | ||
@@ -18,31 +20,8 @@ | ||
var state = parse('foo(arg1, arg2, {\n foo: [a, b\n'); | ||
assert(state.roundDepth === 1); | ||
assert(state.curlyDepth === 1); | ||
assert(state.squareDepth === 1); | ||
assert.deepEqual(state.stack, [')', '}', ']']); | ||
parse(' c, d]\n })', state); | ||
assert(state.squareDepth === 0); | ||
assert(state.curlyDepth === 0); | ||
assert(state.roundDepth === 0); | ||
assert.deepEqual(state.stack, []); | ||
``` | ||
### Bracketed Expressions | ||
Find all the contents of a bracketed expression: | ||
```js | ||
var section = parser.parseMax('foo="(", bar="}") bing bong'); | ||
assert(section.start === 0); | ||
assert(section.end === 16);//exclusive end of string | ||
assert(section.src = 'foo="(", bar="}"'); | ||
var section = parser.parseMax('{foo="(", bar="}"} bing bong', {start: 1}); | ||
assert(section.start === 1); | ||
assert(section.end === 17);//exclusive end of string | ||
assert(section.src = 'foo="(", bar="}"'); | ||
``` | ||
The bracketed expression parsing simply parses up to but excluding the first unmatched closed bracket (`)`, `}`, `]`). It is clever enough to ignore brackets in comments or strings. | ||
### Custom Delimited Expressions | ||
@@ -53,5 +32,6 @@ | ||
```js | ||
// EJS-style | ||
var section = parser.parseUntil('foo.bar("%>").baz%> bing bong', '%>'); | ||
assert(section.start === 0); | ||
assert(section.end === 17);//exclusive end of string | ||
assert(section.end === 17); // exclusive end of string | ||
assert(section.src = 'foo.bar("%>").baz'); | ||
@@ -61,4 +41,19 @@ | ||
assert(section.start === 2); | ||
assert(section.end === 19);//exclusive end of string | ||
assert(section.end === 19); // exclusive end of string | ||
assert(section.src = 'foo.bar("%>").baz'); | ||
// Jade-style | ||
var section = parser.parseUntil('#[p= [1, 2][i]]', ']', {start: 2}) | ||
assert(section.start === 2); | ||
assert(section.end === 14); // exclusive end of string | ||
assert(section.src === 'p= [1, 2][i]') | ||
// Dumb parsing | ||
// Stop at first delimiter encountered, doesn't matter if it's nested or not | ||
// This is the character-parser@1 default behavior. | ||
var section = parser.parseUntil('#[p= [1, 2][i]]', '}', {start: 2, ignoreNesting: true}) | ||
assert(section.start === 2); | ||
assert(section.end === 10); // exclusive end of string | ||
assert(section.src === 'p= [1, 2') | ||
'' | ||
``` | ||
@@ -70,2 +65,4 @@ | ||
All methods may throw an exception in the case of syntax errors. The exception contains an additional `code` property that always starts with `CHARACTER_PARSER:` that is unique for the error. | ||
### parse(str, state = defaultState(), options = {start: 0, end: src.length}) | ||
@@ -79,17 +76,11 @@ | ||
### parseMax(src, options = {start: 0}) | ||
### parseUntil(src, delimiter, options = {start: 0, ignoreLineComment: false, ignoreNesting: false}) | ||
Parses the source until the first unmatched close bracket (any of `)`, `}`, `]`). It returns an object with the structure: | ||
Parses the source until the first occurence of `delimiter` which is not in a string or a comment. | ||
```js | ||
{ | ||
start: 0,//index of first character of string | ||
end: 13,//index of first character after the end of string | ||
src: 'source string' | ||
} | ||
``` | ||
If `ignoreLineComment` is `true`, it will still count if the delimiter occurs in a line comment. | ||
### parseUntil(src, delimiter, options = {start: 0, includeLineComment: false}) | ||
If `ignoreNesting` is `true`, it will stop at the first bracket, not taking into account if the bracket part of nesting or not. See example above. | ||
Parses the source until the first occurence of `delimiter` which is not in a string or a comment. If `includeLineComment` is `true`, it will still count if the delimiter occurs in a line comment, but not in a block comment. It returns an object with the structure: | ||
It returns an object with the structure: | ||
@@ -120,2 +111,6 @@ ```js | ||
### TOKEN_TYPES & BRACKETS | ||
Objects whose values can be a frame in the `stack` property of a State (documented below). | ||
## State | ||
@@ -127,24 +122,60 @@ | ||
{ | ||
lineComment: false, //true if inside a line comment | ||
blockComment: false, //true if inside a block comment | ||
stack: [], // stack of detected brackets; the outermost is [0] | ||
regexpStart: false, // true if a slash is just encountered and a REGEXP state has just been added to the stack | ||
singleQuote: false, //true if inside a single quoted string | ||
doubleQuote: false, //true if inside a double quoted string | ||
regexp: false, //true if inside a regular expression | ||
escaped: false, //true if in a string and the last character was an escape character | ||
escaped: false, // true if in a string and the last character was an escape character | ||
hasDollar: false, // true if in a template string and the last character was a dollar sign | ||
roundDepth: 0, //number of un-closed open `(` brackets | ||
curlyDepth: 0, //number of un-closed open `{` brackets | ||
squareDepth: 0 //number of un-closed open `[` brackets | ||
src: '', // the concatenated source string | ||
history: '', // reversed `src` | ||
lastChar: '' // last parsed character | ||
} | ||
``` | ||
`stack` property can contain any of the following: | ||
- Any of the property values of `characterParser.TOKEN_TYPES` | ||
- Any of the property values of `characterParser.BRACKETS` (the end bracket, not the starting bracket) | ||
It also has the following useful methods: | ||
- `.current()` returns the innermost bracket (i.e. the last stack frame). | ||
- `.isString()` returns `true` if the current location is inside a string. | ||
- `.isComment()` returns `true` if the current location is inside a comment. | ||
- `isNesting()` returns `true` if the current location is anything but at the top level, i.e. with no nesting. | ||
- `.isNesting([opts])` returns `true` if the current location is not at the top level, i.e. if the stack is not empty. If `opts.ignoreLineComment` is `true`, line comments are not counted as a level, so for `// a` it will still return false. | ||
## Transition from v1 | ||
In character-parser@2, we have changed the APIs quite a bit. These are some notes that will help you transition to the new version. | ||
### State Object Changes | ||
Instead of keeping depths of different brackets, we are now keeping a stack. We also removed some properties: | ||
```js | ||
state.lineComment → state.current() === parser.TOKEN_TYPES.LINE_COMMENT | ||
state.blockComment → state.current() === parser.TOKEN_TYPES.BLOCK_COMMENT | ||
state.singleQuote → state.current() === parser.TOKEN_TYPES.SINGLE_QUOTE | ||
state.doubleQuote → state.current() === parser.TOKEN_TYPES.DOUBLE_QUOTE | ||
state.regexp → state.current() === parser.TOKEN_TYPES.REGEXP | ||
``` | ||
### `parseMax` | ||
This function has been removed since the usefulness of this function has been questioned. You should find that `parseUntil` is a better choice for your task. | ||
### `parseUntil` | ||
The default behavior when the delimiter is a bracket has been changed so that nesting is taken into account to determine if the end is reached. | ||
To preserve the original behavior, pass `ignoreNesting: true` as an option. | ||
To see the difference between the new and old behaviors, see the "Usage" section earlier. | ||
### `parseMaxBracket` | ||
This function has been merged into `parseUntil`. You can directly rename the function call without any repercussions. | ||
## License | ||
MIT | ||
MIT |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
16739
18.32%260
22.07%174
22.54%1
Infinity%