character-parser
Advanced tools
Comparing version 1.2.2 to 2.0.0
267
index.js
@@ -0,2 +1,25 @@ | ||
'use strict'; | ||
exports = (module.exports = parse); | ||
var TOKEN_TYPES = exports.TOKEN_TYPES = { | ||
LINE_COMMENT: '//', | ||
BLOCK_COMMENT: '/**/', | ||
SINGLE_QUOTE: '\'', | ||
DOUBLE_QUOTE: '"', | ||
TEMPLATE_QUOTE: '`', | ||
REGEXP: '//g' | ||
} | ||
var BRACKETS = exports.BRACKETS = { | ||
'(': ')', | ||
'{': '}', | ||
'[': ']' | ||
}; | ||
var BRACKETS_REVERSED = { | ||
')': '(', | ||
'}': '{', | ||
']': '[' | ||
}; | ||
exports.parse = parse; | ||
@@ -10,5 +33,2 @@ function parse(src, state, options) { | ||
while (index < end) { | ||
if (state.roundDepth < 0 || state.curlyDepth < 0 || state.squareDepth < 0) { | ||
throw new SyntaxError('Mismatched Bracket: ' + src[index - 1]); | ||
} | ||
exports.parseChar(src[index++], state); | ||
@@ -19,116 +39,135 @@ } | ||
exports.parseMax = parseMax; | ||
function parseMax(src, options) { | ||
options = options || {}; | ||
var start = options.start || 0; | ||
var index = start; | ||
var state = exports.defaultState(); | ||
while (state.roundDepth >= 0 && state.curlyDepth >= 0 && state.squareDepth >= 0) { | ||
if (index >= src.length) { | ||
throw new Error('The end of the string was reached with no closing bracket found.'); | ||
} | ||
exports.parseChar(src[index++], state); | ||
} | ||
var end = index - 1; | ||
return { | ||
start: start, | ||
end: end, | ||
src: src.substring(start, end) | ||
}; | ||
} | ||
exports.parseUntil = parseUntil; | ||
function parseUntil(src, delimiter, options) { | ||
options = options || {}; | ||
var includeLineComment = options.includeLineComment || false; | ||
var start = options.start || 0; | ||
var index = start; | ||
var state = exports.defaultState(); | ||
while (state.isString() || state.regexp || state.blockComment || | ||
(!includeLineComment && state.lineComment) || !startsWith(src, delimiter, index)) { | ||
while ( | ||
index < src.length | ||
) { | ||
if ((options.ignoreNesting || !state.isNesting(options)) && startsWith(src, delimiter, index)) { | ||
var end = index; | ||
return { | ||
start: start, | ||
end: end, | ||
src: src.substring(start, end) | ||
}; | ||
} | ||
exports.parseChar(src[index++], state); | ||
} | ||
var end = index; | ||
return { | ||
start: start, | ||
end: end, | ||
src: src.substring(start, end) | ||
}; | ||
var err = new Error('The end of the string was reached with no closing bracket found.'); | ||
err.code = 'CHARACTER_PARSER:END_OF_STRING_REACHED'; | ||
throw err; | ||
} | ||
exports.parseChar = parseChar; | ||
function parseChar(character, state) { | ||
if (character.length !== 1) throw new Error('Character must be a string of length 1'); | ||
if (character.length !== 1) { | ||
var err = new Error('Character must be a string of length 1'); | ||
err.name = 'InvalidArgumentError'; | ||
err.code = 'CHARACTER_PARSER:CHAR_LENGTH_NOT_ONE'; | ||
throw err; | ||
} | ||
state = state || exports.defaultState(); | ||
state.src = state.src || ''; | ||
state.src += character; | ||
var wasComment = state.blockComment || state.lineComment; | ||
var wasComment = state.isComment(); | ||
var lastChar = state.history ? state.history[0] : ''; | ||
if (state.regexpStart) { | ||
if (character === '/' || character == '*') { | ||
state.regexp = false; | ||
state.stack.pop(); | ||
} | ||
state.regexpStart = false; | ||
} | ||
if (state.lineComment) { | ||
if (character === '\n') { | ||
state.lineComment = false; | ||
} | ||
} else if (state.blockComment) { | ||
if (state.lastChar === '*' && character === '/') { | ||
state.blockComment = false; | ||
} | ||
} else if (state.singleQuote) { | ||
if (character === '\'' && !state.escaped) { | ||
state.singleQuote = false; | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
} else if (state.doubleQuote) { | ||
if (character === '"' && !state.escaped) { | ||
state.doubleQuote = false; | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
} else if (state.regexp) { | ||
if (character === '/' && !state.escaped) { | ||
state.regexp = false; | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
} else if (lastChar === '/' && character === '/') { | ||
state.history = state.history.substr(1); | ||
state.lineComment = true; | ||
} else if (lastChar === '/' && character === '*') { | ||
state.history = state.history.substr(1); | ||
state.blockComment = true; | ||
} else if (character === '/' && isRegexp(state.history)) { | ||
state.regexp = true; | ||
state.regexpStart = true; | ||
} else if (character === '\'') { | ||
state.singleQuote = true; | ||
} else if (character === '"') { | ||
state.doubleQuote = true; | ||
} else if (character === '(') { | ||
state.roundDepth++; | ||
} else if (character === ')') { | ||
state.roundDepth--; | ||
} else if (character === '{') { | ||
state.curlyDepth++; | ||
} else if (character === '}') { | ||
state.curlyDepth--; | ||
} else if (character === '[') { | ||
state.squareDepth++; | ||
} else if (character === ']') { | ||
state.squareDepth--; | ||
switch (state.current()) { | ||
case TOKEN_TYPES.LINE_COMMENT: | ||
if (character === '\n') { | ||
state.stack.pop(); | ||
} | ||
break; | ||
case TOKEN_TYPES.BLOCK_COMMENT: | ||
if (state.lastChar === '*' && character === '/') { | ||
state.stack.pop(); | ||
} | ||
break; | ||
case TOKEN_TYPES.SINGLE_QUOTE: | ||
if (character === '\'' && !state.escaped) { | ||
state.stack.pop(); | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
break; | ||
case TOKEN_TYPES.DOUBLE_QUOTE: | ||
if (character === '"' && !state.escaped) { | ||
state.stack.pop(); | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
break; | ||
case TOKEN_TYPES.TEMPLATE_QUOTE: | ||
if (character === '`' && !state.escaped) { | ||
state.stack.pop(); | ||
state.hasDollar = false; | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
state.hasDollar = false; | ||
} else if (character === '$' && !state.escaped) { | ||
state.hasDollar = true; | ||
} else if (character === '{' && state.hasDollar) { | ||
state.stack.push(BRACKETS[character]); | ||
} else { | ||
state.escaped = false; | ||
state.hasDollar = false; | ||
} | ||
break; | ||
case TOKEN_TYPES.REGEXP: | ||
if (character === '/' && !state.escaped) { | ||
state.stack.pop(); | ||
} else if (character === '\\' && !state.escaped) { | ||
state.escaped = true; | ||
} else { | ||
state.escaped = false; | ||
} | ||
break; | ||
default: | ||
if (character in BRACKETS) { | ||
state.stack.push(BRACKETS[character]); | ||
} else if (character in BRACKETS_REVERSED) { | ||
if (state.current() !== character) { | ||
var err = new SyntaxError('Mismatched Bracket: ' + character); | ||
err.code = 'CHARACTER_PARSER:MISMATCHED_BRACKET'; | ||
throw err; | ||
}; | ||
state.stack.pop(); | ||
} else if (lastChar === '/' && character === '/') { | ||
// Don't include comments in history | ||
state.history = state.history.substr(1); | ||
state.stack.push(TOKEN_TYPES.LINE_COMMENT); | ||
} else if (lastChar === '/' && character === '*') { | ||
// Don't include comment in history | ||
state.history = state.history.substr(1); | ||
state.stack.push(TOKEN_TYPES.BLOCK_COMMENT); | ||
} else if (character === '/' && isRegexp(state.history)) { | ||
state.stack.push(TOKEN_TYPES.REGEXP); | ||
// N.B. if the next character turns out to be a `*` or a `/` | ||
// then this isn't actually a regexp | ||
state.regexpStart = true; | ||
} else if (character === '\'') { | ||
state.stack.push(TOKEN_TYPES.SINGLE_QUOTE); | ||
} else if (character === '"') { | ||
state.stack.push(TOKEN_TYPES.DOUBLE_QUOTE); | ||
} else if (character === '`') { | ||
state.stack.push(TOKEN_TYPES.TEMPLATE_QUOTE); | ||
} | ||
break; | ||
} | ||
if (!state.blockComment && !state.lineComment && !wasComment) state.history = character + state.history; | ||
if (!state.isComment() && !wasComment) { | ||
state.history = character + state.history; | ||
} | ||
state.lastChar = character; // store last character for ending block comments | ||
@@ -140,26 +179,35 @@ return state; | ||
function State() { | ||
this.lineComment = false; | ||
this.blockComment = false; | ||
this.stack = []; | ||
this.singleQuote = false; | ||
this.doubleQuote = false; | ||
this.regexp = false; | ||
this.regexpStart = false; | ||
this.escaped = false; | ||
this.hasDollar = false; | ||
this.roundDepth = 0; | ||
this.curlyDepth = 0; | ||
this.squareDepth = 0; | ||
this.src = ''; | ||
this.history = '' | ||
this.lastChar = '' | ||
} | ||
State.prototype.current = function () { | ||
return this.stack[this.stack.length - 1]; | ||
}; | ||
State.prototype.isString = function () { | ||
return this.singleQuote || this.doubleQuote; | ||
return ( | ||
this.current() === TOKEN_TYPES.SINGLE_QUOTE || | ||
this.current() === TOKEN_TYPES.DOUBLE_QUOTE || | ||
this.current() === TOKEN_TYPES.TEMPLATE_QUOTE | ||
); | ||
} | ||
State.prototype.isComment = function () { | ||
return this.lineComment || this.blockComment; | ||
return this.current() === TOKEN_TYPES.LINE_COMMENT || this.current() === TOKEN_TYPES.BLOCK_COMMENT; | ||
} | ||
State.prototype.isNesting = function () { | ||
return this.isString() || this.isComment() || this.regexp || this.roundDepth > 0 || this.curlyDepth > 0 || this.squareDepth > 0 | ||
State.prototype.isNesting = function (opts) { | ||
if ( | ||
opts && opts.ignoreLineComment && | ||
this.stack.length === 1 && this.stack[0] === TOKEN_TYPES.LINE_COMMENT | ||
) { | ||
// if we are only inside a line comment, and line comments are ignored | ||
// don't count it as nesting | ||
return false; | ||
} | ||
return !!this.stack.length; | ||
} | ||
@@ -206,2 +254,3 @@ | ||
} | ||
exports.isKeyword = isKeyword | ||
@@ -208,0 +257,0 @@ function isKeyword(id) { |
{ | ||
"name": "character-parser", | ||
"version": "1.2.2", | ||
"version": "2.0.0", | ||
"description": "Parse JavaScript one character at a time to look for snippets in Templates. This is not a validator, it's just designed to allow you to have sections of JavaScript delimited by brackets robustly.", | ||
"main": "index.js", | ||
"scripts": { | ||
"test": "mocha -R spec" | ||
"coverage": "istanbul cover test/index.js", | ||
"test": "node test/index.js" | ||
}, | ||
@@ -26,5 +27,5 @@ "repository": { | ||
"devDependencies": { | ||
"better-assert": "~1.0.0", | ||
"mocha": "~1.9.0" | ||
"istanbul": "~0.3.22", | ||
"testit": "~2.0.2" | ||
} | ||
} | ||
} |
131
README.md
@@ -13,2 +13,4 @@ # character-parser | ||
### Parsing | ||
Work out how much depth changes: | ||
@@ -18,31 +20,8 @@ | ||
var state = parse('foo(arg1, arg2, {\n foo: [a, b\n'); | ||
assert(state.roundDepth === 1); | ||
assert(state.curlyDepth === 1); | ||
assert(state.squareDepth === 1); | ||
assert.deepEqual(state.stack, [')', '}', ']']); | ||
parse(' c, d]\n })', state); | ||
assert(state.squareDepth === 0); | ||
assert(state.curlyDepth === 0); | ||
assert(state.roundDepth === 0); | ||
assert.deepEqual(state.stack, []); | ||
``` | ||
### Bracketed Expressions | ||
Find all the contents of a bracketed expression: | ||
```js | ||
var section = parser.parseMax('foo="(", bar="}") bing bong'); | ||
assert(section.start === 0); | ||
assert(section.end === 16);//exclusive end of string | ||
assert(section.src = 'foo="(", bar="}"'); | ||
var section = parser.parseMax('{foo="(", bar="}"} bing bong', {start: 1}); | ||
assert(section.start === 1); | ||
assert(section.end === 17);//exclusive end of string | ||
assert(section.src = 'foo="(", bar="}"'); | ||
``` | ||
The bracketed expression parsing simply parses up to but excluding the first unmatched closed bracket (`)`, `}`, `]`). It is clever enough to ignore brackets in comments or strings. | ||
### Custom Delimited Expressions | ||
@@ -53,5 +32,6 @@ | ||
```js | ||
// EJS-style | ||
var section = parser.parseUntil('foo.bar("%>").baz%> bing bong', '%>'); | ||
assert(section.start === 0); | ||
assert(section.end === 17);//exclusive end of string | ||
assert(section.end === 17); // exclusive end of string | ||
assert(section.src = 'foo.bar("%>").baz'); | ||
@@ -61,4 +41,19 @@ | ||
assert(section.start === 2); | ||
assert(section.end === 19);//exclusive end of string | ||
assert(section.end === 19); // exclusive end of string | ||
assert(section.src = 'foo.bar("%>").baz'); | ||
// Jade-style | ||
var section = parser.parseUntil('#[p= [1, 2][i]]', ']', {start: 2}) | ||
assert(section.start === 2); | ||
assert(section.end === 14); // exclusive end of string | ||
assert(section.src === 'p= [1, 2][i]') | ||
// Dumb parsing | ||
// Stop at first delimiter encountered, doesn't matter if it's nested or not | ||
// This is the character-parser@1 default behavior. | ||
var section = parser.parseUntil('#[p= [1, 2][i]]', '}', {start: 2, ignoreNesting: true}) | ||
assert(section.start === 2); | ||
assert(section.end === 10); // exclusive end of string | ||
assert(section.src === 'p= [1, 2') | ||
'' | ||
``` | ||
@@ -70,2 +65,4 @@ | ||
All methods may throw an exception in the case of syntax errors. The exception contains an additional `code` property that always starts with `CHARACTER_PARSER:` that is unique for the error. | ||
### parse(str, state = defaultState(), options = {start: 0, end: src.length}) | ||
@@ -79,17 +76,11 @@ | ||
### parseMax(src, options = {start: 0}) | ||
### parseUntil(src, delimiter, options = {start: 0, ignoreLineComment: false, ignoreNesting: false}) | ||
Parses the source until the first unmatched close bracket (any of `)`, `}`, `]`). It returns an object with the structure: | ||
Parses the source until the first occurence of `delimiter` which is not in a string or a comment. | ||
```js | ||
{ | ||
start: 0,//index of first character of string | ||
end: 13,//index of first character after the end of string | ||
src: 'source string' | ||
} | ||
``` | ||
If `ignoreLineComment` is `true`, it will still count if the delimiter occurs in a line comment. | ||
### parseUntil(src, delimiter, options = {start: 0, includeLineComment: false}) | ||
If `ignoreNesting` is `true`, it will stop at the first bracket, not taking into account if the bracket part of nesting or not. See example above. | ||
Parses the source until the first occurence of `delimiter` which is not in a string or a comment. If `includeLineComment` is `true`, it will still count if the delimiter occurs in a line comment, but not in a block comment. It returns an object with the structure: | ||
It returns an object with the structure: | ||
@@ -120,2 +111,6 @@ ```js | ||
### TOKEN_TYPES & BRACKETS | ||
Objects whose values can be a frame in the `stack` property of a State (documented below). | ||
## State | ||
@@ -127,24 +122,60 @@ | ||
{ | ||
lineComment: false, //true if inside a line comment | ||
blockComment: false, //true if inside a block comment | ||
stack: [], // stack of detected brackets; the outermost is [0] | ||
regexpStart: false, // true if a slash is just encountered and a REGEXP state has just been added to the stack | ||
singleQuote: false, //true if inside a single quoted string | ||
doubleQuote: false, //true if inside a double quoted string | ||
regexp: false, //true if inside a regular expression | ||
escaped: false, //true if in a string and the last character was an escape character | ||
escaped: false, // true if in a string and the last character was an escape character | ||
hasDollar: false, // true if in a template string and the last character was a dollar sign | ||
roundDepth: 0, //number of un-closed open `(` brackets | ||
curlyDepth: 0, //number of un-closed open `{` brackets | ||
squareDepth: 0 //number of un-closed open `[` brackets | ||
src: '', // the concatenated source string | ||
history: '', // reversed `src` | ||
lastChar: '' // last parsed character | ||
} | ||
``` | ||
`stack` property can contain any of the following: | ||
- Any of the property values of `characterParser.TOKEN_TYPES` | ||
- Any of the property values of `characterParser.BRACKETS` (the end bracket, not the starting bracket) | ||
It also has the following useful methods: | ||
- `.current()` returns the innermost bracket (i.e. the last stack frame). | ||
- `.isString()` returns `true` if the current location is inside a string. | ||
- `.isComment()` returns `true` if the current location is inside a comment. | ||
- `isNesting()` returns `true` if the current location is anything but at the top level, i.e. with no nesting. | ||
- `.isNesting([opts])` returns `true` if the current location is not at the top level, i.e. if the stack is not empty. If `opts.ignoreLineComment` is `true`, line comments are not counted as a level, so for `// a` it will still return false. | ||
## Transition from v1 | ||
In character-parser@2, we have changed the APIs quite a bit. These are some notes that will help you transition to the new version. | ||
### State Object Changes | ||
Instead of keeping depths of different brackets, we are now keeping a stack. We also removed some properties: | ||
```js | ||
state.lineComment → state.current() === parser.TOKEN_TYPES.LINE_COMMENT | ||
state.blockComment → state.current() === parser.TOKEN_TYPES.BLOCK_COMMENT | ||
state.singleQuote → state.current() === parser.TOKEN_TYPES.SINGLE_QUOTE | ||
state.doubleQuote → state.current() === parser.TOKEN_TYPES.DOUBLE_QUOTE | ||
state.regexp → state.current() === parser.TOKEN_TYPES.REGEXP | ||
``` | ||
### `parseMax` | ||
This function has been removed since the usefulness of this function has been questioned. You should find that `parseUntil` is a better choice for your task. | ||
### `parseUntil` | ||
The default behavior when the delimiter is a bracket has been changed so that nesting is taken into account to determine if the end is reached. | ||
To preserve the original behavior, pass `ignoreNesting: true` as an option. | ||
To see the difference between the new and old behaviors, see the "Usage" section earlier. | ||
### `parseMaxBracket` | ||
This function has been merged into `parseUntil`. You can directly rename the function call without any repercussions. | ||
## License | ||
MIT | ||
MIT |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
16739
260
174
1