Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

character-parser

Package Overview
Dependencies
Maintainers
2
Versions
17
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

character-parser - npm Package Compare versions

Comparing version 1.2.2 to 2.0.0

267

index.js

@@ -0,2 +1,25 @@

'use strict';
exports = (module.exports = parse);
var TOKEN_TYPES = exports.TOKEN_TYPES = {
LINE_COMMENT: '//',
BLOCK_COMMENT: '/**/',
SINGLE_QUOTE: '\'',
DOUBLE_QUOTE: '"',
TEMPLATE_QUOTE: '`',
REGEXP: '//g'
}
var BRACKETS = exports.BRACKETS = {
'(': ')',
'{': '}',
'[': ']'
};
var BRACKETS_REVERSED = {
')': '(',
'}': '{',
']': '['
};
exports.parse = parse;

@@ -10,5 +33,2 @@ function parse(src, state, options) {

while (index < end) {
if (state.roundDepth < 0 || state.curlyDepth < 0 || state.squareDepth < 0) {
throw new SyntaxError('Mismatched Bracket: ' + src[index - 1]);
}
exports.parseChar(src[index++], state);

@@ -19,116 +39,135 @@ }

exports.parseMax = parseMax;
function parseMax(src, options) {
options = options || {};
var start = options.start || 0;
var index = start;
var state = exports.defaultState();
while (state.roundDepth >= 0 && state.curlyDepth >= 0 && state.squareDepth >= 0) {
if (index >= src.length) {
throw new Error('The end of the string was reached with no closing bracket found.');
}
exports.parseChar(src[index++], state);
}
var end = index - 1;
return {
start: start,
end: end,
src: src.substring(start, end)
};
}
exports.parseUntil = parseUntil;
function parseUntil(src, delimiter, options) {
options = options || {};
var includeLineComment = options.includeLineComment || false;
var start = options.start || 0;
var index = start;
var state = exports.defaultState();
while (state.isString() || state.regexp || state.blockComment ||
(!includeLineComment && state.lineComment) || !startsWith(src, delimiter, index)) {
while (
index < src.length
) {
if ((options.ignoreNesting || !state.isNesting(options)) && startsWith(src, delimiter, index)) {
var end = index;
return {
start: start,
end: end,
src: src.substring(start, end)
};
}
exports.parseChar(src[index++], state);
}
var end = index;
return {
start: start,
end: end,
src: src.substring(start, end)
};
var err = new Error('The end of the string was reached with no closing bracket found.');
err.code = 'CHARACTER_PARSER:END_OF_STRING_REACHED';
throw err;
}
exports.parseChar = parseChar;
function parseChar(character, state) {
if (character.length !== 1) throw new Error('Character must be a string of length 1');
if (character.length !== 1) {
var err = new Error('Character must be a string of length 1');
err.name = 'InvalidArgumentError';
err.code = 'CHARACTER_PARSER:CHAR_LENGTH_NOT_ONE';
throw err;
}
state = state || exports.defaultState();
state.src = state.src || '';
state.src += character;
var wasComment = state.blockComment || state.lineComment;
var wasComment = state.isComment();
var lastChar = state.history ? state.history[0] : '';
if (state.regexpStart) {
if (character === '/' || character == '*') {
state.regexp = false;
state.stack.pop();
}
state.regexpStart = false;
}
if (state.lineComment) {
if (character === '\n') {
state.lineComment = false;
}
} else if (state.blockComment) {
if (state.lastChar === '*' && character === '/') {
state.blockComment = false;
}
} else if (state.singleQuote) {
if (character === '\'' && !state.escaped) {
state.singleQuote = false;
} else if (character === '\\' && !state.escaped) {
state.escaped = true;
} else {
state.escaped = false;
}
} else if (state.doubleQuote) {
if (character === '"' && !state.escaped) {
state.doubleQuote = false;
} else if (character === '\\' && !state.escaped) {
state.escaped = true;
} else {
state.escaped = false;
}
} else if (state.regexp) {
if (character === '/' && !state.escaped) {
state.regexp = false;
} else if (character === '\\' && !state.escaped) {
state.escaped = true;
} else {
state.escaped = false;
}
} else if (lastChar === '/' && character === '/') {
state.history = state.history.substr(1);
state.lineComment = true;
} else if (lastChar === '/' && character === '*') {
state.history = state.history.substr(1);
state.blockComment = true;
} else if (character === '/' && isRegexp(state.history)) {
state.regexp = true;
state.regexpStart = true;
} else if (character === '\'') {
state.singleQuote = true;
} else if (character === '"') {
state.doubleQuote = true;
} else if (character === '(') {
state.roundDepth++;
} else if (character === ')') {
state.roundDepth--;
} else if (character === '{') {
state.curlyDepth++;
} else if (character === '}') {
state.curlyDepth--;
} else if (character === '[') {
state.squareDepth++;
} else if (character === ']') {
state.squareDepth--;
switch (state.current()) {
case TOKEN_TYPES.LINE_COMMENT:
if (character === '\n') {
state.stack.pop();
}
break;
case TOKEN_TYPES.BLOCK_COMMENT:
if (state.lastChar === '*' && character === '/') {
state.stack.pop();
}
break;
case TOKEN_TYPES.SINGLE_QUOTE:
if (character === '\'' && !state.escaped) {
state.stack.pop();
} else if (character === '\\' && !state.escaped) {
state.escaped = true;
} else {
state.escaped = false;
}
break;
case TOKEN_TYPES.DOUBLE_QUOTE:
if (character === '"' && !state.escaped) {
state.stack.pop();
} else if (character === '\\' && !state.escaped) {
state.escaped = true;
} else {
state.escaped = false;
}
break;
case TOKEN_TYPES.TEMPLATE_QUOTE:
if (character === '`' && !state.escaped) {
state.stack.pop();
state.hasDollar = false;
} else if (character === '\\' && !state.escaped) {
state.escaped = true;
state.hasDollar = false;
} else if (character === '$' && !state.escaped) {
state.hasDollar = true;
} else if (character === '{' && state.hasDollar) {
state.stack.push(BRACKETS[character]);
} else {
state.escaped = false;
state.hasDollar = false;
}
break;
case TOKEN_TYPES.REGEXP:
if (character === '/' && !state.escaped) {
state.stack.pop();
} else if (character === '\\' && !state.escaped) {
state.escaped = true;
} else {
state.escaped = false;
}
break;
default:
if (character in BRACKETS) {
state.stack.push(BRACKETS[character]);
} else if (character in BRACKETS_REVERSED) {
if (state.current() !== character) {
var err = new SyntaxError('Mismatched Bracket: ' + character);
err.code = 'CHARACTER_PARSER:MISMATCHED_BRACKET';
throw err;
};
state.stack.pop();
} else if (lastChar === '/' && character === '/') {
// Don't include comments in history
state.history = state.history.substr(1);
state.stack.push(TOKEN_TYPES.LINE_COMMENT);
} else if (lastChar === '/' && character === '*') {
// Don't include comment in history
state.history = state.history.substr(1);
state.stack.push(TOKEN_TYPES.BLOCK_COMMENT);
} else if (character === '/' && isRegexp(state.history)) {
state.stack.push(TOKEN_TYPES.REGEXP);
// N.B. if the next character turns out to be a `*` or a `/`
// then this isn't actually a regexp
state.regexpStart = true;
} else if (character === '\'') {
state.stack.push(TOKEN_TYPES.SINGLE_QUOTE);
} else if (character === '"') {
state.stack.push(TOKEN_TYPES.DOUBLE_QUOTE);
} else if (character === '`') {
state.stack.push(TOKEN_TYPES.TEMPLATE_QUOTE);
}
break;
}
if (!state.blockComment && !state.lineComment && !wasComment) state.history = character + state.history;
if (!state.isComment() && !wasComment) {
state.history = character + state.history;
}
state.lastChar = character; // store last character for ending block comments

@@ -140,26 +179,35 @@ return state;

function State() {
this.lineComment = false;
this.blockComment = false;
this.stack = [];
this.singleQuote = false;
this.doubleQuote = false;
this.regexp = false;
this.regexpStart = false;
this.escaped = false;
this.hasDollar = false;
this.roundDepth = 0;
this.curlyDepth = 0;
this.squareDepth = 0;
this.src = '';
this.history = ''
this.lastChar = ''
}
State.prototype.current = function () {
return this.stack[this.stack.length - 1];
};
State.prototype.isString = function () {
return this.singleQuote || this.doubleQuote;
return (
this.current() === TOKEN_TYPES.SINGLE_QUOTE ||
this.current() === TOKEN_TYPES.DOUBLE_QUOTE ||
this.current() === TOKEN_TYPES.TEMPLATE_QUOTE
);
}
State.prototype.isComment = function () {
return this.lineComment || this.blockComment;
return this.current() === TOKEN_TYPES.LINE_COMMENT || this.current() === TOKEN_TYPES.BLOCK_COMMENT;
}
State.prototype.isNesting = function () {
return this.isString() || this.isComment() || this.regexp || this.roundDepth > 0 || this.curlyDepth > 0 || this.squareDepth > 0
State.prototype.isNesting = function (opts) {
if (
opts && opts.ignoreLineComment &&
this.stack.length === 1 && this.stack[0] === TOKEN_TYPES.LINE_COMMENT
) {
// if we are only inside a line comment, and line comments are ignored
// don't count it as nesting
return false;
}
return !!this.stack.length;
}

@@ -206,2 +254,3 @@

}
exports.isKeyword = isKeyword

@@ -208,0 +257,0 @@ function isKeyword(id) {

{
"name": "character-parser",
"version": "1.2.2",
"version": "2.0.0",
"description": "Parse JavaScript one character at a time to look for snippets in Templates. This is not a validator, it's just designed to allow you to have sections of JavaScript delimited by brackets robustly.",
"main": "index.js",
"scripts": {
"test": "mocha -R spec"
"coverage": "istanbul cover test/index.js",
"test": "node test/index.js"
},

@@ -26,5 +27,5 @@ "repository": {

"devDependencies": {
"better-assert": "~1.0.0",
"mocha": "~1.9.0"
"istanbul": "~0.3.22",
"testit": "~2.0.2"
}
}
}

@@ -13,2 +13,4 @@ # character-parser

### Parsing
Work out how much depth changes:

@@ -18,31 +20,8 @@

var state = parse('foo(arg1, arg2, {\n foo: [a, b\n');
assert(state.roundDepth === 1);
assert(state.curlyDepth === 1);
assert(state.squareDepth === 1);
assert.deepEqual(state.stack, [')', '}', ']']);
parse(' c, d]\n })', state);
assert(state.squareDepth === 0);
assert(state.curlyDepth === 0);
assert(state.roundDepth === 0);
assert.deepEqual(state.stack, []);
```
### Bracketed Expressions
Find all the contents of a bracketed expression:
```js
var section = parser.parseMax('foo="(", bar="}") bing bong');
assert(section.start === 0);
assert(section.end === 16);//exclusive end of string
assert(section.src = 'foo="(", bar="}"');
var section = parser.parseMax('{foo="(", bar="}"} bing bong', {start: 1});
assert(section.start === 1);
assert(section.end === 17);//exclusive end of string
assert(section.src = 'foo="(", bar="}"');
```
The bracketed expression parsing simply parses up to but excluding the first unmatched closed bracket (`)`, `}`, `]`). It is clever enough to ignore brackets in comments or strings.
### Custom Delimited Expressions

@@ -53,5 +32,6 @@

```js
// EJS-style
var section = parser.parseUntil('foo.bar("%>").baz%> bing bong', '%>');
assert(section.start === 0);
assert(section.end === 17);//exclusive end of string
assert(section.end === 17); // exclusive end of string
assert(section.src = 'foo.bar("%>").baz');

@@ -61,4 +41,19 @@

assert(section.start === 2);
assert(section.end === 19);//exclusive end of string
assert(section.end === 19); // exclusive end of string
assert(section.src = 'foo.bar("%>").baz');
// Jade-style
var section = parser.parseUntil('#[p= [1, 2][i]]', ']', {start: 2})
assert(section.start === 2);
assert(section.end === 14); // exclusive end of string
assert(section.src === 'p= [1, 2][i]')
// Dumb parsing
// Stop at first delimiter encountered, doesn't matter if it's nested or not
// This is the character-parser@1 default behavior.
var section = parser.parseUntil('#[p= [1, 2][i]]', '}', {start: 2, ignoreNesting: true})
assert(section.start === 2);
assert(section.end === 10); // exclusive end of string
assert(section.src === 'p= [1, 2')
''
```

@@ -70,2 +65,4 @@

All methods may throw an exception in the case of syntax errors. The exception contains an additional `code` property that always starts with `CHARACTER_PARSER:` that is unique for the error.
### parse(str, state = defaultState(), options = {start: 0, end: src.length})

@@ -79,17 +76,11 @@

### parseMax(src, options = {start: 0})
### parseUntil(src, delimiter, options = {start: 0, ignoreLineComment: false, ignoreNesting: false})
Parses the source until the first unmatched close bracket (any of `)`, `}`, `]`). It returns an object with the structure:
Parses the source until the first occurence of `delimiter` which is not in a string or a comment.
```js
{
start: 0,//index of first character of string
end: 13,//index of first character after the end of string
src: 'source string'
}
```
If `ignoreLineComment` is `true`, it will still count if the delimiter occurs in a line comment.
### parseUntil(src, delimiter, options = {start: 0, includeLineComment: false})
If `ignoreNesting` is `true`, it will stop at the first bracket, not taking into account if the bracket part of nesting or not. See example above.
Parses the source until the first occurence of `delimiter` which is not in a string or a comment. If `includeLineComment` is `true`, it will still count if the delimiter occurs in a line comment, but not in a block comment. It returns an object with the structure:
It returns an object with the structure:

@@ -120,2 +111,6 @@ ```js

### TOKEN_TYPES & BRACKETS
Objects whose values can be a frame in the `stack` property of a State (documented below).
## State

@@ -127,24 +122,60 @@

{
lineComment: false, //true if inside a line comment
blockComment: false, //true if inside a block comment
stack: [], // stack of detected brackets; the outermost is [0]
regexpStart: false, // true if a slash is just encountered and a REGEXP state has just been added to the stack
singleQuote: false, //true if inside a single quoted string
doubleQuote: false, //true if inside a double quoted string
regexp: false, //true if inside a regular expression
escaped: false, //true if in a string and the last character was an escape character
escaped: false, // true if in a string and the last character was an escape character
hasDollar: false, // true if in a template string and the last character was a dollar sign
roundDepth: 0, //number of un-closed open `(` brackets
curlyDepth: 0, //number of un-closed open `{` brackets
squareDepth: 0 //number of un-closed open `[` brackets
src: '', // the concatenated source string
history: '', // reversed `src`
lastChar: '' // last parsed character
}
```
`stack` property can contain any of the following:
- Any of the property values of `characterParser.TOKEN_TYPES`
- Any of the property values of `characterParser.BRACKETS` (the end bracket, not the starting bracket)
It also has the following useful methods:
- `.current()` returns the innermost bracket (i.e. the last stack frame).
- `.isString()` returns `true` if the current location is inside a string.
- `.isComment()` returns `true` if the current location is inside a comment.
- `isNesting()` returns `true` if the current location is anything but at the top level, i.e. with no nesting.
- `.isNesting([opts])` returns `true` if the current location is not at the top level, i.e. if the stack is not empty. If `opts.ignoreLineComment` is `true`, line comments are not counted as a level, so for `// a` it will still return false.
## Transition from v1
In character-parser@2, we have changed the APIs quite a bit. These are some notes that will help you transition to the new version.
### State Object Changes
Instead of keeping depths of different brackets, we are now keeping a stack. We also removed some properties:
```js
state.lineComment → state.current() === parser.TOKEN_TYPES.LINE_COMMENT
state.blockComment → state.current() === parser.TOKEN_TYPES.BLOCK_COMMENT
state.singleQuote → state.current() === parser.TOKEN_TYPES.SINGLE_QUOTE
state.doubleQuote → state.current() === parser.TOKEN_TYPES.DOUBLE_QUOTE
state.regexp → state.current() === parser.TOKEN_TYPES.REGEXP
```
### `parseMax`
This function has been removed since the usefulness of this function has been questioned. You should find that `parseUntil` is a better choice for your task.
### `parseUntil`
The default behavior when the delimiter is a bracket has been changed so that nesting is taken into account to determine if the end is reached.
To preserve the original behavior, pass `ignoreNesting: true` as an option.
To see the difference between the new and old behaviors, see the "Usage" section earlier.
### `parseMaxBracket`
This function has been merged into `parseUntil`. You can directly rename the function call without any repercussions.
## License
MIT
MIT

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc