Comparing version 2.8.0 to 2.9.0
@@ -49,4 +49,6 @@ #!/usr/bin/env node | ||
var grammar = new require(require('path').resolve(opts.file)); | ||
var parser = new nearley.Parser(grammar.ParserRules, opts.start ? opts.start : grammar.ParserStart, { | ||
var filename = require('path').resolve(opts.file); | ||
var grammar = nearley.Grammar.fromCompiled(require(filename)); | ||
if (opts.start) grammar.start = opts.start | ||
var parser = new nearley.Parser(grammar, { | ||
keepHistory: true, | ||
@@ -53,0 +55,0 @@ }); |
(function(root, factory) { | ||
// if (typeof define === 'function' && define.amd) { | ||
// define(['nearley'], factory); | ||
if (typeof module === 'object' && module.exports) { | ||
@@ -123,3 +121,3 @@ module.exports = factory(require('./nearley')); | ||
} | ||
if (token.literal.length === 1) { | ||
if (token.literal.length === 1 || result.config.lexer) { | ||
return token; | ||
@@ -130,2 +128,7 @@ } | ||
if (token.token) { | ||
if (result.config.lexer) { | ||
var name = token.token; | ||
var expr = result.config.lexer + ".has(" + JSON.stringify(name) + ") ? {type: " + JSON.stringify(name) + "} : " + name; | ||
return {token: "(" + expr + ")"}; | ||
} | ||
return token; | ||
@@ -132,0 +135,0 @@ } |
(function(root, factory) { | ||
// if (typeof define === 'function' && define.amd) { | ||
// define(['nearley'], factory); | ||
if (typeof module === 'object' && module.exports) { | ||
@@ -117,2 +115,3 @@ module.exports = factory(require('./nearley')); | ||
output += "var grammar = {\n"; | ||
output += " Lexer: " + parser.config.lexer + ",\n"; | ||
output += " ParserRules: " + | ||
@@ -147,2 +146,3 @@ serializeRules(parser.rules, generate.javascript.builtinPostprocessors) | ||
output += " grammar = {\n"; | ||
output += " Lexer: " + parser.config.lexer + ",\n"; | ||
output += " ParserRules: " + | ||
@@ -180,2 +180,3 @@ tabulateString( | ||
output += "export var grammar : NearleyGrammar = {\n"; | ||
output += " Lexer: " + parser.config.lexer + ",\n"; | ||
output += " ParserRules: " + serializeRules(parser.rules, generate.typescript.builtinPostprocessors) + "\n"; | ||
@@ -182,0 +183,0 @@ output += " , ParserStart: " + JSON.stringify(parser.start) + "\n"; |
@@ -5,20 +5,4 @@ // Generated automatically by nearley | ||
function id(x) {return x[0]; } | ||
function nth(n) { | ||
return function(d) { | ||
return d[n]; | ||
}; | ||
} | ||
function $(o) { | ||
return function(d) { | ||
var ret = {}; | ||
Object.keys(o).forEach(function(k) { | ||
ret[k] = d[o[k]]; | ||
}); | ||
return ret; | ||
}; | ||
} | ||
var grammar = { | ||
Lexer: undefined, | ||
ParserRules: [ | ||
@@ -25,0 +9,0 @@ {"name": "dqstring$ebnf$1", "symbols": []}, |
(function(root, factory) { | ||
// if (typeof define === 'function' && define.amd) { | ||
// define([], factory); | ||
if (typeof module === 'object' && module.exports) { | ||
@@ -22,4 +20,4 @@ module.exports = factory(); | ||
function stringifySymbolSequence (e) { | ||
return (e.literal) ? JSON.stringify(e.literal) | ||
: e.toString(); | ||
return e.literal ? JSON.stringify(e.literal) : | ||
e.type ? '%' + e.type : e.toString(); | ||
} | ||
@@ -49,6 +47,6 @@ var symbolSequence = (typeof withCursorAt === "undefined") | ||
State.prototype.nextState = function(data) { | ||
State.prototype.nextState = function(child) { | ||
var state = new State(this.rule, this.dot + 1, this.reference, this.wantedBy); | ||
state.left = this; | ||
state.right = data; | ||
state.right = child; | ||
if (state.isComplete) { | ||
@@ -64,3 +62,3 @@ state.data = state.build(); | ||
do { | ||
children.push(node.right); | ||
children.push(node.right.data); | ||
node = node.left; | ||
@@ -109,3 +107,3 @@ } while (node.left); | ||
if (state.reference === this.index) { | ||
// make sure future predictors of this rule get completed. | ||
// make sure future predictors of this rule get completed. | ||
var exp = state.rule.name; | ||
@@ -157,3 +155,3 @@ (this.completed[exp] = this.completed[exp] || []).push(state); | ||
if (left.rule.symbols[left.dot] === inp) { | ||
var copy = left.nextState(right.data); | ||
var copy = left.nextState(right); | ||
this.states.push(copy); | ||
@@ -178,2 +176,3 @@ } | ||
Grammar.fromCompiled = function(rules, start) { | ||
var lexer = rules.Lexer; | ||
if (rules.ParserStart) { | ||
@@ -184,6 +183,56 @@ start = rules.ParserStart; | ||
var rules = rules.map(function (r) { return (new Rule(r.name, r.symbols, r.postprocess)); }); | ||
return new Grammar(rules, start); | ||
var g = new Grammar(rules, start); | ||
g.lexer = lexer; // nb. storing lexer on Grammar is iffy, but unavoidable | ||
return g; | ||
} | ||
function StreamLexer() { | ||
this.reset(""); | ||
} | ||
StreamLexer.prototype.reset = function(data, state) { | ||
this.buffer = data; | ||
this.index = 0; | ||
this.line = state ? state.line : 1; | ||
this.lastLineBreak = state ? -state.col : 0; | ||
} | ||
StreamLexer.prototype.next = function() { | ||
if (this.index < this.buffer.length) { | ||
var ch = this.buffer[this.index++]; | ||
if (ch === '\n') { | ||
this.line += 1; | ||
this.lastLineBreak = this.index; | ||
} | ||
return {value: ch}; | ||
} | ||
} | ||
StreamLexer.prototype.save = function() { | ||
return { | ||
line: this.line, | ||
col: this.index - this.lastLineBreak, | ||
} | ||
} | ||
StreamLexer.prototype.formatError = function(token, message) { | ||
// nb. this gets called after consuming the offending token, | ||
// so the culprit is index-1 | ||
var buffer = this.buffer; | ||
if (typeof buffer === 'string') { | ||
var nextLineBreak = buffer.indexOf('\n', this.index); | ||
if (nextLineBreak === -1) nextLineBreak = buffer.length; | ||
var line = buffer.substring(this.lastLineBreak, nextLineBreak) | ||
var col = this.index - this.lastLineBreak; | ||
message += " at line " + this.line + " col " + col + ":\n\n"; | ||
message += " " + line + "\n" | ||
message += " " + Array(col).join(" ") + "^" | ||
return message; | ||
} else { | ||
return message + " at index " + (this.index - 1); | ||
} | ||
} | ||
function Parser(rules, start, options) { | ||
@@ -201,3 +250,3 @@ if (rules instanceof Grammar) { | ||
keepHistory: false, | ||
// rewindable: false, | ||
lexer: grammar.lexer || new StreamLexer, | ||
}; | ||
@@ -207,4 +256,7 @@ for (var key in (options || {})) { | ||
} | ||
// if (this.options.rewindable) { this.options.keepHistory = true; } | ||
// Setup lexer | ||
this.lexer = this.options.lexer; | ||
this.lexerState = undefined; | ||
// Setup a table | ||
@@ -219,3 +271,3 @@ var column = new Column(grammar, 0); | ||
column.process(); | ||
this.current = 0; | ||
this.current = 0; // token index | ||
} | ||
@@ -227,12 +279,15 @@ | ||
Parser.prototype.feed = function(chunk) { | ||
for (var chunkPos = 0; chunkPos < chunk.length; chunkPos++) { | ||
var lexer = this.lexer; | ||
lexer.reset(chunk, this.lexerState); | ||
while (token = lexer.next()) { | ||
// We add new states to table[current+1] | ||
var column = this.table[this.current + chunkPos]; | ||
var column = this.table[this.current]; | ||
// GC unused states | ||
if (!this.options.keepHistory) { | ||
delete this.table[this.current + chunkPos - 1]; | ||
delete this.table[this.current - 1]; | ||
} | ||
var n = this.current + chunkPos + 1; | ||
var n = this.current + 1; | ||
var nextColumn = new Column(this.grammar, n); | ||
@@ -242,5 +297,4 @@ this.table.push(nextColumn); | ||
// Advance all tokens that expect the symbol | ||
// So for each state in the previous row, | ||
var token = chunk[chunkPos]; | ||
var literal = token.value; | ||
var value = lexer.constructor === StreamLexer ? token.value : token; | ||
var scannable = column.scannable; | ||
@@ -252,5 +306,7 @@ for (var w = scannable.length; w--; ) { | ||
// either regex or literal | ||
if (expect.test ? expect.test(token) : expect.literal === token) { | ||
if (expect.test ? expect.test(value) : | ||
expect.type ? expect.type === token.type | ||
: expect.literal === literal) { | ||
// Add it | ||
var next = state.nextState(token); | ||
var next = state.nextState({data: value, token: token, isToken: true}); | ||
nextColumn.states.push(next); | ||
@@ -273,13 +329,21 @@ } | ||
// No states at all! This is not good. | ||
var err = new Error( | ||
"nearley: No possible parsings (@" + (this.current + chunkPos) | ||
+ ": '" + chunk[chunkPos] + "')." | ||
); | ||
err.offset = this.current + chunkPos; | ||
var message = this.lexer.formatError(token, "invalid syntax") + "\n"; | ||
message += "Unexpected " + (token.type ? token.type + " token: " : ""); | ||
message += JSON.stringify(token.value !== undefined ? token.value : token) + "\n"; | ||
var err = new Error(message); | ||
err.offset = this.current; | ||
throw err; | ||
} | ||
// maybe save lexer state | ||
if (this.options.keepHistory) { | ||
column.lexerState = lexer.save() | ||
} | ||
this.current++; | ||
} | ||
if (column) { | ||
this.lexerState = lexer.save() | ||
} | ||
this.current += chunkPos; | ||
// Incrementally keep track of results | ||
@@ -292,17 +356,14 @@ this.results = this.finish(); | ||
Parser.prototype.rewind = function(index) { | ||
if (!this.options.keepHistory) { | ||
throw new Error('set option `keepHistory` to enable rewinding') | ||
} | ||
if (this.current < this.index) { | ||
// TODO: api -- consider silently succeeding? | ||
throw new Error('cannot rewind forward!') | ||
} | ||
/* | ||
* recall column (table) indicies fall between token indicies. | ||
* | ||
* col 0 -- token 0 -- col 1 | ||
*/ | ||
Parser.prototype.save = function() { | ||
var column = this.table[this.current]; | ||
column.lexerState = this.lexerState; | ||
return column; | ||
}; | ||
Parser.prototype.restore = function(column) { | ||
var index = column.index; | ||
this.current = index; | ||
this.table[index] = column; | ||
this.table.splice(index + 1); | ||
this.current = index; | ||
this.lexerState = column.lexerState; | ||
@@ -313,2 +374,12 @@ // Incrementally keep track of results | ||
// nb. deprecated: use save/restore instead! | ||
Parser.prototype.rewind = function(index) { | ||
if (!this.options.keepHistory) { | ||
throw new Error('set option `keepHistory` to enable rewinding') | ||
} | ||
// nb. recall column (table) indicies fall between token indicies. | ||
// col 0 -- token 0 -- col 1 | ||
this.restore(this.table[index]); | ||
}; | ||
Parser.prototype.finish = function() { | ||
@@ -315,0 +386,0 @@ // Return the possible parsings |
{ | ||
"name": "nearley", | ||
"version": "2.8.0", | ||
"version": "2.9.0", | ||
"description": "Simple, fast, powerful parser toolkit for JavaScript.", | ||
@@ -43,4 +43,5 @@ "main": "lib/nearley.js", | ||
"microtime": "^2.1.2", | ||
"mocha": "^2.3.4" | ||
"mocha": "^2.3.4", | ||
"moo": "^0.3.1" | ||
} | ||
} |
@@ -57,3 +57,3 @@ ``` | ||
nearley is used by [artificial | ||
intelligence](https://github.com/AI-course-TIN172-DIT410/shrdlite-course-project) | ||
intelligence](https://github.com/ChalmersGU-AI-course/shrdlite-course-project) | ||
and [computational | ||
@@ -126,6 +126,5 @@ linguistics](https://wiki.eecs.yorku.ca/course_archive/2014-15/W/6339/useful_handouts) | ||
Alternatively, to use a generated grammar in a browser runtime, include the | ||
`nearley.js` file as a `<script>`. You can hardlink this script from Github if | ||
you want; this will guarantee automatic updates. | ||
`nearley.js` file in a `<script>` tag. | ||
<script src="https://raw.githubusercontent.com/Hardmath123/nearley/master/lib/nearley.js"></script> | ||
<script src="nearley.js"></script> | ||
<script src="my-generated-grammar.js"></script> | ||
@@ -289,2 +288,15 @@ | ||
### Custom lexers | ||
You can pass a `lexer` instance to Parser, which must have the following interface: | ||
* `reset(chunk, Info)`: set the internal buffer to `chunk`, and restore line/col/state info taken from `save()`. | ||
* `next() -> Token` return e.g. `{type, value, line, col, …}`. Only the `value` attribute is required. | ||
* `save() -> Info` -> return an object describing the current line/col etc. This allows us to preserve this information between `feed()` calls, and also to support `Parser#rewind()`. The exact structure is lexer-specific; nearley doesn't care what's in it. | ||
* `formatError(token)` -> return a string with an error message describing the line/col of the offending token. You might like to include a preview of the line in question. | ||
* `has(tokenType)` -> return true if the lexer can emit tokens with that name. Used to resolve `%`-specifiers in compiled nearley grammars. | ||
If Parser isn't given a lexer option, it will look for a `.lexer` attribute on its Grammar. The `@lexer` directive allows exporting a lexer object from your `.ne` grammar file. (See `json.ne` for an example.) | ||
### Custom tokens | ||
@@ -442,5 +454,5 @@ | ||
*This section lists some tools created by others. These are not distributed | ||
with nearley, so if you have problems, please contact the respective author for | ||
support.* | ||
*This section lists nearley tooling created by other developers. These tools | ||
are not distributed with nearley, so if you have problems, please contact the | ||
respective author for support instead of opening an issue with nearley.* | ||
@@ -456,8 +468,13 @@ Atom users can write nearley grammars with [this | ||
Visual Studio Code users can use [this | ||
extension](https://github.com/karyfoundation/nearley-vscode) by Pouya Kary. | ||
Python users can convert nearley grammars to Python using | ||
[lark](https://github.com/erezsh/lark) by Erez (currently experimental). | ||
[lark](https://github.com/erezsh/lark#how-to-use-nearley-grammars-in-lark) by | ||
Erez. | ||
Browser users can use | ||
[nearley-playground](https://omrelli.ug/nearley-playground/) by Guillermo | ||
Webster to explore nearley interactively in the browser. | ||
Webster to explore nearley interactively in the browser. There is also a [Mac | ||
app](https://github.com/pmkary/nearley-playground-mac) by Pouya Kary. | ||
@@ -464,0 +481,0 @@ Webpack users can use |
var fs = require('fs'); | ||
var path = require('path'); | ||
@@ -29,7 +30,14 @@ var nearley = require('../lib/nearley.js'); | ||
function requireFromString(source) { | ||
var filename = '.' | ||
var Module = module.constructor; | ||
var m = new Module(); | ||
m.paths = Module._nodeModulePaths(path.dirname(filename)) | ||
m._compile(source, filename); | ||
return m.exports; | ||
} | ||
function evalGrammar(compiledGrammar) { | ||
var f = new Function('module', compiledGrammar); | ||
var m = {exports: {}}; | ||
f(m); | ||
return new nearley.Grammar.fromCompiled(m.exports); | ||
var exports = requireFromString(compiledGrammar); | ||
return new nearley.Grammar.fromCompiled(exports); | ||
} | ||
@@ -36,0 +44,0 @@ |
@@ -62,4 +62,3 @@ | ||
addTest('json', makeParser('examples/json.ne'), [ | ||
// Example.read('test/test1.json'), | ||
Example.read('test/test2.json'), | ||
Example.read('test/sample1k.json'), | ||
]); | ||
@@ -66,0 +65,0 @@ |
@@ -151,4 +151,36 @@ var child_process = require('child_process') | ||
let testGrammar = compile(` | ||
y -> x:+ | ||
x -> [a-z0-9] | "\\n" | ||
`) | ||
it('shows line number in errors', function() { | ||
(() => parse(testGrammar, 'abc\n12!')).should.throw( | ||
'invalid syntax at line 2 col 3:\n' + | ||
'\n' + | ||
' 12!\n' + | ||
' ^' | ||
) | ||
}) | ||
it('shows token index in errors', function() { | ||
(() => parse(testGrammar, ['1', '2', '!'])).should.throw( | ||
'invalid syntax at index 2' | ||
) | ||
}) | ||
var tosh = compile(read("examples/tosh.ne")); | ||
it('can save state', function() { | ||
let first = "say 'hello'"; | ||
let second = " for 2 secs"; | ||
let p = new nearley.Parser(tosh, { keepHistory: true }); | ||
p.feed(first); | ||
p.current.should.equal(11) | ||
p.table.length.should.equal(12) | ||
var col = p.save(); | ||
col.index.should.equal(11) | ||
col.lexerState.col.should.equal(first.length) | ||
}); | ||
it('can rewind', function() { | ||
@@ -165,2 +197,3 @@ let first = "say 'hello'"; | ||
p.rewind(first.length); | ||
p.current.should.equal(11) | ||
@@ -177,2 +210,33 @@ p.table.length.should.equal(12) | ||
it('restores line numbers', function() { | ||
let p = new nearley.Parser(testGrammar); | ||
p.feed('abc\n') | ||
p.save().lexerState.line.should.equal(2) | ||
p.feed('123\n') | ||
var col = p.save(); | ||
col.lexerState.line.should.equal(3) | ||
p.feed('q') | ||
p.restore(col); | ||
p.lexer.line.should.equal(3) | ||
p.feed('z') | ||
}); | ||
it('restores column number', function() { | ||
let p = new nearley.Parser(testGrammar); | ||
p.feed('foo\nbar') | ||
var col = p.save(); | ||
col.lexerState.line.should.equal(2) | ||
col.lexerState.col.should.equal(3) | ||
p.feed('123'); | ||
p.lexerState.col.should.equal(6) | ||
p.restore(col); | ||
p.lexerState.line.should.equal(2) | ||
p.lexerState.col.should.equal(3) | ||
p.feed('456') | ||
p.lexerState.col.should.equal(6) | ||
}); | ||
// TODO: moo save/restore | ||
}); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Uses eval
Supply chain riskPackage uses dynamic code execution (e.g., eval()), which is a dangerous practice. This can prevent the code from running in certain environments and increases the risk that the code may contain exploits or malicious behavior.
Found 1 instance in 1 package
964182
112
5553
524
6
3