stream-json
Advanced tools
Comparing version 0.2.2 to 0.3.0
@@ -13,3 +13,3 @@ "use strict"; | ||
function ClassicParser(options){ | ||
function Parser(options){ | ||
Transform.call(this, options); | ||
@@ -27,5 +27,5 @@ this._writableState.objectMode = false; | ||
} | ||
util.inherits(ClassicParser, Transform); | ||
util.inherits(Parser, Transform); | ||
ClassicParser.prototype._transform = function transform(chunk, encoding, callback){ | ||
Parser.prototype._transform = function transform(chunk, encoding, callback){ | ||
this._scanner.addBuffer(chunk.toString()); | ||
@@ -35,3 +35,3 @@ this._processInput(callback); | ||
ClassicParser.prototype._flush = function flush(callback){ | ||
Parser.prototype._flush = function flush(callback){ | ||
this._scanner.addBuffer("", true); | ||
@@ -41,3 +41,3 @@ this._processInput(callback); | ||
ClassicParser.prototype._processInput = function processInput(callback){ | ||
Parser.prototype._processInput = function processInput(callback){ | ||
try{ | ||
@@ -76,2 +76,2 @@ if(this._expected === null){ | ||
module.exports = ClassicParser; | ||
module.exports = Parser; |
{ | ||
"name": "stream-json", | ||
"version": "0.2.2", | ||
"description": "stream-json is a SAX-insired stream components with a minimal memory footprint to parse huge JSON files. Includes utilities to stream Django-like JSON database dumps.", | ||
"version": "0.3.0", | ||
"description": "stream-json is a SAX-inspired stream components with a minimal memory footprint to parse huge JSON files. Includes utilities to stream Django-like JSON database dumps.", | ||
"homepage": "http://github.com/uhop/stream-json", | ||
@@ -6,0 +6,0 @@ "bugs": "http://github.com/uhop/stream-json/issues", |
1022
Parser.js
@@ -8,2 +8,40 @@ "use strict"; | ||
var EXPECTING_NOTHING = 0, | ||
EXPECTING_VALUE = 1, | ||
// object | ||
EXPECTING_KEY_FIRST = 2, | ||
EXPECTING_KEY = 3, | ||
EXPECTING_KEY_COLON = 4, | ||
EXPECTING_OBJECT_STOP = 5, | ||
// array | ||
EXPECTING_ARRAY_FIRST = 6, | ||
EXPECTING_ARRAY_STOP = 7, | ||
// key | ||
EXPECTING_KEY_VALUE = 8, | ||
// string | ||
EXPECTING_STRING_VALUE = 9, | ||
// numbers | ||
EXPECTING_NUMBER_START = 10, | ||
EXPECTING_NUMBER_DIGIT = 11, | ||
EXPECTING_FRACTION = 12, | ||
EXPECTING_FRAC_START = 13, | ||
EXPECTING_FRAC_DIGIT = 14, | ||
EXPECTING_EXP_SIGN = 15, | ||
EXPECTING_EXP_START = 16, | ||
EXPECTING_EXP_DIGIT = 17; | ||
var PARSING_NOTHING = 0, | ||
PARSING_OBJECT = 1, | ||
PARSING_ARRAY = 2; | ||
var LITERALS = {t: "true", f: "false", n: "null"}, | ||
ESCAPED_CHAR = "e", HEXADECIMALS = "h"; | ||
var hex = { | ||
"0": 1, "1": 1, "2": 1, "3": 1, "4": 1, "5": 1, "6": 1, "7": 1, "8": 1, "9": 1, | ||
"a": 1, "b": 1, "c": 1, "d": 1, "e": 1, "f": 1, | ||
"A": 1, "B": 1, "C": 1, "D": 1, "E": 1, "F": 1 | ||
}; | ||
function Parser(options){ | ||
@@ -14,418 +52,608 @@ Transform.call(this, options); | ||
this._buffer = ""; | ||
this._done = false; | ||
this._expect = "value"; | ||
this._state = EXPECTING_VALUE; | ||
this._parent = PARSING_NOTHING; | ||
this._stack = []; | ||
this._parent = ""; | ||
this._literal = null; | ||
this._literalFrom = 0; | ||
this._stash = ""; | ||
this._chunk = null; | ||
this._line = this._pos = 1; | ||
this._lastChar = ""; | ||
} | ||
util.inherits(Parser, Transform); | ||
Parser.prototype._transform = function transform(chunk, encoding, callback){ | ||
this._buffer += chunk.toString(); | ||
this._processInput(callback); | ||
}; | ||
var s = chunk.toString(), i = 0, j, k, n = s.length; | ||
Parser.prototype._flush = function flush(callback){ | ||
this._done = true; | ||
this._processInput(callback); | ||
}; | ||
var value1 = /^(?:[\"\{\[\]\-0-9]|true\b|false\b|null\b|\s{1,256})/, | ||
string = /^(?:[^\"\\]{1,256}|\\[bfnrt\"\\\/]|\\u[0-9a-fA-F]{4}|\")/, | ||
number0 = /^[0-9]/, | ||
number1 = /^\d{0,256}/, | ||
number2 = /^[\.eE]/, | ||
number3 = number0, | ||
number4 = number1, | ||
number5 = /^[eE]/, | ||
number6 = /^[-+]/, | ||
number7 = number0, | ||
number8 = number1, | ||
key1 = /^(?:[\"\}]|\s{1,256})/, | ||
colon = /^(?:\:|\s{1,256})/, | ||
comma = /^(?:[\,\]\}]|\s{1,256})/, | ||
ws = /^\s{1,256}/; | ||
Parser.prototype._processInput = function(callback){ | ||
try{ | ||
var match, value; | ||
main: for(;;){ | ||
switch(this._expect){ | ||
case "value1": | ||
case "value": | ||
match = value1.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
if(this._done){ | ||
throw Error("Parser cannot parse input: expected a value"); | ||
} | ||
try { | ||
main: do{ | ||
if(this._literal){ | ||
switch(this._literal){ | ||
case ESCAPED_CHAR: | ||
switch(s[0]){ | ||
case "\"": case "/": case "b": case "f": | ||
case "\\": case "n": case "r": case "t": | ||
this.push({id: "escapedChars", value: "\\" + s[0], line: this._line, pos: this._pos}); | ||
++i; | ||
++this._pos; | ||
break; | ||
case "u": | ||
k = Math.min(5, n); | ||
for(j = 1, ++i; i < k; ++j, ++i){ | ||
if(!hex[s[i]]) { | ||
throw Error("While matching hexadecimals encountered '" + s[i] + "'"); | ||
} | ||
this._stash += s[i]; | ||
} | ||
if(j < 5){ | ||
this._literal = HEXADECIMALS; | ||
this._literalFrom = j; | ||
break main; | ||
} | ||
this.push({id: "escapedChars", value: "\\u" + this._stash, line: this._line, pos: this._pos}); | ||
this._stash = ""; | ||
this._pos += 5; | ||
break; | ||
default: | ||
throw Error("Wrong escaped symbol '" + c + "'"); | ||
} | ||
if(this._done){ | ||
throw Error("Parser has expected a value"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
switch(value){ | ||
case "\"": | ||
this.push({id: value, value: value}); | ||
this._expect = "string"; | ||
break; | ||
case "{": | ||
this.push({id: value, value: value}); | ||
this._stack.push(this._parent); | ||
this._parent = "object"; | ||
this._expect = "key1"; | ||
break; | ||
case "[": | ||
this.push({id: value, value: value}); | ||
this._stack.push(this._parent); | ||
this._parent = "array"; | ||
this._expect = "value1"; | ||
break; | ||
case "]": | ||
if(this._expect !== "value1"){ | ||
throw Error("Parser cannot parse input: unexpected token ']'"); | ||
break; | ||
case HEXADECIMALS: | ||
k = Math.min(5 - this._literalFrom, n); | ||
for(j = this._literalFrom; i < k; ++j, ++i){ | ||
if(!hex[s[i]]) { | ||
throw Error("While matching hexadecimals encountered '" + s[i] + "'"); | ||
} | ||
this.push({id: value, value: value}); | ||
this._parent = this._stack.pop(); | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
case "-": | ||
this.push({id: value, value: value}); | ||
this._expect = "number0"; | ||
break; | ||
case "0": | ||
this.push({id: value, value: value}); | ||
this._expect = "number2"; | ||
break; | ||
case "1": | ||
case "2": | ||
case "3": | ||
case "4": | ||
case "5": | ||
case "6": | ||
case "7": | ||
case "8": | ||
case "9": | ||
this.push({id: "nonZero", value: value}); | ||
this._expect = "number1"; | ||
break; | ||
case "true": | ||
case "false": | ||
case "null": | ||
if(this._buffer.length === value.length && !this._done){ | ||
// wait for more input | ||
break main; | ||
} | ||
this.push({id: value, value: value}); | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
// default: // ws | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "keyVal": | ||
case "string": | ||
match = string.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
if(this._done || this._buffer.length >= 6){ | ||
throw Error("Parser cannot parse input: escaped characters"); | ||
} | ||
this._stash += s[i]; | ||
} | ||
if(this._done){ | ||
throw Error("Parser has expected a string value"); | ||
if(j < 5){ | ||
this._literalFrom = j; | ||
break main; | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === "\""){ | ||
this.push({id: value, value: value}); | ||
if(this._expect === "keyVal"){ | ||
this._expect = "colon"; | ||
}else{ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
this.push({id: "escapedChars", value: "\\u" + this._stash, line: this._line, pos: this._pos}); | ||
this._stash = ""; | ||
this._pos += 5; | ||
break; | ||
default: | ||
k = Math.min(this._literal.length - this._literalFrom, n); | ||
for(j = this._literalFrom; i < k; ++j, ++i){ | ||
if(this._literal[j] !== s[i]) { | ||
throw Error("While matching '" + this._literal + "' encountered '" + s[j] + "' instead of '" + LITERAL_TRUE[j - i] + "'"); | ||
} | ||
} | ||
}else if(value.length > 1 && value.charAt(0) === "\\"){ | ||
this.push({id: "escapedChars", value: value}); | ||
}else{ | ||
this.push({id: "plainChunk", value: value}); | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
// number chunks | ||
case "number0": // [0-9] | ||
match = number0.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected a digit"); | ||
if(j < this._literal.length){ | ||
this._literalFrom = j; | ||
break main; | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === "0"){ | ||
this.push({id: value, value: value}); | ||
this._expect = "number2"; | ||
}else{ | ||
this.push({id: "nonZero", value: value}); | ||
this._expect = "number1"; | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number1": // [0-9]* | ||
match = number1.exec(this._buffer); | ||
value = match[0]; | ||
if(value){ | ||
this.push({id: "numericChunk", value: value}); | ||
this._buffer = this._buffer.substring(value.length); | ||
}else{ | ||
if(this._buffer){ | ||
this._expect = "number2"; | ||
break; | ||
this.push({id: this._literal, value: this._literal, line: this._line, pos: this._pos}); | ||
this._pos += this._literal.length; | ||
// end of value | ||
switch(this._parent){ | ||
case PARSING_OBJECT: | ||
this._state = EXPECTING_OBJECT_STOP; | ||
break; | ||
case PARSING_ARRAY: | ||
this._state = EXPECTING_ARRAY_STOP; | ||
break; | ||
default: | ||
this._state = EXPECTING_NOTHING; | ||
break; | ||
} | ||
if(this._done){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
break; | ||
} | ||
this._literal = null; | ||
} | ||
for(; i < n; ++i, ++this._pos){ | ||
var c = s[i]; | ||
// calculate (line, pos) | ||
switch(c){ | ||
case "\r": | ||
++this._line; | ||
this._pos = 1; | ||
break; | ||
case "\n": | ||
if(this._lastChar !== "\r"){ | ||
++this._line; | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
break; | ||
case "number2": // [\.eE]? | ||
match = number2.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
this._pos = 1; | ||
break; | ||
} | ||
this._lastChar = c; | ||
// process a character | ||
switch(this._state){ | ||
case EXPECTING_NOTHING: | ||
switch(c){ | ||
case " ": case "\t": case "\r": case "\n": // ws | ||
if(this._chunk && this._chunk.id !== "ws"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "ws", value: i, line: this._line, pos: this._pos}; | ||
} | ||
continue; | ||
default: | ||
throw Error("Expected whitespace"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === "."){ | ||
this.push({id: value, value: value}); | ||
this._expect = "number3"; | ||
}else{ | ||
this.push({id: "exponent", value: value}); | ||
this._expect = "number6"; | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number3": // [0-9] | ||
match = number3.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected a fractional part of a number"); | ||
break; | ||
case EXPECTING_VALUE: | ||
case EXPECTING_ARRAY_FIRST: | ||
switch(c){ | ||
case "{": // object | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_KEY_FIRST; | ||
this._stack.push(this._parent); | ||
this._parent = PARSING_OBJECT; | ||
continue; | ||
case "[": // array | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_ARRAY_FIRST; | ||
this._stack.push(this._parent); | ||
this._parent = PARSING_ARRAY; | ||
continue; | ||
case "\"": // string | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_STRING_VALUE; | ||
continue; | ||
case "-": // number | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_NUMBER_START; | ||
continue; | ||
case "0": // number | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_FRACTION; | ||
continue; | ||
case "1": case "2": case "3": case "4": case "5": case "6": case "7": case "8": case "9": // number | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: "nonZero", value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_NUMBER_DIGIT; | ||
continue; | ||
case "t": // true | ||
case "f": // false | ||
case "n": // null | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this._literal = LITERALS[c]; | ||
k = Math.min(this._literal.length + i, n); | ||
for(j = 1, ++i; i < k; ++j, ++i){ | ||
if(this._literal[j] !== s[i]) { | ||
throw Error("While matching '" + this._literal + "' encountered '" + s[i] + "' instead of '" + this._literal[j] + "'"); | ||
} | ||
} | ||
if(j < this._literal.length){ | ||
this._literalFrom = j; | ||
break main; | ||
} | ||
this.push({id: this._literal, value: this._literal, line: this._line, pos: this._pos}); | ||
--i; | ||
this._pos += this._literal.length - 1; | ||
this._literal = null; | ||
break; | ||
case " ": case "\t": case "\r": case "\n": // ws | ||
if(this._chunk && this._chunk.id !== "ws"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "ws", value: i, line: this._line, pos: this._pos}; | ||
} | ||
continue; | ||
case "]": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(this._state !== EXPECTING_ARRAY_FIRST){ | ||
throw Error("Expected a value but got ']' instead"); | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._parent = this._stack.pop(); | ||
break; | ||
default: | ||
throw Error("Expected a value"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
this.push({id: "numericChunk", value: value}); | ||
this._expect = "number4"; | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number4": // [0-9]* | ||
match = number4.exec(this._buffer); | ||
value = match[0]; | ||
if(value){ | ||
this.push({id: "numericChunk", value: value}); | ||
this._buffer = this._buffer.substring(value.length); | ||
}else{ | ||
if(this._buffer){ | ||
this._expect = "number5"; | ||
break; | ||
break; | ||
case EXPECTING_KEY_FIRST: | ||
case EXPECTING_KEY: | ||
switch(c){ | ||
case "}": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(this._state !== EXPECTING_KEY_FIRST){ | ||
throw Error("Expected a key value"); | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._parent = this._stack.pop(); | ||
break; | ||
case "\"": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_KEY_VALUE; | ||
continue; | ||
case " ": case "\t": case "\r": case "\n": // ws | ||
if(this._chunk && this._chunk.id !== "ws"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "ws", value: i, line: this._line, pos: this._pos}; | ||
} | ||
continue; | ||
default: | ||
throw Error("Expected a key"); | ||
} | ||
if(this._done){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
break; | ||
case EXPECTING_KEY_COLON: | ||
switch(c){ | ||
case ":": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_VALUE; | ||
continue; | ||
case " ": case "\t": case "\r": case "\n": // ws | ||
if(this._chunk && this._chunk.id !== "ws"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "ws", value: i, line: this._line, pos: this._pos}; | ||
} | ||
continue; | ||
default: | ||
throw Error("Expected ':'"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
break; | ||
case "number5": // [eE]? | ||
match = number5.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
break; | ||
case EXPECTING_OBJECT_STOP: | ||
switch(c){ | ||
case "}": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._parent = this._stack.pop(); | ||
break; | ||
case ",": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_KEY; | ||
continue; | ||
case " ": case "\t": case "\r": case "\n": // ws | ||
if(this._chunk && this._chunk.id !== "ws"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "ws", value: i, line: this._line, pos: this._pos}; | ||
} | ||
continue; | ||
default: | ||
throw Error("Expected ','"); | ||
} | ||
if(this._done){ | ||
this._expect = "done"; | ||
break; | ||
break; | ||
case EXPECTING_ARRAY_STOP: | ||
switch(c){ | ||
case "]": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._parent = this._stack.pop(); | ||
break; | ||
case ",": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_VALUE; | ||
continue; | ||
case " ": case "\t": case "\r": case "\n": // ws | ||
if(this._chunk && this._chunk.id !== "ws"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "ws", value: i, line: this._line, pos: this._pos}; | ||
} | ||
continue; | ||
default: | ||
throw Error("Expected ','"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
this.push({id: "exponent", value: value}); | ||
this._expect = "number6"; | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number6": // [-+]? | ||
match = number6.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
this._expect = "number7"; | ||
break; | ||
break; | ||
case EXPECTING_KEY_VALUE: | ||
case EXPECTING_STRING_VALUE: | ||
switch(c){ | ||
case "\"": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
if(this._state === EXPECTING_KEY_VALUE){ | ||
this._state = EXPECTING_KEY_COLON; | ||
continue; | ||
} | ||
break; | ||
case "\\": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(i + 1 < n){ | ||
c = s[++i]; | ||
switch(c){ | ||
case "\"": case "/": case "b": case "f": | ||
case "\\": case "n": case "r": case "t": | ||
this.push({id: "escapedChars", value: "\\" + c, line: this._line, pos: this._pos}); | ||
++this._pos; | ||
continue; | ||
case "u": | ||
k = Math.min(i + 5, n); | ||
for(j = 1, ++i; i < k; ++j, ++i){ | ||
if(!hex[s[i]]) { | ||
throw Error("While matching hexadecimals encountered '" + s[i] + "'"); | ||
} | ||
} | ||
if(j < 5){ | ||
// emit this._literal | ||
this._literal = HEXADECIMALS; | ||
this._literalFrom = j; | ||
break main; | ||
} | ||
this.push({id: "escapedChars", value: "\\u" + s.substr(i - 4, 4), | ||
line: this._line, pos: this._pos}); | ||
--i; | ||
this._pos += 5; | ||
continue; | ||
default: | ||
throw Error("Wrong escaped symbol '" + c + "'"); | ||
} | ||
} | ||
this._literal = ESCAPED_CHAR; | ||
break main; | ||
default: | ||
if(this._chunk && this._chunk.id !== "plainChunk"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "plainChunk", value: i, line: this._line, pos: this._pos}; | ||
} | ||
continue; | ||
} | ||
if(this._done){ | ||
throw Error("Parser has expected an exponent value of a number"); | ||
break; | ||
case EXPECTING_NUMBER_START: | ||
switch(c){ | ||
case "0": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_FRACTION; | ||
continue; | ||
case "1": case "2": case "3": | ||
case "4": case "5": case "6": | ||
case "7": case "8": case "9": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: "nonZero", value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_NUMBER_DIGIT; | ||
continue; | ||
default: | ||
throw Error("Expected a digit"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
this.push({id: value, value: value}); | ||
this._expect = "number7"; | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number7": // [0-9] | ||
match = number7.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected an exponent part of a number"); | ||
break; | ||
case EXPECTING_NUMBER_DIGIT: | ||
case EXPECTING_FRACTION: | ||
case EXPECTING_FRAC_DIGIT: | ||
switch(c){ | ||
case "0": case "1": case "2": case "3": case "4": | ||
case "5": case "6": case "7": case "8": case "9": | ||
if(this._chunk && this._chunk.id !== "numericChunk"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(this._state === EXPECTING_FRACTION){ | ||
throw Error("Expected '.' or 'e'"); | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "numericChunk", value: i, line: this._line, pos: this._pos}; | ||
} | ||
continue; | ||
case ".": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(this._state === EXPECTING_FRAC_DIGIT){ | ||
throw Error("Expected a digit"); | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_FRAC_START; | ||
continue; | ||
case "e": case "E": | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: "exponent", value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_EXP_SIGN; | ||
continue; | ||
default: | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
--i; | ||
--this._pos; | ||
break; | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
this.push({id: "numericChunk", value: value}); | ||
this._expect = "number8"; | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number8": // [0-9]* | ||
match = number8.exec(this._buffer); | ||
value = match[0]; | ||
if(value){ | ||
this.push({id: "numericChunk", value: value}); | ||
this._buffer = this._buffer.substring(value.length); | ||
}else{ | ||
if(this._buffer || this._done){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
break; | ||
case EXPECTING_FRAC_START: | ||
switch(c){ | ||
case "0": case "1": case "2": case "3": case "4": | ||
case "5": case "6": case "7": case "8": case "9": | ||
if(this._chunk && this._chunk.id !== "numericChunk"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "numericChunk", value: i, line: this._line, pos: this._pos}; | ||
} | ||
this._state = EXPECTING_FRAC_DIGIT; | ||
continue; | ||
default: | ||
throw Error("Expected a digit"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
break; | ||
case "key1": | ||
case "key": | ||
match = key1.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected an object key"); | ||
break; | ||
case EXPECTING_EXP_SIGN: | ||
case EXPECTING_EXP_START: | ||
switch(c){ | ||
case "-": case "+": | ||
if(this._state === EXPECTING_EXP_START){ | ||
throw Error("Expected a digit"); | ||
} | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
this.push({id: c, value: c, line: this._line, pos: this._pos}); | ||
this._state = EXPECTING_EXP_START; | ||
continue; | ||
case "0": case "1": case "2": case "3": case "4": | ||
case "5": case "6": case "7": case "8": case "9": | ||
if(this._chunk && this._chunk.id !== "numericChunk"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "numericChunk", value: i, line: this._line, pos: this._pos}; | ||
} | ||
this._state = EXPECTING_EXP_DIGIT; | ||
continue; | ||
default: | ||
throw Error("Expected a digit"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === "\""){ | ||
this.push({id: value, value: value}); | ||
this._expect = "keyVal"; | ||
}else if(value === "}"){ | ||
if(this._expect !== "key1"){ | ||
throw Error("Parser cannot parse input: unexpected token '}'"); | ||
break; | ||
case EXPECTING_EXP_DIGIT: | ||
switch(c){ | ||
case "0": case "1": case "2": case "3": case "4": | ||
case "5": case "6": case "7": case "8": case "9": | ||
if(this._chunk && this._chunk.id !== "numericChunk"){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
if(!this._chunk){ | ||
this._chunk = {id: "numericChunk", value: i, line: this._line, pos: this._pos}; | ||
} | ||
continue; | ||
default: | ||
--i; | ||
--this._pos; | ||
break; | ||
} | ||
this.push({id: value, value: value}); | ||
this._parent = this._stack.pop(); | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "colon": | ||
match = colon.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected ':'"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === ":"){ | ||
this.push({id: value, value: value}); | ||
this._expect = "value"; | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "aComma": | ||
case "oComma": | ||
match = comma.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected ','"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === ","){ | ||
this.push({id: value, value: value}); | ||
this._expect = this._expect === "aComma" ? "value" : "key"; | ||
}else if(value === "}" || value === "]"){ | ||
this.push({id: value, value: value}); | ||
this._parent = this._stack.pop(); | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "done": | ||
match = ws.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
throw Error("Parser cannot parse input: unexpected characters"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
this._buffer = this._buffer.substring(match[0].length); | ||
break; | ||
break; | ||
default: | ||
throw Error("Unexpected this._state: " + this._state); | ||
} | ||
// end of value | ||
switch(this._parent){ | ||
case PARSING_OBJECT: | ||
this._state = EXPECTING_OBJECT_STOP; | ||
break; | ||
case PARSING_ARRAY: | ||
this._state = EXPECTING_ARRAY_STOP; | ||
break; | ||
default: | ||
this._state = EXPECTING_NOTHING; | ||
break; | ||
} | ||
} | ||
} | ||
if(this._chunk){ | ||
this._chunk.value = s.substring(this._chunk.value, i); | ||
this.push(this._chunk); | ||
this._chunk = null; | ||
} | ||
}while(false); | ||
}catch(err){ | ||
@@ -435,5 +663,21 @@ callback(err); | ||
} | ||
callback(); | ||
} | ||
}; | ||
Parser.prototype._flush = function flush(callback){ | ||
switch(this._state){ | ||
// normal end | ||
case EXPECTING_NOTHING: | ||
// optional number parts | ||
case EXPECTING_NUMBER_DIGIT: | ||
case EXPECTING_FRACTION: | ||
case EXPECTING_FRAC_DIGIT: | ||
case EXPECTING_EXP_DIGIT: | ||
callback(); | ||
return; | ||
} | ||
callback(new Error("Parser didn't finish, yet the stream has ended.")); | ||
}; | ||
module.exports = Parser; |
@@ -13,4 +13,6 @@ # stream-json | ||
* Streaming JSON `Parser` implemented manually to improve speed over `ClassicParser`. | ||
* Streaming JSON `ClassicParser` based on [parser-toolkit](http://github.com/uhop/parser-toolkit). | ||
* Streaming JSON parsers: | ||
* Streaming JSON `Parser` implemented manually to improve speed over `ClassicParser`. | ||
* Streaming JSON `ClassicParser` based on [parser-toolkit](http://github.com/uhop/parser-toolkit). | ||
* Streaming JSON `AltParser` is manually implemented based on `RegExp`. | ||
* `Streamer`, which converts tokens into SAX-like event stream. | ||
@@ -323,6 +325,14 @@ * `Packer`, which can assemble numbers, strings, and object keys from individual chunks. It is useful, when user knows that individual data items can fit the available memory. Overall, it makes the API simpler. | ||
It is a drop-in replacement for `Parser`, but it can emit whitespace, yet it is slower than the main parser. | ||
It is a drop-in replacement for `Parser`, but it can emit whitespace, yet it is slower than the main parser. It was the main parser for 0.1.x versions. | ||
The test file for `ClassicParser`: `tests/test_classic.js`. | ||
### AltParser | ||
It is another drop-in replacement for `Parser`, which completely skips whitespace. It is generally faster than `ClassicParser`, but can be slower than the main parser. It was the main parser for 0.2.x versions. | ||
In general, its speed depends heavily on the implementation of regular expressions by node.js. When node.js has switched from an interpreted regular expressions, to the JIT compiled ones, both `ClassicParser`, and `AltParser` got a nice performance boost. Yet, even the latest (as of 0.12) JIT compiler uses a simple yet non-linear algorithm to implement regular expressions instead of [NFA](http://en.wikipedia.org/wiki/Nondeterministic_finite_automaton) and/or [DFA](http://en.wikipedia.org/wiki/Deterministic_finite_automaton). Future enhancements to node.js would make `RegExp`-based parsers faster, potentially overtaking manually written JavaScript-only implementations. | ||
The test file for `AltParser`: `tests/test_alternative.js`. | ||
### utils/Assembler | ||
@@ -596,2 +606,3 @@ | ||
- 0.2.2 *refreshed dependencies.* | ||
- 0.2.1 *added utilities to filter objects on the fly.* | ||
@@ -598,0 +609,0 @@ - 0.2.0 *new faster parser, formal unit tests, added utilities to assemble objects on the fly.* |
@@ -112,2 +112,5 @@ "use strict"; | ||
Streamer.prototype._flush = function flush(callback){ | ||
if(this._state === "number"){ | ||
this.push({name: "endNumber"}); | ||
} | ||
callback(); | ||
@@ -114,0 +117,0 @@ }; |
@@ -5,5 +5,6 @@ var util = require("util"); | ||
function ReadString(string, options){ | ||
function ReadString(string, quant, options){ | ||
Readable.call(this, options); | ||
this._string = string; | ||
this._quant = quant; | ||
} | ||
@@ -13,3 +14,9 @@ util.inherits(ReadString, Readable); | ||
ReadString.prototype._read = function read(size){ | ||
this.push(this._string, "utf8"); | ||
if(isNaN(this._quant)){ | ||
this.push(this._string, "utf8"); | ||
}else{ | ||
for(var i = 0; i < this._string.length; i += this._quant){ | ||
this.push(this._string.substr(i, this._quant), "utf8"); | ||
} | ||
} | ||
this.push(null); | ||
@@ -16,0 +23,0 @@ }; |
@@ -22,3 +22,3 @@ "use strict"; | ||
pipeline.on("end", function(){ | ||
eval(t.ASSERT("result.length === 23")); | ||
eval(t.ASSERT("result.length === 28")); | ||
eval(t.TEST("result[0].id === '{' && result[0].val === '{'")); | ||
@@ -29,20 +29,25 @@ eval(t.TEST("result[1].id === '\"' && result[1].val === '\"'")); | ||
eval(t.TEST("result[4].id === ':' && result[4].val === ':'")); | ||
eval(t.TEST("result[5].id === 'nonZero' && result[5].val === '1'")); | ||
eval(t.TEST("result[6].id === ',' && result[6].val === ','")); | ||
eval(t.TEST("result[7].id === '\"' && result[7].val === '\"'")); | ||
eval(t.TEST("result[8].id === 'plainChunk' && result[8].val === 'b'")); | ||
eval(t.TEST("result[5].id === 'ws' && result[5].val === ' '")); | ||
eval(t.TEST("result[6].id === 'nonZero' && result[6].val === '1'")); | ||
eval(t.TEST("result[7].id === ',' && result[7].val === ','")); | ||
eval(t.TEST("result[8].id === 'ws' && result[8].val === ' '")); | ||
eval(t.TEST("result[9].id === '\"' && result[9].val === '\"'")); | ||
eval(t.TEST("result[10].id === ':' && result[10].val === ':'")); | ||
eval(t.TEST("result[11].id === 'true' && result[11].val === 'true'")); | ||
eval(t.TEST("result[12].id === ',' && result[12].val === ','")); | ||
eval(t.TEST("result[13].id === '\"' && result[13].val === '\"'")); | ||
eval(t.TEST("result[14].id === 'plainChunk' && result[14].val === 'c'")); | ||
eval(t.TEST("result[15].id === '\"' && result[15].val === '\"'")); | ||
eval(t.TEST("result[16].id === ':' && result[16].val === ':'")); | ||
eval(t.TEST("result[17].id === '[' && result[17].val === '['")); | ||
eval(t.TEST("result[18].id === '\"' && result[18].val === '\"'")); | ||
eval(t.TEST("result[19].id === 'plainChunk' && result[19].val === 'd'")); | ||
eval(t.TEST("result[20].id === '\"' && result[20].val === '\"'")); | ||
eval(t.TEST("result[21].id === ']' && result[21].val === ']'")); | ||
eval(t.TEST("result[22].id === '}' && result[22].val === '}'")); | ||
eval(t.TEST("result[10].id === 'plainChunk' && result[10].val === 'b'")); | ||
eval(t.TEST("result[11].id === '\"' && result[11].val === '\"'")); | ||
eval(t.TEST("result[12].id === ':' && result[12].val === ':'")); | ||
eval(t.TEST("result[13].id === 'ws' && result[13].val === ' '")); | ||
eval(t.TEST("result[14].id === 'true' && result[14].val === 'true'")); | ||
eval(t.TEST("result[15].id === ',' && result[15].val === ','")); | ||
eval(t.TEST("result[16].id === 'ws' && result[16].val === ' '")); | ||
eval(t.TEST("result[17].id === '\"' && result[17].val === '\"'")); | ||
eval(t.TEST("result[18].id === 'plainChunk' && result[18].val === 'c'")); | ||
eval(t.TEST("result[19].id === '\"' && result[19].val === '\"'")); | ||
eval(t.TEST("result[20].id === ':' && result[20].val === ':'")); | ||
eval(t.TEST("result[21].id === 'ws' && result[21].val === ' '")); | ||
eval(t.TEST("result[22].id === '[' && result[22].val === '['")); | ||
eval(t.TEST("result[23].id === '\"' && result[23].val === '\"'")); | ||
eval(t.TEST("result[24].id === 'plainChunk' && result[24].val === 'd'")); | ||
eval(t.TEST("result[25].id === '\"' && result[25].val === '\"'")); | ||
eval(t.TEST("result[26].id === ']' && result[26].val === ']'")); | ||
eval(t.TEST("result[27].id === '}' && result[27].val === '}'")); | ||
async.done(); | ||
@@ -49,0 +54,0 @@ }); |
@@ -7,2 +7,3 @@ "use strict"; | ||
require("./test_classic"); | ||
require("./test_alternative"); | ||
require("./test_parser"); | ||
@@ -16,2 +17,4 @@ require("./test_streamer"); | ||
require("./test_assembler"); | ||
require("./test_primitives"); | ||
require("./test_sliding"); | ||
require("./test_array"); | ||
@@ -18,0 +21,0 @@ require("./test_filtered_array"); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
178608
50
2838
622
168