stream-json
Advanced tools
Comparing version 0.1.0 to 0.2.0
@@ -0,1 +1,4 @@ | ||
"use strict"; | ||
var util = require("util"); | ||
@@ -2,0 +5,0 @@ var Writable = require("stream").Writable; |
@@ -0,1 +1,4 @@ | ||
"use strict"; | ||
var util = require("util"); | ||
@@ -2,0 +5,0 @@ var Transform = require("stream").Transform; |
@@ -0,1 +1,4 @@ | ||
"use strict"; | ||
var Parser = require("./Parser"); | ||
@@ -2,0 +5,0 @@ var Streamer = require("./Streamer"); |
{ | ||
"name": "stream-json", | ||
"version": "0.1.0", | ||
"description": "stream-json is a collection of node.js 0.10 stream components for creating custom standard-compliant JSON processors, which requires a minimal memory footprint. It can parse JSON files far exceeding available memory. Even individual data items are streamed piece-wise. Streaming SAX-inspired event-based API is included as well.", | ||
"version": "0.2.0", | ||
"description": "stream-json is a collection of node.js stream components for creating custom standard-compliant JSON processors, which requires a minimal memory footprint. It can parse JSON files far exceeding available memory. Even individual data items are streamed piece-wise. Streaming SAX-inspired event-based API is included as well.", | ||
"homepage": "http://github.com/uhop/stream-json", | ||
@@ -14,5 +14,7 @@ "bugs": "http://github.com/uhop/stream-json/issues", | ||
}, | ||
"devDependencies": {}, | ||
"devDependencies": { | ||
"heya-unit": "^0.1.11" | ||
}, | ||
"scripts": { | ||
"test": "node tests/test_main.js" | ||
"test": "node tests/tests.js" | ||
}, | ||
@@ -19,0 +21,0 @@ "github": "http://github.com/uhop/stream-json", |
@@ -0,1 +1,4 @@ | ||
"use strict"; | ||
var util = require("util"); | ||
@@ -2,0 +5,0 @@ var Transform = require("stream").Transform; |
437
Parser.js
@@ -1,5 +0,3 @@ | ||
var Scanner = require("parser-toolkit/Scanner"); | ||
var JsonParser = require("parser-toolkit/topDown/Parser"); | ||
"use strict"; | ||
var json = require("./Grammar"); | ||
@@ -15,9 +13,7 @@ var util = require("util"); | ||
this._scanner = new Scanner(); | ||
this._parser = new JsonParser(json); | ||
var self = this; | ||
this._parser.onToken = function onToken(token){ | ||
self.push(token); | ||
}; | ||
this._buffer = ""; | ||
this._done = false; | ||
this._expect = "value"; | ||
this._stack = []; | ||
this._parent = ""; | ||
} | ||
@@ -27,3 +23,3 @@ util.inherits(Parser, Transform); | ||
Parser.prototype._transform = function transform(chunk, encoding, callback){ | ||
this._scanner.addBuffer(chunk.toString()); | ||
this._buffer += chunk.toString(); | ||
this._processInput(callback); | ||
@@ -33,32 +29,404 @@ }; | ||
Parser.prototype._flush = function flush(callback){ | ||
this._scanner.addBuffer("", true); | ||
this._done = true; | ||
this._processInput(callback); | ||
}; | ||
Parser.prototype._processInput = function processInput(callback){ | ||
var value1 = /^(?:[\"\{\[\]\-0-9]|true\b|false\b|null\b|\s{1,256})/, | ||
string = /^(?:[^\"\\]{1,256}|\\[bfnrt\"\\\/]|\\u[0-9a-fA-F]{4}|\")/, | ||
number0 = /^[0-9]/, | ||
number1 = /^\d{0,256}/, | ||
number2 = /^[\.eE]/, | ||
number3 = number0, | ||
number4 = number1, | ||
number5 = /^[eE]/, | ||
number6 = /^[-+]/, | ||
number7 = number0, | ||
number8 = number1, | ||
key1 = /^(?:[\"\}]|\s{1,256})/, | ||
colon = /^(?:\:|\s{1,256})/, | ||
comma = /^(?:[\,\]\}]|\s{1,256})/, | ||
ws = /^\s{1,256}/; | ||
Parser.prototype._processInput = function(callback){ | ||
try{ | ||
if(this._expected === null){ | ||
throw Error("Unexpected input after parser has finished."); | ||
} | ||
if(typeof this._expected == "undefined"){ | ||
this._expected = this._parser.getExpectedState(); | ||
} | ||
if(this._expected){ | ||
for(;;){ | ||
var token = this._scanner.getToken(this._expected); | ||
if(token === true){ | ||
// need more input | ||
var match, value; | ||
main: for(;;){ | ||
switch(this._expect){ | ||
case "value1": | ||
case "value": | ||
match = value1.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
if(this._done){ | ||
throw Error("Parser cannot parse input: expected a value"); | ||
} | ||
} | ||
if(this._done){ | ||
throw Error("Parser has expected a value"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
switch(value){ | ||
case "\"": | ||
this.push({id: value, value: value}); | ||
this._expect = "string"; | ||
break; | ||
case "{": | ||
this.push({id: value, value: value}); | ||
this._stack.push(this._parent); | ||
this._parent = "object"; | ||
this._expect = "key1"; | ||
break; | ||
case "[": | ||
this.push({id: value, value: value}); | ||
this._stack.push(this._parent); | ||
this._parent = "array"; | ||
this._expect = "value1"; | ||
break; | ||
case "]": | ||
if(this._expect !== "value1"){ | ||
throw Error("Parser cannot parse input: unexpected token ']'"); | ||
} | ||
this.push({id: value, value: value}); | ||
this._parent = this._stack.pop(); | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
case "-": | ||
this.push({id: value, value: value}); | ||
this._expect = "number0"; | ||
break; | ||
case "0": | ||
this.push({id: value, value: value}); | ||
this._expect = "number2"; | ||
break; | ||
case "1": | ||
case "2": | ||
case "3": | ||
case "4": | ||
case "5": | ||
case "6": | ||
case "7": | ||
case "8": | ||
case "9": | ||
this.push({id: "nonZero", value: value}); | ||
this._expect = "number1"; | ||
break; | ||
case "true": | ||
case "false": | ||
case "null": | ||
if(this._buffer.length === value.length && !this._done){ | ||
// wait for more input | ||
break main; | ||
} | ||
this.push({id: value, value: value}); | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
// default: // ws | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
} | ||
this._parser.putToken(token, this._scanner); | ||
this._expected = this._parser.getExpectedState(); | ||
if(!this._expected){ | ||
// we are done | ||
case "keyVal": | ||
case "string": | ||
match = string.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
if(this._done || this._buffer.length >= 6){ | ||
throw Error("Parser cannot parse input: escaped characters"); | ||
} | ||
} | ||
if(this._done){ | ||
throw Error("Parser has expected a string value"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === "\""){ | ||
this.push({id: value, value: value}); | ||
if(this._expect === "keyVal"){ | ||
this._expect = "colon"; | ||
}else{ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
} | ||
}else if(value.length > 1 && value.charAt(0) === "\\"){ | ||
this.push({id: "escapedChars", value: value}); | ||
}else{ | ||
this.push({id: "plainChunk", value: value}); | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
} | ||
// number chunks | ||
case "number0": // [0-9] | ||
match = number0.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected a digit"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === "0"){ | ||
this.push({id: value, value: value}); | ||
this._expect = "number2"; | ||
}else{ | ||
this.push({id: "nonZero", value: value}); | ||
this._expect = "number1"; | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number1": // [0-9]* | ||
match = number1.exec(this._buffer); | ||
value = match[0]; | ||
if(value){ | ||
this.push({id: "numericChunk", value: value}); | ||
this._buffer = this._buffer.substring(value.length); | ||
}else{ | ||
if(this._buffer){ | ||
this._expect = "number2"; | ||
break; | ||
} | ||
if(this._done){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
break; | ||
case "number2": // [\.eE]? | ||
match = number2.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === "."){ | ||
this.push({id: value, value: value}); | ||
this._expect = "number3"; | ||
}else{ | ||
this.push({id: "exponent", value: value}); | ||
this._expect = "number6"; | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number3": // [0-9] | ||
match = number3.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected a fractional part of a number"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
this.push({id: "numericChunk", value: value}); | ||
this._expect = "number4"; | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number4": // [0-9]* | ||
match = number4.exec(this._buffer); | ||
value = match[0]; | ||
if(value){ | ||
this.push({id: "numericChunk", value: value}); | ||
this._buffer = this._buffer.substring(value.length); | ||
}else{ | ||
if(this._buffer){ | ||
this._expect = "number5"; | ||
break; | ||
} | ||
if(this._done){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
break; | ||
case "number5": // [eE]? | ||
match = number5.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
} | ||
if(this._done){ | ||
this._expect = "done"; | ||
break; | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
this.push({id: "exponent", value: value}); | ||
this._expect = "number6"; | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number6": // [-+]? | ||
match = number6.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
this._expect = "number7"; | ||
break; | ||
} | ||
if(this._done){ | ||
throw Error("Parser has expected an exponent value of a number"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
this.push({id: value, value: value}); | ||
this._expect = "number7"; | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number7": // [0-9] | ||
match = number7.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected an exponent part of a number"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
this.push({id: "numericChunk", value: value}); | ||
this._expect = "number8"; | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "number8": // [0-9]* | ||
match = number8.exec(this._buffer); | ||
value = match[0]; | ||
if(value){ | ||
this.push({id: "numericChunk", value: value}); | ||
this._buffer = this._buffer.substring(value.length); | ||
}else{ | ||
if(this._buffer || this._done){ | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
break; | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
break; | ||
case "key1": | ||
case "key": | ||
match = key1.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected an object key"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === "\""){ | ||
this.push({id: value, value: value}); | ||
this._expect = "keyVal"; | ||
}else if(value === "}"){ | ||
if(this._expect !== "key1"){ | ||
throw Error("Parser cannot parse input: unexpected token '}'"); | ||
} | ||
this.push({id: value, value: value}); | ||
this._parent = this._stack.pop(); | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "colon": | ||
match = colon.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected ':'"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === ":"){ | ||
this.push({id: value, value: value}); | ||
this._expect = "value"; | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "aComma": | ||
case "oComma": | ||
match = comma.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer || this._done){ | ||
throw Error("Parser cannot parse input: expected ','"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
value = match[0]; | ||
if(value === ","){ | ||
this.push({id: value, value: value}); | ||
this._expect = this._expect === "aComma" ? "value" : "key"; | ||
}else if(value === "}" || value === "]"){ | ||
this.push({id: value, value: value}); | ||
this._parent = this._stack.pop(); | ||
if(this._parent){ | ||
this._expect = this._parent === "object" ? "oComma" : "aComma"; | ||
}else{ | ||
this._expect = "done"; | ||
} | ||
} | ||
this._buffer = this._buffer.substring(value.length); | ||
break; | ||
case "done": | ||
match = ws.exec(this._buffer); | ||
if(!match){ | ||
if(this._buffer){ | ||
throw Error("Parser cannot parse input: unexpected characters"); | ||
} | ||
// wait for more input | ||
break main; | ||
} | ||
this._buffer = this._buffer.substring(match[0].length); | ||
break; | ||
} | ||
} | ||
if(this._expected === null && !this._scanner.isFinished()){ | ||
throw Error("Scanner has unprocessed symbols."); | ||
} | ||
}catch(err){ | ||
@@ -69,5 +437,4 @@ callback(err); | ||
callback(); | ||
}; | ||
} | ||
module.exports = Parser; |
170
README.md
@@ -1,8 +0,15 @@ | ||
# stream-json [](http://travis-ci.org/uhop/stream-json) | ||
# stream-json | ||
`stream-json` is a collection of node.js 0.10 stream components for creating custom standard-compliant JSON processors, which requires a minimal memory footprint. It can parse JSON files far exceeding available memory. Even individual data items are streamed piece-wise. Streaming SAX-inspired event-based API is included as well. | ||
[![Build status][travis-image]][travis-url] | ||
[![Dependencies][deps-image]][deps-url] | ||
[![devDependencies][dev-deps-image]][dev-deps-url] | ||
[![NPM version][npm-image]][npm-url] | ||
`stream-json` is a collection of node.js stream components for creating custom standard-compliant JSON processors, which requires a minimal memory footprint. It can parse JSON files far exceeding available memory. Even individual primitive data items (keys, strings, and numbers) can be streamed piece-wise. Streaming SAX-inspired event-based API is included as well. | ||
Available components: | ||
* Streaming JSON `Parser` based on [parser-toolkit](http://github.com/uhop/parser-toolkit). | ||
* Streaming JSON `Parser` implemented manually to improve speed over `ClassicParser`. | ||
* Streaming JSON `ClassicParser` based on [parser-toolkit](http://github.com/uhop/parser-toolkit). | ||
* `Streamer`, which converts tokens into SAX-like event stream. | ||
@@ -13,2 +20,5 @@ * `Packer`, which can assemble numbers, strings, and object keys from individual chunks. It is useful, when user knows that individual data items can fit the available memory. Overall, it makes the API simpler. | ||
* `Source`, which is a helper that connects streams using `pipe()` and converts an event stream on the end of pipe into events, similar to `Emitter`. | ||
* Various utilities: | ||
* `Assembler` to assemble full objects from an event stream. | ||
* `StreamArray` handles a frequent use case: a huge array of relatively small objects. It streams array components individually taking care of assembling them automatically. | ||
@@ -68,3 +78,3 @@ Additionally a helper function is available in the main file, which creates a `Source` object with a default set of stream components. | ||
The test file for `Parser` can be found in `tests/test_parser.js`. Actually all test files in `tests/` use `Parser`. | ||
The test files for `Parser`: `tests/test_parser.js`, `tests\manual\test_parser.js`. Actually all test files in `tests/` use `Parser`. | ||
@@ -127,3 +137,3 @@ If you want to catch parsing errors, attach an error listener directly to a parser component — unlike data errors do not travel through stream pipes. | ||
The test file for `Streamer` can be found in `tests/test_streamer.js`. | ||
The test files for `Streamer`: `tests/test_streamer.js` and `tests/manual/test_streamer.js`. | ||
@@ -147,3 +157,3 @@ ### Packer | ||
* `packKeys` can be `true` or `false`. If `true`, a key value is returned as a new event: | ||
* `packKeys` can be `true` or `false` (the default). If `true`, a key value is returned as a new event: | ||
@@ -155,3 +165,3 @@ ```js | ||
`keyValue` event always follows `endKey`. | ||
* `packStrings` can be `true` or `false`. If `true`, a string value is returned as a new event: | ||
* `packStrings` can be `true` or `false` (the default). If `true`, a string value is returned as a new event: | ||
@@ -163,3 +173,3 @@ ```js | ||
`stringValue` event always follows `endString`. | ||
* `packNumbers` can be `true` or `false`. If `true`, a number value is returned as a new event: | ||
* `packNumbers` can be `true` or `false` (the default). If `true`, a number value is returned as a new event: | ||
@@ -177,5 +187,7 @@ ```js | ||
The test files for `Packer`: `tests/test_packer.js` and `tests/manual/test_packer.js`. | ||
### Emitter | ||
`Emitter` is a writeable stream, which consumes a stream of events, and emits them on itself. | ||
`Emitter` is a writeable stream, which consumes a stream of events, and emits them on itself. The standard `finish` event is used to indicate the end of a stream. | ||
@@ -196,2 +208,5 @@ It operates in an [objectMode](http://nodejs.org/api/stream.html#stream_object_mode). | ||
}); | ||
emitter.on("finish", function(){ | ||
console.log("done"); | ||
}); | ||
@@ -204,2 +219,4 @@ fs.createReadStream(fname). | ||
The test file for `Emitter`: `tests/test_emitter.js`. | ||
### Filter | ||
@@ -232,3 +249,3 @@ | ||
The test file for `Filter` can be found in `tests/test_filter.js`. | ||
The test files for `Filter`: `tests/test_filter.js` and `tests/manual/test_filter.js`. | ||
@@ -257,3 +274,3 @@ #### Path examples | ||
`Source` is a convenience object. It connects individual streams with pipes, and attaches itself to the end emitting all events on itself (just like `Emitter`). | ||
`Source` is a convenience object. It connects individual streams with pipes, and attaches itself to the end emitting all events on itself (just like `Emitter`). The standard `end` event is used to indicate the end of a stream. | ||
@@ -280,6 +297,10 @@ ```js | ||
When a stream ends, `Source` produces an event `end` without parameters. | ||
`Source` exposes three public properties: | ||
The test file for `Source` can be found in `tests/test_source.js`. | ||
* `streams` — an array of streams so you can inspect them individually, if needed. They are connected sequentially in the array order. | ||
* `input` — the beginning of a pipeline, which should be used as an input for a JSON stream. | ||
* `output` — the end of a pipeline, which can be used to pipe the resulting stream of objects for futher processing. | ||
The test files for `Source`: `tests/test_source.js` and `tests/manual/test_source.js`. | ||
### main: createSource() | ||
@@ -317,7 +338,99 @@ | ||
The test files for `Source` are `tests/test_main.js`, and `tests/test_chunk.js`. | ||
The test files for `createSource()` are `tests/test_source.js`, `tests/manual/test_main.js`, and `tests/manual/test_chunk.js`. | ||
### ClassicParser | ||
It is a drop-in replacement for `Parser`, but it can emit whitespace, yet it is slower than the main parser. | ||
The test file for `ClassicParser`: `tests/test_classic.js`. | ||
### utils/Assembler | ||
A helper class to convert a JSON stream to a fully assembled JS object. It can be used to assemble sub-objects. | ||
```js | ||
var createSource = require("stream-json"); | ||
var Assembler = require("stream-json/utils/Assembler"); | ||
var source = createSource(options), | ||
assembler = new Assembler(); | ||
// Example of use: | ||
source.output.on("data", function(chunk){ | ||
assembler[chunk.name] && assembler[chunk.name](chunk.value); | ||
}); | ||
source.output.on("end", function(){ | ||
// here is our fully assembled object: | ||
console.log(assembler.current); | ||
}); | ||
fs.createReadStream(fname).pipe(source.input); | ||
``` | ||
`Assembler` is a simple state machine with an explicit stack. It exposes three properties: | ||
* `current` — an object we are working with at the moment. It can be either an object or an array. | ||
* Initial value is `null`. | ||
* If top-level object is a primitive value (`null`, `true`, `false`, a number, or a string), it will be placed in `current` too. | ||
* `key` — is a key value (a string) for a currently processed value, or `null`, if not expected. | ||
* If `current` is an object, a primitive value will be added directly to it using a current value of `key`. | ||
* After use `key` is assigned `null` to prevent memory leaks. | ||
* If `current` is an array, a primitive value will be added directly to it by `push()`. | ||
* `stack` — an array of parent objects. | ||
* `stack` always grows/shrinks by two items: a value of `current` and a value of `key`. | ||
* When an object or an array is closed, it is added to its parent, which is removed from the stack to become a current object again. | ||
* While adding to a parent a saved key is used if needed. Otherwise the second value is ignored. | ||
* When an object or an array is started, the `current` object and `key` are saved to `stack`. | ||
Obviously `Assembler` should be used only when you are sure that the result will fit into memory. It automatically means that all primitive values (strings or numbers) are small enough to fit in memory too. As such `Assembler` is meant to be used after `Packer`, which reconstructs keys, strings, and numbers from possible chunks. | ||
On the other hand, we use `stream-json` when JSON streams are big, and `JSON.parse()` is not an option. But we use `Assembler` to assemble sub-objects. One way to do it is to start directing calls to `Assembler` when we already selected a sub-object with `Filter`. Another way is shown in `StreamArray`. | ||
The test file for `Assembler`: `tests/test_assembler.js`. | ||
### utils/StreamArray | ||
This utility deals with a frequent use case: our JSON is an array of various sub-objects. The assumption is that while individual array items fit in memory, the array itself does not. Such files are frequently produced by various database dump utilities, e.g., [Django's dumpdata](https://docs.djangoproject.com/en/1.8/ref/django-admin/#dumpdata-app-label-app-label-app-label-model). | ||
`StreamArray` produces a stream of objects in following format: | ||
```js | ||
{index, value} | ||
``` | ||
Where `index` is a numeric index in the array starting from 0, and `value` is a corresponding value. All objects are produced strictly sequentially. | ||
```js | ||
var createSource = require("stream-json"); | ||
var StreamArray = require("stream-json/utils/StreamArray"); | ||
var source = createSource(options), | ||
stream = StreamArray.make(); | ||
// Example of use: | ||
stream.output.on("data", function(object){ | ||
console.log(object.index, object.value); | ||
}); | ||
stream.output.on("end", function(){ | ||
console.log("done"); | ||
}); | ||
fs.createReadStream(fname).pipe(stream.input); | ||
``` | ||
`StreamArray` is a constructor, which optionally takes one object: `options`. `options` can contain some technical parameters, and it is rarely needs to be specified. You can find it thoroughly documented in [node.js' Stream documentation](http://nodejs.org/api/stream.html). | ||
Directly on `StreamArray` there is a class-level helper function `make()`, which helps to construct a proper pipeline. It is similar to `createSource()` and takes the same argument `options`. Internally it creates and connects `Parser`, `Streamer`, `Packer`, and `StreamArray`, and returns an object with three properties: | ||
* `streams` — an array of streams so you can inspect them individually, if needed. They are connected sequentially in the array order. | ||
* `input` — the beginning of a pipeline, which should be used as an input for a JSON stream. | ||
* `output` — the end of a pipeline, which can be used for events, or to pipe the resulting stream of objects for futher processing. | ||
The test file for `StreamArray`: `tests/test_array.js`. | ||
## Advanced use | ||
The whole library is organized as set of small components, which can be combined to produce the most effective pipeline. All components are based on node.js 0.10 [streams](http://nodejs.org/api/stream.html), and [events](http://nodejs.org/api/events.html). It is easy to add your own components to solve your unique tasks. | ||
The whole library is organized as set of small components, which can be combined to produce the most effective pipeline. All components are based on node.js [streams](http://nodejs.org/api/stream.html), and [events](http://nodejs.org/api/events.html). It is easy to add your own components to solve your unique tasks. | ||
@@ -330,4 +443,8 @@ The code of all components are compact and simple. Please take a look at their source code to see how things are implemented, so you can produce your own components in no time. | ||
The test file `tests/sample.json.gz` is copied as is from an open source project [json-simple](https://code.google.com/p/json-simple/) under Apache License 2.0 and compressed with gzip. | ||
The test file `tests/sample.json.gz` is a combination of several publicly available datasets merged and compressed with gzip: | ||
* a snapshot of publicly available [Japanese statistics on birth and marriage in JSON)](http://dataforjapan.org/dataset/birth-stat/resource/42799d3c-ecee-4b35-9f5a-7fec30596aa2). | ||
* a snapshot of publicly available [US Department of Housing and Urban Development - HUD's published metadata catalog (Schema Version 1.1)](https://catalog.data.gov/dataset/data-catalog). | ||
* a small fake sample made up by me featuring non-ASCII keys, non-ASCII strings, and primitive data missing in other two samples. | ||
## Apendix A: tokens | ||
@@ -348,3 +465,3 @@ | ||
* `ws`: white spaces, usually ignored. | ||
* `ws`: white spaces, usually ignored. (Produced only by `ClassicParser`.) | ||
* `-`: a unary negation used in a negative number either to start a number, or as an exponent sign. | ||
@@ -369,1 +486,20 @@ * `+`: used as an exponent sign. | ||
* `:`: separates a key and its value in an object literal. | ||
## Release History | ||
- 0.2.0 *new faster parser, formal unit tests, added utilities to assemble objects on the fly.* | ||
- 0.1.0 *bug fixes, more documentation.* | ||
- 0.0.5 *bug fixes.* | ||
- 0.0.4 *improved grammar.* | ||
- 0.0.3 *the technical release.* | ||
- 0.0.2 *bug fixes.* | ||
- 0.0.1 *the initial release.* | ||
[npm-image]: https://img.shields.io/npm/v/stream-json.svg | ||
[npm-url]: https://npmjs.org/package/stream-json | ||
[deps-image]: https://img.shields.io/david/uhop/stream-json.svg | ||
[deps-url]: https://david-dm.org/uhop/stream-json | ||
[dev-deps-image]: https://img.shields.io/david/dev/uhop/stream-json.svg | ||
[dev-deps-url]: https://david-dm.org/uhop/stream-json#info=devDependencies | ||
[travis-image]: https://img.shields.io/travis/uhop/stream-json.svg | ||
[travis-url]: https://travis-ci.org/uhop/stream-json |
@@ -0,1 +1,4 @@ | ||
"use strict"; | ||
var util = require("util"); | ||
@@ -12,2 +15,4 @@ var EventEmitter = require("events").EventEmitter; | ||
this.streams = streams; | ||
// connect pipes | ||
@@ -14,0 +19,0 @@ var input = this.input = streams[0], output = input; |
@@ -0,1 +1,4 @@ | ||
"use strict"; | ||
var util = require("util"); | ||
@@ -2,0 +5,0 @@ var Transform = require("stream").Transform; |
@@ -1,81 +0,36 @@ | ||
var ReadString = require("./ReadString"); | ||
var Parser = require("../Parser"); | ||
var Streamer = require("../Streamer"); | ||
var Packer = require("../Packer"); | ||
var StreamPrinter = require("./StreamPrinter"); | ||
"use strict"; | ||
var Source = require("../Source"); | ||
var unit = require("heya-unit"); | ||
var object = { | ||
stringWithTabsAndNewlines: "Did it work?\nNo...\t\tI don't think so...", | ||
anArray: [1, 2, true, "tabs?\t\t\t\u0001\u0002\u0003", false] | ||
}; | ||
var input = JSON.stringify(object); | ||
var Assembler = require("../utils/Assembler"); | ||
var Parser = require("../Parser"); | ||
var Streamer = require("../Streamer"); | ||
var Packer = require("../Packer"); | ||
var stream = new ReadString(input); | ||
var parser = new Parser(); | ||
var streamer = new Streamer(); | ||
var packer = new Packer({packKeys: true, packStrings: true, packNumbers: true}); | ||
var printer = new StreamPrinter(); | ||
var ReadString = require("./ReadString"); | ||
var source = new Source([parser, streamer, packer/*, printer*/]); | ||
unit.add(module, [ | ||
function test_escaped(t){ | ||
var async = t.startAsync("test_escaped"); | ||
// reconstruct an object | ||
var object = { | ||
stringWithTabsAndNewlines: "Did it work?\nNo...\t\tI don't think so...", | ||
anArray: [1, 2, true, "tabs?\t\t\t\u0001\u0002\u0003", false] | ||
}, | ||
input = JSON.stringify(object), | ||
pipeline = new ReadString(input).pipe(new Parser()).pipe(new Streamer()). | ||
pipe(new Packer({packKeys: true, packStrings: true, packNumbers: true})), | ||
assembler = new Assembler(); | ||
var current, key, stack = []; | ||
function startObject(newValue){ | ||
if(current !== undefined){ | ||
stack.push(current, key); | ||
key = undefined; | ||
pipeline.on("data", function(chunk){ | ||
assembler[chunk.name] && assembler[chunk.name](chunk.value); | ||
}); | ||
pipeline.on("end", function(){ | ||
eval(t.TEST("t.unify(assembler.current, object)")); | ||
async.done(); | ||
}); | ||
} | ||
current = newValue; | ||
console.log("new object: ", JSON.stringify(current)); | ||
console.log("stack: ", JSON.stringify(stack)); | ||
} | ||
function endObject(){ | ||
if(stack.length){ | ||
var value = current; | ||
key = stack.pop(); | ||
current = stack.pop(); | ||
addValue(value); | ||
} | ||
console.log("old object: ", JSON.stringify(current)); | ||
console.log("stack: ", JSON.stringify(stack)); | ||
} | ||
function addValue(value){ | ||
if(current instanceof Array){ | ||
current.push(value); | ||
}else{ | ||
current[key] = value; | ||
key = undefined; | ||
} | ||
console.log("updated object: ", JSON.stringify(current)); | ||
} | ||
source.on("startObject", function(){ startObject({}); }); | ||
source.on("startArray", function(){ startObject([]); }); | ||
source.on("endObject", endObject); | ||
source.on("endArray", endObject); | ||
source.on("keyValue", function(value){ key = value; }); | ||
source.on("stringValue", addValue); | ||
source.on("numberValue", function(value){ addValue(+value); }); | ||
source.on("nullValue", function(){ addValue(null); }); | ||
source.on("trueValue", function(){ addValue(true); }); | ||
source.on("falseValue", function(){ addValue(false); }); | ||
source.on("end", function(){ | ||
console.log("in: ", input); | ||
console.log("out: ", JSON.stringify(current)); | ||
}); | ||
console.log(input); | ||
stream.pipe(source.input); | ||
]); |
@@ -0,1 +1,6 @@ | ||
"use strict"; | ||
var unit = require("heya-unit"); | ||
var ReadString = require("./ReadString"); | ||
@@ -5,15 +10,36 @@ var Parser = require("../Parser"); | ||
var Filter = require("../Filter"); | ||
var StreamPrinter = require("./StreamPrinter") | ||
var input = '{"a": 1, "b": true, "c": ["d"]}'; | ||
unit.add(module, [ | ||
function test_filter(t){ | ||
var async = t.startAsync("test_filter"); | ||
var input = '{"a": 1, "b": true, "c": ["d"]}', | ||
pipeline = new ReadString(input).pipe(new Parser()).pipe(new Streamer()). | ||
pipe(new Filter({filter: /^(|a|c)$/})), | ||
result = []; | ||
var stream = new ReadString(input); | ||
var parser = new Parser(); | ||
var streamer = new Streamer(); | ||
var filter = new Filter({filter: /^(|a|c)$/}); | ||
var printer = new StreamPrinter(); | ||
console.log(input); | ||
stream.pipe(parser).pipe(streamer).pipe(filter).pipe(printer); | ||
pipeline.on("data", function(chunk){ | ||
result.push({name: chunk.name, val: chunk.value}); | ||
}); | ||
pipeline.on("end", function(){ | ||
eval(t.ASSERT("result.length === 15")); | ||
eval(t.TEST("result[0].name === 'startObject'")); | ||
eval(t.TEST("result[1].name === 'startKey'")); | ||
eval(t.TEST("result[2].name === 'stringChunk' && result[2].val === 'a'")); | ||
eval(t.TEST("result[3].name === 'endKey'")); | ||
eval(t.TEST("result[4].name === 'keyValue' && result[4].val === 'a'")); | ||
eval(t.TEST("result[5].name === 'startNumber'")); | ||
eval(t.TEST("result[6].name === 'numberChunk' && result[6].val === '1'")); | ||
eval(t.TEST("result[7].name === 'endNumber'")); | ||
eval(t.TEST("result[8].name === 'startKey'")); | ||
eval(t.TEST("result[9].name === 'stringChunk' && result[9].val === 'c'")); | ||
eval(t.TEST("result[10].name === 'endKey'")); | ||
eval(t.TEST("result[11].name === 'keyValue' && result[11].val === 'c'")); | ||
eval(t.TEST("result[12].name === 'startArray'")); | ||
eval(t.TEST("result[13].name === 'endArray'")); | ||
eval(t.TEST("result[14].name === 'endObject'")); | ||
async.done(); | ||
}); | ||
} | ||
]); |
@@ -0,1 +1,6 @@ | ||
"use strict"; | ||
var unit = require("heya-unit"); | ||
var ReadString = require("./ReadString"); | ||
@@ -5,15 +10,46 @@ var Parser = require("../Parser"); | ||
var Packer = require("../Packer"); | ||
var StreamPrinter = require("./StreamPrinter") | ||
var input = '{"a": 1, "b": true, "c": ["d"]}'; | ||
unit.add(module, [ | ||
function test_packer(t){ | ||
var async = t.startAsync("test_packer"); | ||
var input = '{"a": 1, "b": true, "c": ["d"]}', | ||
pipeline = new ReadString(input).pipe(new Parser()).pipe(new Streamer()). | ||
pipe(new Packer({packKeys: true, packStrings: true, packNumbers: true})), | ||
result = []; | ||
var stream = new ReadString(input); | ||
var parser = new Parser(); | ||
var streamer = new Streamer(); | ||
var packer = new Packer({packKeys: true, packStrings: true, packNumbers: true}); | ||
var printer = new StreamPrinter(); | ||
console.log(input); | ||
stream.pipe(parser).pipe(streamer).pipe(packer).pipe(printer); | ||
pipeline.on("data", function(chunk){ | ||
result.push({name: chunk.name, val: chunk.value}); | ||
}); | ||
pipeline.on("end", function(){ | ||
eval(t.ASSERT("result.length === 25")); | ||
eval(t.TEST("result[0].name === 'startObject'")); | ||
eval(t.TEST("result[1].name === 'startKey'")); | ||
eval(t.TEST("result[2].name === 'stringChunk' && result[2].val === 'a'")); | ||
eval(t.TEST("result[3].name === 'endKey'")); | ||
eval(t.TEST("result[4].name === 'keyValue' && result[4].val === 'a'")); | ||
eval(t.TEST("result[5].name === 'startNumber'")); | ||
eval(t.TEST("result[6].name === 'numberChunk' && result[6].val === '1'")); | ||
eval(t.TEST("result[7].name === 'endNumber'")); | ||
eval(t.TEST("result[8].name === 'numberValue' && result[8].val === '1'")); | ||
eval(t.TEST("result[9].name === 'startKey'")); | ||
eval(t.TEST("result[10].name === 'stringChunk' && result[10].val === 'b'")); | ||
eval(t.TEST("result[11].name === 'endKey'")); | ||
eval(t.TEST("result[12].name === 'keyValue' && result[12].val === 'b'")); | ||
eval(t.TEST("result[13].name === 'trueValue' && result[13].val === true")); | ||
eval(t.TEST("result[14].name === 'startKey'")); | ||
eval(t.TEST("result[15].name === 'stringChunk' && result[15].val === 'c'")); | ||
eval(t.TEST("result[16].name === 'endKey'")); | ||
eval(t.TEST("result[17].name === 'keyValue' && result[17].val === 'c'")); | ||
eval(t.TEST("result[18].name === 'startArray'")); | ||
eval(t.TEST("result[19].name === 'startString'")); | ||
eval(t.TEST("result[20].name === 'stringChunk' && result[20].val === 'd'")); | ||
eval(t.TEST("result[21].name === 'endString'")); | ||
eval(t.TEST("result[22].name === 'stringValue' && result[22].val === 'd'")); | ||
eval(t.TEST("result[23].name === 'endArray'")); | ||
eval(t.TEST("result[24].name === 'endObject'")); | ||
async.done(); | ||
}); | ||
} | ||
]); |
@@ -0,14 +1,49 @@ | ||
"use strict"; | ||
var unit = require("heya-unit"); | ||
var ReadString = require("./ReadString"); | ||
var Parser = require("../Parser"); | ||
var TokenPrinter = require("./TokenPrinter") | ||
var input = '{"a": 1, "b": true, "c": ["d"]}'; | ||
unit.add(module, [ | ||
function test_parser(t) { | ||
var async = t.startAsync("test_parser"); | ||
var input = '{"a": 1, "b": true, "c": ["d"]}', | ||
pipeline = new ReadString(input).pipe(new Parser()), | ||
result = []; | ||
var stream = new ReadString(input); | ||
var parser = new Parser(); | ||
var tokens = new TokenPrinter(); | ||
console.log(input); | ||
stream.pipe(parser).pipe(tokens); | ||
pipeline.on("data", function(chunk){ | ||
result.push({id: chunk.id, val: chunk.value}); | ||
}); | ||
pipeline.on("end", function(){ | ||
eval(t.ASSERT("result.length === 23")); | ||
eval(t.TEST("result[0].id === '{' && result[0].val === '{'")); | ||
eval(t.TEST("result[1].id === '\"' && result[1].val === '\"'")); | ||
eval(t.TEST("result[2].id === 'plainChunk' && result[2].val === 'a'")); | ||
eval(t.TEST("result[3].id === '\"' && result[3].val === '\"'")); | ||
eval(t.TEST("result[4].id === ':' && result[4].val === ':'")); | ||
eval(t.TEST("result[5].id === 'nonZero' && result[5].val === '1'")); | ||
eval(t.TEST("result[6].id === ',' && result[6].val === ','")); | ||
eval(t.TEST("result[7].id === '\"' && result[7].val === '\"'")); | ||
eval(t.TEST("result[8].id === 'plainChunk' && result[8].val === 'b'")); | ||
eval(t.TEST("result[9].id === '\"' && result[9].val === '\"'")); | ||
eval(t.TEST("result[10].id === ':' && result[10].val === ':'")); | ||
eval(t.TEST("result[11].id === 'true' && result[11].val === 'true'")); | ||
eval(t.TEST("result[12].id === ',' && result[12].val === ','")); | ||
eval(t.TEST("result[13].id === '\"' && result[13].val === '\"'")); | ||
eval(t.TEST("result[14].id === 'plainChunk' && result[14].val === 'c'")); | ||
eval(t.TEST("result[15].id === '\"' && result[15].val === '\"'")); | ||
eval(t.TEST("result[16].id === ':' && result[16].val === ':'")); | ||
eval(t.TEST("result[17].id === '[' && result[17].val === '['")); | ||
eval(t.TEST("result[18].id === '\"' && result[18].val === '\"'")); | ||
eval(t.TEST("result[19].id === 'plainChunk' && result[19].val === 'd'")); | ||
eval(t.TEST("result[20].id === '\"' && result[20].val === '\"'")); | ||
eval(t.TEST("result[21].id === ']' && result[21].val === ']'")); | ||
eval(t.TEST("result[22].id === '}' && result[22].val === '}'")); | ||
async.done(); | ||
}); | ||
} | ||
]); |
@@ -1,34 +0,47 @@ | ||
var Source = require("../Source"); | ||
var Parser = require("../Parser"); | ||
var Streamer = require("../Streamer"); | ||
"use strict"; | ||
var unit = require("heya-unit"); | ||
var fs = require("fs"), path = require("path"), zlib = require("zlib"); | ||
var createSource = require("../main"); | ||
var Counter = require("./Counter"); | ||
var source = new Source([new Parser(), new Streamer()]); | ||
var objectCounter = 0, arrayCounter = 0, stringCounter = 0, numberCounter = 0, | ||
nullCounter = 0, trueCounter = 0, falseCounter = 0, keyCounter = 0; | ||
unit.add(module, [ | ||
function test_source(t){ | ||
var async = t.startAsync("test_source"); | ||
source.on("startObject", function(){ ++objectCounter; }); | ||
source.on("startArray", function(){ ++arrayCounter; }); | ||
source.on("startKey", function(){ ++keyCounter; }); | ||
source.on("startString", function(){ ++stringCounter; }); | ||
source.on("startNumber", function(){ ++numberCounter; }); | ||
source.on("nullValue", function(){ ++nullCounter; }); | ||
source.on("trueValue", function(){ ++trueCounter; }); | ||
source.on("falseValue", function(){ ++falseCounter; }); | ||
var plainCounter = new Counter(), | ||
streamCounter = new Counter(), | ||
source = createSource(); | ||
source.on("end", function(){ | ||
console.log("objects:", objectCounter); | ||
console.log("arrays:", arrayCounter); | ||
console.log("keys:", keyCounter); | ||
console.log("strings:", stringCounter); | ||
console.log("numbers:", numberCounter); | ||
console.log("nulls:", nullCounter); | ||
console.log("trues:", trueCounter); | ||
console.log("falses:", falseCounter); | ||
}); | ||
source.on("startObject", function(){ ++streamCounter.objects; }); | ||
source.on("keyValue", function(){ ++streamCounter.keys; }); | ||
source.on("startArray", function(){ ++streamCounter.arrays; }); | ||
source.on("nullValue", function(){ ++streamCounter.nulls; }); | ||
source.on("trueValue", function(){ ++streamCounter.trues; }); | ||
source.on("falseValue", function(){ ++streamCounter.falses; }); | ||
source.on("numberValue", function(){ ++streamCounter.numbers; }); | ||
source.on("stringValue", function(){ ++streamCounter.strings; }); | ||
fs.createReadStream(path.resolve(__dirname, "sample.json.gz")). | ||
pipe(zlib.createGunzip()).pipe(source.input); | ||
source.on("end", function(){ | ||
eval(t.TEST("t.unify(plainCounter, streamCounter)")); | ||
async.done(); | ||
}); | ||
fs.readFile(path.resolve(__dirname, "./sample.json.gz"), function(err, data){ | ||
if(err){ throw err; } | ||
zlib.gunzip(data, function(err, data){ | ||
if(err){ throw err; } | ||
var o = JSON.parse(data); | ||
Counter.walk(o, plainCounter); | ||
fs.createReadStream(path.resolve(__dirname, "./sample.json.gz")). | ||
pipe(zlib.createGunzip()).pipe(source.input); | ||
}); | ||
}); | ||
} | ||
]); |
@@ -0,16 +1,47 @@ | ||
"use strict"; | ||
var unit = require("heya-unit"); | ||
var ReadString = require("./ReadString"); | ||
var Parser = require("../Parser"); | ||
var Streamer = require("../Streamer"); | ||
var StreamPrinter = require("./StreamPrinter") | ||
var input = '{"a": 1, "b": true, "c": ["d"]}'; | ||
unit.add(module, [ | ||
function test_streamer(t){ | ||
var async = t.startAsync("test_streamer"); | ||
var input = '{"a": 1, "b": true, "c": ["d"]}', | ||
pipeline = new ReadString(input).pipe(new Parser()).pipe(new Streamer()), | ||
result = []; | ||
var stream = new ReadString(input); | ||
var parser = new Parser(); | ||
var streamer = new Streamer(); | ||
var printer = new StreamPrinter(); | ||
console.log(input); | ||
stream.pipe(parser).pipe(streamer).pipe(printer); | ||
pipeline.on("data", function(chunk){ | ||
result.push({name: chunk.name, val: chunk.value}); | ||
}); | ||
pipeline.on("end", function(){ | ||
eval(t.ASSERT("result.length === 20")); | ||
eval(t.TEST("result[0].name === 'startObject'")); | ||
eval(t.TEST("result[1].name === 'startKey'")); | ||
eval(t.TEST("result[2].name === 'stringChunk' && result[2].val === 'a'")); | ||
eval(t.TEST("result[3].name === 'endKey'")); | ||
eval(t.TEST("result[4].name === 'startNumber'")); | ||
eval(t.TEST("result[5].name === 'numberChunk' && result[5].val === '1'")); | ||
eval(t.TEST("result[6].name === 'endNumber'")); | ||
eval(t.TEST("result[7].name === 'startKey'")); | ||
eval(t.TEST("result[8].name === 'stringChunk' && result[8].val === 'b'")); | ||
eval(t.TEST("result[9].name === 'endKey'")); | ||
eval(t.TEST("result[10].name === 'trueValue' && result[10].val === true")); | ||
eval(t.TEST("result[11].name === 'startKey'")); | ||
eval(t.TEST("result[12].name === 'stringChunk' && result[12].val === 'c'")); | ||
eval(t.TEST("result[13].name === 'endKey'")); | ||
eval(t.TEST("result[14].name === 'startArray'")); | ||
eval(t.TEST("result[15].name === 'startString'")); | ||
eval(t.TEST("result[16].name === 'stringChunk' && result[16].val === 'd'")); | ||
eval(t.TEST("result[17].name === 'endString'")); | ||
eval(t.TEST("result[18].name === 'endArray'")); | ||
eval(t.TEST("result[19].name === 'endObject'")); | ||
async.done(); | ||
}); | ||
} | ||
]); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Uses eval
Supply chain riskPackage uses dynamic code execution (e.g., eval()), which is a dangerous practice. This can prevent the code from running in certain environments and increases the risk that the code may contain exploits or malicious behavior.
Found 1 instance in 1 package
42
1719
490
134435
1
9
125