What is JSONStream?
The JSONStream npm package is a Node.js module that provides a streaming JSON.parse and stringify. It allows you to parse large JSON files or streams chunk by chunk without loading the entire file into memory, and to stringify large objects into JSON streams.
What are JSONStream's main functionalities?
Parsing JSON Streams
This feature allows you to parse JSON data that is being streamed. You can use JSONStream.parse() to process a stream of JSON data and emit JavaScript objects.
{"_readableState":{"objectMode":true,"highWaterMark":16,"buffer":{"head":null,"tail":null,"length":0},"length":0,"pipes":null,"pipesCount":0,"flowing":null,"ended":false,"endEmitted":false,"reading":false,"sync":true,"needReadable":false,"emittedReadable":false,"readableListening":false,"resumeScheduled":false,"paused":true,"emitClose":true,"autoDestroy":true,"destroyed":false,"defaultEncoding":"utf8","awaitDrain":0,"readingMore":false,"decoder":null,"encoding":null},"readable":true,"_events":{},"_eventsCount":0,"_writableState":{"objectMode":true,"highWaterMark":16,"finalCalled":false,"needDrain":false,"ending":false,"ended":false,"finished":false,"destroyed":false,"decodeStrings":false,"defaultEncoding":"utf8","length":0,"writing":false,"corked":0,"sync":false,"bufferProcessing":false,"writecb":null,"writelen":0,"bufferedRequest":null,"lastBufferedRequest":null,"pendingcb":0,"prefinished":false,"errorEmitted":false,"emitClose":true,"autoDestroy":true,"bufferedRequestCount":0,"corkedRequestsFree":{"next":null,"entry":null}},"writable":true,"allowHalfOpen":true,"_transformState":{"needTransform":false,"transforming":false,"writecb":null,"writechunk":null,"writeencoding":null}}
Stringifying Objects to JSON Streams
This feature allows you to convert JavaScript objects into a JSON formatted stream. You can use JSONStream.stringify() to create a stream that outputs JSON data.
{"_readableState":{"objectMode":true,"highWaterMark":16,"buffer":{"head":null,"tail":null,"length":0},"length":0,"pipes":null,"pipesCount":0,"flowing":null,"ended":false,"endEmitted":false,"reading":false,"sync":true,"needReadable":false,"emittedReadable":false,"readableListening":false,"resumeScheduled":false,"paused":true,"emitClose":true,"autoDestroy":true,"destroyed":false,"defaultEncoding":"utf8","awaitDrain":0,"readingMore":false,"decoder":null,"encoding":null},"readable":true,"_events":{},"_eventsCount":0,"_writableState":{"objectMode":true,"highWaterMark":16,"finalCalled":false,"needDrain":false,"ending":false,"ended":false,"finished":false,"destroyed":false,"decodeStrings":false,"defaultEncoding":"utf8","length":0,"writing":false,"corked":0,"sync":false,"bufferProcessing":false,"writecb":null,"writelen":0,"bufferedRequest":null,"lastBufferedRequest":null,"pendingcb":0,"prefinished":false,"errorEmitted":false,"emitClose":true,"autoDestroy":true,"bufferedRequestCount":0,"corkedRequestsFree":{"next":null,"entry":null}},"writable":true,"allowHalfOpen":true,"_transformState":{"needTransform":false,"transforming":false,"writecb":null,"writechunk":null,"writeencoding":null}}
Other packages similar to JSONStream
stream-json
stream-json is a package similar to JSONStream that provides a streaming JSON parser and a set of stream components for different use cases. It is designed to be a more modular and flexible solution than JSONStream, allowing users to assemble the functionality they need.
big-json
big-json provides a way to parse and stringify large JSON files in a streaming fashion, similar to JSONStream. It uses JSONStream internally but adds a simpler API for reading and writing JSON files directly from and to the filesystem.
JSONStream
streaming JSON.parse and stringify
<img src=https://secure.travis-ci.org/dominictarr/JSONStream.png?branch=master>
example
var request = require('request')
, JSONStream = require('JSONStream')
, es = require('event-stream')
request({url: 'http://isaacs.couchone.com/registry/_all_docs'})
.pipe(JSONStream.parse('rows.*'))
.pipe(es.mapSync(function (data) {
console.error(data)
return data
}))
JSONStream.parse(path)
parse stream of values that match a path
JSONStream.parse('rows.*.doc')
The ..
operator is the recursive descent operator from JSONPath, which will match a child at any depth (see examples below).
If your keys have keys that include .
or *
etc, use an array instead.
['row', true, /^doc/]
.
If you use an array, RegExp
s, booleans, and/or functions. The ..
operator is also available in array representation, using {recurse: true}
.
any object that matches the path will be emitted as 'data' (and pipe
d down stream)
If path
is empty or null, no 'data' events are emitted.
Examples
query a couchdb view:
curl -sS localhost:5984/tests/_all_docs&include_docs=true
you will get something like this:
{"total_rows":129,"offset":0,"rows":[
{ "id":"change1_0.6995461115147918"
, "key":"change1_0.6995461115147918"
, "value":{"rev":"1-e240bae28c7bb3667f02760f6398d508"}
, "doc":{
"_id": "change1_0.6995461115147918"
, "_rev": "1-e240bae28c7bb3667f02760f6398d508","hello":1}
},
{ "id":"change2_0.6995461115147918"
, "key":"change2_0.6995461115147918"
, "value":{"rev":"1-13677d36b98c0c075145bb8975105153"}
, "doc":{
"_id":"change2_0.6995461115147918"
, "_rev":"1-13677d36b98c0c075145bb8975105153"
, "hello":2
}
},
]}
we are probably most interested in the rows.*.docs
create a Stream
that parses the documents from the feed like this:
var stream = JSONStream.parse(['rows', true, 'doc'])
stream.on('data', function(data) {
console.log('received:', data);
});
stream.on('root', function(root, count) {
if (!count) {
console.log('no matches found:', root);
}
});
awesome!
recursive patterns (..)
JSONStream.parser('docs..value')
(or JSONStream.parser(['docs', {recurse: true}, 'value'])
using an array)
will emit every value
object that is a child, grand-child, etc. of the
docs
object. In this example, it will match exactly 5 times at various depth
levels, emitting 0, 1, 2, 3 and 4 as results.
{
"total": 5,
"docs": [
{
"key": {
"value": 0,
"some": "property"
}
},
{"value": 1},
{"value": 2},
{"blbl": [{}, {"a":0, "b":1, "value":3}, 10]},
{"value": 4}
]
}
JSONStream.parse(pattern, map)
provide a function that can be used to map or filter
the json output. map
is passed the value at that node of the pattern,
if map
return non-nullish (anything but null
or undefined
)
that value will be emitted in the stream. If it returns a nullish value,
nothing will be emitted.
JSONStream.stringify(open, sep, close)
Create a writable stream.
you may pass in custom open
, close
, and seperator
strings.
But, by default, JSONStream.stringify()
will create an array,
(with default options open='[\n', sep='\n,\n', close='\n]\n'
)
If you call JSONStream.stringify(false)
the elements will only be seperated by a newline.
If you only write one item this will be valid JSON.
If you write many items,
you can use a RegExp
to split it into valid chunks.
JSONStream.stringifyObject(open, sep, close)
Very much like JSONStream.stringify
,
but creates a writable stream for objects instead of arrays.
Accordingly, open='{\n', sep='\n,\n', close='\n}\n'
.
When you .write()
to the stream you must supply an array with [ key, data ]
as the first argument.
unix tool
query npm to see all the modules that browserify has ever depended on.
curl https://registry.npmjs.org/browserify | JSONStream 'versions.*.dependencies'
numbers
There are occasional problems parsing and unparsing very precise numbers.
I have opened an issue here:
https://github.com/creationix/jsonparse/issues/2
+1
Acknowlegements
this module depends on https://github.com/creationix/jsonparse
by Tim Caswell
and also thanks to Florent Jaby for teaching me about parsing with:
https://github.com/Floby/node-json-streams
license
MIT / APACHE2