What is JSONStream?
The JSONStream npm package is a Node.js module that provides a streaming JSON.parse and stringify. It allows you to parse large JSON files or streams chunk by chunk without loading the entire file into memory, and to stringify large objects into JSON streams.
What are JSONStream's main functionalities?
Parsing JSON Streams
This feature allows you to parse JSON data that is being streamed. You can use JSONStream.parse() to process a stream of JSON data and emit JavaScript objects.
{"_readableState":{"objectMode":true,"highWaterMark":16,"buffer":{"head":null,"tail":null,"length":0},"length":0,"pipes":null,"pipesCount":0,"flowing":null,"ended":false,"endEmitted":false,"reading":false,"sync":true,"needReadable":false,"emittedReadable":false,"readableListening":false,"resumeScheduled":false,"paused":true,"emitClose":true,"autoDestroy":true,"destroyed":false,"defaultEncoding":"utf8","awaitDrain":0,"readingMore":false,"decoder":null,"encoding":null},"readable":true,"_events":{},"_eventsCount":0,"_writableState":{"objectMode":true,"highWaterMark":16,"finalCalled":false,"needDrain":false,"ending":false,"ended":false,"finished":false,"destroyed":false,"decodeStrings":false,"defaultEncoding":"utf8","length":0,"writing":false,"corked":0,"sync":false,"bufferProcessing":false,"writecb":null,"writelen":0,"bufferedRequest":null,"lastBufferedRequest":null,"pendingcb":0,"prefinished":false,"errorEmitted":false,"emitClose":true,"autoDestroy":true,"bufferedRequestCount":0,"corkedRequestsFree":{"next":null,"entry":null}},"writable":true,"allowHalfOpen":true,"_transformState":{"needTransform":false,"transforming":false,"writecb":null,"writechunk":null,"writeencoding":null}}
Stringifying Objects to JSON Streams
This feature allows you to convert JavaScript objects into a JSON formatted stream. You can use JSONStream.stringify() to create a stream that outputs JSON data.
{"_readableState":{"objectMode":true,"highWaterMark":16,"buffer":{"head":null,"tail":null,"length":0},"length":0,"pipes":null,"pipesCount":0,"flowing":null,"ended":false,"endEmitted":false,"reading":false,"sync":true,"needReadable":false,"emittedReadable":false,"readableListening":false,"resumeScheduled":false,"paused":true,"emitClose":true,"autoDestroy":true,"destroyed":false,"defaultEncoding":"utf8","awaitDrain":0,"readingMore":false,"decoder":null,"encoding":null},"readable":true,"_events":{},"_eventsCount":0,"_writableState":{"objectMode":true,"highWaterMark":16,"finalCalled":false,"needDrain":false,"ending":false,"ended":false,"finished":false,"destroyed":false,"decodeStrings":false,"defaultEncoding":"utf8","length":0,"writing":false,"corked":0,"sync":false,"bufferProcessing":false,"writecb":null,"writelen":0,"bufferedRequest":null,"lastBufferedRequest":null,"pendingcb":0,"prefinished":false,"errorEmitted":false,"emitClose":true,"autoDestroy":true,"bufferedRequestCount":0,"corkedRequestsFree":{"next":null,"entry":null}},"writable":true,"allowHalfOpen":true,"_transformState":{"needTransform":false,"transforming":false,"writecb":null,"writechunk":null,"writeencoding":null}}
Other packages similar to JSONStream
stream-json
stream-json is a package similar to JSONStream that provides a streaming JSON parser and a set of stream components for different use cases. It is designed to be a more modular and flexible solution than JSONStream, allowing users to assemble the functionality they need.
big-json
big-json provides a way to parse and stringify large JSON files in a streaming fashion, similar to JSONStream. It uses JSONStream internally but adds a simpler API for reading and writing JSON files directly from and to the filesystem.
JSONStream
streaming JSON.parse and stringify
example
in node v0.4.x
var request = require('request')
, JSONStream = require('JSONStream')
, es = require('event-stream')
var parser = JSONStream.parse(['rows', /./])
, req = request({url: 'http://isaacs.couchone.com/registry/_all_docs'})
in node 0.4.x
req.pipe(parser)
parser.pipe(es.log(''))
in node v0.5.x
req.pipe(parser).pipe(es.log(''))
JSONStream.parse(path)
usally, a json API will return a list of objects.
path
should be an array of property names and/or RedExp
s.
any object that matches the path will be emitted as 'data' (and pipe()
d down stream)
if path
is empty or null, or if no matches are made:
JSONStream.parse will only emit 'data' once, emitting the root object.
for example, couchdb returns views like this:
curl -sS localhost:5984/tests/_all_docs
returns this:
{"total_rows":129,"offset":0,"rows":[
{ "id":"change1_0.6995461115147918"
, "key":"change1_0.6995461115147918"
, "value":{"rev":"1-e240bae28c7bb3667f02760f6398d508"}
, "doc":{
"_id": "change1_0.6995461115147918"
, "_rev": "1-e240bae28c7bb3667f02760f6398d508","hello":1}
},
{"id":"change2_0.6995461115147918","key":"change2_0.6995461115147918","value":{"rev":"1-13677d36b98c0c075145bb8975105153"},"doc":{"_id":"change2_0.6995461115147918","_rev":"1-13677d36b98c0c075145bb8975105153","hello":2}},
...
]}
we are probably interested in the rows.*.docs
create a Stream
that parses the documents from the feed like this:
JSONStream.parse(['rows', /./, 'doc'])
awesome!
todo
Acknowlegements
this module depends on https://github.com/creationix/jsonparse
by Tim Caswell
and also thanks to Florent Jaby for teaching me about parsing with:
https://github.com/Floby/node-json-streams