json-to-jsonl
Advanced tools
Comparing version 1.0.0 to 1.1.0
@@ -16,3 +16,9 @@ 'use strict'; | ||
var writeJSONLines = function writeJSONLines(jsonFilename, getArray) { | ||
var defaultGetArray = function defaultGetArray(x) { | ||
return x; | ||
}; | ||
var writeJSONLines = function writeJSONLines(jsonFilename) { | ||
var getArray = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : defaultGetArray; | ||
var json = getJSON(jsonFilename); | ||
@@ -26,5 +32,8 @@ var arr = validateArray(getArray(json)); | ||
writeStream.end(); | ||
return arr.length; | ||
return { | ||
lines: arr.length, | ||
file: jsonlFilename | ||
}; | ||
}; | ||
module.exports = writeJSONLines; |
{ | ||
"name": "json-to-jsonl", | ||
"version": "1.0.0", | ||
"version": "1.1.0", | ||
"description": "Convert an array in a .json file to a newline-delimited .jsonl file.", | ||
@@ -5,0 +5,0 @@ "main": "./build/index.js", |
@@ -0,6 +1,68 @@ | ||
# JSON to JSON Lines | ||
You have a JSON file containing a very large array of objects you want to analyse. | ||
`json-to-jsonl` is a lightweight package to re-write the array to a JSON Lines file - one row per object. | ||
node --max-old-space-size=4096 index.js | ||
## JSON Lines Format | ||
[JSON Lines](http://jsonlines.org/) is a convenient format for storing structured data that may be processed one record at a time. It works well with unix-style text processing tools and shell pipelines. Here's how easy it is to create a document database (in MongoDB) from a JSON Lines file: | ||
``` | ||
mongoimport --db my-db-name --collection my-collection-name --file /path/to/my-file.jsonl | ||
``` | ||
`mongoimport --db charity-base-v240-june-2018 --collection grants --file /Users/dan/Documents/Repositories/charity-base-data/data/grantnav-20180731084014.jsonl` | ||
## Installation | ||
``` | ||
$ npm install --save json-to-jsonl | ||
``` | ||
## Examples | ||
```json5 | ||
// my-file-1.json | ||
[ | ||
{ "a" : 1 }, | ||
{ "a" : 2 }, | ||
{ "a" : 3 } | ||
] | ||
// my-file-2.json | ||
{ | ||
"name" : "Some JSON Object", | ||
"longList" : [ | ||
"one", | ||
"two", | ||
"three", | ||
"four" | ||
] | ||
} | ||
``` | ||
```js | ||
// convert.js | ||
const jsonTojsonl = require('json-to-jsonl') | ||
try { | ||
// The array is top-level in my-file-1.json so don't have to specify getArray func: | ||
const response1 = jsonTojsonl('my-file-1.json') | ||
// { lines: 3, file: 'my-file-1.jsonl' } | ||
// The array is a value in the my-file-2.json object so we have to specify a getArray func: | ||
const response2 = jsonTojsonl('my-file-2.json', x => x.longList) | ||
// { lines: 4, file: 'my-file-2.jsonl' } | ||
} | ||
catch (e) { | ||
... | ||
} | ||
``` | ||
## Docs | ||
```js | ||
jsonTojsonl(jsonFilename, getArray=function(x) {return x}) | ||
``` | ||
Writes a new file with same name (but `.json` extension replaced with `.jsonl`). The optional argument `getArray` allows you to define where the JSON array is (if it isn't top-level). | ||
Returns an object with `lines` and `file` properties (number of lines written and name of new file). | ||
## Memory Limit | ||
If your JSON file is very big you might encounter a FATAL ERROR due to JavaScript heap out of memory. It might be necessary to increase the memory usage limit when running your script, e.g. to 4GB: | ||
``` | ||
$ node --max-old-space-size=4096 convert.js | ||
``` |
3900
29
69