Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

csvtojson

Package Overview
Dependencies
Maintainers
1
Versions
82
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

csvtojson - npm Package Compare versions

Comparing version 0.4.2 to 0.4.3

test/data/noheadercsv

15

bin/csvtojson.js

@@ -70,3 +70,10 @@ function csvtojson() {

};
function parseObject(val){
try {
return JSON.parse(val);
}catch(e){
console.error(e);
process.exit(1);
}
}
function parseBool(str, optName) {

@@ -100,4 +107,5 @@ str = str.toLowerCase();

parsedCmd.options[key] = parseFloat(val);
}
else {
} else if (type ==="object"){
parsedCmd.options[key] = parseObject(val);
} else {
throw ({

@@ -108,3 +116,2 @@ name: "UnimplementedException",

}
parsedCmd.options[key] = val;
} else if (cmds[item]) {

@@ -111,0 +118,0 @@ parsedCmd.cmd = item;

@@ -31,2 +31,10 @@ {

"type": "number"
},
"--noheader":{
"desc":"Indicating csv data has no header row and first row is data row. Default is false",
"type":"boolean"
},
"--headers":{
"desc":"An array to specify the headers of CSV data. If --noheader is false, this value will override CSV header. Default: null. Example: [\"my field\",\"name\"]",
"type":"object"
}

@@ -33,0 +41,0 @@ },

@@ -22,3 +22,5 @@ var util = require("util");

workerNum: 1, //number of parallel workers. If multi-core CPU available, increase the number will get better performance for large csv data.
fork: false //use another CPU core to convert the csv stream
fork: false, //use another CPU core to convert the csv stream
noheader:false, //indicate if first line of CSV file is header or not.
headers:null //an array of header strings. If noheader is false and headers is array, csv header will be ignored.
};

@@ -25,0 +27,0 @@ if (params && typeof params === "object") {

@@ -11,4 +11,9 @@ module.exports = {

function parseParamType (type, item) {
if (type === 'number' && !isNaN(item)) {
return parseFloat(item);
if (type === 'number') {
var rtn=parseFloat(item);
if (isNaN(rtn)){
return 0;
}else{
return rtn;
}
} else if (type === '') {

@@ -32,3 +37,3 @@ try {

}
index = match[1]; //get index where json object should stay
index = match[1]; //get index where json object should stay
pointer = pointer[headStr.replace(match[0], '')];

@@ -55,3 +60,3 @@ if (index === '') { //if its dynamic array index, push to the end

match = key.match(arrReg);
if (match) { // the last element is an array, we need check and treat it as an array.
if (match) { // the last element is an array, we need check and treat it as an array.
key = key.replace(match[0], '');

@@ -58,0 +63,0 @@ if (!pointer[key] || !(pointer[key] instanceof Array)) {

@@ -42,10 +42,37 @@ /**

var row = utils.rowSplit(csvRow, this.param.delimiter, this.param.quote, this.param.trim);
this.processHeadRow(row, cb);
async.each(this.workers, function(worker, scb) {
if (this.param.headers && this.param.headers instanceof Array){
var counter=1;
while (this.param.headers.length<row.length){
this.param.headers.push("field"+counter++);
}
while (this.param.headers.length>row.length){
this.param.headers.pop();
}
row=this.param.headers;
}
if (this.param.noheader && !this.param.headers) {
worker.genConstHeadRow(row.length,scb);
} else {
worker.processHeadRow(row, scb);
}
}.bind(this), function() {
//console.log(arguments);
if (this.param.noheader){
this.recordNumber++;
rowProcess.call(this);
}else{
cb();
}
}.bind(this));
} else { //pass the data to worker
rowProcess.call(this);
}
function rowProcess(){
this.runningWorker++;
this.rowProcess(data.toString("utf8"), function(err, resultRow,row,index) {
this.rowProcess(data.toString("utf8"), function(err, resultRow, row, index) {
if (err) {
this.emit("error", err);
} else {
this.emit("record_parsed", resultRow, row, index-1);
this.emit("record_parsed", resultRow, row, index - 1);
//this.push(JSON.stringify([resultRow,row,obj.rowIndex]),"utf8");

@@ -102,18 +129,18 @@ }

}
Processor.prototype.rowProcess = function(data, cb) {
Processor.prototype.rowProcess = function(data, cb) {
var worker;
if (this.workers.length > 1) {// if multi-worker enabled
if (this.workers.length>2){// for 2+ workers, host process will concentrate on csv parsing while workers will convert csv lines to JSON.
if (this.workers.length > 1) { // if multi-worker enabled
if (this.workers.length > 2) { // for 2+ workers, host process will concentrate on csv parsing while workers will convert csv lines to JSON.
worker = this.workers[(this.recordNumber % (this.workers.length - 1)) + 1];
}else{//for 2 workers, leverage it as first worker has like 50% cpu used for csv parsing. the weight would be like 0,1,1,0,1,1,0
var index=this.recordNumber%3;
if (index>1){
index=1;
} else { //for 2 workers, leverage it as first worker has like 50% cpu used for csv parsing. the weight would be like 0,1,1,0,1,1,0
var index = this.recordNumber % 3;
if (index > 1) {
index = 1;
}
worker = this.workers[index];
}
} else {//if only 1 worker
} else { //if only 1 worker
worker = this.workers[0];
}
worker.processRow(data,this.recordNumber, cb);
worker.processRow(data, this.recordNumber, cb);
}

@@ -120,0 +147,0 @@ Processor.prototype.checkAndFlush = function() {

@@ -21,2 +21,3 @@ /**

this.sync = sync ? true : false;
this.funcs = require("./workerRunner")();
if (!this.sync) {

@@ -34,35 +35,10 @@ this.child = require("child_process").fork(__dirname + "/workerRunner.js");

}
Worker.prototype.processRow = function(data,index, cb) {
if (this.sync) {
var i, item, parser, head,
row = utils.rowSplit(data, this.param.delimiter, this.param.quote, this.param.trim);
var resultRow = {};
for (i = 0; i < this.parseRules.length; i++) {
item = row[i];
if (this.param.ignoreEmpty && item === '') {
continue;
}
parser = this.parseRules[i];
head = this.headRow[i];
parser.parse({
head: head,
item: item,
itemIndex: i,
rawRow: row,
resultRow: resultRow,
rowIndex: index,
config: this.param || {}
});
}
cb(null, resultRow,row,index);
} else {
this.send({
action: this.genAction("processRow"),
data: data,
index:index,
param: this.param
}, function(err,res){
cb(null,res.resultRow,res.row,res.index);
});
}
Worker.prototype.processRow = function(data, index, cb) {
this.send({
action: "processRow",
data: data,
index: index
}, function(err, res) {
cb(null, res.resultRow, res.row, res.index);
});
}

@@ -77,23 +53,28 @@ Worker.prototype.onChildMsg = function(m) {

} else {
//None register child action
//None register child action
}
}
Worker.prototype.send = function(msg, cb) {
var action = msg.action;
this.childCallbacks[action] = cb;
this.child.send(msg);
}
Worker.prototype.processHeadRow = function(headRow, cb) {
msg.param=this.param;
if (this.sync) {
this.headRow = headRow;
this.parseRules = parserMgr.initParsers(headRow, this.param.checkType);
cb();
this.funcs[msg.action](msg, cb);
} else {
this.send({
action: this.genAction("processHeadRow"),
row: headRow,
param: this.param
}, cb);
var action = this.genAction(msg.action);
msg.action=action;
this.childCallbacks[action] = cb;
this.child.send(msg);
}
}
Worker.prototype.genConstHeadRow = function(number, cb) {
this.send({
action: "genConstHeadRow",
number: number
}, cb);
}
Worker.prototype.processHeadRow = function(headRow, cb) {
this.send({
action: "processHeadRow",
row: headRow
}, cb);
}
Worker.prototype.genAction = function(action) {

@@ -100,0 +81,0 @@ var d = "" + new Date().getTime() + Math.round(Math.random() * 1000000);

var parserMgr = require("./parserMgr.js");
var utils = require("./utils.js");
var actions = {
processHeadRow: processHeadRow,
processRow: processRow
if (process.send) {
var inst = init();
process.on("message", function(m) {
var action = getAction(m.action);
inst[action](m, function(err, res) {
if (err) {
//error handling
}
if (!res){
res={};
}
res.action = m.action;
process.send(res);
});
});
}
var headRow, parseRules;
process.on("message", function(m) {
var action = getAction(m.action);
actions[action](m, function(err, res) {
if (err) {
//error handling
}
res.action = m.action;
process.send(res);
});
});
function getAction(action) {

@@ -24,39 +24,65 @@ return action.split("_")[0];

function processHeadRow(msg, cb) {
headRow = msg.row;
var param = msg.param;
parseRules = parserMgr.initParsers(headRow, param.checkType);
cb(null, {});
}
function init() {
var headRow, parseRules;
function processRow(msg, cb) {
var i, item, parser, head,
data = msg.data,
param = msg.param,
index = msg.index;
var row = utils.rowSplit(data, param.delimiter, param.quote, param.trim);
var resultRow = {};
for (i = 0; i < parseRules.length; i++) {
item = row[i];
if (param.ignoreEmpty && item === '') {
continue;
function genConstHeadRow(msg,cb){
var Parser=require("./parser");
var number=msg.number;
parseRules=[];
headRow=[];
while (number>0){
var p=new Parser("field"+number,/.*/,function(params){
var name=this.getName();
params.resultRow[name]=params.item;
},true);
parseRules.unshift(p);
headRow.unshift(p.getName());
number--;
}
cb();
}
function processHeadRow(msg, cb) {
headRow = msg.row;
var param = msg.param;
parseRules = parserMgr.initParsers(headRow, param.checkType);
cb(null, {});
}
function processRow(msg, cb) {
var i, item, parser, head,
data = msg.data,
param = msg.param,
index = msg.index;
var row = utils.rowSplit(data, param.delimiter, param.quote, param.trim);
var resultRow = {};
for (i = 0; i < parseRules.length; i++) {
item = row[i];
if (param.ignoreEmpty && item === '') {
continue;
}
parser = parseRules[i];
head = headRow[i];
parser.parse({
head: head,
item: item,
itemIndex: i,
rawRow: row,
resultRow: resultRow,
rowIndex: index,
config: param || {}
});
}
parser = parseRules[i];
head = headRow[i];
parser.parse({
head: head,
item: item,
itemIndex: i,
rawRow: row,
cb(null, {
resultRow: resultRow,
rowIndex: index,
config: param || {}
row: row,
index: index
});
}
cb(null, {
resultRow: resultRow,
row: row,
index: index
});
return {
processHeadRow: processHeadRow,
processRow: processRow,
genConstHeadRow:genConstHeadRow
}
}
module.exports=init;

@@ -21,3 +21,3 @@ {

],
"version": "0.4.2",
"version": "0.4.3",
"keywords": [

@@ -24,0 +24,0 @@ "csv",

#CSVTOJSON
All you need nodejs csv to json converter.
All you need nodejs csv to json converter.
* Large CSV data

@@ -12,33 +12,3 @@ * Command Line Tool and Node.JS Lib

#IMPORTANT!!
Since version 0.3, the core class of csvtojson has been inheriting from stream.Transform class. Therefore, it will behave like a normal Stream object and CSV features will not be available any more. Now the usage is like:
```js
//Converter Class
var fs = require("fs");
var Converter = require("csvtojson").Converter;
var fileStream = fs.createReadStream("./file.csv");
//new converter instance
var converter = new Converter({constructResult:true});
//end_parsed will be emitted once parsing finished
converter.on("end_parsed", function (jsonObj) {
console.log(jsonObj); //here is your result json object
});
//read from file
fileStream.pipe(converter);
```
To convert from a string, previously the code was:
```js
csvConverter.from(csvString);
```
Now it is:
```js
csvConverter.fromString(csvString, callback);
```
The callback function above is optional. see [Parse String](#parse-string).
After version 0.3, csvtojson requires node 0.10 and above.
##Menu

@@ -65,2 +35,3 @@ * [Installation](#installation)

* [Multi-Core / Fork Process](#multi-cpu-core)
* [Header Configuration](#header-configuration)
* [Change Log](#change-log)

@@ -71,17 +42,52 @@

##Installation
>npm install -g csvtojson
>npm install csvtojson --save
##Features
##Usage
* Powerful library for you nodejs applications processing csv data.
* Multi cpu core support
* Extremly straight forward
* Multiple input support: CSV File, Readable Stream, CSV String etc.
* Highly extendible with your own rules and parsers for outputs.
* Multiple interfaces (webservice, command line)
### library
**From File**
```js
//Converter Class
var Converter = require("csvtojson").Converter;
var converter = new Converter({});
##Usage
//end_parsed will be emitted once parsing finished
converter.on("end_parsed", function (jsonArray) {
console.log(jsonArray); //here is your result jsonarray
});
//read from file
require("fs").createReadStream("./file.csv").pipe(converter);
```
**From Web Server**
```js
//Converter Class
var Converter = require("csvtojson").Converter;
var converter = new Converter({constructResult:false}); //for big csv data
//record_parsed will be emitted each csv row being processed
converter.on("record_parsed", function (jsonObj) {
console.log(jsonObj); //here is your result json object
});
require("request").get("http://csvwebserver").pipe(converter);
```
**From String**
```js
var Converter = require("csvtojson").Converter;
var converter = new Converter({}); //for big csv data
converter.fromString(csvString, function(err,result){
//your code here
});
```
###Command Line Tools

@@ -107,70 +113,28 @@

### WebService
After webserve being initialised, it is able to use http post with CSV data as body.
For example, we start web server with default configuration:
>csvtojson startserver
# Params
And then we use curl to perform a web request:
>curl -X POST -d "date,\*json\*employee.name,\*json\*employee.age,\*json\*employee.number,\*array\*address,\*array\*address,\*jsonarray\*employee.key,\*jsonarray\*employee.key,\*omit\*id
>
>2012-02-12,Eric,31,51234,Dunno Street,Kilkeny Road,key1,key2,2
>
>2012-03-06,Ted,28,51289,Cambridge Road,Tormore,key3,key4,4" http://127.0.0.1:8801/parseCSV
The constructor of csv Converter allows parameters:
#Demo Product
To write a demo app, simply use csvtojson web interface. Paste following code to index.js:
```js
var server = require("csvtojson").interfaces.web;
server.startWebServer({
"port":8801
var converter=new require("csvtojson").Converter({
constructResult:false,
workerNum:4,
noheader:true
});
```
Then run the app:
```
node ./index.js
```
Now you can post any csv data to http://localhost:8801/parseCSV
It uses HTTP Request as readable stream and HTTP Response as writable stream.
Following parameters are supported:
# Quick Start
Use csvtojson library to your own project.
* **constructResult**: true/false. Whether to constrcut final json object in memory which will be populated in "end_parsed" event. Set to false if deal with huge csv data. default: true.
* **delimiter**: delimiter used for seperating columns. default: ","
* **quote**: If a column contains delimiter, it is able to use quote character to surround the column content. e.g. "hello, world" wont be split into two columns while parsing. default: " (double quote)
* **trim**: Indicate if parser trim off spaces surrounding column content. e.g. " content " will be trimmed to "content". Default: true
* **checkType**: This parameter turns on and off weather check field type. default is true. See [Field type](#field-type)
* **toArrayString**: Stringify the stream output to JSON array. This is useful when pipe output to a file which expects JSON array. default is false and only JSON will be pushed to downstream.
* **ignoreEmpty**: Ignore the empty value in CSV columns. If a column value is not giving, set this to true to skip them. Defalut: false.
* **workerNum**: Number of worker processes. The worker process will use multi-cores to help process CSV data. Set to number of Core to improve the performance of processing large csv file. Keep 1 for small csv files. Default 1.
* **fork**: Use another CPU core to process the CSV stream.
* **noheader**:Indicating csv data has no header row and first row is data row. Default is false. See [header configuration](#header-configuration)
* **headers**: An array to specify the headers of CSV data. If --noheader is false, this value will override CSV header row. Default: null. Example: ["my field","name"]. See [header configuration](#header-configuration)
Import csvtojson to your package.json or install through npm:
>npm install csvtojson
~~The core of the tool is Converter class. It is based on node-csv library (version 0.3.6). Therefore it has all features of [node-csv](http://www.adaltas.com/projects/node-csv/).~~ To start a parse, simply use following code:
```js
//Converter Class
var fs = require("fs");
var Converter = require("csvtojson").Converter;
var fileStream = fs.createReadStream("./file.csv");
//new converter instance
var param={};
var converter = new Converter(param);
//end_parsed will be emitted once parsing finished
converter.on("end_parsed", function (jsonObj) {
console.log(jsonObj); //here is your result json object
});
//read from file
fileStream.pipe(converter);
```
# Params
The parameters for Converter constructor are:
* constructResult: true/false. Whether to constrcut final json object in memory which will be populated in "end_parsed" event. Set to false if deal with huge csv data. default: true.
* delimiter: delimiter used for seperating columns. default: ","
* quote: If a column contains delimiter, it is able to use quote character to surround the column content. e.g. "hello, world" wont be split into two columns while parsing. default: " (double quote)
* trim: Indicate if parser trim off spaces surrounding column content. e.g. " content " will be trimmed to "content". Default: true
* checkType: This parameter turns on and off weather check field type. default is true. See [Field type](#field-type)
* toArrayString: Stringify the stream output to JSON array. This is useful when pipe output to a file which expects JSON array. default is false and only JSON will be pushed to downstream.
* ignoreEmpty: Ignore the empty value in CSV columns. If a column value is not giving, set this to true to skip them. Defalut: false.
* workerNum: Number of worker processes. The worker process will use multi-cores to help process CSV data. Set to number of Core to improve the performance of processing large csv file. Keep 1 for small csv files. Default 1.
* fork: Use another CPU core to convert the CSV stream
# Parser

@@ -286,7 +250,7 @@ CSVTOJSON allows adding customised parsers which concentrating on what to parse and how to parse.

Original data:
```csv
date,*json*employee.name,*json*employee.age,*json*employee.number,*array*address,*array*address,*jsonarray*employee.key,*jsonarray*employee.key,*omit*id
2012-02-12,Eric,31,51234,Dunno Street,Kilkeny Road,key1,key2,2
2012-03-06,Ted,28,51289,Cambridge Road,Tormore,key3,key4,4
```
Output data:

@@ -399,2 +363,3 @@

```csv
TIMESTAMP,UPDATE,UID,BYTES SENT,BYTES RCVED

@@ -406,2 +371,3 @@ 1395426422,n,10028,1213,5461

1395426422,n,10022,15506,72125
```

@@ -450,3 +416,3 @@ It will be converted to:

```
```csv
fieldA.title, fieldA.children[0].name, fieldA.children[0].id,fieldA.children[1].name, fieldA.children[1].employee[].name,fieldA.children[1].employee[].name, fieldA.address[],fieldA.address[], description

@@ -596,3 +562,3 @@ Food Factory, Oscar, 0023, Tikka, Tim, Joe, 3 Lame Road, Grantstown, A fresh new food factory

```
csvtojson --workerNum=4 ./myfile.csv
csvtojson --workerNum=4 ./myfile.csv
```

@@ -612,4 +578,64 @@ It is worth to mention that for small size of CSV file it actually costs more time to create processes and keep the communication between them. Therefore, use less workers for small CSV files.

### Header configuration
CSV header row can be configured programmatically.
the *noheader* parameter indicate if first row of csv is header row or not. e.g. CSV data:
```
CC102-PDMI-001,eClass_5.1.3,10/3/2014,12,40,green,40
CC200-009-001,eClass_5.1.3,11/3/2014,5,3,blue,38
```
With noheader=true
```
csvtojson ./test/data/noheadercsv --noheader=true
```
we can get following result:
```json
[
{"field1":"CC102-PDMI-001","field2":"eClass_5.1.3","field3":"10/3/2014","field4":"12","field5":"40","field6":"green","field7":"40"},
{"field1":"CC200-009-001","field2":"eClass_5.1.3","field3":"11/3/2014","field4":"5","field5":"3","field6":"blue","field7":"38"}
]
```
or we can use it in code:
```js
var converter=new require("csvtojson").Converter({noheader:true});
```
the *headers* parameter specify the header row in an array. If *noheader* is false, this value will override csv header row. With csv data above, run command:
```
csvtojson ./test/data/noheadercsv --noheader=true --headers='["hell","csv"]'
```
we get following results:
```json
[
{"hello":"CC102-PDMI-001","csv":"eClass_5.1.3","field1":"10/3/2014","field2":12,"field3":40,"field4":"green","field5":40},
{"hello":"CC200-009-001","csv":"eClass_5.1.3","field1":"11/3/2014","field2":5,"field3":3,"field4":"blue","field5":38}
]
```
If length of headers array is smaller than the column of csv, converter will automatically fill the column with "field*".
Also we can use it in code:
```js
var converter=new require("csvtojson").Converter({headers:["my header1","hello world"]});
```
#Change Log
##0.4.3
* Added header configuration
* Refactored worker code
* **Number type field now returns 0 if parseFloat returns NaN with the value of the field. Previously it returns original value in string.**
##0.4.0

@@ -652,1 +678,31 @@ * Added Multi-core CPU support to increase performance

#IMPORTANT!!
Since version 0.3, the core class of csvtojson has been inheriting from stream.Transform class. Therefore, it will behave like a normal Stream object and CSV features will not be available any more. Now the usage is like:
```js
//Converter Class
var fs = require("fs");
var Converter = require("csvtojson").Converter;
var fileStream = fs.createReadStream("./file.csv");
//new converter instance
var converter = new Converter({constructResult:true});
//end_parsed will be emitted once parsing finished
converter.on("end_parsed", function (jsonObj) {
console.log(jsonObj); //here is your result json object
});
//read from file
fileStream.pipe(converter);
```
To convert from a string, previously the code was:
```js
csvConverter.from(csvString);
```
Now it is:
```js
csvConverter.fromString(csvString, callback);
```
The callback function above is optional. see [Parse String](#parse-string).
After version 0.3, csvtojson requires node 0.10 and above.

@@ -187,3 +187,3 @@ var Converter = require("../libs/core/Converter.js");

assert(d.column7 === "1234");
assert(d.column8 === "abcd");
assert(d.column8 === 0);
assert(d.column9 === true);

@@ -278,2 +278,44 @@ });

});
it ("should allow no header",function(done){
var testData = __dirname + "/data/noheadercsv";
var rs = fs.createReadStream(testData);
var st = rs.pipe(new Converter({noheader:true}));
st.on("end_parsed",function (res){
var j = res[0];
assert(res.length===5);
assert(j.field1==="CC102-PDMI-001");
assert(j.field2==="eClass_5.1.3");
done();
});
})
it ("should allow customised header",function(done){
var testData = __dirname + "/data/noheadercsv";
var rs = fs.createReadStream(testData);
var st = rs.pipe(new Converter({
noheader:true,
headers:["a","b"]
}));
st.on("end_parsed",function (res){
var j = res[0];
assert(res.length===5);
assert(j.a==="CC102-PDMI-001");
assert(j.b==="eClass_5.1.3");
assert(j.field1==="10/3/2014");
done();
});
})
it ("should allow customised header to override existing header",function(done){
var testData = __dirname + "/data/complexJSONCSV";
var rs = fs.createReadStream(testData);
var st = rs.pipe(new Converter({
headers:[]
}));
st.on("end_parsed",function (res){
var j = res[0];
assert(res.length===2);
assert(j.field1==="Food Factory");
assert(j.field2==="Oscar");
done();
});
})
});
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc