Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

csvtojson

Package Overview
Dependencies
Maintainers
1
Versions
82
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

csvtojson - npm Package Compare versions

Comparing version 0.4.8 to 0.5.0

bin/benchmark.js

7

bin/csvtojson.js

@@ -9,2 +9,3 @@ function csvtojson() {

var exps = options.examples;
var pkg=require("../package.json");
/**

@@ -23,2 +24,4 @@ *{

errno = typeof errno === "number" ? errno : 0;
console.log("csvtojson: Convert csv to JSON format");
console.log("version:",pkg.version);
console.log("Usage: csvtojson [<command>] [<options>] filepath\n");

@@ -60,3 +63,5 @@ console.log("Commands: ");

web.startWebServer(options);
} else {
} else if (cmd ==="version"){
console.log(pkg.version);
}else {
console.log("unknown command %s.", cmd);

@@ -63,0 +68,0 @@ _showHelp(1);

3

bin/options.json
{
"commands": {
"parse": "(Default)Parse a csv file to json",
"startserver": "(Deprecated)Start a web server"
"startserver": "(Deprecated)Start a web server",
"version": "Show version of current csvtojson"
},

@@ -6,0 +7,0 @@ "options": {

@@ -9,3 +9,4 @@ var util = require("util");

var Worker = require("./Worker.js");
var CSVLine = require("./CSVLine.js");
var utils = require("./utils.js");
var async = require("async");

@@ -45,8 +46,9 @@ function Converter(params) {

this.recordNum = 0;
this.runningProcess = 0;
//this._pipe(this.lineParser).pipe(this.processor);
if (this.param.fork) {
this.initFork();
} else {
this.initNoFork();
this.param.fork=false;
this.param.workerNum=2;
}
this.initNoFork();
this.flushCb = null;

@@ -63,98 +65,101 @@ this.processEnd = false;

util.inherits(Converter, Transform);
Converter.prototype.initFork = function() {
var env = process.env;
env.params = JSON.stringify(this.param);
this.child = require("child_process").fork(__dirname + "/fork.js", {
env: env,
silent: true
});
this.child.stdout.on("data", function(d, e) {
// this.push(d, e);
// this.emit("record_parsed");
}.bind(this));
this.child.on("message", function(msg) {
// console.log("aa",msg);
if (msg.action === "record_parsed") {
//var recs = msg.arguments;
var args = msg.arguments;
//console.log(recs);
//var recs=args[0];
//for (var i=0;i<recs.length;i++){
//this.emit("record_parsed", recs[i][0], recs[i][1], recs[i][2]);
//}
this.emit("record_parsed", args[0], args[1], args[2]);
} else if (msg.action === "data") {
var args = msg.arguments;
this.push(new Buffer(args[0]), args[1]);
} else if (msg.action === "error") {
var args = msg.arguments;
args.unshift("error");
this.hasError=true;
this.emit.apply(this, args);
}else{
this.push(msg);
}
}.bind(this));
this._transform = this._transformFork;
this._flush = this._flushFork;
//child.on("message",function(msg){
//var syncLock=false;
//if (msg.action=="record_parsed"){
//this.sequenceBuffer[msg.index]=msg;
//if
//}
//}.bind(this));
//child.on("exit",function(code){
//this.processEnd=true;
//this.flushBuffer();
//this.checkAndFlush();
//}.bind(this));
}
// Converter.prototype.initFork = function() {
// var env = process.env;
// env.params = JSON.stringify(this.param);
// this.child = require("child_process").fork(__dirname + "/fork.js", {
// env: env,
// silent: true
// });
// this.child.stderr.on("data", function(d, e) {
// process.stderr.write(d, e);
// // this.push(d, e);
// // this.emit("record_parsed");
// }.bind(this));
// // this.child.stdout.on("data",function(d){
// // this.push(d.toString("utf8"));
// // }.bind(this));
// this.child.on("message", function(msg) {
// if (msg.action === "record_parsed") {
// //var recs = msg.arguments;
// var args = msg.arguments;
// //console.log(recs);
// //var recs=args[0];
// //for (var i=0;i<recs.length;i++){
// //this.emit("record_parsed", recs[i][0], recs[i][1], recs[i][2]);
// //}
// this.emit("record_parsed", args[0], args[1], args[2]);
// } else if (msg.action === "data") {
// var args = msg.arguments;
// this.push(new Buffer(args[0]), args[1]);
// } else if (msg.action === "error") {
// var args = msg.arguments;
// args.unshift("error");
// this.hasError = true;
// this.emit.apply(this, args);
// }
// }.bind(this));
// this._transform = this._transformFork;
// this._flush = this._flushFork;
// //child.on("message",function(msg){
// //var syncLock=false;
// //if (msg.action=="record_parsed"){
// //this.sequenceBuffer[msg.index]=msg;
// //if
// //}
// //}.bind(this));
// //child.on("exit",function(code){
// //this.processEnd=true;
// //this.flushBuffer();
// //this.checkAndFlush();
// //}.bind(this));
// }
Converter.prototype.initNoFork = function() {
function onError() {
var args = Array.prototype.slice.call(arguments, 0);
args.unshift("error");
this.hasError = true;
this.emit.apply(this, args);
};
this.lineParser = new CSVLine(this.param);
this.lineParser.on("error", onError.bind(this));
// function onError() {
// var args = Array.prototype.slice.call(arguments, 0);
// args.unshift("error");
// this.hasError = true;
// this.emit.apply(this, args);
// };
this._lineBuffer = "";
this._csvLineBuffer = "";
// this.lineParser = new CSVLine(this.param);
// this.lineParser.on("error", onError.bind(this));
this.processor = new Processor(this.param);
this.processor.on("error", onError.bind(this));
var syncWorker = new Worker(this.param, true);
// syncWorker.on("error",onError);
this.processor.addWorker(syncWorker);
if (this.param.workerNum > 1) {
for (var i = 1; i < this.param.workerNum; i++) {
var worker = new Worker(this.param, false);
// worker.on("error",onError);
this.processor.addWorker(worker);
}
} else if (this.param.workerNum < 1) {
this.param.workerNum = 1;
}
// this.processor.on("error", onError.bind(this));
// var syncWorker = new Worker(this.param, true);
// // syncWorker.on("error",onError);
// this.processor.addWorker(syncWorker);
// if (this.param.workerNum > 1) {
// for (var i = 1; i < this.param.workerNum; i++) {
// var worker = new Worker(this.param, false);
// // worker.on("error",onError);
// this.processor.addWorker(worker);
// }
// } else if (this.param.workerNum < 1) {
// this.param.workerNum = 1;
// }
if (!this.param.constructResult) {
this.resultObject.disableConstruct();
}
this.lineParser.pipe(this.processor);
// this.lineParser.pipe(this.processor);
var syncLock = false;
this.processor.on("record_parsed", function(resultRow, row, index) {
// this.emit("record_parsed", resultRow, row, index);
this.sequenceBuffer[index] = {
resultRow: resultRow,
row: row,
index: index
};
//critical area
if (!syncLock) {
syncLock = true;
this.flushBuffer();
syncLock = false;
}
}.bind(this));
this.processor.on("end_parse", function() {
this.processEnd = true;
this.flushBuffer();
this.checkAndFlush();
}.bind(this));
// this.processor.on("record_parsed", function(resultRow, row, index) {
// // this.emit("record_parsed", resultRow, row, index);
// this.sequenceBuffer[index] = {
// resultRow: resultRow,
// row: row,
// index: index
// };
// //critical area
// if (!syncLock) {
// syncLock = true;
// this.flushBuffer();
// syncLock = false;
// }
// }.bind(this));
// this.processor.on("end_parse", function() {
// this.processEnd = true;
// this.flushBuffer();
// this.checkAndFlush();
// }.bind(this));
this._transform = this._transformNoFork;

@@ -168,49 +173,138 @@ this._flush = this._flushNoFork;

this.sequenceBuffer[index] = undefined;
if (obj.resultRow === null){ // empty. skip
this.recordNum++;
continue;
}
var resultRow = obj.resultRow;
var resultJSONStr = obj.resultJSONStr;
var resultRow = JSON.parse(resultJSONStr)
var row = obj.row;
this.emit("record_parsed", resultRow, row, index);
if (this.param.toArrayString && this.recordNum > 0) {
this.push("," + this.getEol());
this.push("," + eol);
}
this.push(JSON.stringify(resultRow), "utf8");
this.push(resultJSONStr, "utf8");
this.recordNum++;
}
this.checkAndFlush();
}
var size = 0;
Converter.prototype._transformNoFork = function(data, encoding, cb) {
// console.log("con",data.length);
size += data.length;
if (this.param.toArrayString && this.started === false) {
this.started = true;
this.push("[" + this.getEol(), "utf8");
this.push("[" + eol, "utf8");
}
this.lineParser.write(data, encoding, cb);
var lines = this.toCSVLines(this.toLines(data, encoding)); //lines of csv
this.processCSVLines(lines, cb);
// async.eachLimit(lines,1,function(line,scb){
// this.push(line.data);
// scb();
// }.bind(this),function(err){
// cb();
// });
//this.push(data,encoding);
// cb();
};
Converter.prototype.processCSVLines = function(csvLines, cb) {
// for (var i=0;i<csvLines.length;i++){
// this.push(csvLines[i].data);
// }
// cb();
// return;
this.runningProcess++;
this.processor.rows(csvLines, function(err, resArr) {
this.runningProcess--;
if (err) {
this.emit("error","row_process",err);
} else {
for (var i = 0; i < resArr.length; i++) {
this.sequenceBuffer[resArr[i].index] = {
resultJSONStr: resArr[i].jsonRaw,
row: resArr[i].row,
index: resArr[i].index
}
}
this.flushBuffer();
}
}.bind(this), cb);
}
Converter.prototype.toLines = function(data, encoding) {
if (encoding === "buffer") {
encoding = "utf8";
}
data = this._lineBuffer + data.toString(encoding);
var eol = this.getEol(data);
return data.split(eol);
}
Converter.prototype.toCSVLines = function(fileLines, last) {
var recordLine = "";
var lines = [];
while (fileLines.length > 1) {
var line = fileLines.shift();
lines = lines.concat(this._line(line));
}
this._lineBuffer = fileLines[0];
if (last && this._csvLineBuffer.length > 0) {
this.emit("error", "unclosed_quote", this._csvLineBuffer)
}
return lines;
}
Converter.prototype._line = function(line) {
var lines = [];
this._csvLineBuffer += line;
if (this.param.maxRowLength && this._csvLineBuffer.length > this.param.maxRowLength) {
this.hasError = true;
this.emit("error", "row_exceed", this._csvLineBuffer);
}
if (!utils.isToogleQuote(this._csvLineBuffer, this.param.quote)) { //if a complete record is in buffer.push to result
data = this._csvLineBuffer;
this._csvLineBuffer = '';
lines.push(data);
} else { //if the record in buffer is not a complete record (quote does not close). wait next line
this._csvLineBuffer += this.getEol();
}
return lines;
}
Converter.prototype._flushNoFork = function(cb) {
this.lineParser.end();
var self = this;
this.flushCb = cb;
this.checkAndFlush();
//cb();
if (this._lineBuffer.length > 0) {
var lines = this._line(this._lineBuffer);
this.processCSVLines(lines, function() {
this.checkAndFlush();
}.bind(this));
} else {
this.checkAndFlush();
}
return;
};
Converter.prototype._transformFork = function(data, encoding, cb) {
this.child.stdin.write(data, encoding, cb);
}
Converter.prototype._flushFork = function(cb) {
this.child.stdin.end();
this.child.on("exit", cb);
}
// Converter.prototype._transformFork = function(data, encoding, cb) {
// this.child.stdin.write(data, encoding, cb);
// }
// Converter.prototype._flushFork = function(cb) {
// this.child.stdin.end();
// this.child.on("exit", cb);
// }
Converter.prototype.checkAndFlush = function() {
if (this.processEnd && this.flushCb) {
if (this.runningProcess === 0 && this.flushCb) {
if (this._csvLineBuffer.length !== 0) {
this.emit("error", "unclosed_quote", this._csvLineBuffer);
}
if (this.param.toArrayString) {
this.push(this.getEol() + "]", "utf8");
this.push(eol + "]", "utf8");
}
this.flushCb();
this.processor.releaseWorker();
this.flushCb = null;
}
}
Converter.prototype.getEol = function() {
return this.eol ? this.eol : eol;
Converter.prototype.getEol = function(data) {
function contains(str, subString) {
return str.lastIndexOf(subString) > -1;
}
if (!this.param.eol && data) {
this.param.eol = contains(data, '\r\n') ? '\r\n' :
contains(data, '\n') ? '\n' :
contains(data, '\r') ? '\r' :
eol;
}
return this.param.eol;
};

@@ -223,3 +317,3 @@ Converter.prototype.fromFile = function(filePath, cb) {

rs.pipe(this);
this.wrapCallback(cb,function(){
this.wrapCallback(cb, function() {
fs.destroy();

@@ -250,3 +344,3 @@ });

if (cb && typeof cb === "function") {
this.wrapCallback(cb,function(){
this.wrapCallback(cb, function() {
rs.pause();

@@ -257,3 +351,3 @@ });

};
Converter.prototype.wrapCallback = function(cb,clean) {
Converter.prototype.wrapCallback = function(cb, clean) {
this.once("end_parsed", function(res) {

@@ -265,7 +359,8 @@ if (!this.hasError) {

this.once("error", function(err) {
this.hasError=true;
cb(Array.prototype.join.call(arguments, ", "));
clean();
});
}.bind(this));
}
module.exports = Converter;
var arrReg = /\[([0-9]*)\]/;
var numReg=/^[-+]?[0-9]*\.?[0-9]+$/;
function parseParamType(type, item) {
if (type === 'number') {
var rtn = parseFloat(item);
if (isNaN(rtn)) {
return 0;
} else {
return rtn;
}
}
else if (type === '') {
var trimed=item.trim();
if (numReg.test(trimed)){
return parseFloat(trimed);
}else if(trimed.length === 5 && trimed.toLowerCase() ==="false"){
return false;
}else if (trimed.length === 4 && trimed.toLowerCase() === "true"){
return true;
}else if (trimed[0]==="{" && trimed[trimed.length-1]==="}"){
try{
return JSON.parse(trimed);
}catch(e){
return item;
}
}else{
return item;
}
}
return item;
}
function processHead(pointer, headArr, arrReg, flatKeys) {
var headStr, match, index;
while (headArr.length > 1) {
headStr = headArr.shift();
// match = headStr.match(arrReg);
match = flatKeys ? false : headStr.match(arrReg);
if (match) { //if its array, we need add an empty json object into specified index.
if (pointer[headStr.replace(match[0], '')] === undefined) {
pointer[headStr.replace(match[0], '')] = [];
}
index = match[1]; //get index where json object should stay
pointer = pointer[headStr.replace(match[0], '')];
if (index === '') { //if its dynamic array index, push to the end
index = pointer.length;
}
if (!pointer[index]) { //current index in the array is empty. we need create a new json object.
pointer[index] = {};
}
pointer = pointer[index];
} else { //not array, just normal JSON object. we get the reference of it
if (pointer[headStr] === undefined) {
pointer[headStr] = {};
}
pointer = pointer[headStr];
}
}
return pointer;
}
module.exports = {
"name": "json",
"processSafe":true,
"processSafe": true,
"regExp": /^\*json\*/,
"parserFunc": function parser_json (params) {
"parserFunc": function parser_json(params) {
var fieldStr = this.getHeadStr();
var headArr = (params.config && params.config.flatKeys) ? [fieldStr] : fieldStr.split('.');
var match, index, key, pointer;
function parseParamType (type, item) {
if (type === 'number') {
var rtn=parseFloat(item);
if (isNaN(rtn)){
return 0;
}else{
return rtn;
}
} else if (type === '') {
try {
return JSON.parse(item);
} catch (e) {
return item;
}
}
return item;
}
function processHead (pointer, headArr, arrReg, flatKeys) {
var headStr, match, index;
while (headArr.length > 1) {
headStr = headArr.shift();
// match = headStr.match(arrReg);
match = flatKeys ? false : headStr.match(arrReg);
if (match) { //if its array, we need add an empty json object into specified index.
if (pointer[headStr.replace(match[0], '')] === undefined) {
pointer[headStr.replace(match[0], '')] = [];
}
index = match[1]; //get index where json object should stay
pointer = pointer[headStr.replace(match[0], '')];
if (index === '') { //if its dynamic array index, push to the end
index = pointer.length;
}
if (!pointer[index]) { //current index in the array is empty. we need create a new json object.
pointer[index] = {};
}
pointer = pointer[index];
} else { //not array, just normal JSON object. we get the reference of it
if (pointer[headStr] === undefined) {
pointer[headStr] = {};
}
pointer = pointer[headStr];
}
}
return pointer;
}
//now the pointer is pointing the position to add a key/value pair.

@@ -70,5 +85,5 @@ pointer = processHead(params.resultRow, headArr, arrReg, params.config && params.config.flatKeys);

} else {
pointer[key] = params.config && params.config.checkType ? parseParamType(this.type, params.item) : params.item;
pointer[key] = params.config && params.config.checkType ? parseParamType(this.type, params.item) : params.item;
}
}
};

@@ -16,13 +16,16 @@ var params = JSON.parse(process.env.params);

converter.on("end_parsed", function() {
process.exit(0);
process.send({
action: "end_parsed"
});
process.exit(0);
});
var count = 0;
converter.pipe(process.stdout);
// converter.pipe(process.stdout);
converter.on("data", function(d) {
process.send(d.toString("utf8"));
// var args = Array.prototype.slice.call(arguments, 0)
// process.send({
// action: "data",
// arguments: args
// });
// process.send(d.toString("utf8"));
var args = Array.prototype.slice.call(arguments, 0)
process.send({
action: "data",
arguments: args
});
});

@@ -29,0 +32,0 @@ converter.on("error",function(){

@@ -24,4 +24,9 @@ function Parser(name, regExp, parser, processSafe) {

Parser.prototype.getHeadStr = function() {
var head= this.head;
return head.replace(this.regExp,'');
if (this.headStr) {
return this.headStr;
} else {
var head = this.head;
this.headStr = head.replace(this.regExp, '');
return this.getHeadStr();
}
};

@@ -31,8 +36,7 @@ Parser.prototype.getHead = function() {

};
Parser.prototype.get
Parser.prototype.clone = function() {
var obj=Object.create(this);
var newParser=new Parser();
for (var key in obj){
newParser[key]=obj[key];
var obj = Object.create(this);
var newParser = new Parser();
for (var key in obj) {
newParser[key] = obj[key];
}

@@ -39,0 +43,0 @@ return newParser;

@@ -7,3 +7,2 @@ /**

module.exports = Processor;
var Transform = require("stream").Transform;
var util = require("util");

@@ -13,6 +12,6 @@ var utils = require("./utils.js");

var parserMgr = require("./parserMgr.js");
var Worker = require('./Worker');
var async = require("async");
function Processor(params) {
Transform.call(this);
var _param = {

@@ -23,3 +22,4 @@ delimiter: ",",

checkType: true,
ignoreEmpty: false
ignoreEmpty: false,
workerNum: 1
}

@@ -37,61 +37,61 @@ for (var key in params) {

this.flushCb = null;
if (this.param.workerNum > 1) {
for (var i = 0; i < this.param.workerNum; i++) {
var worker = new Worker(this.param, false);
// worker.on("error",onError);
this.addWorker(worker);
}
} else{
this.param.workerNum = 1;
this.addWorker(new Worker(this.param,true));
}
}
util.inherits(Processor, Transform);
Processor.prototype._transform = function(data, encoding, cb) {
// console.log("pro",data.length);
this.recordNumber++;
if (this.recordNumber === 0) { //router handle header processing
var csvRow = data.toString("utf8");
var row = utils.rowSplit(csvRow, this.param.delimiter, this.param.quote, this.param.trim);
async.each(this.workers, function(worker, scb) {
if (this.param.headers && this.param.headers instanceof Array){
var counter=1;
while (this.param.headers.length<row.length){
this.param.headers.push("field"+counter++);
}
while (this.param.headers.length>row.length){
this.param.headers.pop();
}
row=this.param.headers;
}
if (this.param.noheader && !this.param.headers) {
worker.genConstHeadRow(row.length,scb);
} else {
worker.processHeadRow(row, scb);
}
}.bind(this), function() {
//console.log(arguments);
if (this.param.noheader){
this.recordNumber++;
rowProcess.call(this);
}else{
cb();
}
Processor.prototype.rows = function(csvRows, cb,valvCb) {
if (csvRows.length === 0) {
cb(null, []);
valvCb();
return;
}
var count = csvRows.length;
var rtn = [];
var _err = null;
if (this.recordNumber === -1) {
var headRow="";
if (!this.param.noheader){
headRow = csvRows.shift();
}
this.processHead(headRow, function() {
this.recordNumber++;
this.rows(csvRows, cb,valvCb);
}.bind(this));
} else { //pass the data to worker
rowProcess.call(this);
} else {
var worker=this.getFreeWorker();
worker.processRows(csvRows,this.recordNumber,function(err,res){
this.addWorker(worker);
this.releaseValve();
cb(err,res);
}.bind(this));
this.recordNumber+=csvRows.length;
this.valve(valvCb);
}
function rowProcess(){
this.runningWorker++;
this.rowProcess(data.toString("utf8"), function(err, resultRow, row, index) {
if (err) {
this.emit("error","row_process", err);
} else {
if (resultRow){
this.emit("record_parsed", resultRow, row, index - 1);
}else{
this.emit("record_parsed",null,row,index -1);
//Empty row detedted. skip
}
//this.push(JSON.stringify([resultRow,row,obj.rowIndex]),"utf8");
}
this.runningWorker--;
this.releaseValve();
this.checkAndFlush();
}.bind(this)); //wait until one row processing finished
this.valve(cb);
}
// console.log(csvRows, csvRows.length);
}
Processor.prototype.processHead = function(row, cb) {
async.each(this.workers, function(worker, scb) {
worker.processHeadRow(row, scb);
}.bind(this), function() {
//console.log(arguments);
if (this.param.noheader) {
this.rowProcess(row, 0, cb); //wait until one row processing finished
} else {
cb();
}
}.bind(this));
}
// this.recordNumber++;
// }
Processor.prototype.valve = function(cb) {
if (this.runningWorker < this.workers.length) {
// console.log(this.workers.length);
if (this.workers.length>0) {
cb();

@@ -109,2 +109,3 @@ } else {

Processor.prototype.releaseWorker = function() {
this.released=true;
this.workers.forEach(function(worker) {

@@ -115,4 +116,11 @@ worker.release();

Processor.prototype.addWorker = function(worker) {
this.workers.push(worker);
// if (this.released){
// worker.release();
// }else{
this.workers.push(worker);
// }
}
Processor.prototype.getFreeWorker=function(){
return this.workers.shift();
}
Processor.prototype.processHeadRow = function(headRow, cb) {

@@ -139,9 +147,10 @@ this.parseRules = parserMgr.initParsers(headRow, this.param.checkType);

}
Processor.prototype.rowProcess = function(data, cb) {
Processor.prototype.rowProcess = function(data, curIndex, cb) {
var worker;
if (this.workers.length > 1) { // if multi-worker enabled
// console.log(curIndex,data);
if (this.workers.length > 2) { // for 2+ workers, host process will concentrate on csv parsing while workers will convert csv lines to JSON.
worker = this.workers[(this.recordNumber % (this.workers.length - 1)) + 1];
worker = this.workers[(curIndex % (this.workers.length - 1)) + 1];
} else { //for 2 workers, leverage it as first worker has like 50% cpu used for csv parsing. the weight would be like 0,1,1,0,1,1,0
var index = this.recordNumber % 3;
var index = curIndex % 3;
if (index > 1) {

@@ -155,3 +164,3 @@ index = 1;

}
worker.processRow(data, this.recordNumber, cb);
worker.processRow(data, curIndex, cb);
}

@@ -158,0 +167,0 @@ Processor.prototype.checkAndFlush = function() {

var Writable = require("stream").Writable;
var util = require("util");
var eol=require("os").EOL;
function Result(csvParser) {

@@ -8,3 +8,3 @@ Writable.call(this);

this.param = csvParser.param;
this.buffer = this.param.toArrayString?"":"["+csvParser.getEol();
this.buffer = this.param.toArrayString?"":"["+eol;
this.started = false;

@@ -14,3 +14,3 @@ var self = this;

if (!self.param.toArrayString){
self.buffer += self.parser.getEol() + "]";
self.buffer += eol+ "]";
}

@@ -28,3 +28,3 @@ });

if (this.started) {
this.buffer += "," + this.parser.getEol();
this.buffer += "," + eol;
} else {

@@ -31,0 +31,0 @@ this.started = true;

@@ -16,3 +16,4 @@ /**

var quoteBuff = '';
rowArr.forEach(function(e) {
for (var i=0;i<rowArr.length;i++){
var e=rowArr[i];
if (isToogleQuote(e, quote)) { //if current col has odd quotes, switch quote status

@@ -22,3 +23,3 @@ if (inquote) { //if currently in open quote status, close it and output data

quoteBuff += twoDoubleQuote(e.substr(0, e.length - 1), quote);
row.push(trim ? quoteBuff.toString().trim() : quoteBuff);
row.push(trim ? quoteBuff.trim() : quoteBuff);
quoteBuff = '';

@@ -37,7 +38,7 @@ } else { // currently not in open quote status, open it

if (trim) {
e = e.toString().trim();
e = e.trim();
}
row.push(twoDoubleQuote(e, quote));
}
});
}
return row;

@@ -60,3 +61,3 @@ }

var reg = _getRegExpObj(quote).single;
var match = segment.toString().match(reg);
var match = segment.match(reg);
return match && match.length % 2 !== 0;

@@ -67,3 +68,3 @@ }

var regExp = _getRegExpObj(quote).double;
return segment.toString().replace(regExp, quote);
return segment.replace(regExp, quote);
}

@@ -8,2 +8,3 @@ /**

var utils = require("./utils.js");
function Worker(params, sync) {

@@ -21,6 +22,13 @@ var _param = {

this.sync = sync ? true : false;
this.funcs = require("./workerRunner")();
this.cmdCounter = 0;
if (!this.sync) {
this.child = require("child_process").fork(__dirname + "/workerRunner.js");
this.child = require("child_process").fork(__dirname + "/workerRunner.js",[JSON.stringify(this.param)], {
silent: true,
env:{
child:true
}
});
this.child.on("message", this.onChildMsg.bind(this));
} else {
this.funcs = require("./workerRunner")(this.param);
}

@@ -35,19 +43,22 @@ this.childCallbacks = {};

}
Worker.prototype.processRow = function(data, index, cb) {
Worker.prototype.processRows = function(csvRows, startIndex, cb) {
this.send({
action: "processRow",
data: data,
index: index
action: "processRows",
csvRows: csvRows,
startIndex: startIndex
}, function(err, res) {
if (err){
if (err) {
cb(err);
}else{
if (res){
cb(null, res.resultRow, res.row, res.index);
}else{
cb(null,null,res.row,res.index);
}
} else {
cb(null, res.data);
}
});
}
Worker.prototype.processRow = function(data, index, cb) {
this.send({
action: "processRow",
data: data,
index: index
}, cb);
}
Worker.prototype.onChildMsg = function(m) {

@@ -65,3 +76,2 @@ var action = m.action;

Worker.prototype.send = function(msg, cb) {
msg.param=this.param;
if (this.sync) {

@@ -71,3 +81,3 @@ this.funcs[msg.action](msg, cb);

var action = this.genAction(msg.action);
msg.action=action;
msg.action = action;
this.childCallbacks[action] = cb;

@@ -77,8 +87,2 @@ this.child.send(msg);

}
Worker.prototype.genConstHeadRow = function(number, cb) {
this.send({
action: "genConstHeadRow",
number: number
}, cb);
}
Worker.prototype.processHeadRow = function(headRow, cb) {

@@ -91,4 +95,4 @@ this.send({

Worker.prototype.genAction = function(action) {
var d = "" + new Date().getTime() + Math.round(Math.random() * 1000000);
var d = this.cmdCounter++;
return action + "_" + d;
}
var parserMgr = require("./parserMgr.js");
var utils = require("./utils.js");
if (process.send) {
var inst = init();
var async=require("async");
var Parser = require("./parser");
if (process.env.child) {
var inst = init(JSON.parse(process.argv[2]));
process.on("message", function(m) {

@@ -23,8 +26,13 @@ var action = getAction(m.action);

}
function getConstParser(number){
return new Parser("field" + number, /.*/, function(params) {
var name = this.getName();
params.resultRow[name] = params.item;
}, true);
}
function init(param) {
var headRow;
var parseRules=[];
function init() {
var headRow, parseRules;
function genConstHeadRow(msg, cb) {
var Parser = require("./parser");
var number = msg.number;

@@ -34,6 +42,3 @@ parseRules = [];

while (number > 0) {
var p = new Parser("field" + number, /.*/, function(params) {
var name = this.getName();
params.resultRow[name] = params.item;
}, true);
var p =getConstParser(number);
parseRules.unshift(p);

@@ -47,12 +52,44 @@ headRow.unshift(p.getName());

function processHeadRow(msg, cb) {
headRow = msg.row;
var param = msg.param;
parseRules = parserMgr.initParsers(headRow, param.checkType);
// headRow = msg.row;
var row=[];
if (param.headers){
row=param.headers;
}else if(msg.row.length>0){
row=utils.rowSplit(msg.row, param.delimiter, param.quote, param.trim);
}
headRow=row;
if (row.length>0){
parseRules = parserMgr.initParsers(row, param.checkType);
}
cb(null, {});
}
function processRows(msg,cb){
var csvRows=msg.csvRows;
var startIndex=msg.startIndex;
var res={data:[]};
var count=csvRows.length;
var _err=null;
for (var i=0;i<csvRows.length;i++){
msg.data=csvRows[i];
msg.index=startIndex++;
processRow(msg,function(err,r){
if (err){
_err=err;
}else{
if (r){
res.data.push(r);
}else{
startIndex--;
}
}
})
if (_err){
return cb(_err);
}
}
cb(null,res);
}
function processRow(msg, cb) {
var i, item, parser, head,
data = msg.data,
param = msg.param,
index = msg.index;

@@ -65,3 +102,3 @@ var row = utils.rowSplit(data, param.delimiter, param.quote, param.trim);

var hasValue = false;
for (i = 0; i < parseRules.length; i++) {
for (i = 0; i < row.length; i++) {
item = row[i];

@@ -73,3 +110,10 @@ if (param.ignoreEmpty && item === '') {

parser = parseRules[i];
if (!parser){
parser=parseRules[i]=getConstParser(i+1);
}
head = headRow[i];
if (!head || head===""){
head=headRow[i]="field"+(i+1);
parser.head=head;
}
parser.parse({

@@ -87,3 +131,3 @@ head: head,

cb(null, {
resultRow: resultRow,
jsonRaw: JSON.stringify(resultRow),
row: row,

@@ -93,6 +137,3 @@ index: index

} else {
cb(null,{
row:row,
index:index
});
cb(null,null);
}

@@ -104,5 +145,6 @@

processRow: processRow,
genConstHeadRow: genConstHeadRow
genConstHeadRow: genConstHeadRow,
processRows:processRows
}
}
module.exports = init;

@@ -21,5 +21,6 @@ {

],
"version": "0.4.8",
"version": "0.5.0",
"keywords": [
"csv",
"csvtojson",
"json",

@@ -26,0 +27,0 @@ "convert",

@@ -12,6 +12,13 @@ # CSVTOJSON

# Version 0.5
Version 0.5 contains big refactor expecially for performance. The parser is like **7 times** faster than version 0.4.
## Menu
* [Installation](#installation)
* [Usage](#usage)
* [Library](#library)
* [Convert from a file](#from-file)
* [Convert from a web resource / Readable stream](#from-web)
* [Convert from CSV string](#from-string)
* [Parameters](#params)

@@ -45,9 +52,10 @@ * [Customised Parser](#parser)

**From File**
#### From File
You can use File stream
```js
//Converter Class
var Converter = require("csvtojson").Converter;
var converter = new Converter({
checkType:false //turn off auto type check to increase performance
});
var converter = new Converter({});

@@ -63,3 +71,3 @@ //end_parsed will be emitted once parsing finished

**From Web Server**
Or use fromFile convenient function

@@ -69,2 +77,15 @@ ```js

var Converter = require("csvtojson").Converter;
var converter = new Converter({});
converter.fromFile("./file.csv",function(err,result){
});
```
#### From Web
To convert any CSV data from readable stream just simply pipe in the data.
```js
//Converter Class
var Converter = require("csvtojson").Converter;
var converter = new Converter({constructResult:false}); //for big csv data

@@ -81,3 +102,3 @@

**From String**
#### From String

@@ -98,3 +119,3 @@ ```js

>csvtojson ./myCSVFile
>csvtojson ./myCSVFile <option1=value>

@@ -105,5 +126,5 @@ Or use pipe:

To start a webserver
Check current version:
>csvtojson startserver [options]
>csvtojson version

@@ -132,7 +153,7 @@ Advanced usage with parameters support, check help:

* **trim**: Indicate if parser trim off spaces surrounding column content. e.g. " content " will be trimmed to "content". Default: true
* **checkType**: This parameter turns on and off weather check field type. default is true. Change to false to increase performance. See [Field type](#field-type)
* **checkType**: This parameter turns on and off weather check field type. default is true. See [Field type](#field-type)
* **toArrayString**: Stringify the stream output to JSON array. This is useful when pipe output to a file which expects stringified JSON array. default is false and only stringified JSON (without []) will be pushed to downstream.
* **ignoreEmpty**: Ignore the empty value in CSV columns. If a column value is not giving, set this to true to skip them. Defalut: false.
* **workerNum**: Number of worker processes. The worker process will use multi-cores to help process CSV data. Set to number of Core to improve the performance of processing large csv file. Keep 1 for small csv files. Default 1.
* **fork**: Use another CPU core to process the CSV stream.
* **fork(Deprecated, same as workerNum=2)**: Use another CPU core to process the CSV stream.
* **noheader**:Indicating csv data has no header row and first row is data row. Default is false. See [header configuration](#header-configuration)

@@ -487,3 +508,2 @@ * **headers**: An array to specify the headers of CSV data. If --noheader is false, this value will override CSV header row. Default: null. Example: ["my field","name"]. See [header configuration](#header-configuration)

Implict type check will consume a significant amount of CPU power which will lead the CSV conversion like 2-3 times slower than turning it off. If this is not needed, simply set "checkType:false".

@@ -503,4 +523,4 @@ ## Explicit Type

```csv
string#appNumber, string#finished, startDate
201401010002, true, 2014-01-01
string#appNumber, string#finished, string#msg
201401010002, true, {"hello":"world","total":23}
```

@@ -513,3 +533,3 @@ The data will be converted to:

"finished":"true",
"startDate":"2014-01-01"
"msg":"{\"hello\":\"world\",\"total\":23}"
}

@@ -535,8 +555,9 @@ ```

```
csvtojson --workerNum=4 ./myfile.csv
csvtojson --workerNum=3 ./myfile.csv
```
It is worth to mention that for small size of CSV file it actually costs more time to create processes and keep the communication between them. Therefore, use less workers for small CSV files.
### Fork Process
Node.JS is running on single thread. You will not want to convert a large csv file on the same process where your node.js webserver is running. csvtojson gives an option to fork the whole conversion process to a new system process while the origin process will only pipe the input and result in and out. It very simple to enable this feature:
### Fork Process (Deprecated since 0.5.0)
*Node.JS is running on single thread. You will not want to convert a large csv file on the same process where your node.js webserver is running. csvtojson gives an option to fork the whole conversion process to a new system process while the origin process will only pipe the input and result in and out. It very simple to enable this feature:

@@ -549,4 +570,6 @@ ```js

```
Same as multi-workers, fork a new process will cause extra cost on process communication and life cycle management. Use it wisely.
Same as multi-workers, fork a new process will cause extra cost on process communication and life cycle management. Use it wisely.*
Since 0.5.0, fork=true is the same as workerNum=2.
### Header configuration

@@ -560,3 +583,3 @@

CC102-PDMI-001,eClass_5.1.3,10/3/2014,12,40,green,40
CC200-009-001,eClass_5.1.3,11/3/2014,5,3,blue,38
CC200-009-001,eClass_5.1.3,11/3/2014,5,3,blue,38,extra field!
```

@@ -575,3 +598,3 @@

{"field1":"CC102-PDMI-001","field2":"eClass_5.1.3","field3":"10/3/2014","field4":"12","field5":"40","field6":"green","field7":"40"},
{"field1":"CC200-009-001","field2":"eClass_5.1.3","field3":"11/3/2014","field4":"5","field5":"3","field6":"blue","field7":"38"}
{"field1":"CC200-009-001","field2":"eClass_5.1.3","field3":"11/3/2014","field4":"5","field5":"3","field6":"blue","field7":"38","field8":"extra field!"}
]

@@ -596,8 +619,8 @@ ```

[
{"hello":"CC102-PDMI-001","csv":"eClass_5.1.3","field1":"10/3/2014","field2":12,"field3":40,"field4":"green","field5":40},
{"hello":"CC200-009-001","csv":"eClass_5.1.3","field1":"11/3/2014","field2":5,"field3":3,"field4":"blue","field5":38}
{"hell":"CC102-PDMI-001","csv":"eClass_5.1.3","field3":"10/3/2014","field4":"12","field5":"40","field6":"green","field7":"40"},
{"hell":"CC200-009-001","csv":"eClass_5.1.3","field3":"11/3/2014","field4":"5","field5":"3","field6":"blue","field7":"38","field8":"extra field!"}
]
```
If length of headers array is smaller than the column of csv, converter will automatically fill the column with "field*".
If length of headers array is smaller than the column of csv, converter will automatically fill the column with "field*". where * is current column index starting from 1.

@@ -631,2 +654,9 @@ Also we can use it in code:

## 0.5.0
* Fixed some bugs
* Performance improvement
* **Implicity type for numbers now use RegExp:/^[-+]?[0-9]*\.?[0-9]+$/. Previously 00131 is a string now will be recognised as number type**
* **If a column has no head, now it will use current column index as column name: 'field*'. previously parser uses a fixed index starting from 1. e.g. csv data: 'aa,bb,cc' with head 'a,b'. previously it will convert to {'a':'aa','b':'bb','field1':'cc'} and now it is {'a':'aa','b':'bb','field3':'cc'}**
## 0.4.7

@@ -633,0 +663,0 @@ * ignoreEmpty now ignores empty rows as well

@@ -313,3 +313,3 @@ var Converter = require("../libs/core/Converter.js");

assert(j.b==="eClass_5.1.3");
assert(j.field1==="10/3/2014");
assert(j.field3==="10/3/2014");
done();

@@ -316,0 +316,0 @@ });

@@ -17,3 +17,2 @@ var Converter = require("../libs/core/Converter.js");

it("should set eol ", function(done) {

@@ -57,3 +56,3 @@

conv.fromString(rs,function(err,json){
assert.equal(json[0].field1,40);
assert.equal(json[0].field7,40);
assert.equal(json[0].a,"CC102-PDMI-001");

@@ -74,2 +73,25 @@ done();

});
it ("should parse no header with dynamic column number",function(done){
var testData = __dirname + "/data/noheaderWithVaryColumnNum";
var rs = fs.readFileSync(testData,"utf8");
var conv=new Converter({
noheader:true
});
conv.fromString(rs,function(err,json){
assert.equal(json.length,2);
assert.equal(json[1].field4,7);
done();
});
});
it ("should parse tabsv data with dynamic columns",function(done){
var testData = __dirname + "/data/tabsv";
var rs = fs.readFileSync(testData,"utf8");
var conv=new Converter({
delimiter:"\t"
});
conv.fromString(rs,function(err,json){
assert.equal(json[0].Idevise,"");
done();
});
});
// it ("should convert big csv",function(done){

@@ -76,0 +98,0 @@ // // var rs=fs.createReadStream(__dirname+"/data/large-csv-sample.csv");

@@ -1,35 +0,35 @@

var CSVLine=require("../libs/core/CSVLine");
var assert=require("assert");
var fs=require("fs");
describe("CSVLine",function(){
it ("should break data into csv lines",function(done){
var rs=fs.createReadStream(__dirname+"/data/large-csv-sample.csv");
// var rs=fs.createReadStream("/Users/kxiang/tmp/csvdata");
var conv=new CSVLine({});
rs.pipe(conv);
var count=0;
conv.on("data",function(){
count++;
if (count % 10000 === 0){
console.log(count);
}
});
conv.on("end",function(){
assert(count ===5291);
done();
})
});
it ("should handle long rows",function(done){
var rs=fs.createReadStream(__dirname+"/data/dataWithLongRow");
var conv=new CSVLine({});
rs.pipe(conv);
var count=0;
conv.on("data",function(data){
count++;
});
conv.on("end",function(){
assert(count === 2);
done();
})
});
})
// var CSVLine=require("../libs/core/CSVLine");
// var assert=require("assert");
// var fs=require("fs");
// describe("CSVLine",function(){
// it ("should break data into csv lines",function(done){
// var rs=fs.createReadStream(__dirname+"/data/large-csv-sample.csv");
// // var rs=fs.createReadStream("/Users/kxiang/tmp/csvdata");
// var conv=new CSVLine({});
// rs.pipe(conv);
// var count=0;
// conv.on("data",function(){
// count++;
// if (count % 10000 === 0){
// console.log(count);
// }
// });
// conv.on("end",function(){
// assert(count ===5291);
// done();
// })
// });
// it ("should handle long rows",function(done){
// var rs=fs.createReadStream(__dirname+"/data/dataWithLongRow");
// var conv=new CSVLine({});
// rs.pipe(conv);
// var count=0;
// conv.on("data",function(data){
// count++;
// });
// conv.on("end",function(){
// assert(count === 2);
// done();
// })
// });
// })

@@ -144,3 +144,3 @@ var assert = require("assert");

assert(res[0].fieldA.children[0].name === "Oscar");
assert(res[0].fieldA.children[0].id === "0023");
assert(res[0].fieldA.children[0].id === 23);
assert(res[0].fieldA.children[1].name === "Tikka");

@@ -147,0 +147,0 @@ assert(res[0].fieldA.children[1].employee.length === 2);

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc