Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

csvtojson

Package Overview
Dependencies
Maintainers
1
Versions
82
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

csvtojson - npm Package Compare versions

Comparing version 0.5.0 to 0.5.1

libs/core/Transformer.js

1

bin/csvtojson.js

@@ -19,3 +19,2 @@ function csvtojson() {

var parsedCmd;
function _showHelp(errno) {

@@ -22,0 +21,0 @@ var key;

@@ -52,2 +52,6 @@ {

"type": "boolean"
},
"--eol": {
"desc": "Explicitly specify the end of line character to use.",
"type": "string"
}

@@ -54,0 +58,0 @@ },

@@ -173,2 +173,6 @@ var util = require("util");

var row = obj.row;
if (this.transform && typeof this.transform==="function"){
this.transform(resultRow,row,index);
resultJSONStr=JSON.stringify(resultRow);
}
this.emit("record_parsed", resultRow, row, index);

@@ -296,10 +300,17 @@ if (this.param.toArrayString && this.recordNum > 0) {

Converter.prototype.getEol = function(data) {
function contains(str, subString) {
return str.lastIndexOf(subString) > -1;
}
if (!this.param.eol && data) {
this.param.eol = contains(data, '\r\n') ? '\r\n' :
contains(data, '\n') ? '\n' :
contains(data, '\r') ? '\r' :
eol;
for (var i=0;i<data.length;i++){
if (data[i]==="\r"){
if (data[i+1] === "\n"){
this.param.eol="\r\n";
}else{
this.param.eol="\r";
}
return this.param.eol;
}else if (data[i]==="\n"){
this.param.eol="\n";
return this.param.eol;
}
}
this.param.eol=eol;
}

@@ -306,0 +317,0 @@

var arrReg = /\[([0-9]*)\]/;
var numReg=/^[-+]?[0-9]*\.?[0-9]+$/;
function parseParamType(type, item) {
if (type === 'number') {
var rtn = parseFloat(item);
if (isNaN(rtn)) {
return 0;
} else {
return rtn;
}
}
else if (type === '') {
var trimed=item.trim();
if (numReg.test(trimed)){
return parseFloat(trimed);
}else if(trimed.length === 5 && trimed.toLowerCase() ==="false"){
return false;
}else if (trimed.length === 4 && trimed.toLowerCase() === "true"){
return true;
}else if (trimed[0]==="{" && trimed[trimed.length-1]==="}"){
try{
return JSON.parse(trimed);
}catch(e){
return item;
}
}else{
return item;
}
}
return item;
}

@@ -85,5 +55,5 @@ function processHead(pointer, headArr, arrReg, flatKeys) {

} else {
pointer[key] = params.config && params.config.checkType ? parseParamType(this.type, params.item) : params.item;
pointer[key] = params.item;
}
}
};
var parserMgr = require("./parserMgr.js");
var utils = require("./utils.js");
var async=require("async");
var async = require("async");
var Parser = require("./parser");

@@ -26,11 +26,13 @@ if (process.env.child) {

}
function getConstParser(number){
return new Parser("field" + number, /.*/, function(params) {
var name = this.getName();
params.resultRow[name] = params.item;
}, true);
function getConstParser(number) {
return new Parser("field" + number, /.*/, function(params) {
var name = this.getName();
params.resultRow[name] = params.item;
}, true);
}
function init(param) {
var headRow;
var parseRules=[];
var parseRules = [];

@@ -42,3 +44,3 @@ function genConstHeadRow(msg, cb) {

while (number > 0) {
var p =getConstParser(number);
var p = getConstParser(number);
parseRules.unshift(p);

@@ -53,10 +55,10 @@ headRow.unshift(p.getName());

// headRow = msg.row;
var row=[];
if (param.headers){
row=param.headers;
}else if(msg.row.length>0){
row=utils.rowSplit(msg.row, param.delimiter, param.quote, param.trim);
var row = [];
if (param.headers) {
row = param.headers;
} else if (msg.row.length > 0) {
row = utils.rowSplit(msg.row, param.delimiter, param.quote, param.trim);
}
headRow=row;
if (row.length>0){
headRow = row;
if (row.length > 0) {
parseRules = parserMgr.initParsers(row, param.checkType);

@@ -66,28 +68,32 @@ }

}
function processRows(msg,cb){
var csvRows=msg.csvRows;
var startIndex=msg.startIndex;
var res={data:[]};
var count=csvRows.length;
var _err=null;
for (var i=0;i<csvRows.length;i++){
msg.data=csvRows[i];
msg.index=startIndex++;
processRow(msg,function(err,r){
if (err){
_err=err;
}else{
if (r){
res.data.push(r);
}else{
startIndex--;
}
function processRows(msg, cb) {
var csvRows = msg.csvRows;
var startIndex = msg.startIndex;
var res = {
data: []
};
var count = csvRows.length;
var _err = null;
for (var i = 0; i < csvRows.length; i++) {
msg.data = csvRows[i];
msg.index = startIndex++;
processRow(msg, function(err, r) {
if (err) {
_err = err;
} else {
if (r) {
res.data.push(r);
} else {
startIndex--;
}
})
if (_err){
return cb(_err);
}
})
if (_err) {
return cb(_err);
}
}
cb(null,res);
cb(null, res);
}
function processRow(msg, cb) {

@@ -110,10 +116,13 @@ var i, item, parser, head,

parser = parseRules[i];
if (!parser){
parser=parseRules[i]=getConstParser(i+1);
if (!parser) {
parser = parseRules[i] = getConstParser(i + 1);
}
head = headRow[i];
if (!head || head===""){
head=headRow[i]="field"+(i+1);
parser.head=head;
if (!head || head === "") {
head = headRow[i] = "field" + (i + 1);
parser.head = head;
}
if (param.checkType){
item=parseParamType(parser.type,item,param);
}
parser.parse({

@@ -136,6 +145,36 @@ head: head,

} else {
cb(null,null);
cb(null, null);
}
}
var numReg=/^[-+]?[0-9]*\.?[0-9]+$/;
function parseParamType(type, item) {
if (type === 'number') {
var rtn = parseFloat(item);
if (isNaN(rtn)) {
return 0;
} else {
return rtn;
}
} else if (type === '') {
var trimed = item.trim();
if (numReg.test(trimed)) {
return parseFloat(trimed);
} else if (trimed.length === 5 && trimed.toLowerCase() === "false") {
return false;
} else if (trimed.length === 4 && trimed.toLowerCase() === "true") {
return true;
} else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}") {
try {
return JSON.parse(trimed);
} catch (e) {
return item;
}
} else {
return item;
}
}
return item;
}
return {

@@ -145,5 +184,5 @@ processHeadRow: processHeadRow,

genConstHeadRow: genConstHeadRow,
processRows:processRows
processRows: processRows
}
}
module.exports = init;

@@ -21,3 +21,3 @@ {

],
"version": "0.5.0",
"version": "0.5.1",
"keywords": [

@@ -24,0 +24,0 @@ "csv",

@@ -14,3 +14,3 @@ # CSVTOJSON

Version 0.5 contains big refactor expecially for performance. The parser is like **7 times** faster than version 0.4.
Version 0.5 contains big refactor especially for performance. The parser is like **7 times** faster than version 0.4.

@@ -25,9 +25,10 @@ ## Menu

* [Parameters](#params)
* [Customised Parser](#parser)
* [Webserver](#webserver)
* [Result Transform](#result-transform)
* [Synchronouse Transformer](#synchronouse-transformer)
* [Asynchronouse Transformer](#asynchronouse-transformer)
* [Convert to other data type](#convert-to-other-data-type)
* [Events](#events)
* [Built-in Parsers](#default-parsers)
* [Flags](#flags)
* [Big CSV File Streaming](#big-csv-file)
* [Process Big CSV File in CLI](#convert-big-csv-file-with-command-line-tool)
* [Column Array](#column-array)
* [Parse String](#parse-string)

@@ -39,2 +40,3 @@ * [Empowered JSON Parser](#empowered-json-parser)

* [Error Handling](#error-handling)
* [Customised Parser](#parser)
* [Change Log](#change-log)

@@ -145,3 +147,3 @@

* **constructResult**: true/false. Whether to constrcut final json object in memory which will be populated in "end_parsed" event. Set to false if deal with huge csv data. default: true.
* **constructResult**: true/false. Whether to construct final json object in memory which will be populated in "end_parsed" event. Set to false if deal with huge csv data. default: true.
* **delimiter**: delimiter used for seperating columns. default: ","

@@ -162,72 +164,144 @@ * **quote**: If a column contains delimiter, it is able to use quote character to surround the column content. e.g. "hello, world" wont be split into two columns while parsing. default: " (double quote)

# Parser
CSVTOJSON allows adding customised parsers which concentrating on what to parse and how to parse.
It is the main power of the tool that developer only needs to concentrate on how to deal with the data and other concerns like streaming, memory, web, cli etc are done automatically.
All parameters can be used in Command Line tool. see
How to add a customised parser:
```
csvtojson --help
```
# Result Transform
To transform JSON result, (e.g. change value of one column), just simply add 'transform handler'.
## Synchronouse transformer
```js
//Parser Manager
var parserMgr=require("csvtojson").parserMgr;
parserMgr.addParser("myParserName",/^\*parserRegExp\*/,function (params){
var columnTitle=params.head; //params.head be like: *parserRegExp*ColumnName;
var fieldName=columnTitle.replace(this.regExp, ""); //this.regExp is the regular expression above.
params.resultRow[fieldName]="Hello my parser"+params.item;
var Converter=require("csvtojson").Converter;
var csvConverter=new Converter({});
csvConverter.transform=function(json,row,index){
json["rowIndex"]=index;
/* some other examples:
delete json["myfield"]; //remove a field
json["dateOfBirth"]=new Date(json["dateOfBirth"]); // convert a field type
*/
};
csvConverter.fromString(csvString,function(err,result){
//all result rows will add a field 'rowIndex' indicating the row number of the csv data:
/*
[{
field1:value1,
rowIndex: 0
}]
*/
});
```
parserMgr's addParser function take three parameters:
As shown in example above, it is able to apply any changes to the result json which will be pushed to down stream and "record_parsed" event.
1. parser name: the name of your parser. It should be unique.
## Asynchronouse Transformer
2. Regular Expression: It is used to test if a column of CSV data is using this parser. In the example above any column's first row starting with *parserRegExp* will be using it.
Asynchronouse transformation can be achieve either through "record_parsed" event or creating a Writable stream.
3. Parse function call back: It is where the parse happens. The converter works row by row and therefore the function will be called each time needs to parse a cell in CSV data.
### Use record_parsed
The parameter of Parse function is a JSON object. It contains following fields:
To transform data asynchronously, it is suggested to use csvtojson with [Async Queue](https://github.com/caolan/async#queue).
**head**: The column's first row's data. It generally contains field information. e.g. *array*items
This mainly is used when transformation of each csv row needs be mashed with data retrieved from external such as database / server / file system.
**item**: The data inside current cell. e.g. item1
However this approach will **not** change the json result pushed to downstream.
**itemIndex**: the index of current cell of a row. e.g. 0
Here is an example:
**rawRow**: the reference of current row in array format. e.g. ["item1", 23 ,"hello"]
```js
var Conv=require("csvtojson").Converter;
var async=require("async");
var rs=require("fs").createReadStream("path/to/csv"); // or any readable stream to csv data.
var q=async.queue(function(json,callback){
//process the json asynchronously.
require("request").get("http://myserver/user/"+json.userId,function(err,user){
//do the data mash here
json.user=user;
callback();
});
},10);//10 concurrent worker same time
q.saturated=function(){
rs.pause(); //if queue is full, it is suggested to pause the readstream so csvtojson will suspend populating json data. It is ok to not to do so if CSV data is not very large.
}
q.empty=function(){
rs.resume();//Resume the paused readable stream. you may need check if the readable stream isPaused() (this is since node 0.12) or finished.
}
var conv=new Conv({construct:false});
conv.transform=function(json){
q.push(json);
};
conv.on("end_parsed",function(){
q.drain=function(){
//code when Queue process finished.
}
})
rs.pipe(conv);
```
**resultRow**: the reference of result row in JSON format. e.g. {"name":"Joe"}
In example above, the transformation will happen if one csv rown being processed. The related user info will be pulled from a web server and mashed into json result.
**rowIndex**: the index of current row in CSV data. start from 1 since 0 is the head. e.g. 1
There will be at most 10 data transformation woker working concurrently with the help of Async Queue.
**resultObject**: the reference of result object in JSON format. It always has a field called csvRows which is in Array format. It changes as parsing going on. e.g.
### Use Stream
```json
{
"csvRows":[
{
"itemName":"item1",
"number":10
},
{
"itemName":"item2",
"number":4
}
]
}
```
It is able to create a Writable stream (or Transform) which process data asynchronously. See [Here](https://nodejs.org/dist/latest-v4.x/docs/api/stream.html#stream_class_stream_transform) for more details.
# WebServer
It is able to start the web server through code.
## Convert to other data type
Below is an example of result tranformation which converts csv data to a column array rather than a JSON.
```js
var webServer=require("csvtojson").interfaces.web;
var Converter=require("csvtojson").Converter;
var columArrData=__dirname+"/data/columnArray";
var rs=fs.createReadStream(columArrData);
var result = {}
var csvConverter=new Converter();
//end_parsed will be emitted once parsing finished
csvConverter.on("end_parsed", function(jsonObj) {
console.log(result);
console.log("Finished parsing");
done();
});
var server=webServer.startWebServer({
"port":"8801",
"urlpath":"/parseCSV"
//record_parsed will be emitted each time a row has been parsed.
csvConverter.on("record_parsed", function(resultRow, rawRow, rowIndex) {
for (var key in resultRow) {
if (!result[key] || !result[key] instanceof Array) {
result[key] = [];
}
result[key][rowIndex] = resultRow[key];
}
});
rs.pipe(csvConverter);
```
~~It will return an [expressjs](http://expressjs.com/) Application. You can add your own web app content there.~~ It will return http.Server object.
Here is an example:
```csv
TIMESTAMP,UPDATE,UID,BYTES SENT,BYTES RCVED
1395426422,n,10028,1213,5461
1395426422,n,10013,9954,13560
1395426422,n,10109,221391500,141836
1395426422,n,10007,53448,308549
1395426422,n,10022,15506,72125
```
It will be converted to:
```json
{
"TIMESTAMP": ["1395426422", "1395426422", "1395426422", "1395426422", "1395426422"],
"UPDATE": ["n", "n", "n", "n", "n"],
"UID": ["10028", "10013", "10109", "10007", "10022"],
"BYTES SENT": ["1213", "9954", "221391500", "53448", "15506"],
"BYTES RCVED": ["5461", "13560", "141836", "308549", "72125"]
}
```
# Events

@@ -257,14 +331,15 @@

# Default Parsers
There are default parsers in the library they are
# Flags
**JSON**: Any valid JSON structure (array, nested json) are supported. see [Empowered JSON Parser](#empowered-json-parser)
There are flags in the library:
**Omitted column**: For columns head start with "\*omit\*" e.g. "\*omit\*id", the parser will omit the column's data.
\*omit\*: Omit a column. The values in the column will not be built into JSON result.
**Flat JSON**: Mark a head column as is the key of its JSON result. e.g.
\*flat\*: Mark a head column as is the key of its JSON result.
Example:
```csv
*flat*user.name, user.age
Joe , 40
*flat*user.name, user.age, *omit*user.gender
Joe , 40, Male
```

@@ -318,54 +393,4 @@

# Column Array
To convert a csv data to column array, you have to construct the result in memory. See example below
```js
var columArrData=__dirname+"/data/columnArray";
var rs=fs.createReadStream(columArrData);
var result = {}
var csvConverter=new CSVAdv();
//end_parsed will be emitted once parsing finished
csvConverter.on("end_parsed", function(jsonObj) {
console.log(result);
console.log("Finished parsing");
done();
});
//record_parsed will be emitted each time a row has been parsed.
csvConverter.on("record_parsed", function(resultRow, rawRow, rowIndex) {
for (var key in resultRow) {
if (!result[key] || !result[key] instanceof Array) {
result[key] = [];
}
result[key][rowIndex] = resultRow[key];
}
});
rs.pipe(csvConverter);
```
Here is an example:
```csv
TIMESTAMP,UPDATE,UID,BYTES SENT,BYTES RCVED
1395426422,n,10028,1213,5461
1395426422,n,10013,9954,13560
1395426422,n,10109,221391500,141836
1395426422,n,10007,53448,308549
1395426422,n,10022,15506,72125
```
It will be converted to:
```json
{
"TIMESTAMP": ["1395426422", "1395426422", "1395426422", "1395426422", "1395426422"],
"UPDATE": ["n", "n", "n", "n", "n"],
"UID": ["10028", "10013", "10109", "10007", "10022"],
"BYTES SENT": ["1213", "9954", "221391500", "53448", "15506"],
"BYTES RCVED": ["5461", "13560", "141836", "308549", "72125"]
}
```
# Parse String

@@ -643,2 +668,66 @@ To parse a string, simply call fromString(csvString,callback) method. The callback parameter is optional.

# Parser
** Parser will be replaced by [Result Transform](#result-transform) and [Flags](#flags) **
This feature will be disabled in future.
CSVTOJSON allows adding customised parsers which concentrating on what to parse and how to parse.
It is the main power of the tool that developer only needs to concentrate on how to deal with the data and other concerns like streaming, memory, web, cli etc are done automatically.
How to add a customised parser:
```js
//Parser Manager
var parserMgr=require("csvtojson").parserMgr;
parserMgr.addParser("myParserName",/^\*parserRegExp\*/,function (params){
var columnTitle=params.head; //params.head be like: *parserRegExp*ColumnName;
var fieldName=columnTitle.replace(this.regExp, ""); //this.regExp is the regular expression above.
params.resultRow[fieldName]="Hello my parser"+params.item;
});
```
parserMgr's addParser function take three parameters:
1. parser name: the name of your parser. It should be unique.
2. Regular Expression: It is used to test if a column of CSV data is using this parser. In the example above any column's first row starting with *parserRegExp* will be using it.
3. Parse function call back: It is where the parse happens. The converter works row by row and therefore the function will be called each time needs to parse a cell in CSV data.
The parameter of Parse function is a JSON object. It contains following fields:
**head**: The column's first row's data. It generally contains field information. e.g. *array*items
**item**: The data inside current cell. e.g. item1
**itemIndex**: the index of current cell of a row. e.g. 0
**rawRow**: the reference of current row in array format. e.g. ["item1", 23 ,"hello"]
**resultRow**: the reference of result row in JSON format. e.g. {"name":"Joe"}
**rowIndex**: the index of current row in CSV data. start from 1 since 0 is the head. e.g. 1
**resultObject**: the reference of result object in JSON format. It always has a field called csvRows which is in Array format. It changes as parsing going on. e.g.
```json
{
"csvRows":[
{
"itemName":"item1",
"number":10
},
{
"itemName":"item2",
"number":4
}
]
}
```
#Change Log

@@ -645,0 +734,0 @@

@@ -198,2 +198,4 @@ var Converter = require("../libs/core/Converter.js");

assert(d.column9 === true);
assert(d.column10[0]===23);
assert(d.column10[1]===31);
});

@@ -221,2 +223,4 @@ csvConverter.on("end_parsed",function (){

assert(d.column9 === "true");
assert(d.column10[0]==="23");
assert(d.column10[1]==="31");
});

@@ -223,0 +227,0 @@ csvConverter.on("end_parsed",function (){

@@ -94,2 +94,29 @@ var Converter = require("../libs/core/Converter.js");

});
it ("should use first line break as eol",function(done){
var testData = __dirname + "/data/testEol";
var conv=new Converter({
noheader:true
});
conv.fromFile(testData,function(err,json){
assert(!err);
done();
});
})
it ("should use sync transform",function(done){
var testData = __dirname + "/data/complexJSONCSV";
var rs = fs.createReadStream(testData);
var conv=new Converter({});
conv.transform=function(json,row,index){
json.rowNum=index;
}
conv.on("record_parsed",function(j){
assert(j.rowNum>=0);
});
conv.on("end_parsed",function(res){
assert(res[0].rowNum===0);
assert(res[1].rowNum===1);
done();
});
rs.pipe(conv);
});
// it ("should convert big csv",function(done){

@@ -96,0 +123,0 @@ // // var rs=fs.createReadStream(__dirname+"/data/large-csv-sample.csv");

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc