csvtojson - npm Package Compare versions

Comparing version 0.5.0 to 0.5.1

libs/core/Transformer.js

test/data/testEol

bin/csvtojson.js

		@@ -19,3 +19,2 @@ function csvtojson() {
		var parsedCmd;

		function _showHelp(errno) {
		@@ -22,0 +21,0 @@ var key;

bin/options.json

		@@ -52,2 +52,6 @@ {
		"type": "boolean"
		},
		"--eol": {
		"desc": "Explicitly specify the end of line character to use.",
		"type": "string"
		}
		@@ -54,0 +58,0 @@ },

libs/core/Converter.js

		@@ -173,2 +173,6 @@ var util = require("util");
		var row = obj.row;
		if (this.transform && typeof this.transform==="function"){
		this.transform(resultRow,row,index);
		resultJSONStr=JSON.stringify(resultRow);
		}
		this.emit("record_parsed", resultRow, row, index);
		@@ -296,10 +300,17 @@ if (this.param.toArrayString && this.recordNum > 0) {
		Converter.prototype.getEol = function(data) {
		function contains(str, subString) {
		return str.lastIndexOf(subString) > -1;
		}
		if (!this.param.eol && data) {
		this.param.eol = contains(data, '\r\n') ? '\r\n' :
		contains(data, '\n') ? '\n' :
		contains(data, '\r') ? '\r' :
		eol;
		for (var i=0;i<data.length;i++){
		if (data[i]==="\r"){
		if (data[i+1] === "\n"){
		this.param.eol="\r\n";
		}else{
		this.param.eol="\r";
		}
		return this.param.eol;
		}else if (data[i]==="\n"){
		this.param.eol="\n";
		return this.param.eol;
		}
		}
		this.param.eol=eol;
		}
		@@ -306,0 +317,0 @@

libs/core/defaultParsers/parser_json.js

		var arrReg = /\[([0-9]*)\]/;
		var numReg=/^[-+]?[0-9]*\.?[0-9]+$/;

		function parseParamType(type, item) {
		if (type === 'number') {
		var rtn = parseFloat(item);
		if (isNaN(rtn)) {
		return 0;
		} else {
		return rtn;
		}
		}
		else if (type === '') {
		var trimed=item.trim();
		if (numReg.test(trimed)){
		return parseFloat(trimed);
		}else if(trimed.length === 5 && trimed.toLowerCase() ==="false"){
		return false;
		}else if (trimed.length === 4 && trimed.toLowerCase() === "true"){
		return true;
		}else if (trimed[0]==="{" && trimed[trimed.length-1]==="}"){
		try{
		return JSON.parse(trimed);
		}catch(e){
		return item;
		}
		}else{
		return item;
		}
		}
		return item;
		}

		@@ -85,5 +55,5 @@ function processHead(pointer, headArr, arrReg, flatKeys) {
		} else {
		pointer[key] = params.config && params.config.checkType ? parseParamType(this.type, params.item) : params.item;
		pointer[key] = params.item;
		}
		}
		};

127

libs/core/workerRunner.js

		var parserMgr = require("./parserMgr.js");
		var utils = require("./utils.js");
		var async=require("async");
		var async = require("async");
		var Parser = require("./parser");
		@@ -26,11 +26,13 @@ if (process.env.child) {
		}
		function getConstParser(number){
		return new Parser("field" + number, /.*/, function(params) {
		var name = this.getName();
		params.resultRow[name] = params.item;
		}, true);

		function getConstParser(number) {
		return new Parser("field" + number, /.*/, function(params) {
		var name = this.getName();
		params.resultRow[name] = params.item;
		}, true);
		}

		function init(param) {
		var headRow;
		var parseRules=[];
		var parseRules = [];

		@@ -42,3 +44,3 @@ function genConstHeadRow(msg, cb) {
		while (number > 0) {
		var p =getConstParser(number);
		var p = getConstParser(number);
		parseRules.unshift(p);
		@@ -53,10 +55,10 @@ headRow.unshift(p.getName());
		// headRow = msg.row;
		var row=[];
		if (param.headers){
		row=param.headers;
		}else if(msg.row.length>0){
		row=utils.rowSplit(msg.row, param.delimiter, param.quote, param.trim);
		var row = [];
		if (param.headers) {
		row = param.headers;
		} else if (msg.row.length > 0) {
		row = utils.rowSplit(msg.row, param.delimiter, param.quote, param.trim);
		}
		headRow=row;
		if (row.length>0){
		headRow = row;
		if (row.length > 0) {
		parseRules = parserMgr.initParsers(row, param.checkType);
		@@ -66,28 +68,32 @@ }
		}
		function processRows(msg,cb){
		var csvRows=msg.csvRows;
		var startIndex=msg.startIndex;
		var res={data:[]};
		var count=csvRows.length;
		var _err=null;
		for (var i=0;i<csvRows.length;i++){
		msg.data=csvRows[i];
		msg.index=startIndex++;
		processRow(msg,function(err,r){
		if (err){
		_err=err;
		}else{
		if (r){
		res.data.push(r);
		}else{
		startIndex--;
		}

		function processRows(msg, cb) {
		var csvRows = msg.csvRows;
		var startIndex = msg.startIndex;
		var res = {
		data: []
		};
		var count = csvRows.length;
		var _err = null;
		for (var i = 0; i < csvRows.length; i++) {
		msg.data = csvRows[i];
		msg.index = startIndex++;
		processRow(msg, function(err, r) {
		if (err) {
		_err = err;
		} else {
		if (r) {
		res.data.push(r);
		} else {
		startIndex--;
		}
		})
		if (_err){
		return cb(_err);
		}
		})
		if (_err) {
		return cb(_err);
		}
		}
		cb(null,res);
		cb(null, res);
		}

		function processRow(msg, cb) {
		@@ -110,10 +116,13 @@ var i, item, parser, head,
		parser = parseRules[i];
		if (!parser){
		parser=parseRules[i]=getConstParser(i+1);
		if (!parser) {
		parser = parseRules[i] = getConstParser(i + 1);
		}
		head = headRow[i];
		if (!head \|\| head===""){
		head=headRow[i]="field"+(i+1);
		parser.head=head;
		if (!head \|\| head === "") {
		head = headRow[i] = "field" + (i + 1);
		parser.head = head;
		}
		if (param.checkType){
		item=parseParamType(parser.type,item,param);
		}
		parser.parse({
		@@ -136,6 +145,36 @@ head: head,
		} else {
		cb(null,null);
		cb(null, null);
		}

		}

		var numReg=/^[-+]?[0-9]*\.?[0-9]+$/;
		function parseParamType(type, item) {
		if (type === 'number') {
		var rtn = parseFloat(item);
		if (isNaN(rtn)) {
		return 0;
		} else {
		return rtn;
		}
		} else if (type === '') {
		var trimed = item.trim();
		if (numReg.test(trimed)) {
		return parseFloat(trimed);
		} else if (trimed.length === 5 && trimed.toLowerCase() === "false") {
		return false;
		} else if (trimed.length === 4 && trimed.toLowerCase() === "true") {
		return true;
		} else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}") {
		try {
		return JSON.parse(trimed);
		} catch (e) {
		return item;
		}
		} else {
		return item;
		}
		}
		return item;
		}
		return {
		@@ -145,5 +184,5 @@ processHeadRow: processHeadRow,
		genConstHeadRow: genConstHeadRow,
		processRows:processRows
		processRows: processRows
		}
		}
		module.exports = init;

package.json

		@@ -21,3 +21,3 @@ {
		],
		"version": "0.5.0",
		"version": "0.5.1",
		"keywords": [
		@@ -24,0 +24,0 @@ "csv",

303

readme.md

		@@ -14,3 +14,3 @@ # CSVTOJSON

		Version 0.5 contains big refactor expecially for performance. The parser is like 7 times faster than version 0.4.
		Version 0.5 contains big refactor especially for performance. The parser is like 7 times faster than version 0.4.

		@@ -25,9 +25,10 @@ ## Menu
		* [Parameters](#params)
		* [Customised Parser](#parser)
		* [Webserver](#webserver)
		* [Result Transform](#result-transform)
		* [Synchronouse Transformer](#synchronouse-transformer)
		* [Asynchronouse Transformer](#asynchronouse-transformer)
		* [Convert to other data type](#convert-to-other-data-type)
		* [Events](#events)
		* [Built-in Parsers](#default-parsers)
		* [Flags](#flags)
		* [Big CSV File Streaming](#big-csv-file)
		* [Process Big CSV File in CLI](#convert-big-csv-file-with-command-line-tool)
		* [Column Array](#column-array)
		* [Parse String](#parse-string)
		@@ -39,2 +40,3 @@ * [Empowered JSON Parser](#empowered-json-parser)
		* [Error Handling](#error-handling)
		* [Customised Parser](#parser)
		* [Change Log](#change-log)
		@@ -145,3 +147,3 @@

		* constructResult: true/false. Whether to constrcut final json object in memory which will be populated in "end_parsed" event. Set to false if deal with huge csv data. default: true.
		* constructResult: true/false. Whether to construct final json object in memory which will be populated in "end_parsed" event. Set to false if deal with huge csv data. default: true.
		* delimiter: delimiter used for seperating columns. default: ","
		@@ -162,72 +164,144 @@ * quote: If a column contains delimiter, it is able to use quote character to surround the column content. e.g. "hello, world" wont be split into two columns while parsing. default: " (double quote)

		# Parser
		CSVTOJSON allows adding customised parsers which concentrating on what to parse and how to parse.
		It is the main power of the tool that developer only needs to concentrate on how to deal with the data and other concerns like streaming, memory, web, cli etc are done automatically.
		All parameters can be used in Command Line tool. see

		How to add a customised parser:
		```
		csvtojson --help
		```

		# Result Transform

		To transform JSON result, (e.g. change value of one column), just simply add 'transform handler'.

		## Synchronouse transformer

		```js
		//Parser Manager
		var parserMgr=require("csvtojson").parserMgr;

		parserMgr.addParser("myParserName",/^\parserRegExp\/,function (params){
		var columnTitle=params.head; //params.head be like: parserRegExpColumnName;
		var fieldName=columnTitle.replace(this.regExp, ""); //this.regExp is the regular expression above.
		params.resultRow[fieldName]="Hello my parser"+params.item;
		var Converter=require("csvtojson").Converter;
		var csvConverter=new Converter({});
		csvConverter.transform=function(json,row,index){
		json["rowIndex"]=index;
		/* some other examples:
		delete json["myfield"]; //remove a field
		json["dateOfBirth"]=new Date(json["dateOfBirth"]); // convert a field type
		*/
		};
		csvConverter.fromString(csvString,function(err,result){
		//all result rows will add a field 'rowIndex' indicating the row number of the csv data:
		/*
		[{
		field1:value1,
		rowIndex: 0
		}]
		*/
		});
		```

		parserMgr's addParser function take three parameters:
		As shown in example above, it is able to apply any changes to the result json which will be pushed to down stream and "record_parsed" event.

		1. parser name: the name of your parser. It should be unique.
		## Asynchronouse Transformer

		2. Regular Expression: It is used to test if a column of CSV data is using this parser. In the example above any column's first row starting with parserRegExp will be using it.
		Asynchronouse transformation can be achieve either through "record_parsed" event or creating a Writable stream.

		3. Parse function call back: It is where the parse happens. The converter works row by row and therefore the function will be called each time needs to parse a cell in CSV data.
		### Use record_parsed

		The parameter of Parse function is a JSON object. It contains following fields:
		To transform data asynchronously, it is suggested to use csvtojson with [Async Queue](https://github.com/caolan/async#queue).

		head: The column's first row's data. It generally contains field information. e.g. arrayitems
		This mainly is used when transformation of each csv row needs be mashed with data retrieved from external such as database / server / file system.

		item: The data inside current cell. e.g. item1
		However this approach will not change the json result pushed to downstream.

		itemIndex: the index of current cell of a row. e.g. 0
		Here is an example:

		rawRow: the reference of current row in array format. e.g. ["item1", 23 ,"hello"]
		```js
		var Conv=require("csvtojson").Converter;
		var async=require("async");
		var rs=require("fs").createReadStream("path/to/csv"); // or any readable stream to csv data.
		var q=async.queue(function(json,callback){
		//process the json asynchronously.
		require("request").get("http://myserver/user/"+json.userId,function(err,user){
		//do the data mash here
		json.user=user;
		callback();
		});
		},10);//10 concurrent worker same time
		q.saturated=function(){
		rs.pause(); //if queue is full, it is suggested to pause the readstream so csvtojson will suspend populating json data. It is ok to not to do so if CSV data is not very large.
		}
		q.empty=function(){
		rs.resume();//Resume the paused readable stream. you may need check if the readable stream isPaused() (this is since node 0.12) or finished.
		}
		var conv=new Conv({construct:false});
		conv.transform=function(json){
		q.push(json);
		};
		conv.on("end_parsed",function(){
		q.drain=function(){
		//code when Queue process finished.
		}
		})
		rs.pipe(conv);
		```

		resultRow: the reference of result row in JSON format. e.g. {"name":"Joe"}
		In example above, the transformation will happen if one csv rown being processed. The related user info will be pulled from a web server and mashed into json result.

		rowIndex: the index of current row in CSV data. start from 1 since 0 is the head. e.g. 1
		There will be at most 10 data transformation woker working concurrently with the help of Async Queue.

		resultObject: the reference of result object in JSON format. It always has a field called csvRows which is in Array format. It changes as parsing going on. e.g.
		### Use Stream

		```json
		{
		"csvRows":[
		{
		"itemName":"item1",
		"number":10
		},
		{
		"itemName":"item2",
		"number":4
		}
		]
		}
		```
		It is able to create a Writable stream (or Transform) which process data asynchronously. See [Here](https://nodejs.org/dist/latest-v4.x/docs/api/stream.html#stream_class_stream_transform) for more details.

		# WebServer
		It is able to start the web server through code.
		## Convert to other data type

		Below is an example of result tranformation which converts csv data to a column array rather than a JSON.

		```js
		var webServer=require("csvtojson").interfaces.web;
		var Converter=require("csvtojson").Converter;
		var columArrData=__dirname+"/data/columnArray";
		var rs=fs.createReadStream(columArrData);
		var result = {}
		var csvConverter=new Converter();
		//end_parsed will be emitted once parsing finished
		csvConverter.on("end_parsed", function(jsonObj) {
		console.log(result);
		console.log("Finished parsing");
		done();
		});

		var server=webServer.startWebServer({
		"port":"8801",
		"urlpath":"/parseCSV"
		//record_parsed will be emitted each time a row has been parsed.
		csvConverter.on("record_parsed", function(resultRow, rawRow, rowIndex) {

		for (var key in resultRow) {
		if (!result[key] \|\| !result[key] instanceof Array) {
		result[key] = [];
		}
		result[key][rowIndex] = resultRow[key];
		}

		});
		rs.pipe(csvConverter);
		```

		~~It will return an [expressjs](http://expressjs.com/) Application. You can add your own web app content there.~~ It will return http.Server object.
		Here is an example:

		```csv
		TIMESTAMP,UPDATE,UID,BYTES SENT,BYTES RCVED
		1395426422,n,10028,1213,5461
		1395426422,n,10013,9954,13560
		1395426422,n,10109,221391500,141836
		1395426422,n,10007,53448,308549
		1395426422,n,10022,15506,72125
		```

		It will be converted to:

		```json
		{
		"TIMESTAMP": ["1395426422", "1395426422", "1395426422", "1395426422", "1395426422"],
		"UPDATE": ["n", "n", "n", "n", "n"],
		"UID": ["10028", "10013", "10109", "10007", "10022"],
		"BYTES SENT": ["1213", "9954", "221391500", "53448", "15506"],
		"BYTES RCVED": ["5461", "13560", "141836", "308549", "72125"]
		}
		```


		# Events
		@@ -257,14 +331,15 @@

		# Default Parsers
		There are default parsers in the library they are
		# Flags

		JSON: Any valid JSON structure (array, nested json) are supported. see [Empowered JSON Parser](#empowered-json-parser)
		There are flags in the library:

		Omitted column: For columns head start with "\omit\" e.g. "\omit\id", the parser will omit the column's data.
		\omit\: Omit a column. The values in the column will not be built into JSON result.

		Flat JSON: Mark a head column as is the key of its JSON result. e.g.
		\flat\: Mark a head column as is the key of its JSON result.

		Example:

		```csv
		flatuser.name, user.age
		Joe , 40
		flatuser.name, user.age, omituser.gender
		Joe , 40, Male
		```
		@@ -318,54 +393,4 @@

		# Column Array
		To convert a csv data to column array, you have to construct the result in memory. See example below

		```js
		var columArrData=__dirname+"/data/columnArray";
		var rs=fs.createReadStream(columArrData);
		var result = {}
		var csvConverter=new CSVAdv();
		//end_parsed will be emitted once parsing finished
		csvConverter.on("end_parsed", function(jsonObj) {
		console.log(result);
		console.log("Finished parsing");
		done();
		});

		//record_parsed will be emitted each time a row has been parsed.
		csvConverter.on("record_parsed", function(resultRow, rawRow, rowIndex) {

		for (var key in resultRow) {
		if (!result[key] \|\| !result[key] instanceof Array) {
		result[key] = [];
		}
		result[key][rowIndex] = resultRow[key];
		}

		});
		rs.pipe(csvConverter);
		```

		Here is an example:

		```csv
		TIMESTAMP,UPDATE,UID,BYTES SENT,BYTES RCVED
		1395426422,n,10028,1213,5461
		1395426422,n,10013,9954,13560
		1395426422,n,10109,221391500,141836
		1395426422,n,10007,53448,308549
		1395426422,n,10022,15506,72125
		```

		It will be converted to:

		```json
		{
		"TIMESTAMP": ["1395426422", "1395426422", "1395426422", "1395426422", "1395426422"],
		"UPDATE": ["n", "n", "n", "n", "n"],
		"UID": ["10028", "10013", "10109", "10007", "10022"],
		"BYTES SENT": ["1213", "9954", "221391500", "53448", "15506"],
		"BYTES RCVED": ["5461", "13560", "141836", "308549", "72125"]
		}
		```

		# Parse String
		@@ -643,2 +668,66 @@ To parse a string, simply call fromString(csvString,callback) method. The callback parameter is optional.


		# Parser

		Parser will be replaced by [Result Transform](#result-transform) and [Flags](#flags)

		This feature will be disabled in future.

		CSVTOJSON allows adding customised parsers which concentrating on what to parse and how to parse.
		It is the main power of the tool that developer only needs to concentrate on how to deal with the data and other concerns like streaming, memory, web, cli etc are done automatically.

		How to add a customised parser:

		```js
		//Parser Manager
		var parserMgr=require("csvtojson").parserMgr;

		parserMgr.addParser("myParserName",/^\parserRegExp\/,function (params){
		var columnTitle=params.head; //params.head be like: parserRegExpColumnName;
		var fieldName=columnTitle.replace(this.regExp, ""); //this.regExp is the regular expression above.
		params.resultRow[fieldName]="Hello my parser"+params.item;
		});
		```

		parserMgr's addParser function take three parameters:

		1. parser name: the name of your parser. It should be unique.

		2. Regular Expression: It is used to test if a column of CSV data is using this parser. In the example above any column's first row starting with parserRegExp will be using it.

		3. Parse function call back: It is where the parse happens. The converter works row by row and therefore the function will be called each time needs to parse a cell in CSV data.

		The parameter of Parse function is a JSON object. It contains following fields:

		head: The column's first row's data. It generally contains field information. e.g. arrayitems

		item: The data inside current cell. e.g. item1

		itemIndex: the index of current cell of a row. e.g. 0

		rawRow: the reference of current row in array format. e.g. ["item1", 23 ,"hello"]

		resultRow: the reference of result row in JSON format. e.g. {"name":"Joe"}

		rowIndex: the index of current row in CSV data. start from 1 since 0 is the head. e.g. 1

		resultObject: the reference of result object in JSON format. It always has a field called csvRows which is in Array format. It changes as parsing going on. e.g.

		```json
		{
		"csvRows":[
		{
		"itemName":"item1",
		"number":10
		},
		{
		"itemName":"item2",
		"number":4
		}
		]
		}
		```



		#Change Log
		@@ -645,0 +734,0 @@

test/testCSVConverter.js

		@@ -198,2 +198,4 @@ var Converter = require("../libs/core/Converter.js");
		assert(d.column9 === true);
		assert(d.column10[0]===23);
		assert(d.column10[1]===31);
		});
		@@ -221,2 +223,4 @@ csvConverter.on("end_parsed",function (){
		assert(d.column9 === "true");
		assert(d.column10[0]==="23");
		assert(d.column10[1]==="31");
		});
		@@ -223,0 +227,0 @@ csvConverter.on("end_parsed",function (){

test/testCSVConverter2.js

		@@ -94,2 +94,29 @@ var Converter = require("../libs/core/Converter.js");
		});
		it ("should use first line break as eol",function(done){
		var testData = __dirname + "/data/testEol";
		var conv=new Converter({
		noheader:true
		});
		conv.fromFile(testData,function(err,json){
		assert(!err);
		done();
		});
		})
		it ("should use sync transform",function(done){
		var testData = __dirname + "/data/complexJSONCSV";
		var rs = fs.createReadStream(testData);
		var conv=new Converter({});
		conv.transform=function(json,row,index){
		json.rowNum=index;
		}
		conv.on("record_parsed",function(j){
		assert(j.rowNum>=0);
		});
		conv.on("end_parsed",function(res){
		assert(res[0].rowNum===0);
		assert(res[1].rowNum===1);
		done();
		});
		rs.pipe(conv);
		});
		// it ("should convert big csv",function(done){
		@@ -96,0 +123,0 @@ // // var rs=fs.createReadStream(__dirname+"/data/large-csv-sample.csv");

test/data/dataWithType

Sorry, the diff of this file is not supported yet

csvtojson - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics