Comparing version 0.0.19 to 0.2.0
module.exports = require('./lib/csv'); | ||
csv = require('./lib/csv'); | ||
csv.generator = require('./lib/generator'); | ||
module.exports = csv; |
762
lib/csv.js
@@ -1,476 +0,308 @@ | ||
// Module CSV - Copyright David Worms <open@adaltas.com> (BSD Licensed) | ||
// Generated by CoffeeScript 1.3.3 | ||
/* | ||
var EventEmitter = require('events').EventEmitter, | ||
fs = require('fs'); | ||
Node CSV | ||
======== | ||
// Utils function | ||
var merge = function(obj1, obj2){ | ||
var r = obj1||{}; | ||
for(var key in obj2){ | ||
r[key] = obj2[key]; | ||
} | ||
return r; | ||
} | ||
This project provides CSV parsing and has been tested and used | ||
on a large input file (over 2Gb). | ||
module.exports = function(){ | ||
var state = { | ||
count: 0, | ||
countWriten: 0, | ||
field: '', | ||
line: [], | ||
lastC: '', | ||
quoted: false, | ||
commented: false, | ||
buffer: null, | ||
bufferPosition: 0 | ||
} | ||
// Are we currently inside the transform callback? If so, | ||
// we shouldn't increment `state.count` which count provided lines | ||
var transforming = false; | ||
* Follow the NodeJs streaming API | ||
* Async and event based | ||
* Support delimiters, quotes and escape characters | ||
* Line breaks discovery: line breaks in source are detected and reported to destination | ||
* Data transformation | ||
* Support for large datasets | ||
* Complete test coverage as sample and inspiration | ||
Important, this documentation cover the current version of the node | ||
csv parser. The documentation for the current version 0.1.0 is | ||
available [here](https://github.com/wdavidw/node-csv-parser/tree/v0.1). | ||
Quick example | ||
------------- | ||
The following example illustrate 4 usages of the library: | ||
1. Plug a readable stream by defining a file path | ||
2. Direct output to a file path | ||
3. Transform the data (optional) | ||
4. Listen to events (optional) | ||
// Defined Class | ||
var CSV = function(){ | ||
// Set options | ||
this.readOptions = { | ||
delimiter: ',', | ||
quote: '"', | ||
escape: '"', | ||
columns: null, | ||
flags: 'r', | ||
encoding: 'utf8', | ||
bufferSize: 8 * 1024 * 1024, | ||
trim: false, | ||
ltrim: false, | ||
rtrim: false | ||
}; | ||
this.writeOptions = { | ||
delimiter: null, | ||
quote: null, | ||
quoted: false, | ||
escape: null, | ||
columns: null, | ||
header: false, | ||
lineBreaks: null, | ||
flags: 'w', | ||
encoding: 'utf8', | ||
bufferSize: null, | ||
newColumns: false, | ||
end: true // Call `end()` on close | ||
}; | ||
// A boolean that is true by default, but turns false after an 'error' occurred, | ||
// the stream came to an 'end', or destroy() was called. | ||
this.readable = true; | ||
// A boolean that is true by default, but turns false after an 'error' occurred | ||
// or end() / destroy() was called. | ||
this.writable = true; | ||
} | ||
CSV.prototype.__proto__ = EventEmitter.prototype; | ||
// Reading API | ||
CSV.prototype.from = function(data, options){ | ||
if(options) merge(this.readOptions,options); | ||
var self = this; | ||
process.nextTick(function(){ | ||
if(data instanceof Array){ | ||
if( csv.writeOptions.lineBreaks === null ){ | ||
csv.writeOptions.lineBreaks = "\n"; | ||
} | ||
for(var i=0; i<data.length; i++){ | ||
state.line = data[i]; | ||
flush(); | ||
} | ||
}else{ | ||
try{ | ||
parse(data); | ||
}catch(e){ | ||
return self.emit('error', e); | ||
} | ||
} | ||
self.end(); | ||
}); | ||
return this; | ||
} | ||
CSV.prototype.fromStream = function(readStream, options){ | ||
// node samples/sample.js | ||
var csv = require('csv'); | ||
if(options) merge(this.readOptions,options); | ||
var self = this; | ||
readStream.on('data', function(data) { | ||
try{ | ||
parse(data); | ||
}catch(e){ | ||
self.emit('error', e); | ||
// Destroy the input stream | ||
readStream.destroy(); | ||
} | ||
}); | ||
readStream.on('error', function(error) { self.emit('error', error) }); | ||
readStream.on('end', function() { | ||
self.end(); | ||
}); | ||
this.readStream = readStream; | ||
return this; | ||
csv() | ||
.from.stream(fs.createReadStream(__dirname+'/sample.in') | ||
.to.path(__dirname+'/sample.out') | ||
.transform( function(data){ | ||
data.unshift(data.pop()); | ||
return data; | ||
}) | ||
.on('record', function(data,index){ | ||
console.log('#'+index+' '+JSON.stringify(data)); | ||
}) | ||
.on('end', function(count){ | ||
console.log('Number of lines: '+count); | ||
}) | ||
.on('error', function(error){ | ||
console.log(error.message); | ||
}); | ||
// Print sth like: | ||
// #0 ["2000-01-01","20322051544","1979.0","8.8017226E7","ABC","45"] | ||
// #1 ["2050-11-27","28392898392","1974.0","8.8392926E7","DEF","23"] | ||
// Number of lines: 2 | ||
Pipe example | ||
------------ | ||
The module follow a Stream architecture | ||
|-----------| |---------|---------| |---------| | ||
| | | | | | | | ||
| | | CSV | | | | ||
| | | | | | | | ||
| Stream | | Writer | Reader | | Stream | | ||
| Reader |.pipe(| API | API |).pipe(| Writer |) | ||
| | | | | | | | ||
| | | | | | | | ||
|-----------| |---------|---------| |---------| | ||
in = fs.createReadStream('./in') | ||
out = fs.createWriteStream('./out') | ||
in.pipe(csv()).pipe(out) | ||
Installing | ||
---------- | ||
Via [npm](http://github.com/isaacs/npm): | ||
```bash | ||
npm install csv | ||
``` | ||
Via git (or downloaded tarball): | ||
```bash | ||
git clone http://github.com/wdavidw/node-csv-parser.git | ||
``` | ||
Events | ||
------ | ||
By extending the Node `EventEmitter` class, the library provides | ||
a few useful events: | ||
* *record* | ||
Emitted by the stringifier when a new row is parsed and transformed. The data is | ||
the value returned by the user `transform` callback if any. Note however that the event won't | ||
be called if transform return `null` since the record is skipped. | ||
The callback provides two arguments. `data` is the CSV line being processed (an array or an object) | ||
and `index` is the index number of the line starting at zero | ||
* *data* | ||
Emitted by the stringifier on each line once the data has been transformed and stringified. | ||
* *drain* | ||
* *end* | ||
Emitted when the CSV content has been parsed. | ||
* *close* | ||
Emitted when the underlying resource has been closed. For example, when writting to a file with `csv().to.path()`, the event will be called once the writing process is complete and the file closed. | ||
* *error* | ||
Thrown whenever an error occured. | ||
Columns | ||
------- | ||
Columns names may be provided or discovered in the first line with | ||
the read options `columns`. If defined as an array, the order must | ||
match the one of the input source. If set to `true`, the fields are | ||
expected to be present in the first line of the input source. | ||
You can define a different order and even different columns in the | ||
read options and in the write options. If the `columns` is not defined | ||
in the write options, it will default to the one present in the read options. | ||
When working with fields, the `transform` method and the `data` | ||
events receive their `data` parameter as an object instead of an | ||
array where the keys are the field names. | ||
// node samples/column.js | ||
var csv = require('csv'); | ||
csv() | ||
.from.path(__dirname+'/columns.in', { | ||
columns: true | ||
}) | ||
.to.stream(process.stdout, { | ||
columns: ['id', 'name'] | ||
}) | ||
.transform(function(data){ | ||
data.name = data.firstname + ' ' + data.lastname | ||
return data; | ||
}); | ||
// Print sth like: | ||
// 82,Zbigniew Preisner | ||
// 94,Serge Gainsbourg | ||
*/ | ||
var CSV, from, options, parser, state, stream, stringifier, to, transformer; | ||
stream = require('stream'); | ||
state = require('./state'); | ||
options = require('./options'); | ||
from = require('./from'); | ||
to = require('./to'); | ||
stringifier = require('./stringifier'); | ||
parser = require('./parser'); | ||
transformer = require('./transformer'); | ||
CSV = function() { | ||
this.readable = true; | ||
this.writable = true; | ||
this.state = state(); | ||
this.options = options(); | ||
this.from = from(this); | ||
this.to = to(this); | ||
this.parser = parser(this); | ||
this.parser.on('row', (function(row) { | ||
return this.transformer.transform(row); | ||
}).bind(this)); | ||
this.parser.on('end', (function() { | ||
this.emit('end', this.state.count); | ||
return this.readable = false; | ||
}).bind(this)); | ||
this.parser.on('error', (function(e) { | ||
return this.error(e); | ||
}).bind(this)); | ||
this.stringifier = stringifier(this); | ||
this.transformer = transformer(this); | ||
return this; | ||
}; | ||
CSV.prototype.__proto__ = stream.prototype; | ||
/* | ||
`pause()` | ||
--------- | ||
Implementation of the Readable Stream API, requesting that no further data | ||
be sent until resume() is called. | ||
*/ | ||
CSV.prototype.pause = function() { | ||
return this.paused = true; | ||
}; | ||
/* | ||
`resume()` | ||
---------- | ||
Implementation of the Readable Stream API, resuming the incoming 'data' | ||
events after a pause() | ||
*/ | ||
CSV.prototype.resume = function() { | ||
this.paused = false; | ||
return this.emit('drain'); | ||
}; | ||
/* | ||
`write(data, [preserve])` | ||
------------------------- | ||
Implementation of the Writable Stream API with a larger signature. Data | ||
may be a string, a buffer, an array or an object. | ||
If data is a string or a buffer, it could span multiple lines. If data | ||
is an object or an array, it must represent a single line. | ||
Preserve is for line which are not considered as CSV data. | ||
*/ | ||
CSV.prototype.write = function(data, preserve) { | ||
if (!this.writable) { | ||
return false; | ||
} | ||
if (typeof data === 'string' && !preserve) { | ||
this.parser.parse(data); | ||
} else if (Array.isArray(data) && !this.state.transforming) { | ||
this.transformer.transform(data); | ||
} else { | ||
if (this.state.count === 0 && this.options.to.header === true) { | ||
this.stringifier.write(this.options.to.columns || this.options.from.columns); | ||
} | ||
CSV.prototype.fromPath = function(path, options){ | ||
if(options) merge(this.readOptions,options); | ||
var stream = fs.createReadStream(path, this.readOptions); | ||
stream.setEncoding(this.readOptions.encoding); | ||
return this.fromStream(stream, null); | ||
this.stringifier.write(data, preserve); | ||
if (!this.state.transforming && !preserve) { | ||
this.state.count++; | ||
} | ||
// Writting API | ||
/** | ||
* Write data. | ||
* Data may be string in which case it could span multiple lines. If data | ||
* is an object or an array, it must represent a single line. | ||
* Preserve is for line which are not considered as CSV data. | ||
*/ | ||
CSV.prototype.write = function(data, preserve){ | ||
if(typeof data === 'string' && !preserve){ | ||
return parse(data); | ||
}else if(Array.isArray(data) && !transforming){ | ||
state.line = data; | ||
return flush(); | ||
} | ||
if(state.count === 0 && csv.writeOptions.header === true){ | ||
write(csv.writeOptions.columns || csv.readOptions.columns); | ||
} | ||
write(data, preserve); | ||
if(!transforming && !preserve){ | ||
state.count++; | ||
} | ||
} | ||
CSV.prototype.end = function(){ | ||
if (state.quoted) { | ||
return csv.emit('error', new Error('Quoted field not terminated')); | ||
} | ||
// dump open record | ||
if (state.field || state.lastC === this.readOptions.delimiter || state.lastC === this.readOptions.quote) { | ||
if(csv.readOptions.trim || csv.readOptions.rtrim){ | ||
state.field = state.field.trimRight(); | ||
} | ||
state.line.push(state.field); | ||
state.field = ''; | ||
} | ||
if (state.line.length > 0) { | ||
flush(); | ||
} | ||
if(csv.writeStream){ | ||
if(state.bufferPosition !== 0){ | ||
csv.writeStream.write(state.buffer.slice(0, state.bufferPosition)); | ||
} | ||
if(this.writeOptions.end){ | ||
csv.writeStream.end(); | ||
}else{ | ||
csv.emit('end', state.count); | ||
csv.readable = false; | ||
} | ||
}else{ | ||
csv.emit('end', state.count); | ||
csv.readable = false; | ||
} | ||
} | ||
CSV.prototype.toStream = function(writeStream, options){ | ||
if(options) merge(this.writeOptions,options); | ||
var self = this; | ||
switch(this.writeOptions.lineBreaks){ | ||
case 'auto': | ||
this.writeOptions.lineBreaks = null; | ||
break; | ||
case 'unix': | ||
this.writeOptions.lineBreaks = "\n"; | ||
break; | ||
case 'mac': | ||
this.writeOptions.lineBreaks = "\r"; | ||
break; | ||
case 'windows': | ||
this.writeOptions.lineBreaks = "\r\n"; | ||
break; | ||
case 'unicode': | ||
this.writeOptions.lineBreaks = "\u2028"; | ||
break; | ||
} | ||
writeStream.on('close', function(){ | ||
self.emit('end', state.count); | ||
self.readable = false; | ||
self.writable = false; | ||
}) | ||
this.writeStream = writeStream; | ||
state.buffer = new Buffer(this.writeOptions.bufferSize||this.readOptions.bufferSize); | ||
state.bufferPosition = 0; | ||
return this; | ||
} | ||
CSV.prototype.toPath = function(path, options){ | ||
// Merge user provided options | ||
if(options) merge(this.writeOptions,options); | ||
// clone options | ||
var options = merge({},this.writeOptions); | ||
// Delete end property which otherwise overwrite `WriteStream.end()` | ||
delete options.end; | ||
// Create the write stream | ||
var stream = fs.createWriteStream(path, options); | ||
return this.toStream(stream, null); | ||
} | ||
// Transform API | ||
CSV.prototype.transform = function(callback){ | ||
this.transformer = callback; | ||
return this; | ||
} | ||
var csv = new CSV(); | ||
// Private API | ||
/** | ||
* Parse a string which may hold multiple lines. | ||
* Private state object is enriched on each character until | ||
* flush is called on a new line | ||
*/ | ||
function parse(chars){ | ||
chars = '' + chars; | ||
for (var i = 0, l = chars.length; i < l; i++) { | ||
var c = chars.charAt(i); | ||
switch (c) { | ||
case csv.readOptions.escape: | ||
case csv.readOptions.quote: | ||
if( state.commented ) break; | ||
var isEscape = false; | ||
if (c === csv.readOptions.escape) { | ||
// Make sure the escape is really here for escaping: | ||
// if escape is same as quote, and escape is first char of a field and it's not quoted, then it is a quote | ||
// next char should be an escape or a quote | ||
var nextChar = chars.charAt(i + 1); | ||
if( !( csv.readOptions.escape === csv.readOptions.quote && !state.field && !state.quoted ) | ||
&& ( nextChar === csv.readOptions.escape || nextChar === csv.readOptions.quote ) ) { | ||
i++; | ||
isEscape = true; | ||
c = chars.charAt(i); | ||
state.field += c; | ||
} | ||
} | ||
if (!isEscape && c === csv.readOptions.quote) { | ||
if (state.field && !state.quoted) { | ||
// Treat quote as a regular character | ||
state.field += c; | ||
break; | ||
} | ||
if (state.quoted) { | ||
// Make sure a closing quote is followed by a delimiter | ||
var nextChar = chars.charAt(i + 1); | ||
if (nextChar && nextChar != '\r' && nextChar != '\n' && nextChar !== csv.readOptions.delimiter) { | ||
throw new Error('Invalid closing quote; found "' + nextChar + '" instead of delimiter "' + csv.readOptions.delimiter + '"'); | ||
} | ||
state.quoted = false; | ||
} else if (state.field === '') { | ||
state.quoted = true; | ||
} | ||
} | ||
break; | ||
case csv.readOptions.delimiter: | ||
if( state.commented ) break; | ||
if( state.quoted ) { | ||
state.field += c; | ||
}else{ | ||
if(csv.readOptions.trim || csv.readOptions.rtrim){ | ||
state.field = state.field.trimRight(); | ||
} | ||
state.line.push(state.field); | ||
state.field = ''; | ||
} | ||
break; | ||
case '\n': | ||
if(state.quoted) { | ||
state.field += c; | ||
break; | ||
} | ||
if( !csv.readOptions.quoted && state.lastC === '\r' ){ | ||
break; | ||
} | ||
case '\r': | ||
if(state.quoted) { | ||
state.field += c; | ||
break; | ||
} | ||
if( csv.writeOptions.lineBreaks === null ){ | ||
// Auto-discovery of linebreaks | ||
csv.writeOptions.lineBreaks = c + ( c === '\r' && chars.charAt(i+1) === '\n' ? '\n' : '' ); | ||
} | ||
if(csv.readOptions.trim || csv.readOptions.rtrim){ | ||
state.field = state.field.trimRight(); | ||
} | ||
state.line.push(state.field); | ||
state.field = ''; | ||
flush(); | ||
break; | ||
case ' ': | ||
case '\t': | ||
if(state.quoted || (!csv.readOptions.trim && !csv.readOptions.ltrim ) || state.field) { | ||
state.field += c; | ||
break; | ||
} | ||
break; | ||
default: | ||
if(state.commented) break; | ||
state.field += c; | ||
} | ||
state.lastC = c; | ||
} | ||
} | ||
/** | ||
* Called by the `parse` function on each line. It is responsible for | ||
* transforming the data and finally calling `write`. | ||
*/ | ||
function flush(){ | ||
if(csv.readOptions.columns){ | ||
if(state.count === 0 && csv.readOptions.columns === true){ | ||
csv.readOptions.columns = state.line; | ||
state.line = []; | ||
state.lastC = ''; | ||
return; | ||
} | ||
var line = {}; | ||
for(var i=0; i<csv.readOptions.columns.length; i++){ | ||
var column = csv.readOptions.columns[i]; | ||
line[column] = state.line[i]||null; | ||
} | ||
state.line = line; | ||
line = null; | ||
} | ||
var line; | ||
if(csv.transformer){ | ||
transforming = true; | ||
try{ | ||
line = csv.transformer(state.line, state.count); | ||
}catch(e){ | ||
return error(e); | ||
} | ||
if (csv.writeOptions.newColumns && !csv.writeOptions.columns && typeof line === 'object' && !Array.isArray(line)) { | ||
Object.keys(line) | ||
.filter(function(column) { return csv.readOptions.columns.indexOf(column) === -1; }) | ||
.forEach(function(column) { csv.readOptions.columns.push(column); }); | ||
} | ||
} | ||
return !this.paused; | ||
}; | ||
transforming = false; | ||
}else{ | ||
line = state.line; | ||
} | ||
if(state.count === 0 && csv.writeOptions.header === true){ | ||
write(csv.writeOptions.columns || csv.readOptions.columns); | ||
} | ||
write(line); | ||
state.count++; | ||
state.line = []; | ||
state.lastC = ''; | ||
} | ||
/** | ||
* Write a line to the written stream. | ||
* Line may be an object, an array or a string | ||
* Preserve is for line which are not considered as CSV data | ||
*/ | ||
function write(line, preserve){ | ||
if(typeof line === 'undefined' || line === null){ | ||
return; | ||
} | ||
if(!preserve){ | ||
try { | ||
csv.emit('data', line, state.count); | ||
}catch(e){ | ||
return error(e); | ||
} | ||
} | ||
if(typeof line === 'object'){ | ||
if(!(line instanceof Array)){ | ||
var columns = csv.writeOptions.columns || csv.readOptions.columns; | ||
var _line = []; | ||
if(columns){ | ||
for(var i=0; i<columns.length; i++){ | ||
var column = columns[i]; | ||
_line[i] = (typeof line[column] === 'undefined' || line[column] === null) ? '' : line[column]; | ||
} | ||
}else{ | ||
for(var column in line){ | ||
_line.push(line[column]); | ||
} | ||
} | ||
line = _line; | ||
_line = null; | ||
}else if(csv.writeOptions.columns){ | ||
// We are getting an array but the user want specified output columns. In | ||
// this case, we respect the columns indexes | ||
line.splice(csv.writeOptions.columns.length); | ||
} | ||
if(line instanceof Array){ | ||
var newLine = state.countWriten ? csv.writeOptions.lineBreaks || "\n" : ''; | ||
for(var i=0; i<line.length; i++){ | ||
var field = line[i]; | ||
if(typeof field === 'string'){ | ||
// fine 99% of the cases, keep going | ||
}else if(typeof field === 'number'){ | ||
// Cast number to string | ||
field = '' + field; | ||
}else if(typeof field === 'boolean'){ | ||
// Cast boolean to string | ||
field = field ? '1' : ''; | ||
}else if(field instanceof Date){ | ||
// Cast date to timestamp string | ||
field = '' + field.getTime(); | ||
} | ||
if(field){ | ||
var containsdelimiter = field.indexOf(csv.writeOptions.delimiter || csv.readOptions.delimiter) >= 0; | ||
var containsQuote = field.indexOf(csv.writeOptions.quote || csv.readOptions.quote) >= 0; | ||
var containsLinebreak = field.indexOf("\r") >= 0 || field.indexOf("\n") >= 0; | ||
if(containsQuote){ | ||
field = field.replace( | ||
new RegExp(csv.writeOptions.quote || csv.readOptions.quote,'g') | ||
, (csv.writeOptions.escape || csv.readOptions.escape) | ||
+ (csv.writeOptions.quote || csv.readOptions.quote)); | ||
} | ||
/* | ||
if(containsQuote || containsdelimiter || containsLinebreak || csv.writeOptions.quoted){ | ||
field = (csv.writeOptions.quote || csv.readOptions.quote) + field + (csv.writeOptions.quote || csv.readOptions.quote); | ||
} | ||
newLine += field; | ||
} | ||
if(i!==line.length-1){ | ||
newLine += csv.writeOptions.delimiter || csv.readOptions.delimiter; | ||
} | ||
} | ||
line = newLine; | ||
} | ||
}else if(typeof line == 'number'){ | ||
line = ''+line; | ||
} | ||
if(state.buffer){ | ||
if(state.bufferPosition + Buffer.byteLength(line, csv.writeOptions.encoding) > csv.readOptions.bufferSize){ | ||
csv.writeStream.write(state.buffer.slice(0, state.bufferPosition)); | ||
state.buffer = new Buffer(csv.readOptions.bufferSize); | ||
state.bufferPosition = 0; | ||
} | ||
state.bufferPosition += state.buffer.write(line, state.bufferPosition, csv.writeOptions.encoding); | ||
} | ||
if(!preserve){ | ||
state.countWriten++; | ||
} | ||
return true; | ||
} | ||
`end()` | ||
------- | ||
function error(e){ | ||
csv.readable = false; | ||
csv.writable = false; | ||
csv.emit('error', e); | ||
// Destroy the input stream | ||
if(csv.readStream) csv.readStream.destroy(); | ||
} | ||
return csv; | ||
Terminate the parsing. Call this method when no more csv data is | ||
to be parsed. It implement the StreamWriter API by setting the `writable` | ||
property to "false" and emitting the `end` event. | ||
*/ | ||
CSV.prototype.end = function() { | ||
if (!this.writable) { | ||
return; | ||
} | ||
return this.parser.end(); | ||
}; | ||
/* | ||
`transform(callback)` | ||
--------------------- | ||
Register the transformer callback. The callback is a user provided | ||
function call on each line to filter, enrich or modify the | ||
dataset. More information in the "transforming data" section. | ||
*/ | ||
CSV.prototype.transform = function(callback) { | ||
this.transformer.callback = callback; | ||
return this; | ||
}; | ||
/* | ||
`error(error)` | ||
-------------- | ||
Unified mechanism to handle error, emit the error and mark the | ||
stream as non readable and non writable. | ||
*/ | ||
CSV.prototype.error = function(e) { | ||
this.readable = false; | ||
this.writable = false; | ||
this.emit('error', e); | ||
if (this.readStream) { | ||
this.readStream.destroy(); | ||
} | ||
return this; | ||
}; | ||
module.exports = function() { | ||
return new CSV; | ||
}; |
{ | ||
"name": "csv", | ||
"version": "0.0.19", | ||
"version": "0.2.0", | ||
"description": "CSV parser with simple api, full of options and tested against large datasets.", | ||
@@ -25,3 +25,6 @@ "author": "David Worms <david@adaltas.com>", | ||
"mocha": "latest", | ||
"should": "latest" | ||
"should": "latest", | ||
"each": "latest", | ||
"mecano": "latest", | ||
"glob": "latest" | ||
}, | ||
@@ -28,0 +31,0 @@ "dependencies": {}, |
271
readme.md
@@ -11,259 +11,54 @@ <pre> | ||
This project provides CSV parsing and has been tested and used on a large source file (over 2Gb). | ||
[Documentation is for the parser is available here](http://localhost:4000/projects/node-csv/). | ||
- Support delimiters, quotes and escape characters | ||
- Line breaks discovery: line breaks in source are detected and reported to destination | ||
- Data transformation | ||
- Async and event based | ||
- Support for large datasets | ||
- Complete test coverage as sample and inspiration | ||
Important | ||
--------- | ||
Quick example | ||
------------- | ||
This readme cover the current version 0.2.x of the node | ||
csv parser. | ||
Using the library is a 4 steps process: | ||
The documentation for the current version 0.1.0 is | ||
available [here](https://github.com/wdavidw/node-csv-parser/tree/v0.1). | ||
1. Create a source | ||
2. Create a destination (optional) | ||
3. Transform the data (optional) | ||
4. Listen to events (optional) | ||
Migration | ||
--------- | ||
Here is a example: | ||
The functions 'from*' and 'to*' are now rewritten as 'from.*' and 'to.*'. The 'data' | ||
event is now the 'record' event. The 'data' now recieved a stringified version of | ||
the 'record' event. | ||
```javascript | ||
// node samples/sample.js | ||
var csv = require('csv'); | ||
Development | ||
----------- | ||
csv() | ||
.fromPath(__dirname+'/sample.in') | ||
.toPath(__dirname+'/sample.out') | ||
.transform(function(data){ | ||
data.unshift(data.pop()); | ||
return data; | ||
}) | ||
.on('data',function(data,index){ | ||
console.log('#'+index+' '+JSON.stringify(data)); | ||
}) | ||
.on('end',function(count){ | ||
console.log('Number of lines: '+count); | ||
}) | ||
.on('error',function(error){ | ||
console.log(error.message); | ||
}); | ||
Tests are executed with mocha. To install it, simple run `npm install`, it will install | ||
mocha and its dependencies in your project "node_modules" directory. | ||
// Print sth like: | ||
// #0 ["2000-01-01","20322051544","1979.0","8.8017226E7","ABC","45"] | ||
// #1 ["2050-11-27","28392898392","1974.0","8.8392926E7","DEF","23"] | ||
// Number of lines: 2 | ||
``` | ||
Installing | ||
---------- | ||
Via git (or downloaded tarball): | ||
To run the tests: | ||
```bash | ||
git clone http://github.com/wdavidw/node-csv-parser.git | ||
npm test | ||
``` | ||
Then, simply copy or link the ./lib/csv.js file into your $HOME/.node_libraries folder or inside a declared path folder. | ||
The tests run against the CoffeeScript source files. | ||
Via [npm](http://github.com/isaacs/npm): | ||
To generate the JavaScript files: | ||
```bash | ||
npm install csv | ||
make build | ||
``` | ||
Reading API | ||
----------- | ||
The following method are available: | ||
- *fromPath(data, options)* | ||
Take a file path as first argument and optionally on object of options as a second argument. | ||
- *fromStream(readStream, options)* | ||
Take a readable stream as first argument and optionally on object of options as a second argument. | ||
- *from(data, options)* | ||
Take a string, a buffer, an array or an object as first argument and optionally some options as a second argument. | ||
Options are: | ||
- *delimiter* | ||
Set the field delimiter, one character only, defaults to comma. | ||
- *quote* | ||
Set the field delimiter, one character only, defaults to double quotes. | ||
- *escape* | ||
Set the field delimiter, one character only, defaults to double quotes. | ||
- *columns* | ||
List of fields or true if autodiscovered in the first CSV line, impact the `transform` argument and the `data` event by providing an object instead of an array, order matters, see the transform and the columns sections below. | ||
- *encoding* | ||
Defaults to 'utf8', applied when a readable stream is created. | ||
- *trim* | ||
If true, ignore whitespace immediately around the delimiter, defaults to false. | ||
- *ltrim* | ||
If true, ignore whitespace immediately following the delimiter (i.e. left-trim all fields), defaults to false. | ||
- *rtrim* | ||
If true, ignore whitespace immediately preceding the delimiter (i.e. right-trim all fields), defaults to false. | ||
Writing API | ||
----------- | ||
The following methods are available: | ||
- *write(data, preserve)* | ||
Take a string, an array or an object, implementation of the StreamWriter API. | ||
- *end()* | ||
Terminate the stream, implementation of the StreamWriter API. | ||
- *toPath(path, options)* | ||
Take a file path as first argument and optionally on object of options as a second argument. | ||
- *toStream(writeStream, options)* | ||
Take a readable stream as first argument and optionally on object of options as a second argument. | ||
Options are: | ||
- *delimiter* | ||
Defaults to the delimiter read option. | ||
- *quote* | ||
Defaults to the quote read option. | ||
- *quoted* | ||
Boolean, default to false, quote all the fields even if not required. | ||
- *escape* | ||
Defaults to the escape read option. | ||
- *columns* | ||
List of fields, applied when `transform` returns an object, order matters, see the transform and the columns sections below. | ||
- *encoding* | ||
Defaults to 'utf8', applied when a writable stream is created. | ||
- *header* | ||
Display the column names on the first line if the columns option is provided. | ||
- *lineBreaks* | ||
String used to delimit record rows or a special value; special values are 'auto', 'unix', 'mac', 'windows', 'unicode'; defaults to 'auto' (discovered in source or 'unix' if no source is specified). | ||
- *flags* | ||
Defaults to 'w', 'w' to create or overwrite an file, 'a' to append to a file. Applied when using the `toPath` method. | ||
- *bufferSize* | ||
Internal buffer holding data before being flushed into a stream. Applied when destination is a stream. | ||
- *end* | ||
Prevent calling `end` on the destination, so that destination is no longer writable, similar to passing `{end: false}` option in `stream.pipe()`. | ||
- *newColumns* | ||
If the `columns` option is not specified (which means columns will be taken from the reader | ||
options, will automatically append new columns if they are added during `transform()`. | ||
Transforming data | ||
----------------- | ||
- *transform(callback)* | ||
User provided function call on each line to filter, enrich or modify the dataset. The callback is called asynchronously. | ||
The contract is quite simple, you receive an array of fields for each record and return the transformed record. The return value may be an array, an associative array, a string or null. If null, the record will simply be skipped. | ||
Unless you specify the `columns` read option, `data` are provided as arrays, otherwise they are objects with keys matching columns names. | ||
When the returned value is an array, the fields are merged in order. When the returned value is an object, it will search for the `columns` property in the write or in the read options and smartly order the values. If no `columns` options are found, it will merge the values in their order of appearance. When the returned value is a string, it is directly sent to the destination source and it is your responsibility to delimit, quote, escape or define line breaks. | ||
Example of transform returning a string | ||
```javascript | ||
// node samples/transform.js | ||
var csv = require('csv'); | ||
csv() | ||
.fromPath(__dirname+'/transform.in') | ||
.toStream(process.stdout) | ||
.transform(function(data,index){ | ||
return (index>0 ? ',' : '') + data[0] + ":" + data[2] + ' ' + data[1]; | ||
}); | ||
// Print sth like: | ||
// 82:Zbigniew Preisner,94:Serge Gainsbourg | ||
``` | ||
Events | ||
------ | ||
By extending the Node `EventEmitter` class, the library provides a few useful events: | ||
- *data* (function(data, index){}) | ||
Thrown when a new row is parsed after the `transform` callback and with the data being the value returned by `transform`. Note however that the event won't be called if transform return `null` since the record is skipped. | ||
The callback provide two arguments: | ||
`data` is the CSV line being processed (by default as an array) | ||
`index` is the index number of the line starting at zero | ||
- *end* | ||
In case your redirecting the output to a file using the `toPath` method, the event will be called once the writing process is complete and the file closed. | ||
- *error* | ||
Thrown whenever an error is captured. | ||
Columns | ||
------- | ||
Columns names may be provided or discovered in the first line with the read options `columns`. If defined as an array, the order must match the one of the input source. If set to `true`, the fields are expected to be present in the first line of the input source. | ||
You can define a different order and even different columns in the read options and in the write options. If the `columns` is not defined in the write options, it will default to the one present in the read options. | ||
When working with fields, the `transform` method and the `data` events receive their `data` parameter as an object instead of an array where the keys are the field names. | ||
```javascript | ||
// node samples/column.js | ||
var csv = require('csv'); | ||
csv() | ||
.fromPath(__dirname+'/columns.in',{ | ||
columns: true | ||
}) | ||
.toStream(process.stdout,{ | ||
columns: ['id', 'name'] | ||
}) | ||
.transform(function(data){ | ||
data.name = data.firstname + ' ' + data.lastname | ||
return data; | ||
}); | ||
// Print sth like: | ||
// 82,Zbigniew Preisner | ||
// 94,Serge Gainsbourg | ||
``` | ||
Running the tests | ||
----------------- | ||
Tests are executed with expresso. To install it, simple use `npm install -g expresso`. | ||
To run the tests | ||
```bash | ||
expresso test | ||
``` | ||
Contributors | ||
------------ | ||
* David Worms : <https://github.com/wdavidw> | ||
* Will White : <https://github.com/willwhite> | ||
* Justin Latimer : <https://github.com/justinlatimer> | ||
* jonseymour : <https://github.com/jonseymour> | ||
* pascalopitz : <https://github.com/pascalopitz> | ||
* Josh Pschorr : <https://github.com/jpschorr> | ||
* David Worms : <https://github.com/wdavidw> | ||
* Will White : <https://github.com/willwhite> | ||
* Justin Latimer : <https://github.com/justinlatimer> | ||
* jonseymour : <https://github.com/jonseymour> | ||
* pascalopitz : <https://github.com/pascalopitz> | ||
* Josh Pschorr : <https://github.com/jpschorr> | ||
* Elad Ben-Israel: <https://github.com/eladb> | ||
* Philippe Plantier: <https://github.com/phipla> | ||
* Tim Oxley: <https://github.com/timoxley> | ||
* Damon Oehlman: <https://github.com/DamonOehlman> | ||
* Alexandru Topliceanu: <https://github.com/topliceanu> | ||
* Visup: <https://github.com/visup> | ||
* Edmund von der Burg: <https://github.com/evdb> | ||
@@ -270,0 +65,0 @@ Related projects |
// CSV sample - Copyright David Worms <open@adaltas.com> (BSD Licensed) | ||
// node samples/column.js | ||
var csv = require('..'); | ||
csv() | ||
.fromPath(__dirname+'/columns.in',{ | ||
columns: true | ||
}) | ||
.toStream(process.stdout, { | ||
columns: ['id', 'name'], | ||
end: false | ||
}) | ||
.transform(function(data){ | ||
data.name = data.firstname + ' ' + data.lastname | ||
return data; | ||
}); | ||
// Will print sth like: | ||
// 82,Zbigniew Preisner | ||
// 94,Serge Gainsbourg | ||
// node samples/column.js | ||
var csv = require('..'); | ||
csv() | ||
.fromPath(__dirname+'/columns.in', { | ||
columns: true | ||
}) | ||
.toStream(process.stdout, { | ||
columns: ['id', 'name'], | ||
end: false | ||
}) | ||
.transform(function(data){ | ||
data.name = data.firstname + ' ' + data.lastname | ||
return data; | ||
}); | ||
/* | ||
`node samples/columns.js` | ||
82,Zbigniew Preisner | ||
94,Serge Gainsbourg | ||
*/ |
@@ -13,32 +13,35 @@ // CSV sample - Copyright jon seymour jon.seymour@gmail.com | ||
var csv = require('..'); | ||
var header; | ||
var csv = require('..'); | ||
var header; | ||
process.stdin.resume(); | ||
csv() | ||
.fromStream(process.stdin) | ||
.toStream(process.stdout, {end: false}) | ||
.transform(function(data){ | ||
if (header) { | ||
this.write(header); | ||
} else { | ||
header=data; | ||
return null; | ||
} | ||
return data; | ||
}) | ||
.on('end',function(error){ | ||
process.stdout.write("\n"); | ||
}) | ||
.on('error',function(error){ | ||
console.log(error.message); | ||
}); | ||
process.stdin.resume(); | ||
// | ||
// expected output | ||
// | ||
//ts,year,ms,chars,age,date | ||
//20322051544,1979.0,8.8017226E7,ABC,45,2000-01-01 | ||
//ts,year,ms,chars,age,date | ||
//28392898392,1974.0,8.8392926E7,DEF,23,2050-11-27 | ||
csv() | ||
.fromStream(process.stdin) | ||
.toStream(process.stdout, {end: false}) | ||
.transform(function(data){ | ||
if (header) { | ||
this.write(header); | ||
} else { | ||
header=data; | ||
return null; | ||
} | ||
return data; | ||
}) | ||
.on('end',function(error){ | ||
process.stdout.write("\n"); | ||
}) | ||
.on('error',function(error){ | ||
console.log(error.message); | ||
}); | ||
/* | ||
`node samples/duplicate-header.js` | ||
ts,year,ms,chars,age,date | ||
20322051544,1979.0,8.8017226E7,ABC,45,2000-01-01 | ||
ts,year,ms,chars,age,date | ||
28392898392,1974.0,8.8392926E7,DEF,23,2050-11-27 | ||
*/ |
// CSV sample - Copyright David Worms <open@adaltas.com> (BSD Licensed) | ||
// node samples/column.js | ||
var csv = require('..'); | ||
csv() | ||
.fromPath(__dirname+'/columns.in',{ | ||
columns: true | ||
}) | ||
.toStream(process.stdout, { | ||
newColumns: true, | ||
end: false | ||
}) | ||
.transform(function(data){ | ||
data.name = data.firstname + ' ' + data.lastname | ||
return data; | ||
}); | ||
// Will print sth like: | ||
// 82,Preisner,Zbigniew,Zbigniew Preisner | ||
// 94,Gainsbourg,Serge,Serge Gainsbourg | ||
// node samples/column.js | ||
var csv = require('..'); | ||
csv() | ||
.fromPath(__dirname+'/columns.in',{ | ||
columns: true | ||
}) | ||
.toStream(process.stdout, { | ||
newColumns: true, | ||
end: false | ||
}) | ||
.transform(function(data){ | ||
data.name = data.firstname + ' ' + data.lastname | ||
return data; | ||
}); | ||
/* | ||
`node samples/new-columns.js` | ||
82,Preisner,Zbigniew,Zbigniew Preisner | ||
94,Gainsbourg,Serge,Serge Gainsbourg | ||
*/ |
// CSV sample - Copyright David Worms <open@adaltas.com> (BSD Licensed) | ||
// cat samples/sample.in | node samples/sample-stdin.js | ||
var csv = require('..'); | ||
process.stdin.resume(); | ||
csv() | ||
.fromStream(process.stdin) | ||
.toPath(__dirname+'/sample.out') | ||
.transform(function(data){ | ||
data.unshift(data.pop()); | ||
return data; | ||
}) | ||
.on('data',function(data,index){ | ||
console.log('#'+index+' '+JSON.stringify(data)); | ||
}) | ||
.on('end',function(count){ | ||
console.log('Number of lines: '+count); | ||
}) | ||
.on('error',function(error){ | ||
console.log(error.message); | ||
}); | ||
// Print sth like: | ||
// #0 ["2000-01-01","20322051544","1979.0","8.8017226E7","ABC","45"] | ||
// #1 ["2050-11-27","28392898392","1974.0","8.8392926E7","DEF","23"] | ||
// Number of lines: 2 | ||
var csv = require('..'); | ||
process.stdin.resume(); | ||
csv() | ||
.from.stream(process.stdin) | ||
.to.path(__dirname+'/sample.out') | ||
.transform(function(data){ | ||
data.unshift(data.pop()); | ||
return data; | ||
}) | ||
.on('record',function(record, index){ | ||
console.log('#'+index+' '+JSON.stringify(record)); | ||
}) | ||
.on('end',function(count){ | ||
console.log('Number of lines: '+count); | ||
}) | ||
.on('error',function(error){ | ||
console.log(error.message); | ||
}); | ||
/* | ||
`cat samples/sample.in | node samples/sample-stdin.js` | ||
#0 ["2000-01-01","20322051544","1979.0","8.8017226E7","ABC","45"] | ||
#1 ["2050-11-27","28392898392","1974.0","8.8392926E7","DEF","23"] | ||
Number of lines: 2 | ||
*/ |
var csv = require('..'); | ||
var arr = [ | ||
var data = [ | ||
[1,2,3,4,5], | ||
@@ -9,3 +9,12 @@ [2,4,6,8,10] | ||
csv() | ||
.from(arr) | ||
.toStream(process.stdout, {end: false}); //thows on csv.js line 150 | ||
.from.array(data) | ||
.to.stream(process.stdout, {end: false}); //thows on csv.js line 150 | ||
/* | ||
`node samples/sample-stdout.js` | ||
1,2,3,4,5 | ||
2,4,6,8,10 | ||
*/ |
// CSV sample - Copyright David Worms <open@adaltas.com> (BSD Licensed) | ||
// node samples/sample.js | ||
var csv = require('csv'); | ||
csv() | ||
.fromPath(__dirname+'/sample.in') | ||
.toPath(__dirname+'/sample.out') | ||
.transform(function(data){ | ||
data.unshift(data.pop()); | ||
return data; | ||
}) | ||
.on('data',function(data,index){ | ||
console.log('#'+index+' '+JSON.stringify(data)); | ||
}) | ||
.on('end',function(count){ | ||
console.log('Number of lines: '+count); | ||
}) | ||
.on('error',function(error){ | ||
console.log(error.message); | ||
}); | ||
// Print sth like: | ||
// #0 ["2000-01-01","20322051544","1979.0","8.8017226E7","ABC","45"] | ||
// #1 ["2050-11-27","28392898392","1974.0","8.8392926E7","DEF","23"] | ||
// Number of lines: 2 | ||
var csv = require('..'); | ||
csv() | ||
.fromPath(__dirname+'/sample.in') | ||
.toPath(__dirname+'/sample.out') | ||
.transform(function(data){ | ||
data.unshift(data.pop()); | ||
return data; | ||
}) | ||
.on('data',function(data,index){ | ||
console.log('#'+index+' '+JSON.stringify(data)); | ||
}) | ||
.on('end',function(count){ | ||
console.log('Number of lines: '+count); | ||
}) | ||
.on('error',function(error){ | ||
console.log(error.message); | ||
}); | ||
/* | ||
`node samples/sample.js` | ||
#0 ["2000-01-01","20322051544","1979.0","8.8017226E7","ABC","45"] | ||
#1 ["2050-11-27","28392898392","1974.0","8.8392926E7","DEF","23"] | ||
Number of lines: 2 | ||
*/ |
// CSV sample - Copyright David Worms <open@adaltas.com> (BSD Licensed) | ||
// node samples/transform.js | ||
var csv = require('csv'); | ||
csv() | ||
.fromPath(__dirname+'/transform.in') | ||
.toStream(process.stdout) | ||
.transform(function(data,index){ | ||
return (index>0 ? ',' : '') + data[0] + ":" + data[2] + ' ' + data[1]; | ||
}); | ||
// Print sth like: | ||
// 82:Zbigniew Preisner,94:Serge Gainsbourg | ||
// node samples/transform.js | ||
var csv = require('..'); | ||
csv() | ||
.fromPath(__dirname+'/transform.in') | ||
.toStream(process.stdout) | ||
.transform(function(data,index){ | ||
return (index>0 ? ',' : '') + data[0] + ":" + data[2] + ' ' + data[1]; | ||
}); | ||
/* | ||
`node samples/transform.js` | ||
82:Zbigniew Preisner,94:Serge Gainsbourg | ||
*/ |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
268379
139
1294
6
70
3
1