Comparing version 0.3.2 to 0.3.3
@@ -17,8 +17,7 @@ #!/usr/bin/env node | ||
'Parser options:', | ||
' --peek 10 infer columns from first ten lines of input', | ||
' --in-delimiter field separator (inferred if unspecified)', | ||
' --in-quotechar " ', | ||
// ' --escapechar \\ escape quotechars when quoted', | ||
'', | ||
'Stringifier options:', | ||
' --peek 10 infer columns from first ten objects of input', | ||
' --out-delimiter , field separator', | ||
@@ -50,3 +49,2 @@ ' --out-quotechar " marks beginning and end of fields containing delimiter', | ||
var parser_opts = { | ||
peek: argv.peek, | ||
delimiter: argv['in-delimiter'], | ||
@@ -58,2 +56,3 @@ quotechar: argv['in-quotechar'], | ||
quotechar: argv['out-quotechar'], | ||
peek: argv.peek, | ||
filter: argv.filter, | ||
@@ -65,3 +64,3 @@ omit: argv.omit, | ||
// func: function (stream, filename, parser_opts, stringifier_opts, callback) { ... } | ||
// func: function(stream, filename, parser_opts, stringifier_opts, callback) { ... } | ||
var func = argv.describe ? sv.describe : sv.transform; | ||
@@ -68,0 +67,0 @@ var exit = function(err) { |
@@ -32,3 +32,3 @@ var sv = require('./'); // require('sv') elsewhere | ||
var parser = new sv.Parser({peek: 3}); | ||
var parser = new sv.Parser(); | ||
parser.on('data', function(row) { | ||
@@ -35,0 +35,0 @@ console.log(row); |
@@ -58,4 +58,5 @@ 'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
for (var candidate, j = 0; (candidate = candidates[j]); j++) { | ||
if (counts[candidate] > 0) | ||
if (counts[candidate] > 0) { | ||
return candidate; | ||
} | ||
} | ||
@@ -62,0 +63,0 @@ }; |
{ | ||
"name": "sv", | ||
"version": "0.3.2", | ||
"version": "0.3.3", | ||
"description": "Any separated values.", | ||
@@ -22,3 +22,3 @@ "keywords": [ | ||
}, | ||
"author": "Christopher Brown <io@henrian.com>", | ||
"author": "Christopher Brown <io@henrian.com> (http://henrian.com)", | ||
"license": "MIT", | ||
@@ -25,0 +25,0 @@ "dependencies": { |
@@ -7,14 +7,22 @@ 'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
/* Parser class | ||
new Parser(); | ||
- `_bytes_buffer` is a buffer (of bytes) that have yet to be processed. | ||
- `_cells_buffer` is a list of strings that have yet to be processed. | ||
- `delimiter` is the field separator used for incoming strings. | ||
- `columns` is an array of strings used as object keys. | ||
* They are inferred if they are missing once the headers have been inferred. | ||
- `missing` is the value we use for 'time' when we have | ||
`columns = ['index', 'time']` and `write({index: 90})` is called. | ||
at before inferring headers and flushing | ||
*/ | ||
var Parser = module.exports = function(opts) { | ||
/** new Parser(opts) | ||
Options (`opts`): | ||
- `encoding` for converting to strings. | ||
- `missing` is the value we use for 'time' when we have `columns = ['index', 'time']` and `write({index: 90})` is called. | ||
- `columns` is an array of strings used as object keys. Inferred by default (but after inferring `delimiter`) | ||
- `delimiter` is the field separator used for incoming strings, ',' for csv, '\t' for tsv, etc. | ||
- `quote` is the value that designates a string in which there might be delimiters to ignore. Defaults to '"' | ||
- `escape` is the character that escapes special characters in a quoted field | ||
Private values: | ||
- `_bytes_buffer` is a buffer (of bytes) that have yet to be processed (and sent to output). | ||
- `_cells_buffer` is a list of strings that have yet to be processed (and sent to output). | ||
- etc. | ||
*/ | ||
stream.Transform.call(this, { | ||
@@ -24,7 +32,7 @@ decodeStrings: true, // Writable option, ensure _transform always gets a Buffer | ||
}); | ||
// this._readableState.objectMode = true; // default | ||
// this._readableState.objectMode = true; // default, good | ||
// decodeStrings: true, dammit! () | ||
// stream.Transform({decodeStrings: true}) is not honored if objectMode: true, | ||
// because objectMode: true (intended for the Readable) overrides the decodeStrings: true | ||
// if this gets fixed, you can remove the setting below. | ||
// if this gets fixed, you can remove the private field setting below. | ||
// Issue at https://github.com/joyent/node/issues/5580 | ||
@@ -34,15 +42,23 @@ this._writableState.objectMode = false; | ||
if (opts === undefined) opts = {}; | ||
this.missing = opts.missing || ''; // should be a string | ||
this.delimiter = opts.delimiter; | ||
// arbitrary settings (non-inferrable, but with sane & safe defaults) | ||
this.encoding = opts.encoding; | ||
this.missing_string = opts.missing || ''; // should be a string | ||
this.columns = opts.columns; | ||
this.encoding = opts.encoding; | ||
this.escapechar = opts.escapechar || '\\'; | ||
this.escapebyte = this.escapechar.charCodeAt(0); | ||
this.quotechar = opts.quotechar; | ||
if (this.quotechar) { | ||
this.quotecharquotechar_regex = new RegExp(this.quotechar + this.quotechar, 'g'); | ||
this.escapequotechar_regex = new RegExp('\\\\' + this.quotechar, 'g'); | ||
this.quotebyte = this.quotechar.charCodeAt(0); | ||
} | ||
// special demarcating characters | ||
// 1. delimiter | ||
this._delimiter_byte = opts.delimiter ? opts.delimiter.charCodeAt(0) : null; | ||
// 2. quote | ||
this._quote_string = opts.quote || '"'; | ||
this._quote_byte = this._quote_string.charCodeAt(0); | ||
this._quotequote_regex = new RegExp(this._quote_string + this._quote_string, 'g'); | ||
// 3. escape | ||
var escape_string = opts.escape || '\\'; | ||
this._escape_byte = escape_string.charCodeAt(0); | ||
this._escapequote_regex = new RegExp('\\' + escape_string + this._quote_string, 'g'); | ||
// private storage | ||
this._bytes_buffer = new Buffer(0); | ||
@@ -59,3 +75,3 @@ this._cells_buffer = []; | ||
else { | ||
this.push(inference.zip(this.columns, cells, this.missing)); | ||
this.push(inference.zip(this.columns, cells, this.missing_string)); | ||
} | ||
@@ -68,5 +84,5 @@ }; | ||
if (!this.delimiter) { | ||
if (!this._delimiter_byte) { | ||
// should we wait for some minimum amount of data? | ||
this.delimiter = inference.delimiter(buffer); | ||
this._delimiter_byte = inference.delimiter(buffer); | ||
} | ||
@@ -88,14 +104,14 @@ | ||
// if we are on an escape char, simply skip over it (++) and the (default) | ||
if (!eos && buffer[i] == this.escapebyte) { | ||
if (!eos && buffer[i] == this._escape_byte) { | ||
// excel is bizarre. An escape before a quotebyte doesn't count, | ||
// so we only increment if the next character is not a quotebyte | ||
// unless we are not inside quotes, in which case we do skip over it. | ||
if (!inside_quote || buffer[i+1] !== this.quotebyte) { | ||
if (!inside_quote || buffer[i+1] !== this._quote_byte) { | ||
i++; | ||
} | ||
} | ||
else if (!eos && buffer[i] === this.quotebyte && inside_quote) { | ||
else if (!eos && buffer[i] === this._quote_byte && inside_quote) { | ||
// if we are inside, and on a " | ||
// handle excel dialect: double quotebyte => single literal quotebyte | ||
if (buffer[i+1] === this.quotebyte) { | ||
if (buffer[i+1] === this._quote_byte) { | ||
// double quotebyte | ||
@@ -113,3 +129,3 @@ // we just advance over it for now, so that we can put this back on the buffer, if needed. | ||
} | ||
else if (!eos && buffer[i] === this.quotebyte && !inside_quote && i == start) { | ||
else if (!eos && buffer[i] === this._quote_byte && !inside_quote && i == start) { | ||
// if we are not already inside, and on a " | ||
@@ -124,3 +140,3 @@ inside_quote = true; | ||
// OR, we push a new cell whenever we hit a delimiter (say, tab) and are not inside a quote | ||
(!inside_quote && (buffer[i] == 13 || buffer[i] == 10 || buffer[i] == this.delimiter)) | ||
(!inside_quote && (buffer[i] == 13 || buffer[i] == 10 || buffer[i] == this._delimiter_byte)) | ||
) { | ||
@@ -135,4 +151,4 @@ | ||
var trimmed_cell = buffer.toString(this.encoding, start + 1, i - 1); | ||
if (this.quotecharquotechar_regex) { | ||
trimmed_cell = trimmed_cell.replace(this.quotecharquotechar_regex, this.quotechar); | ||
if (this._quotequote_regex) { | ||
trimmed_cell = trimmed_cell.replace(this._quotequote_regex, this._quote_string); | ||
} | ||
@@ -145,4 +161,4 @@ // is this good enough? | ||
var cell = buffer.toString(this.encoding, start, i); | ||
if (this.escapequotechar_regex) { | ||
cell = cell.replace(this.escapequotechar_regex, this.quotechar); | ||
if (this._escapequote_regex) { | ||
cell = cell.replace(this._escapequote_regex, this._quote_string); | ||
} | ||
@@ -155,3 +171,3 @@ cells.push(cell); | ||
// we flush the row, also, if we are at the end and this is the final chunk | ||
if (eos || (buffer[i] != this.delimiter)) { | ||
if (eos || (buffer[i] != this._delimiter_byte)) { | ||
// add these cells to the emit queue | ||
@@ -158,0 +174,0 @@ this._row(cells); |
@@ -10,3 +10,3 @@ 'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
- `peek` is an integer (or undefined / null) describing how many rows we | ||
should peek at before inferring headers and flushing. | ||
should look at before inferring headers and flushing. | ||
- `columns` is an array of strings once the headers have been inferred | ||
@@ -13,0 +13,0 @@ - `encoding` is the encoding that the stream's read function will use. |
'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
var fs = require('fs'); | ||
var tap = require('tap'); | ||
var streaming = require('streaming'); | ||
var sv = require('..'); | ||
tap.test('import', function (t) { | ||
tap.test('import', function(t) { | ||
t.ok(sv !== undefined, 'sv should load from the current directory'); | ||
@@ -12,3 +13,3 @@ t.end(); | ||
tap.test('parser', function (t) { | ||
tap.test('parser', function(t) { | ||
var input = [ | ||
@@ -23,8 +24,4 @@ 'index name time', | ||
var rows = []; | ||
var parser = new sv.Parser({quotechar: '"'}); | ||
parser.on('data', function(obj) { | ||
rows.push(obj); | ||
}); | ||
parser.end(input, function() { | ||
var parser = new sv.Parser(); | ||
streaming.readToEnd(parser, function(err, rows) { | ||
t.ok(rows[2], 'There should be a third row'); | ||
@@ -34,5 +31,6 @@ t.equal(rows[2].name, 'lewis', 'The name attribute of the third row should be "lewis"'); | ||
}); | ||
parser.end(input); | ||
}); | ||
tap.test('stringify', function (t) { | ||
tap.test('stringify', function(t) { | ||
var expected = [ | ||
@@ -49,8 +47,4 @@ 'index,name,time', | ||
var stringifier = new sv.Stringifier({peek: 2, missing: 'NA'}); | ||
var string = ''; | ||
stringifier.on('data', function(chunk) { | ||
string += chunk.toString(); | ||
}); | ||
stringifier.on('end', function() { | ||
t.equal(string, expected, 'Stringify output should equal expected.'); | ||
streaming.readToEnd(stringifier, function(err, chunks) { | ||
t.equal(chunks.join(''), expected, 'Stringify output should equal expected.'); | ||
t.end(); | ||
@@ -57,0 +51,0 @@ }); |
@@ -7,3 +7,3 @@ 'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
tap.test('excel dialect parser', function (t) { | ||
tap.test('excel dialect parser', function(t) { | ||
var input = [ | ||
@@ -18,3 +18,3 @@ 'index\tname\ttime', | ||
var rows = []; | ||
var parser = new sv.Parser({quotechar: '"'}); | ||
var parser = new sv.Parser(); | ||
parser.on('data', function(obj) { | ||
@@ -33,3 +33,3 @@ rows.push(obj); | ||
tap.test('quoted newlines', function (t) { | ||
tap.test('quoted newlines', function(t) { | ||
var input = [ | ||
@@ -42,3 +42,3 @@ 'index name time', | ||
var rows = []; | ||
var parser = new sv.Parser({quotechar: '"'}); | ||
var parser = new sv.Parser(); | ||
parser.on('data', function(obj) { | ||
@@ -45,0 +45,0 @@ rows.push(obj); |
@@ -1,2 +0,2 @@ | ||
'use strict'; /*jslint node: true, es5: true, indent: 2, multistr: true */ | ||
'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
var fs = require('fs'); | ||
@@ -7,14 +7,14 @@ var tap = require('tap'); | ||
var input = [ | ||
{ index: '1', name: 'chris', time: '1:29' }, | ||
{ index: '2', name: 'daniel', time: '1:17' }, | ||
{ index: '3', name: 'lewis', time: '1:30' }, | ||
{ index: '4', name: 'stephen', time: '1:16' }, | ||
{ index: '5', name: 'larry', time: '1:31' }, | ||
]; | ||
tap.test('passthrough', function(t) { | ||
var input = [ | ||
{ index: '1', name: 'chris', time: '1:29' }, | ||
{ index: '2', name: 'daniel', time: '1:17' }, | ||
{ index: '3', name: 'lewis', time: '1:30' }, | ||
{ index: '4', name: 'stephen', time: '1:16' }, | ||
{ index: '5', name: 'larry', time: '1:31' }, | ||
]; | ||
tap.test('passthrough', function(t) { | ||
var stringifier = new sv.Stringifier({peek: 2, missing: 'NA'}); | ||
var parser = new sv.Parser({encoding: stringifier.encoding, quotechar: '"'}); | ||
var parser = stringifier.pipe(new sv.Parser()); | ||
streaming.readToEnd(parser, function(err, output) { | ||
@@ -25,3 +25,2 @@ t.equivalent(output, input, 'Throughput should be transparent.'); | ||
stringifier.pipe(parser); | ||
input.forEach(function(record) { | ||
@@ -34,3 +33,3 @@ stringifier.write(record); | ||
tap.test('filter', function(t) { | ||
var parser = new sv.Parser({quotechar: '"'}); | ||
var parser = new sv.Parser(); | ||
var filter = new streaming.Filter(['index', 'name']); | ||
@@ -58,3 +57,3 @@ parser.pipe(filter); | ||
tap.test('omitter', function(t) { | ||
var parser = new sv.Parser({quotechar: '"'}); | ||
var parser = new sv.Parser(); | ||
var omitter = new streaming.Omitter(['index', 'name']); | ||
@@ -61,0 +60,0 @@ parser.pipe(omitter); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
45023
25
1007
9