@@ -17,8 +17,7 @@ #!/usr/bin/env node
		'Parser options:',
		' --peek 10 infer columns from first ten lines of input',
		' --in-delimiter field separator (inferred if unspecified)',
		' --in-quotechar " ',
		// ' --escapechar \\ escape quotechars when quoted',
		'',
		'Stringifier options:',
		' --peek 10 infer columns from first ten objects of input',
		' --out-delimiter , field separator',
		@@ -50,3 +49,2 @@ ' --out-quotechar " marks beginning and end of fields containing delimiter',
		var parser_opts = {
		peek: argv.peek,
		delimiter: argv['in-delimiter'],
		@@ -58,2 +56,3 @@ quotechar: argv['in-quotechar'],
		quotechar: argv['out-quotechar'],
		peek: argv.peek,
		filter: argv.filter,
		@@ -65,3 +64,3 @@ omit: argv.omit,

		// func: function (stream, filename, parser_opts, stringifier_opts, callback) { ... }
		// func: function(stream, filename, parser_opts, stringifier_opts, callback) { ... }
		var func = argv.describe ? sv.describe : sv.transform;
		@@ -68,0 +67,0 @@ var exit = function(err) {

example.js

		@@ -32,3 +32,3 @@ var sv = require('./'); // require('sv') elsewhere

		var parser = new sv.Parser({peek: 3});
		var parser = new sv.Parser();
		parser.on('data', function(row) {
		@@ -35,0 +35,0 @@ console.log(row);

inference.js

		@@ -58,4 +58,5 @@ 'use strict'; /jslint node: true, es5: true, indent: 2 /
		for (var candidate, j = 0; (candidate = candidates[j]); j++) {
		if (counts[candidate] > 0)
		if (counts[candidate] > 0) {
		return candidate;
		}
		}
		@@ -62,0 +63,0 @@ };

package.json

		{
		"name": "sv",
		"version": "0.3.2",
		"version": "0.3.3",
		"description": "Any separated values.",
		@@ -22,3 +22,3 @@ "keywords": [
		},
		"author": "Christopher Brown <io@henrian.com>",
		"author": "Christopher Brown <io@henrian.com> (http://henrian.com)",
		"license": "MIT",
		@@ -25,0 +25,0 @@ "dependencies": {

parser.js

		@@ -7,14 +7,22 @@ 'use strict'; /jslint node: true, es5: true, indent: 2 /

		/* Parser class
		new Parser();
		- `_bytes_buffer` is a buffer (of bytes) that have yet to be processed.
		- `_cells_buffer` is a list of strings that have yet to be processed.
		- `delimiter` is the field separator used for incoming strings.
		- `columns` is an array of strings used as object keys.
		* They are inferred if they are missing once the headers have been inferred.
		- `missing` is the value we use for 'time' when we have
		`columns = ['index', 'time']` and `write({index: 90})` is called.
		at before inferring headers and flushing
		*/
		var Parser = module.exports = function(opts) {
		/** new Parser(opts)

		Options (`opts`):

		- `encoding` for converting to strings.
		- `missing` is the value we use for 'time' when we have `columns = ['index', 'time']` and `write({index: 90})` is called.
		- `columns` is an array of strings used as object keys. Inferred by default (but after inferring `delimiter`)

		- `delimiter` is the field separator used for incoming strings, ',' for csv, '\t' for tsv, etc.
		- `quote` is the value that designates a string in which there might be delimiters to ignore. Defaults to '"'
		- `escape` is the character that escapes special characters in a quoted field

		Private values:

		- `_bytes_buffer` is a buffer (of bytes) that have yet to be processed (and sent to output).
		- `_cells_buffer` is a list of strings that have yet to be processed (and sent to output).
		- etc.

		*/
		stream.Transform.call(this, {
		@@ -24,7 +32,7 @@ decodeStrings: true, // Writable option, ensure _transform always gets a Buffer
		});
		// this._readableState.objectMode = true; // default
		// this._readableState.objectMode = true; // default, good
		// decodeStrings: true, dammit! ()
		// stream.Transform({decodeStrings: true}) is not honored if objectMode: true,
		// because objectMode: true (intended for the Readable) overrides the decodeStrings: true
		// if this gets fixed, you can remove the setting below.
		// if this gets fixed, you can remove the private field setting below.
		// Issue at https://github.com/joyent/node/issues/5580
		@@ -34,15 +42,23 @@ this._writableState.objectMode = false;
		if (opts === undefined) opts = {};
		this.missing = opts.missing \|\| ''; // should be a string
		this.delimiter = opts.delimiter;

		// arbitrary settings (non-inferrable, but with sane & safe defaults)
		this.encoding = opts.encoding;
		this.missing_string = opts.missing \|\| ''; // should be a string
		this.columns = opts.columns;
		this.encoding = opts.encoding;
		this.escapechar = opts.escapechar \|\| '\\';
		this.escapebyte = this.escapechar.charCodeAt(0);
		this.quotechar = opts.quotechar;
		if (this.quotechar) {
		this.quotecharquotechar_regex = new RegExp(this.quotechar + this.quotechar, 'g');
		this.escapequotechar_regex = new RegExp('\\\\' + this.quotechar, 'g');
		this.quotebyte = this.quotechar.charCodeAt(0);
		}

		// special demarcating characters
		// 1. delimiter
		this._delimiter_byte = opts.delimiter ? opts.delimiter.charCodeAt(0) : null;

		// 2. quote
		this._quote_string = opts.quote \|\| '"';
		this._quote_byte = this._quote_string.charCodeAt(0);
		this._quotequote_regex = new RegExp(this._quote_string + this._quote_string, 'g');

		// 3. escape
		var escape_string = opts.escape \|\| '\\';
		this._escape_byte = escape_string.charCodeAt(0);
		this._escapequote_regex = new RegExp('\\' + escape_string + this._quote_string, 'g');

		// private storage
		this._bytes_buffer = new Buffer(0);
		@@ -59,3 +75,3 @@ this._cells_buffer = [];
		else {
		this.push(inference.zip(this.columns, cells, this.missing));
		this.push(inference.zip(this.columns, cells, this.missing_string));
		}
		@@ -68,5 +84,5 @@ };

		if (!this.delimiter) {
		if (!this._delimiter_byte) {
		// should we wait for some minimum amount of data?
		this.delimiter = inference.delimiter(buffer);
		this._delimiter_byte = inference.delimiter(buffer);
		}
		@@ -88,14 +104,14 @@
		// if we are on an escape char, simply skip over it (++) and the (default)
		if (!eos && buffer[i] == this.escapebyte) {
		if (!eos && buffer[i] == this._escape_byte) {
		// excel is bizarre. An escape before a quotebyte doesn't count,
		// so we only increment if the next character is not a quotebyte
		// unless we are not inside quotes, in which case we do skip over it.
		if (!inside_quote \|\| buffer[i+1] !== this.quotebyte) {
		if (!inside_quote \|\| buffer[i+1] !== this._quote_byte) {
		i++;
		}
		}
		else if (!eos && buffer[i] === this.quotebyte && inside_quote) {
		else if (!eos && buffer[i] === this._quote_byte && inside_quote) {
		// if we are inside, and on a "
		// handle excel dialect: double quotebyte => single literal quotebyte
		if (buffer[i+1] === this.quotebyte) {
		if (buffer[i+1] === this._quote_byte) {
		// double quotebyte
		@@ -113,3 +129,3 @@ // we just advance over it for now, so that we can put this back on the buffer, if needed.
		}
		else if (!eos && buffer[i] === this.quotebyte && !inside_quote && i == start) {
		else if (!eos && buffer[i] === this._quote_byte && !inside_quote && i == start) {
		// if we are not already inside, and on a "
		@@ -124,3 +140,3 @@ inside_quote = true;
		// OR, we push a new cell whenever we hit a delimiter (say, tab) and are not inside a quote
		(!inside_quote && (buffer[i] == 13 \|\| buffer[i] == 10 \|\| buffer[i] == this.delimiter))
		(!inside_quote && (buffer[i] == 13 \|\| buffer[i] == 10 \|\| buffer[i] == this._delimiter_byte))
		) {
		@@ -135,4 +151,4 @@
		var trimmed_cell = buffer.toString(this.encoding, start + 1, i - 1);
		if (this.quotecharquotechar_regex) {
		trimmed_cell = trimmed_cell.replace(this.quotecharquotechar_regex, this.quotechar);
		if (this._quotequote_regex) {
		trimmed_cell = trimmed_cell.replace(this._quotequote_regex, this._quote_string);
		}
		@@ -145,4 +161,4 @@ // is this good enough?
		var cell = buffer.toString(this.encoding, start, i);
		if (this.escapequotechar_regex) {
		cell = cell.replace(this.escapequotechar_regex, this.quotechar);
		if (this._escapequote_regex) {
		cell = cell.replace(this._escapequote_regex, this._quote_string);
		}
		@@ -155,3 +171,3 @@ cells.push(cell);
		// we flush the row, also, if we are at the end and this is the final chunk
		if (eos \|\| (buffer[i] != this.delimiter)) {
		if (eos \|\| (buffer[i] != this._delimiter_byte)) {
		// add these cells to the emit queue
		@@ -158,0 +174,0 @@ this._row(cells);

stringifier.js

		@@ -10,3 +10,3 @@ 'use strict'; /jslint node: true, es5: true, indent: 2 /
		- `peek` is an integer (or undefined / null) describing how many rows we
		should peek at before inferring headers and flushing.
		should look at before inferring headers and flushing.
		- `columns` is an array of strings once the headers have been inferred
		@@ -13,0 +13,0 @@ - `encoding` is the encoding that the stream's read function will use.

test/basic.js

		'use strict'; /jslint node: true, es5: true, indent: 2 /
		var fs = require('fs');
		var tap = require('tap');
		var streaming = require('streaming');

		var sv = require('..');

		tap.test('import', function (t) {
		tap.test('import', function(t) {
		t.ok(sv !== undefined, 'sv should load from the current directory');
		@@ -12,3 +13,3 @@ t.end();

		tap.test('parser', function (t) {
		tap.test('parser', function(t) {
		var input = [
		@@ -23,8 +24,4 @@ 'index name time',

		var rows = [];
		var parser = new sv.Parser({quotechar: '"'});
		parser.on('data', function(obj) {
		rows.push(obj);
		});
		parser.end(input, function() {
		var parser = new sv.Parser();
		streaming.readToEnd(parser, function(err, rows) {
		t.ok(rows[2], 'There should be a third row');
		@@ -34,5 +31,6 @@ t.equal(rows[2].name, 'lewis', 'The name attribute of the third row should be "lewis"');
		});
		parser.end(input);
		});

		tap.test('stringify', function (t) {
		tap.test('stringify', function(t) {
		var expected = [
		@@ -49,8 +47,4 @@ 'index,name,time',
		var stringifier = new sv.Stringifier({peek: 2, missing: 'NA'});
		var string = '';
		stringifier.on('data', function(chunk) {
		string += chunk.toString();
		});
		stringifier.on('end', function() {
		t.equal(string, expected, 'Stringify output should equal expected.');
		streaming.readToEnd(stringifier, function(err, chunks) {
		t.equal(chunks.join(''), expected, 'Stringify output should equal expected.');
		t.end();
		@@ -57,0 +51,0 @@ });

test/quotes.js

		@@ -7,3 +7,3 @@ 'use strict'; /jslint node: true, es5: true, indent: 2 /

		tap.test('excel dialect parser', function (t) {
		tap.test('excel dialect parser', function(t) {
		var input = [
		@@ -18,3 +18,3 @@ 'index\tname\ttime',
		var rows = [];
		var parser = new sv.Parser({quotechar: '"'});
		var parser = new sv.Parser();
		parser.on('data', function(obj) {
		@@ -33,3 +33,3 @@ rows.push(obj);

		tap.test('quoted newlines', function (t) {
		tap.test('quoted newlines', function(t) {
		var input = [
		@@ -42,3 +42,3 @@ 'index name time',
		var rows = [];
		var parser = new sv.Parser({quotechar: '"'});
		var parser = new sv.Parser();
		parser.on('data', function(obj) {
		@@ -45,0 +45,0 @@ rows.push(obj);

test/throughput.js

		@@ -1,2 +0,2 @@
		'use strict'; /jslint node: true, es5: true, indent: 2, multistr: true /
		'use strict'; /jslint node: true, es5: true, indent: 2 /
		var fs = require('fs');
		@@ -7,14 +7,14 @@ var tap = require('tap');

		var input = [
		{ index: '1', name: 'chris', time: '1:29' },
		{ index: '2', name: 'daniel', time: '1:17' },
		{ index: '3', name: 'lewis', time: '1:30' },
		{ index: '4', name: 'stephen', time: '1:16' },
		{ index: '5', name: 'larry', time: '1:31' },
		];
		tap.test('passthrough', function(t) {
		var input = [
		{ index: '1', name: 'chris', time: '1:29' },
		{ index: '2', name: 'daniel', time: '1:17' },
		{ index: '3', name: 'lewis', time: '1:30' },
		{ index: '4', name: 'stephen', time: '1:16' },
		{ index: '5', name: 'larry', time: '1:31' },
		];

		tap.test('passthrough', function(t) {
		var stringifier = new sv.Stringifier({peek: 2, missing: 'NA'});
		var parser = new sv.Parser({encoding: stringifier.encoding, quotechar: '"'});

		var parser = stringifier.pipe(new sv.Parser());
		streaming.readToEnd(parser, function(err, output) {
		@@ -25,3 +25,2 @@ t.equivalent(output, input, 'Throughput should be transparent.');

		stringifier.pipe(parser);
		input.forEach(function(record) {
		@@ -34,3 +33,3 @@ stringifier.write(record);
		tap.test('filter', function(t) {
		var parser = new sv.Parser({quotechar: '"'});
		var parser = new sv.Parser();
		var filter = new streaming.Filter(['index', 'name']);
		@@ -58,3 +57,3 @@ parser.pipe(filter);
		tap.test('omitter', function(t) {
		var parser = new sv.Parser({quotechar: '"'});
		var parser = new sv.Parser();
		var omitter = new streaming.Omitter(['index', 'name']);
		@@ -61,0 +60,0 @@ parser.pipe(omitter);

sv - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics