sv - npm Package Compare versions

Comparing version 0.2.3 to 0.2.4

merge.js

test/quotes.js

index.js

		@@ -10,10 +10,2 @@ #!/usr/bin/env node

		// function logEvents(emitter, prefix, names) {
		// names.forEach(function(name) {
		// emitter.on(name, function(/.../) {
		// console.error(prefix + ':' + name, arguments);
		// });
		// });
		// }

		var JSONStringifier = function(opts) {
		@@ -20,0 +12,0 @@ stream.Transform.call(this, {objectMode: true});

inference.js

		'use strict'; /jslint node: true, es5: true, indent: 2 /
		var fs = require('fs');
		var util = require('util');
		@@ -61,1 +62,35 @@
		};


		var commonPrefix = exports.commonPrefix = function(filepaths) {
		var prefix = filepaths[0];
		for (var filepath, i = 1; (filepath = filepaths[i]) && prefix.length; i++) {
		for (var c = 0; prefix[c] == filepath[c]; c++);
		prefix = prefix.slice(0, c);
		}
		return prefix;
		};


		var lc = exports.lc = function(filepath, callback) {
		// callback signature: function(err, number_of_lines)
		var count = 0;
		fs.createReadStream(filepath).on('data', function(buffer) {
		for (var i = 0; i < buffer.length; i++) {
		// universal newlines: handle \r (13), \r\n (13, 10), or \n (10) as one line break
		// '\r' == 13, '\n' == 10
		if (buffer[i] == 13) {
		count++;
		if (buffer[i+1] == 10)
		i++;
		}
		else if (buffer[i] == 10) {
		count++;
		}
		}
		}).on('end', function() {
		callback(null, count);
		}).on('error', function(err) {
		callback(err, count);
		});
		};

package.json

		{
		"name": "sv",
		"version": "0.2.3",
		"version": "0.2.4",
		"description": "Any separated values.",
		@@ -25,2 +25,3 @@ "keywords": [
		"dependencies": {
		"async": "*",
		"optimist": "0.5.2"
		@@ -27,0 +28,0 @@ },

129

parser.js

		@@ -8,4 +8,4 @@ 'use strict'; /jslint node: true, es5: true, indent: 2 /
		new Parser();
		- `_byte_buffer` is a buffer (of bytes) that have yet to be processed.
		- `_cell_buffer` is a list of strings that have yet to be processed.
		- `_bytes_buffer` is a buffer (of bytes) that have yet to be processed.
		- `_cells_buffer` is a list of strings that have yet to be processed.
		- `delimiter` is the field separator used for incoming strings.
		@@ -38,5 +38,6 @@ - `columns` is an array of strings used as object keys.
		this.quotechar = (opts.quotechar \|\| '"').charCodeAt(0);
		this.double_quotechar_regex = new RegExp(String.fromCharCode(this.quotechar) + String.fromCharCode(this.quotechar), 'g');

		this._byte_buffer = new Buffer(0);
		this._cell_buffer = [];
		this._bytes_buffer = new Buffer(0);
		this._cells_buffer = [];
		};
		@@ -56,4 +57,4 @@ util.inherits(Parser, stream.Transform);
		Parser.prototype._flush = function(callback, nonfinal) {
		var buffer = this._byte_buffer;
		var cells = this._cell_buffer;
		var buffer = this._bytes_buffer;
		var cells = this._cells_buffer;

		@@ -63,3 +64,2 @@ if (!this.delimiter) {
		this.delimiter = inference.delimiter(buffer);
		console.error('Using delimiter:', this.delimiter);
		}
		@@ -69,59 +69,78 @@
		var end = buffer.length;
		var inside = false; // i.e., inside quotes = inside cell
		var inside_quote = false;
		// outside_quote reminds us to remove the quotes later (in pushCell)
		var outside_quote = false;

		for (var i = 0; i < end; i++) {
		var eos = !nonfinal && i + 1 == end;
		// var snippet = buffer.toString('utf8', 0, i) +
		// '\x1b[7m' + buffer.toString('utf8', i, i + 1) + '\x1b[0m' +
		// buffer.toString('utf8', i + 1, end);
		// console.error(snippet.replace(/\n/g, 'N').replace(/\t/g, 'T'), inside_quote ? 'inside_quote' : '');

		// if we are on an escape char, simply skip over it (++) and the (default)
		if (buffer[i] === this.escapechar) {
		// excel is bizarre. An escape before a quotechar doesn't count.
		if (buffer[i+1] !== this.quotechar) {
		if (!eos && buffer[i] == this.escapechar) {
		// excel is bizarre. An escape before a quotechar doesn't count,
		// so we only increment if the next character is not a quotechar
		if (buffer[i+1] != this.quotechar) {
		i++;
		}
		}
		// if we are not current inside, and on a "
		else if (!inside && buffer[i] === this.quotechar) {
		inside = true;
		start = i + 1;
		}
		// if we are inside a quote, and on a "
		else if (inside && buffer[i] === this.quotechar) {
		// handle excel dialect: double quotechar => single literal quotechar
		if (buffer[i+1] === this.quotechar) {
		// double quotechar
		// `inside` remains true
		// we need to collapse out the current index. this might be optimized somehow
		// buffer.copy(targetBuffer, [targetStart], [sourceStart], [sourceEnd])#
		buffer.copy(buffer, i, i+1);
		end--;
		else if (!eos && buffer[i] == this.quotechar) {
		// if we are inside, and on a "
		if (inside_quote) {
		// handle excel dialect: double quotechar => single literal quotechar
		if (buffer[i+1] == this.quotechar) {
		// double quotechar
		// we just advance over it for now, so that we can put this back on the buffer, if needed.
		i++;
		}
		else {
		// lone quotechar -> don't assume that they're always followed by a delimiter.
		// they might be followed by a newline
		// and we advance so that buffer[i] skips over the delimiter
		inside_quote = false;
		outside_quote = true;
		}
		}
		// if we are not inside, and on a "
		else {
		// otherwise, assume that an end quotechar is always followed by a delimiter.
		// advance so that buffer[i] == '\t'
		inside = false;
		cells.push(buffer.toString(this.encoding, start, i));
		start = i + 2;
		inside_quote = true;
		}
		i++;
		}
		// otherwise we just wait for the delimiter
		else if (!inside && buffer[i] === this.delimiter) {
		cells.push(buffer.toString(this.encoding, start, i));
		start = i + 1;
		}
		// handle \r, \r\n, or \n (but not \n\n) as one line break
		// '\r' == 13, '\n' == 10
		else if (!inside && (buffer[i] == 13 \|\| buffer[i] == 10)) {
		// we may have consumed the last field, already, if it was quoted.
		if (start < i) {
		else if (
		// if we are at the very end of the input and this is the final chunk (ignoring any sort of state)
		eos \|\|
		// OR, we push a new cell whenever we hit a delimiter (say, tab) and are not inside a quote
		(!inside_quote && (buffer[i] == 13 \|\| buffer[i] == 10 \|\| buffer[i] == this.delimiter))
		) {
		// add the unprocessed buffer to our cells
		// inside_quote might be true if the file ends on a quote
		if (eos) i++;

		if (inside_quote \|\| outside_quote) {
		var trimmed_cell = buffer.toString(this.encoding, start + 1, i - 1);
		// is this good enough?
		cells.push(trimmed_cell.replace(this.double_quotechar_regex, String.fromCharCode(this.quotechar)));
		outside_quote = false;
		}
		else {
		cells.push(buffer.toString(this.encoding, start, i));
		}

		// add these cells to the emit queue
		this._row(cells);
		// handle \r, \r\n, or \n (but not \n\n) as one line break
		// '\r' == 13, '\n' == 10
		// we flush the row, also, if we are at the end and this is the final chunk
		if (eos \|\| (buffer[i] != this.delimiter)) {
		// add these cells to the emit queue
		this._row(cells);

		// and reset them
		cells = [];
		// and reset them
		cells = [];

		// also consume a following \n, if there is one.
		if (buffer[i] == 13 && buffer[i+1] == 10) {
		i++;
		// also consume a following \n, if this was \r, and there is one.
		if (buffer[i] == 13 && buffer[i+1] == 10) {
		i++;
		}
		}
		@@ -132,13 +151,5 @@ start = i + 1;

		if (!nonfinal && start < end) {
		// this is the final flush call, wrap up any loose ends!
		// add the unprocessed buffer to our cells
		cells.push(buffer.toString(this.encoding, start, end));
		this._row(cells);
		cells = []; // but doesn't really matter
		}

		// save whatever we have yet to process
		this._byte_buffer = buffer.slice(start, end);
		this._cell_buffer = cells;
		this._bytes_buffer = buffer.slice(start, end);
		this._cells_buffer = cells;

		@@ -156,3 +167,3 @@ // if there was a trailing newline, this._buffer.length = 0
		// collect unused buffer and new chunk into a single buffer
		this._byte_buffer = this._byte_buffer.length ? Buffer.concat([this._byte_buffer, chunk]) : chunk;
		this._bytes_buffer = this._bytes_buffer.length ? Buffer.concat([this._bytes_buffer, chunk]) : chunk;

		@@ -159,0 +170,0 @@ // do all the processing

README.md

		@@ -113,4 +113,14 @@ # sv

		## Debugging helper:

		function logEvents(emitter, prefix, names) {
		names.forEach(function(name) {
		emitter.on(name, function(/.../) {
		console.error(prefix + ':' + name, arguments);
		});
		});
		}

		## License

		Copyright © 2013 Christopher Brown. [MIT Licensed](LICENSE).

stringifier.js

		@@ -80,5 +80,10 @@ 'use strict'; /jslint node: true, es5: true, indent: 2 /
		var value = obj[j].toString();
		if (value.indexOf(this.delimiter) > -1) {
		if (value.indexOf(this.quotechar) > -1) {
		value = value.replace(this.quotechar_regex, '\\' + this.quotechar);
		var contains_newline = value.indexOf('\n') > -1 \|\| value.indexOf('\r') > -1;
		var contains_quotechar = value.indexOf(this.quotechar) > -1;
		if (value.indexOf(this.delimiter) > -1 \|\| contains_newline \|\| contains_quotechar) {
		if (contains_quotechar) {
		// serialize into the excel dialect, currently
		value = value.replace(this.quotechar_regex, this.quotechar + this.quotechar);
		// serialize with escapes:
		// value = value.replace(this.quotechar_regex, '\\' + this.quotechar);
		}
		@@ -85,0 +90,0 @@ value = this.quotechar + value + this.quotechar;

test/basic.js

		@@ -6,2 +6,3 @@ 'use strict'; /jslint node: true, es5: true, indent: 2 /
		var sv = require('..');

		test('import', function (t) {
		@@ -62,40 +63,1 @@ t.ok(sv !== undefined, 'sv should load from the current directory');
		});

		test('excel dialect parser', function (t) {
		var input = [
		'index name time',
		'1 "chris ""breezy"" brown" 1:18',
		'2 "stephen" 1:16',
		].join('\n');

		var rows = [];
		var parser = new sv.Parser();
		parser.on('data', function(obj) {
		rows.push(obj);
		});
		parser.end(input, function() {
		t.equal(rows.length, 2, 'There should be two rows.');
		t.equal(rows[0].name, 'chris "breezy" brown', 'The paired double quotes should be interpreted as just one double quote.');
		t.end();
		});
		});


		test('quoted newlines', function (t) {
		var input = [
		'index name time',
		'1 "chris\ngrant\nbrown" 1:18',
		'2 "stephen\nhodgins" 1:16',
		].join('\n');

		var rows = [];
		var parser = new sv.Parser();
		parser.on('data', function(obj) {
		rows.push(obj);
		});
		parser.end(input, function() {
		t.equal(rows.length, 2, 'There should be exactly two rows.');
		t.equal(rows[0].name, 'chris\ngrant\nbrown', 'Newlines should be retained.');
		t.end();
		});
		});

test/throughput.js

		@@ -25,3 +25,3 @@ 'use strict'; /jslint node: true, es5: true, indent: 2, multistr: true /
		parser.on('end', function() {
		t.similar(input, output, 'Throughput should be transparent.');
		t.similar(output, input, 'Throughput should be transparent.');
		t.end();
		@@ -28,0 +28,0 @@ });

sv - npm Package Compare versions

New alerts

Improved metrics

Worsened metrics

Dependency changes