Comparing version 0.2.3 to 0.2.4
@@ -10,10 +10,2 @@ #!/usr/bin/env node | ||
// function logEvents(emitter, prefix, names) { | ||
// names.forEach(function(name) { | ||
// emitter.on(name, function(/*...*/) { | ||
// console.error(prefix + ':' + name, arguments); | ||
// }); | ||
// }); | ||
// } | ||
var JSONStringifier = function(opts) { | ||
@@ -20,0 +12,0 @@ stream.Transform.call(this, {objectMode: true}); |
'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
var fs = require('fs'); | ||
var util = require('util'); | ||
@@ -61,1 +62,35 @@ | ||
}; | ||
var commonPrefix = exports.commonPrefix = function(filepaths) { | ||
var prefix = filepaths[0]; | ||
for (var filepath, i = 1; (filepath = filepaths[i]) && prefix.length; i++) { | ||
for (var c = 0; prefix[c] == filepath[c]; c++); | ||
prefix = prefix.slice(0, c); | ||
} | ||
return prefix; | ||
}; | ||
var lc = exports.lc = function(filepath, callback) { | ||
// callback signature: function(err, number_of_lines) | ||
var count = 0; | ||
fs.createReadStream(filepath).on('data', function(buffer) { | ||
for (var i = 0; i < buffer.length; i++) { | ||
// universal newlines: handle \r (13), \r\n (13, 10), or \n (10) as one line break | ||
// '\r' == 13, '\n' == 10 | ||
if (buffer[i] == 13) { | ||
count++; | ||
if (buffer[i+1] == 10) | ||
i++; | ||
} | ||
else if (buffer[i] == 10) { | ||
count++; | ||
} | ||
} | ||
}).on('end', function() { | ||
callback(null, count); | ||
}).on('error', function(err) { | ||
callback(err, count); | ||
}); | ||
}; |
{ | ||
"name": "sv", | ||
"version": "0.2.3", | ||
"version": "0.2.4", | ||
"description": "Any separated values.", | ||
@@ -25,2 +25,3 @@ "keywords": [ | ||
"dependencies": { | ||
"async": "*", | ||
"optimist": "0.5.2" | ||
@@ -27,0 +28,0 @@ }, |
129
parser.js
@@ -8,4 +8,4 @@ 'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
new Parser(); | ||
- `_byte_buffer` is a buffer (of bytes) that have yet to be processed. | ||
- `_cell_buffer` is a list of strings that have yet to be processed. | ||
- `_bytes_buffer` is a buffer (of bytes) that have yet to be processed. | ||
- `_cells_buffer` is a list of strings that have yet to be processed. | ||
- `delimiter` is the field separator used for incoming strings. | ||
@@ -38,5 +38,6 @@ - `columns` is an array of strings used as object keys. | ||
this.quotechar = (opts.quotechar || '"').charCodeAt(0); | ||
this.double_quotechar_regex = new RegExp(String.fromCharCode(this.quotechar) + String.fromCharCode(this.quotechar), 'g'); | ||
this._byte_buffer = new Buffer(0); | ||
this._cell_buffer = []; | ||
this._bytes_buffer = new Buffer(0); | ||
this._cells_buffer = []; | ||
}; | ||
@@ -56,4 +57,4 @@ util.inherits(Parser, stream.Transform); | ||
Parser.prototype._flush = function(callback, nonfinal) { | ||
var buffer = this._byte_buffer; | ||
var cells = this._cell_buffer; | ||
var buffer = this._bytes_buffer; | ||
var cells = this._cells_buffer; | ||
@@ -63,3 +64,2 @@ if (!this.delimiter) { | ||
this.delimiter = inference.delimiter(buffer); | ||
console.error('Using delimiter:', this.delimiter); | ||
} | ||
@@ -69,59 +69,78 @@ | ||
var end = buffer.length; | ||
var inside = false; // i.e., inside quotes = inside cell | ||
var inside_quote = false; | ||
// outside_quote reminds us to remove the quotes later (in pushCell) | ||
var outside_quote = false; | ||
for (var i = 0; i < end; i++) { | ||
var eos = !nonfinal && i + 1 == end; | ||
// var snippet = buffer.toString('utf8', 0, i) + | ||
// '\x1b[7m' + buffer.toString('utf8', i, i + 1) + '\x1b[0m' + | ||
// buffer.toString('utf8', i + 1, end); | ||
// console.error(snippet.replace(/\n/g, 'N').replace(/\t/g, 'T'), inside_quote ? 'inside_quote' : ''); | ||
// if we are on an escape char, simply skip over it (++) and the (default) | ||
if (buffer[i] === this.escapechar) { | ||
// excel is bizarre. An escape before a quotechar doesn't count. | ||
if (buffer[i+1] !== this.quotechar) { | ||
if (!eos && buffer[i] == this.escapechar) { | ||
// excel is bizarre. An escape before a quotechar doesn't count, | ||
// so we only increment if the next character is not a quotechar | ||
if (buffer[i+1] != this.quotechar) { | ||
i++; | ||
} | ||
} | ||
// if we are not current inside, and on a " | ||
else if (!inside && buffer[i] === this.quotechar) { | ||
inside = true; | ||
start = i + 1; | ||
} | ||
// if we are inside a quote, and on a " | ||
else if (inside && buffer[i] === this.quotechar) { | ||
// handle excel dialect: double quotechar => single literal quotechar | ||
if (buffer[i+1] === this.quotechar) { | ||
// double quotechar | ||
// `inside` remains true | ||
// we need to collapse out the current index. this might be optimized somehow | ||
// buffer.copy(targetBuffer, [targetStart], [sourceStart], [sourceEnd])# | ||
buffer.copy(buffer, i, i+1); | ||
end--; | ||
else if (!eos && buffer[i] == this.quotechar) { | ||
// if we are inside, and on a " | ||
if (inside_quote) { | ||
// handle excel dialect: double quotechar => single literal quotechar | ||
if (buffer[i+1] == this.quotechar) { | ||
// double quotechar | ||
// we just advance over it for now, so that we can put this back on the buffer, if needed. | ||
i++; | ||
} | ||
else { | ||
// lone quotechar -> don't assume that they're always followed by a delimiter. | ||
// they might be followed by a newline | ||
// and we advance so that buffer[i] skips over the delimiter | ||
inside_quote = false; | ||
outside_quote = true; | ||
} | ||
} | ||
// if we are not inside, and on a " | ||
else { | ||
// otherwise, assume that an end quotechar is always followed by a delimiter. | ||
// advance so that buffer[i] == '\t' | ||
inside = false; | ||
cells.push(buffer.toString(this.encoding, start, i)); | ||
start = i + 2; | ||
inside_quote = true; | ||
} | ||
i++; | ||
} | ||
// otherwise we just wait for the delimiter | ||
else if (!inside && buffer[i] === this.delimiter) { | ||
cells.push(buffer.toString(this.encoding, start, i)); | ||
start = i + 1; | ||
} | ||
// handle \r, \r\n, or \n (but not \n\n) as one line break | ||
// '\r' == 13, '\n' == 10 | ||
else if (!inside && (buffer[i] == 13 || buffer[i] == 10)) { | ||
// we may have consumed the last field, already, if it was quoted. | ||
if (start < i) { | ||
else if ( | ||
// if we are at the very end of the input and this is the final chunk (ignoring any sort of state) | ||
eos || | ||
// OR, we push a new cell whenever we hit a delimiter (say, tab) and are not inside a quote | ||
(!inside_quote && (buffer[i] == 13 || buffer[i] == 10 || buffer[i] == this.delimiter)) | ||
) { | ||
// add the unprocessed buffer to our cells | ||
// inside_quote might be true if the file ends on a quote | ||
if (eos) i++; | ||
if (inside_quote || outside_quote) { | ||
var trimmed_cell = buffer.toString(this.encoding, start + 1, i - 1); | ||
// is this good enough? | ||
cells.push(trimmed_cell.replace(this.double_quotechar_regex, String.fromCharCode(this.quotechar))); | ||
outside_quote = false; | ||
} | ||
else { | ||
cells.push(buffer.toString(this.encoding, start, i)); | ||
} | ||
// add these cells to the emit queue | ||
this._row(cells); | ||
// handle \r, \r\n, or \n (but not \n\n) as one line break | ||
// '\r' == 13, '\n' == 10 | ||
// we flush the row, also, if we are at the end and this is the final chunk | ||
if (eos || (buffer[i] != this.delimiter)) { | ||
// add these cells to the emit queue | ||
this._row(cells); | ||
// and reset them | ||
cells = []; | ||
// and reset them | ||
cells = []; | ||
// also consume a following \n, if there is one. | ||
if (buffer[i] == 13 && buffer[i+1] == 10) { | ||
i++; | ||
// also consume a following \n, if this was \r, and there is one. | ||
if (buffer[i] == 13 && buffer[i+1] == 10) { | ||
i++; | ||
} | ||
} | ||
@@ -132,13 +151,5 @@ start = i + 1; | ||
if (!nonfinal && start < end) { | ||
// this is the final flush call, wrap up any loose ends! | ||
// add the unprocessed buffer to our cells | ||
cells.push(buffer.toString(this.encoding, start, end)); | ||
this._row(cells); | ||
cells = []; // but doesn't really matter | ||
} | ||
// save whatever we have yet to process | ||
this._byte_buffer = buffer.slice(start, end); | ||
this._cell_buffer = cells; | ||
this._bytes_buffer = buffer.slice(start, end); | ||
this._cells_buffer = cells; | ||
@@ -156,3 +167,3 @@ // if there was a trailing newline, this._buffer.length = 0 | ||
// collect unused buffer and new chunk into a single buffer | ||
this._byte_buffer = this._byte_buffer.length ? Buffer.concat([this._byte_buffer, chunk]) : chunk; | ||
this._bytes_buffer = this._bytes_buffer.length ? Buffer.concat([this._bytes_buffer, chunk]) : chunk; | ||
@@ -159,0 +170,0 @@ // do all the processing |
@@ -113,4 +113,14 @@ # sv | ||
## Debugging helper: | ||
function logEvents(emitter, prefix, names) { | ||
names.forEach(function(name) { | ||
emitter.on(name, function(/*...*/) { | ||
console.error(prefix + ':' + name, arguments); | ||
}); | ||
}); | ||
} | ||
## License | ||
Copyright © 2013 Christopher Brown. [MIT Licensed](LICENSE). |
@@ -80,5 +80,10 @@ 'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
var value = obj[j].toString(); | ||
if (value.indexOf(this.delimiter) > -1) { | ||
if (value.indexOf(this.quotechar) > -1) { | ||
value = value.replace(this.quotechar_regex, '\\' + this.quotechar); | ||
var contains_newline = value.indexOf('\n') > -1 || value.indexOf('\r') > -1; | ||
var contains_quotechar = value.indexOf(this.quotechar) > -1; | ||
if (value.indexOf(this.delimiter) > -1 || contains_newline || contains_quotechar) { | ||
if (contains_quotechar) { | ||
// serialize into the excel dialect, currently | ||
value = value.replace(this.quotechar_regex, this.quotechar + this.quotechar); | ||
// serialize with escapes: | ||
// value = value.replace(this.quotechar_regex, '\\' + this.quotechar); | ||
} | ||
@@ -85,0 +90,0 @@ value = this.quotechar + value + this.quotechar; |
@@ -6,2 +6,3 @@ 'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
var sv = require('..'); | ||
test('import', function (t) { | ||
@@ -62,40 +63,1 @@ t.ok(sv !== undefined, 'sv should load from the current directory'); | ||
}); | ||
test('excel dialect parser', function (t) { | ||
var input = [ | ||
'index name time', | ||
'1 "chris ""breezy"" brown" 1:18', | ||
'2 "stephen" 1:16', | ||
].join('\n'); | ||
var rows = []; | ||
var parser = new sv.Parser(); | ||
parser.on('data', function(obj) { | ||
rows.push(obj); | ||
}); | ||
parser.end(input, function() { | ||
t.equal(rows.length, 2, 'There should be two rows.'); | ||
t.equal(rows[0].name, 'chris "breezy" brown', 'The paired double quotes should be interpreted as just one double quote.'); | ||
t.end(); | ||
}); | ||
}); | ||
test('quoted newlines', function (t) { | ||
var input = [ | ||
'index name time', | ||
'1 "chris\ngrant\nbrown" 1:18', | ||
'2 "stephen\nhodgins" 1:16', | ||
].join('\n'); | ||
var rows = []; | ||
var parser = new sv.Parser(); | ||
parser.on('data', function(obj) { | ||
rows.push(obj); | ||
}); | ||
parser.end(input, function() { | ||
t.equal(rows.length, 2, 'There should be exactly two rows.'); | ||
t.equal(rows[0].name, 'chris\ngrant\nbrown', 'Newlines should be retained.'); | ||
t.end(); | ||
}); | ||
}); |
@@ -25,3 +25,3 @@ 'use strict'; /*jslint node: true, es5: true, indent: 2, multistr: true */ | ||
parser.on('end', function() { | ||
t.similar(input, output, 'Throughput should be transparent.'); | ||
t.similar(output, input, 'Throughput should be transparent.'); | ||
t.end(); | ||
@@ -28,0 +28,0 @@ }); |
Wildcard dependency
QualityPackage has a dependency with a floating version range. This can cause issues if the dependency publishes a new major version.
Found 1 instance in 1 package
32782
14
782
126
2
1
5
+ Addedasync@*
+ Addedasync@3.2.6(transitive)