Comparing version 0.3.7 to 0.3.8
201
index.js
@@ -1,79 +0,138 @@ | ||
'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
var os = require('os'); | ||
var stream = require('stream'); | ||
var streaming = require('streaming'); | ||
var util = require('util'); | ||
var inference = require('./inference'); | ||
var Parser = exports.Parser = require('./parser'); | ||
var Stringifier = exports.Stringifier = require('./stringifier'); | ||
var json_1 = require('streaming/json'); | ||
var property_1 = require('streaming/property'); | ||
var fs_1 = require('fs'); | ||
var async_1 = require('async'); | ||
// import * as optimist from 'optimist'; | ||
var optimist = require('optimist'); | ||
var parser_1 = require('./parser'); | ||
exports.Parser = parser_1.Parser; | ||
var stringifier_1 = require('./stringifier'); | ||
exports.Stringifier = stringifier_1.Stringifier; | ||
function pluck(xs, prop) { | ||
return xs.map(function(x) { return x[prop]; }); | ||
return xs.map(function (x) { return x[prop]; }); | ||
} | ||
var whitespace_literals = { | ||
'\r': '\\r', | ||
'\n': '\\n', | ||
'\t': '\\t', | ||
'\r': '\\r', | ||
'\n': '\\n', | ||
'\t': '\\t', | ||
}; | ||
function escapeWhitespace(s) { | ||
return whitespace_literals[s]; | ||
return whitespace_literals[s]; | ||
} | ||
var describe = exports.describe = function(input, filename, parser_opts, stringifier_opts, callback) { | ||
if (filename) { | ||
console.log(filename); | ||
} | ||
var rows = []; | ||
var parser = input.pipe(new Parser(parser_opts)); | ||
parser.on('data', function(row) { | ||
rows.push(row); | ||
if (rows.length > 10) { | ||
parser.pause(); | ||
for (var i = 0, l = parser.columns.length; i < l; i++) { | ||
var name = parser.columns[i]; | ||
console.log('[' + i + '] ' + name + ':'); | ||
var cells = pluck(rows, name).join(', ').replace(/\r|\n|\t/g, escapeWhitespace); | ||
var segment = stringifier_opts.width - 2; | ||
for (var start = 0, end = cells.length; start < end; start += segment) { | ||
console.log(' ' + cells.slice(start, start + segment)); | ||
function transform(input, parserConfig, stringifierConfig, callback) { | ||
// if (filename) { | ||
// console.error('Transforming ' + filename); | ||
// } | ||
var transforms = [ | ||
parserConfig.json ? new json_1.Parser() : new parser_1.Parser(parserConfig), | ||
]; | ||
if (stringifierConfig.omit) { | ||
transforms.push(new property_1.Omitter(stringifierConfig.omit.split(/,/g))); | ||
} | ||
if (stringifierConfig.filter) { | ||
transforms.push(new property_1.Picker(stringifierConfig.filter.split(/,/g))); | ||
} | ||
var stringifier = stringifierConfig.json ? new json_1.Stringifier() : new stringifier_1.Stringifier(stringifierConfig); | ||
transforms.push(stringifier); | ||
var output = transforms.reduce(function (outputStream, transform) { return outputStream.pipe(transform); }, input).pipe(process.stdout); | ||
output.on('finish', callback); | ||
output.on('error', function (error) { | ||
// panic! (lets us quit faster, actually) | ||
input.unpipe(); | ||
// output.unpipe(); | ||
callback(error); | ||
}); | ||
} | ||
exports.transform = transform; | ||
function main() { | ||
var argvparser = optimist | ||
.usage([ | ||
'Consolidate any tabular format.', | ||
'', | ||
'Usage: <sprints.txt sv [options] > sprints.csv', | ||
' or: sv [options] ~/Desktop/**/*.csv > ~/all.csv', | ||
'', | ||
'Parser options:', | ||
' --in-delimiter field separator (inferred if unspecified)', | ||
' --in-quotechar " ', | ||
' --in-json parse input as JSON (one object per row)', | ||
'', | ||
'Stringifier options:', | ||
' --peek 10 infer columns from first ten objects of input', | ||
' --out-delimiter , field separator', | ||
' --out-quotechar " marks beginning and end of fields containing delimiter', | ||
' --filter a,b keep only fields a and b in the results', | ||
' --omit c,d leave out fields x and y from the results (processed before filter)', | ||
' -j, --json write one JSON object per row', | ||
'', | ||
'Other options:', | ||
' --version print version and quit', | ||
' -v --verbose turn up the verbosity (still all on STDERR)', | ||
'', | ||
'STDIN, if supplied, will be coerced to utf8', | ||
].join('\n')) | ||
.string(['delimiter', 'quotechar', 'escapechar']) | ||
.boolean(['json', 'verbose', 'version', 'in-json']) | ||
.alias({ | ||
j: 'json', | ||
v: 'verbose', | ||
}) | ||
.default({ | ||
width: process.stdout['columns'] || 80, | ||
}); | ||
var argv = argvparser.argv; | ||
var parser_opts = { | ||
delimiter: argv['in-delimiter'], | ||
quotechar: argv['in-quotechar'], | ||
json: argv['in-json'], | ||
}; | ||
var stringifier_opts = { | ||
delimiter: argv['out-delimiter'], | ||
quotechar: argv['out-quotechar'], | ||
peek: argv.peek, | ||
filter: argv.filter, | ||
omit: argv.omit, | ||
json: argv.json, | ||
width: argv.width, | ||
}; | ||
function exit(err) { | ||
if (err && err.code != 'EPIPE') { | ||
throw err; | ||
} | ||
} | ||
callback(); | ||
// if err.code == 'EPIPE' that just means that someone down | ||
// the line cut us short with a | head or something | ||
if (argv.verbose) { | ||
console.error('Done.'); | ||
} | ||
// process.exit(); // wait for stdout to finish, actually. | ||
} | ||
}); | ||
}; | ||
var transform = exports.transform = function(input, filename, parser_opts, stringifier_opts, callback) { | ||
if (filename) { | ||
console.error('Transforming ' + filename); | ||
} | ||
var parser = input.pipe((parser_opts.json) ? new streaming.json.Parser() : new Parser(parser_opts)); | ||
if (stringifier_opts.omit) { | ||
parser = parser.pipe(new streaming.property.Omitter(stringifier_opts.omit.split(/,/g))); | ||
} | ||
if (stringifier_opts.filter) { | ||
parser = parser.pipe(new streaming.property.Filter(stringifier_opts.filter.split(/,/g))); | ||
} | ||
var stringifier = stringifier_opts.json ? new streaming.json.Stringifier() : new Stringifier(stringifier_opts); | ||
parser.pipe(stringifier); | ||
var output = stringifier.pipe(process.stdout); | ||
output.on('finish', callback); | ||
output.on('error', function(err) { | ||
// panic! (lets us quit faster, actually) | ||
input.unpipe(); | ||
output.unpipe(); | ||
callback(err); | ||
}); | ||
}; | ||
if (argv.help) { | ||
argvparser.showHelp(); | ||
console.log('ARGV: ' + process.argv.join(' ')); | ||
if (argv.verbose) { | ||
console.log(' argv: ' + JSON.stringify(argv, null, ' ').replace(/\n/g, '\n ')); | ||
} | ||
console.log(' parser options: ' + JSON.stringify(parser_opts, null, ' ').replace(/\n/g, '\n ')); | ||
console.log(' stringifier options: ' + JSON.stringify(stringifier_opts, null, ' ').replace(/\n/g, '\n ')); | ||
} | ||
else if (argv.version) { | ||
console.log(require('../package').version); | ||
} | ||
else if (!process.stdin['isTTY']) { | ||
// process.stdin.setEncoding('utf8'); | ||
transform(process.stdin, parser_opts, stringifier_opts, exit); | ||
} | ||
else if (argv._.length) { | ||
var filepaths = argv._; | ||
async_1.eachSeries(filepaths, function (filepath, callback) { | ||
var stream = fs_1.createReadStream(filepath); | ||
transform(stream, parser_opts, stringifier_opts, callback); | ||
console.error(''); // newline | ||
}, exit); | ||
} | ||
else { | ||
argvparser.showHelp(); | ||
console.error('You must supply data via STDIN or as unflagged command line arguments.'); | ||
} | ||
} | ||
exports.main = main; |
{ | ||
"name": "sv", | ||
"version": "0.3.7", | ||
"version": "0.3.8", | ||
"description": "Any separated values.", | ||
@@ -14,9 +14,6 @@ "keywords": [ | ||
], | ||
"bin": { | ||
"sv": "bin/sv.js" | ||
}, | ||
"homepage": "https://github.com/chbrown/sv", | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/chbrown/sv.git" | ||
"url": "https://github.com/chbrown/sv.git" | ||
}, | ||
@@ -28,10 +25,16 @@ "author": "Christopher Brown <io@henrian.com> (http://henrian.com)", | ||
"optimist": "*", | ||
"streaming": ">=0.3.7" | ||
"streaming": "*" | ||
}, | ||
"devDependencies": { | ||
"tap": "*" | ||
"babel-core": "^5.0.0", | ||
"declarations": "*", | ||
"mocha": "*", | ||
"typescript": "next" | ||
}, | ||
"scripts": { | ||
"test": "tap test" | ||
"test": "mocha test" | ||
}, | ||
"bin": { | ||
"sv": "bin/sv" | ||
} | ||
} |
378
parser.js
@@ -1,221 +0,171 @@ | ||
'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
var fs = require('fs'); | ||
var util = require('util'); | ||
var stream = require('stream'); | ||
var inference = require('./inference'); | ||
var Parser = module.exports = function(opts) { | ||
/** new Parser(opts) | ||
Options (`opts`): | ||
- `encoding` for converting to strings. | ||
- `missing` is the value we use for 'time' when we have `columns = ['index', 'time']` and `write({index: 90})` is called. | ||
- `columns` is an array of strings used as object keys. Inferred by default (but after inferring `delimiter`) | ||
- `delimiter` is the field separator used for incoming strings, ',' for csv, '\t' for tsv, etc. | ||
- `quote` is the value that designates a string in which there might be delimiters to ignore. Defaults to '"' | ||
- `escape` is the character that escapes special characters in a quoted field | ||
Private values: | ||
- `_bytes_buffer` is a buffer (of bytes) that have yet to be processed (and sent to output). | ||
- `_cells_buffer` is a list of strings that have yet to be processed (and sent to output). | ||
- etc. | ||
*/ | ||
stream.Transform.call(this, { | ||
decodeStrings: true, // Writable option, ensure _transform always gets a Buffer | ||
objectMode: true, // Readable option, .read(n) should return a single value, rather than a Buffer | ||
}); | ||
// this._readableState.objectMode = true; // default, good | ||
// decodeStrings: true, dammit! () | ||
// stream.Transform({decodeStrings: true}) is not honored if objectMode: true, | ||
// because objectMode: true (intended for the Readable) overrides the decodeStrings: true | ||
// if this gets fixed, you can remove the private field setting below. | ||
// Issue at https://github.com/joyent/node/issues/5580 | ||
this._writableState.objectMode = false; | ||
if (opts === undefined) opts = {}; | ||
// arbitrary settings (non-inferrable, but with sane & safe defaults) | ||
this.encoding = opts.encoding; | ||
this.missing_string = opts.missing || ''; // should be a string | ||
this.columns = opts.columns; | ||
// special demarcating characters | ||
// 1. delimiter | ||
this._delimiter_byte = opts.delimiter ? opts.delimiter.charCodeAt(0) : null; | ||
// 2. quote | ||
this._quote_string = opts.quote || '"'; | ||
this._quote_byte = this._quote_string.charCodeAt(0); | ||
this._quotequote_regex = new RegExp(this._quote_string + this._quote_string, 'g'); | ||
// 3. escape | ||
var escape_string = opts.escape || '\\'; | ||
this._escape_byte = escape_string.charCodeAt(0); | ||
this._escapequote_regex = new RegExp('\\' + escape_string + this._quote_string, 'g'); | ||
// private storage | ||
this._bytes_buffer = new Buffer(0); | ||
this._cells_buffer = []; | ||
var __extends = (this && this.__extends) || function (d, b) { | ||
for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; | ||
function __() { this.constructor = d; } | ||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); | ||
}; | ||
util.inherits(Parser, stream.Transform); | ||
Parser.prototype._row = function(cells) { | ||
if (!this.columns) { | ||
// we don't emit the column names as data | ||
this.columns = cells; | ||
} | ||
else { | ||
this.push(inference.zip(this.columns, cells, this.missing_string)); | ||
} | ||
var stream_1 = require('stream'); | ||
var common_1 = require('./common'); | ||
exports.defaultParserConfiguration = { | ||
encoding: 'utf8', | ||
missing: '', | ||
newline: '\n', | ||
// omit delimiter so that it gets inferred | ||
quotechar: '"', | ||
escape: '\\', | ||
}; | ||
/** | ||
- `byteBuffer` is a buffer (of bytes) that have yet to be processed (and sent to output). | ||
- `cellBuffer` is a list of strings that have yet to be processed (and sent to output). | ||
Parser.prototype.flush = function(callback, nonfinal) { | ||
var buffer = this._bytes_buffer; | ||
var cells = this._cells_buffer; | ||
if (!this._delimiter_byte) { | ||
// should we wait for some minimum amount of data? | ||
this._delimiter_byte = inference.delimiter(buffer); | ||
} | ||
var start = 0; | ||
var end = buffer.length; | ||
var inside_quote = false; | ||
// outside_quote reminds us to remove the quotes later (in pushCell) | ||
var outside_quote = false; | ||
for (var i = 0; i < end; i++) { | ||
var eos = !nonfinal && i + 1 == end; | ||
// var snippet = buffer.toString('utf8', 0, i) + | ||
// '\x1b[7m' + buffer.toString('utf8', i, i + 1) + '\x1b[0m' + | ||
// buffer.toString('utf8', i + 1, end); | ||
// console.error(snippet.replace(/\n/g, 'N').replace(/\t/g, 'T'), inside_quote ? 'inside_quote' : ''); | ||
// if we are on an escape char, simply skip over it (++) and the (default) | ||
if (!eos && buffer[i] == this._escape_byte) { | ||
// excel is bizarre. An escape before a quotebyte doesn't count, | ||
// so we only increment if the next character is not a quotebyte | ||
// unless we are not inside quotes, in which case we do skip over it. | ||
if (!inside_quote || buffer[i+1] !== this._quote_byte) { | ||
i++; | ||
} | ||
*/ | ||
var Parser = (function (_super) { | ||
__extends(Parser, _super); | ||
function Parser(config) { | ||
if (config === void 0) { config = {}; } | ||
_super.call(this, { | ||
decodeStrings: true, | ||
objectMode: true, | ||
}); | ||
this.byteBuffer = new Buffer(0); | ||
this.cellBuffer = []; | ||
// this._readableState.objectMode = true; // default, good | ||
// decodeStrings: true, dammit! () | ||
// stream.Transform({decodeStrings: true}) is not honored if objectMode: true, | ||
// because objectMode: true (intended for the Readable) overrides the decodeStrings: true | ||
// if this gets fixed, you can remove the private field setting below. | ||
// Issue at https://github.com/joyent/node/issues/5580 | ||
this['_writableState'].objectMode = false; | ||
// merge defaults | ||
this.config = common_1.merge(config, exports.defaultParserConfiguration); | ||
// special demarcating characters | ||
// 1. delimiter | ||
if (this.config.delimiter) { | ||
this.delimiterByte = this.config.delimiter.charCodeAt(0); | ||
} | ||
// 2. quote | ||
this.quoteByte = this.config.quotechar.charCodeAt(0); | ||
this.quotequoteRegExp = new RegExp(this.config.quotechar + this.config.quotechar, 'g'); | ||
// 3. escape | ||
this.escapeByte = this.config.escape.charCodeAt(0); | ||
this.escapeQuoteRegExp = new RegExp('\\' + this.config.escape + this.config.quotechar, 'g'); | ||
} | ||
else if (!eos && buffer[i] === this._quote_byte && inside_quote) { | ||
// if we are inside, and on a " | ||
// handle excel dialect: double quotebyte => single literal quotebyte | ||
if (buffer[i+1] === this._quote_byte) { | ||
// double quotebyte | ||
// we just advance over it for now, so that we can put this back on the buffer, if needed. | ||
i++; | ||
} | ||
else { | ||
// lone quotebyte -> don't assume that they're always followed by a delimiter. | ||
// they might be followed by a newline | ||
// and we advance so that buffer[i] skips over the delimiter | ||
inside_quote = false; | ||
outside_quote = true; | ||
} | ||
} | ||
else if (!eos && buffer[i] === this._quote_byte && !inside_quote && i == start) { | ||
// if we are not already inside, and on a " | ||
inside_quote = true; | ||
// we can only enter a quote at the edge of the cell (thus, i == start) | ||
} | ||
// otherwise we just wait for the delimiter | ||
else if ( | ||
// if we are at the very end of the input and this is the final chunk (ignoring any sort of state) | ||
eos || | ||
// OR, we push a new cell whenever we hit a delimiter (say, tab) and are not inside a quote | ||
(!inside_quote && (buffer[i] == 13 || buffer[i] == 10 || buffer[i] == this._delimiter_byte)) | ||
) { | ||
// this generally won't hurt, since it will only go to the end of the buffer anyway. | ||
if (eos) i++; | ||
// add the unprocessed buffer to our cells | ||
// inside_quote might be true if the file ends on a quote | ||
if (inside_quote || outside_quote) { | ||
var trimmed_cell = buffer.toString(this.encoding, start + 1, i - 1); | ||
if (this._quotequote_regex) { | ||
trimmed_cell = trimmed_cell.replace(this._quotequote_regex, this._quote_string); | ||
Parser.prototype.writeRow = function (cells) { | ||
if (!this.config.columns) { | ||
// we don't emit the column names as data | ||
this.config.columns = cells; | ||
} | ||
// is this good enough? | ||
cells.push(trimmed_cell); | ||
outside_quote = inside_quote = false; | ||
} | ||
else { | ||
var cell = buffer.toString(this.encoding, start, i); | ||
if (this._escapequote_regex) { | ||
cell = cell.replace(this._escapequote_regex, this._quote_string); | ||
else { | ||
this.push(common_1.zip(this.config.columns, cells, this.config.missing)); | ||
} | ||
cells.push(cell); | ||
} | ||
// handle \r, \r\n, or \n (but not \n\n) as one line break | ||
// '\r' == 13, '\n' == 10 | ||
// we flush the row, also, if we are at the end and this is the final chunk | ||
if (eos || (buffer[i] != this._delimiter_byte)) { | ||
// add these cells to the emit queue | ||
this._row(cells); | ||
// and reset them | ||
cells = []; | ||
// also consume a following \n, if this was \r, and there is one. | ||
if (buffer[i] == 13 && buffer[i+1] == 10) { | ||
i++; | ||
}; | ||
Parser.prototype.flush = function (callback, nonfinal) { | ||
var buffer = this.byteBuffer; | ||
var cells = this.cellBuffer; | ||
if (!this.delimiterByte) { | ||
// should we wait for some minimum amount of data? | ||
this.delimiterByte = common_1.inferDelimiter(buffer); | ||
} | ||
} | ||
start = i + 1; | ||
} | ||
} | ||
// save whatever we have yet to process | ||
this._bytes_buffer = buffer.slice(start, end); | ||
this._cells_buffer = cells; | ||
// if there was a trailing newline, this._buffer.length = 0 | ||
callback(); | ||
}; | ||
// Parser.prototype._flush = Parser.prototype.flush; | ||
Parser.prototype._flush = function(callback) { | ||
return this.flush(callback, false); | ||
}; | ||
Parser.prototype._transform = function(chunk, encoding, callback) { | ||
// we'll assume that we always get chunks with the same encoding. | ||
if (!this.encoding && encoding != 'buffer') { | ||
this.encoding = encoding; | ||
} | ||
// collect unused buffer and new chunk into a single buffer | ||
this._bytes_buffer = this._bytes_buffer.length ? Buffer.concat([this._bytes_buffer, chunk]) : chunk; | ||
// do all the processing | ||
this.flush(callback, true); | ||
}; | ||
Parser.readToEnd = function(filename, opts, callback) { | ||
// `opts` is optional, `callback` is required | ||
// callback signature: function(err, rows) --> rows is a list of objects | ||
if (callback === undefined) { | ||
callback = opts; | ||
opts = undefined; | ||
} | ||
var rows = []; | ||
var filepath = filename.replace(/^~/, process.env.HOME); | ||
return fs.createReadStream(filepath, opts).pipe(new Parser(opts)) | ||
.on('error', function(err) { | ||
callback(err); | ||
}) | ||
.on('data', function(row) { | ||
rows.push(row); | ||
}) | ||
.on('end', function() { | ||
callback(null, rows); | ||
}); | ||
}; | ||
var start = 0; | ||
var end = buffer.length; | ||
var inside_quote = false; | ||
// outside_quote reminds us to remove the quotes later (in pushCell) | ||
var outside_quote = false; | ||
for (var i = 0; i < end; i++) { | ||
var eos = !nonfinal && i + 1 == end; | ||
// var snippet = buffer.toString('utf8', 0, i) + | ||
// '\x1b[7m' + buffer.toString('utf8', i, i + 1) + '\x1b[0m' + | ||
// buffer.toString('utf8', i + 1, end); | ||
// console.error(snippet.replace(/\n/g, 'N').replace(/\t/g, 'T'), inside_quote ? 'inside_quote' : ''); | ||
// if we are on an escape char, simply skip over it (++) and the (default) | ||
if (!eos && buffer[i] == this.escapeByte) { | ||
// excel is bizarre. An escape before a quotebyte doesn't count, | ||
// so we only increment if the next character is not a quotebyte | ||
// unless we are not inside quotes, in which case we do skip over it. | ||
if (!inside_quote || buffer[i + 1] !== this.quoteByte) { | ||
i++; | ||
} | ||
} | ||
else if (!eos && buffer[i] === this.quoteByte && inside_quote) { | ||
// if we are inside, and on a " | ||
// handle excel dialect: double quotebyte => single literal quotebyte | ||
if (buffer[i + 1] === this.quoteByte) { | ||
// double quotebyte | ||
// we just advance over it for now, so that we can put this back on the buffer, if needed. | ||
i++; | ||
} | ||
else { | ||
// lone quotebyte -> don't assume that they're always followed by a delimiter. | ||
// they might be followed by a newline | ||
// and we advance so that buffer[i] skips over the delimiter | ||
inside_quote = false; | ||
outside_quote = true; | ||
} | ||
} | ||
else if (!eos && buffer[i] === this.quoteByte && !inside_quote && i == start) { | ||
// if we are not already inside, and on a " | ||
inside_quote = true; | ||
} | ||
else if ( | ||
// if we are at the very end of the input and this is the final chunk (ignoring any sort of state) | ||
eos || | ||
// OR, we push a new cell whenever we hit a delimiter (say, tab) and are not inside a quote | ||
(!inside_quote && (buffer[i] == 13 || buffer[i] == 10 || buffer[i] == this.delimiterByte))) { | ||
// this generally won't hurt, since it will only go to the end of the buffer anyway. | ||
if (eos) | ||
i++; | ||
// add the unprocessed buffer to our cells | ||
// inside_quote might be true if the file ends on a quote | ||
if (inside_quote || outside_quote) { | ||
var trimmed_cell = buffer.toString(this.config.encoding, start + 1, i - 1); | ||
if (this.quotequoteRegExp) { | ||
trimmed_cell = trimmed_cell.replace(this.quotequoteRegExp, this.config.quotechar); | ||
} | ||
// is this good enough? | ||
cells.push(trimmed_cell); | ||
outside_quote = inside_quote = false; | ||
} | ||
else { | ||
var cell = buffer.toString(this.config.encoding, start, i); | ||
if (this.escapeQuoteRegExp) { | ||
cell = cell.replace(this.escapeQuoteRegExp, this.config.quotechar); | ||
} | ||
cells.push(cell); | ||
} | ||
// handle \r, \r\n, or \n (but not \n\n) as one line break | ||
// '\r' == 13, '\n' == 10 | ||
// we flush the row, also, if we are at the end and this is the final chunk | ||
if (eos || (buffer[i] != this.delimiterByte)) { | ||
// add these cells to the emit queue | ||
this.writeRow(cells); | ||
// and reset them | ||
cells = []; | ||
// also consume a following \n, if this was \r, and there is one. | ||
if (buffer[i] == 13 && buffer[i + 1] == 10) { | ||
i++; | ||
} | ||
} | ||
start = i + 1; | ||
} | ||
} | ||
// save whatever we have yet to process | ||
this.byteBuffer = buffer.slice(start, end); | ||
this.cellBuffer = cells; | ||
// if there was a trailing newline, this._buffer.length = 0 | ||
callback(); | ||
}; | ||
Parser.prototype._flush = function (callback) { | ||
return this.flush(callback, false); | ||
}; | ||
Parser.prototype._transform = function (chunk, encoding, callback) { | ||
// we'll assume that we always get chunks with the same encoding. | ||
if (!this.config.encoding && encoding != 'buffer') { | ||
this.config.encoding = encoding; | ||
} | ||
// collect unused buffer and new chunk into a single buffer | ||
this.byteBuffer = this.byteBuffer.length ? Buffer.concat([this.byteBuffer, chunk]) : chunk; | ||
// do all the processing | ||
this.flush(callback, true); | ||
}; | ||
return Parser; | ||
})(stream_1.Transform); | ||
exports.Parser = Parser; |
@@ -98,6 +98,42 @@ # sv | ||
Also see the [notes](NOTES.md) for more development comments. | ||
# Development notes | ||
## Characters codes | ||
Line separators: | ||
* `\n` = 10 (newline) | ||
* `\r` = 13 (return) | ||
Field separators: | ||
* `\t` = 9 (tab) | ||
* ` ` = 32 (space) | ||
* `,` = 44 (comma) | ||
* `;` = 59 (semicolon) | ||
Field quotations: | ||
* `"` = 34 (double quote) | ||
* `'` = 39 (single quote) | ||
* <code>`</code> = 96 (backtick) | ||
Escapes: | ||
* `\` = 92 (backslash) | ||
## Debugging helper: | ||
function logEvents(emitter, prefix, names) { | ||
names.forEach(function(name) { | ||
emitter.on(name, function(/*...*/) { | ||
console.error(prefix + ':' + name, arguments); | ||
}); | ||
}); | ||
} | ||
## License | ||
Copyright © 2013-2015 Christopher Brown. [MIT Licensed](LICENSE). | ||
Copyright 2013-2015 Christopher Brown. [MIT Licensed](http://chbrown.github.io/licenses/MIT/#2013-2015). |
@@ -1,8 +0,18 @@ | ||
'use strict'; /*jslint node: true, es5: true, indent: 2 */ | ||
var os = require('os'); | ||
var stream = require('stream'); | ||
var util = require('util'); | ||
var inference = require('./inference'); | ||
/* Stringifier class | ||
var __extends = (this && this.__extends) || function (d, b) { | ||
for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; | ||
function __() { this.constructor = d; } | ||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); | ||
}; | ||
var stream_1 = require('stream'); | ||
var common_1 = require('./common'); | ||
exports.defaultStringifierConfiguration = { | ||
encoding: 'utf8', | ||
missing: '', | ||
newline: '\n', | ||
delimiter: ',', | ||
quotechar: '"', | ||
escape: '\\', | ||
peek: 1, | ||
}; | ||
/** Stringifier class | ||
new Stringifier(); | ||
@@ -22,130 +32,112 @@ - `peek` is an integer (or undefined / null) describing how many rows we | ||
*/ | ||
var Stringifier = module.exports = function(opts) { | ||
stream.Transform.call(this, { | ||
objectMode: true, | ||
}); | ||
// we want: | ||
// Readable({objectMode: false}) | ||
// Writable({objectMode: true}) | ||
this._readableState.objectMode = false; | ||
if (opts === undefined) opts = {}; | ||
this.encoding = opts.encoding || 'utf8'; | ||
this.peek = opts.peek || 1; // should this even be 1? (ignored if opts.columns) | ||
this.missing = opts.missing || ''; // should be a string | ||
this.newline = opts.newline || os.EOL; | ||
this.delimiter = opts.delimiter || ','; | ||
this.quotechar = opts.quotechar || '"'; | ||
this.quotechar_regex = new RegExp(this.quotechar, 'ig'); | ||
// this.escapechar = opts.escapechar || '\\'; | ||
if (opts.columns) { | ||
if (!util.isArray(opts.columns)) { | ||
throw new Error("Stringifier's `columns` must be an array"); | ||
var Stringifier = (function (_super) { | ||
__extends(Stringifier, _super); | ||
function Stringifier(config) { | ||
if (config === void 0) { config = {}; } | ||
_super.call(this, { objectMode: true }); | ||
this.rowBuffer = []; | ||
// we want: | ||
// Readable({objectMode: false}) | ||
// Writable({objectMode: true}) | ||
this['_readableState'].objectMode = false; | ||
this.config = common_1.merge(config, exports.defaultStringifierConfiguration); | ||
this.quotecharRegExp = new RegExp(this.config.quotechar, 'ig'); | ||
if (this.config.columns) { | ||
// maybe we should write the columns even if we don't get any data? | ||
this.rowBuffer = [this.config.columns]; | ||
} | ||
else { | ||
this.rowBuffer = []; | ||
} | ||
} | ||
this.columns = opts.columns; | ||
// maybe we should write the columns even if we don't get any data? | ||
this._buffer = [this.columns]; | ||
} | ||
else { | ||
this._buffer = []; | ||
} | ||
}; | ||
util.inherits(Stringifier, stream.Transform); | ||
Stringifier.prototype._line = function(obj) { | ||
// _write is already a thing, so don't use it. | ||
// this.columns must be set! | ||
if (typeof(obj) === 'string') { | ||
// raw string | ||
this.push(obj + this.newline, this.encoding); | ||
} | ||
else { | ||
// if obj is an array, we ignore this.columns | ||
var length = obj.length; | ||
if (!util.isArray(obj)) { | ||
// object | ||
length = this.columns.length; | ||
// pull properties off the given object in proper column order | ||
var list = new Array(length); | ||
for (var i = 0; i < length; i++) { | ||
var column_value = obj[this.columns[i]]; | ||
list[i] = (column_value === undefined) ? this.missing : column_value; | ||
} | ||
obj = list; | ||
} | ||
// obj is definitely an array now, but the fields aren't quoted. | ||
for (var j = 0; j < length; j++) { | ||
// assume minimal quoting (don't quote unless the cell contains the delimiter) | ||
var value = obj[j].toString(); | ||
var contains_newline = value.indexOf('\n') > -1 || value.indexOf('\r') > -1; | ||
var contains_quotechar = value.indexOf(this.quotechar) > -1; | ||
if (value.indexOf(this.delimiter) > -1 || contains_newline || contains_quotechar) { | ||
if (contains_quotechar) { | ||
// serialize into the excel dialect, currently | ||
value = value.replace(this.quotechar_regex, this.quotechar + this.quotechar); | ||
// serialize with escapes: | ||
// value = value.replace(this.quotechar_regex, '\\' + this.quotechar); | ||
Stringifier.prototype.writeObject = function (object) { | ||
// _write is already a thing, so don't use it. | ||
// this.columns must be set! | ||
if (typeof (object) === 'string') { | ||
// raw string | ||
this.push(object + this.config.newline, this.config.encoding); | ||
} | ||
value = this.quotechar + value + this.quotechar; | ||
} | ||
obj[j] = value; | ||
} | ||
this.push(obj.join(this.delimiter) + this.newline, this.encoding); | ||
} | ||
}; | ||
Stringifier.prototype._lines = function(objs) { | ||
for (var i = 0, l = objs.length; i < l; i++) { | ||
this._line(objs[i]); | ||
} | ||
}; | ||
Stringifier.prototype.flush = function(callback, nonfinal) { | ||
// called when we're done peeking (nonfinal = true) or when end() is | ||
// called (nonfinal = false), in which case we are done peeking, but for a | ||
// different reason. In either case, we need to flush the peeked columns. | ||
if (!this.columns) { | ||
// infer columns | ||
this.columns = inference.columns(this._buffer); | ||
this._line(this.columns); | ||
} | ||
if (this._buffer) { | ||
// flush the _buffer | ||
this._lines(this._buffer); | ||
// a null _buffer means we're done peeking and won't be buffering any more rows | ||
this._buffer = null; | ||
} | ||
// this.push(null); // inferred | ||
callback(); | ||
}; | ||
// the docs decree that we shouldn't call _flush directly | ||
// Stringifier.prototype._flush = Stringifier.prototype.flush; | ||
Stringifier.prototype._flush = function(callback) { | ||
return this.flush(callback, false); | ||
}; | ||
Stringifier.prototype._transform = function(chunk, encoding, callback) { | ||
// objectMode: true, so chunk is an object (and encoding is always 'utf8'?) | ||
if (this.columns) { | ||
// flush the _buffer, if needed | ||
if (this._buffer) { | ||
this._lines(this._buffer); | ||
this._buffer = null; | ||
} | ||
this._line(chunk); | ||
callback(); | ||
} | ||
else { | ||
// if set {peek: 10}, column inference will be called when write(obj) is called the 10th time | ||
this._buffer.push(chunk); | ||
if (this._buffer.length >= this.peek) { | ||
this.flush(callback, true); | ||
} | ||
else { | ||
callback(); | ||
} | ||
} | ||
}; | ||
else { | ||
// if object is an array, we ignore this.columns | ||
var length = object.length; | ||
if (!Array.isArray(object)) { | ||
// object | ||
length = this.config.columns.length; | ||
// pull properties off the given object in proper column order | ||
var list = new Array(length); | ||
for (var i = 0; i < length; i++) { | ||
var column_value = object[this.config.columns[i]]; | ||
list[i] = (column_value === undefined) ? this.config.missing : column_value; | ||
} | ||
object = list; | ||
} | ||
// obj is definitely an array now, but the fields aren't quoted. | ||
for (var j = 0; j < length; j++) { | ||
// assume minimal quoting (don't quote unless the cell contains the delimiter) | ||
var value = object[j].toString(); | ||
var contains_newline = value.indexOf('\n') > -1 || value.indexOf('\r') > -1; | ||
var contains_quotechar = value.indexOf(this.config.quotechar) > -1; | ||
if (value.indexOf(this.config.delimiter) > -1 || contains_newline || contains_quotechar) { | ||
if (contains_quotechar) { | ||
// serialize into the excel dialect, currently | ||
value = value.replace(this.quotecharRegExp, this.config.quotechar + this.config.quotechar); | ||
} | ||
value = this.config.quotechar + value + this.config.quotechar; | ||
} | ||
object[j] = value; | ||
} | ||
this.push(object.join(this.config.delimiter) + this.config.newline, this.config.encoding); | ||
} | ||
}; | ||
Stringifier.prototype.writeObjects = function (objects) { | ||
for (var i = 0, l = objects.length; i < l; i++) { | ||
this.writeObject(objects[i]); | ||
} | ||
}; | ||
Stringifier.prototype.flush = function (callback, nonfinal) { | ||
// called when we're done peeking (nonfinal = true) or when end() is | ||
// called (nonfinal = false), in which case we are done peeking, but for a | ||
// different reason. In either case, we need to flush the peeked columns. | ||
if (!this.config.columns) { | ||
// infer columns | ||
this.config.columns = common_1.inferColumns(this.rowBuffer); | ||
this.writeObject(this.config.columns); | ||
} | ||
if (this.rowBuffer) { | ||
// flush the _buffer | ||
this.writeObjects(this.rowBuffer); | ||
// a null _buffer means we're done peeking and won't be buffering any more rows | ||
this.rowBuffer = null; | ||
} | ||
// this.push(null); // inferred | ||
callback(); | ||
}; | ||
// the docs decree that we shouldn't call _flush directly | ||
Stringifier.prototype._flush = function (callback) { | ||
return this.flush(callback, false); | ||
}; | ||
Stringifier.prototype._transform = function (chunk, encoding, callback) { | ||
// objectMode: true, so chunk is an object (and encoding is always 'utf8'?) | ||
if (this.config.columns) { | ||
// flush the _buffer, if needed | ||
if (this.rowBuffer) { | ||
this.writeObjects(this.rowBuffer); | ||
this.rowBuffer = null; | ||
} | ||
this.writeObject(chunk); | ||
callback(); | ||
} | ||
else { | ||
// if set {peek: 10}, column inference will be called when write(obj) is called the 10th time | ||
this.rowBuffer.push(chunk); | ||
if (this.rowBuffer.length >= this.config.peek) { | ||
this.flush(callback, true); | ||
} | ||
else { | ||
callback(); | ||
} | ||
} | ||
}; | ||
return Stringifier; | ||
})(stream_1.Transform); | ||
exports.Stringifier = Stringifier; |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Wildcard dependency
QualityPackage has a dependency with a floating version range. This can cause issues if the dependency publishes a new major version.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
139
1
31304
4
12
658
3
1
Updatedstreaming@*