Comparing version 0.0.3 to 0.0.4
37
index.js
@@ -8,2 +8,3 @@ #!/usr/bin/env nodejs | ||
var cluster = require('cluster'); | ||
var csv = require('fast-csv'); | ||
var numCPUs = require('os').cpus().length; | ||
@@ -69,5 +70,5 @@ | ||
outputBuffers[name] = ''; | ||
outputBuffers[name] += columnNames.join(program.outputDelimiter)+'\n'; | ||
outputBuffers[name] += csv.writeToString([columnNames], {delimiter: program.outputDelimiter}); | ||
} | ||
outputBuffers[name] += data.join(program.outputDelimiter)+'\n'; | ||
outputBuffers[name] += '\n'+csv.writeToString([data], {delimiter: program.delimiter}); | ||
if (outputBuffers[name].length > 1000000) { | ||
@@ -82,3 +83,2 @@ outputStreams[name].write(outputBuffers[name]); | ||
var i = 0; | ||
var columnNames; | ||
@@ -88,23 +88,16 @@ | ||
var stream = fs.createReadStream(inputFile, 'LATIN1'); | ||
var workingBuffer = ''; | ||
stream.on('data', function(chunk){ | ||
workingBuffer += chunk; | ||
var lines = workingBuffer.split('\n'); | ||
while (lines.length > 1) { | ||
var line = lines.shift(); | ||
var data = line.split(program.delimiter); | ||
var stream = csv.fromPath(inputFile); | ||
if (i === 0) { | ||
columnNames = data; | ||
targetIndex = columnNames.indexOf(program.column); | ||
if (targetIndex == -1) { | ||
throw new Error('Column "'+program.column+'" not found'); | ||
} | ||
} else { | ||
writeFile('tmp/'+inputFile, columnNames, data[targetIndex], data); | ||
var i = 0; | ||
stream.on('record', function(data){ | ||
if (i == 0) { | ||
columnNames = data; | ||
targetIndex = columnNames.indexOf(program.column); | ||
if (targetIndex == -1) { | ||
throw new Error('Column "'+program.column+'" not found'); | ||
} | ||
i++; | ||
} else { | ||
writeFile('tmp/'+inputFile, columnNames, data[targetIndex], data); | ||
} | ||
workingBuffer = lines[0]; | ||
i++; | ||
}); | ||
@@ -126,4 +119,4 @@ | ||
} | ||
process.exit(0); | ||
cluster.worker.disconnect(); | ||
}); | ||
} |
{ | ||
"name": "csvkiller", | ||
"description": "Segment CSV files by any column", | ||
"version": "0.0.3", | ||
"version": "0.0.4", | ||
"maintainers": [ | ||
@@ -20,6 +20,7 @@ { | ||
"dependencies": { | ||
"async": "~0.9.0", | ||
"commander": "~2.2.0", | ||
"async": "~0.9.0", | ||
"fast-csv": "^0.4.1", | ||
"mkdirp": "~0.5.0" | ||
} | ||
} |
@@ -28,5 +28,5 @@ csvkiller | ||
-c, --column [name] Which column to segment by | ||
-o, --output-directory [directory] The directory where output goes [output] | ||
-d, --delimiter [delimiter] How to split up lines in the input file [,] | ||
-od, --output-delimiter [delimiter] How to split up lines in the output files [,] | ||
-d, --delimiter [delimiter] How to split up lines in the input file (use TAB for tab-delimited) [,] | ||
-od, --output-delimiter [delimiter] How to split up lines in the output files (use TAB for tab-delimited) [,] | ||
-b, --buffer-size [characters] Number of characters can be in the in-memory file buffer before it's written to the disk [1000000] | ||
``` | ||
@@ -33,0 +33,0 @@ |
Sorry, the diff of this file is not supported yet
6487
4
100
+ Addedfast-csv@^0.4.1
+ Addedarguments-extended@0.0.3(transitive)
+ Addedarray-extended@0.0.11(transitive)
+ Addeddate-extended@0.0.6(transitive)
+ Addeddeclare.js@0.0.8(transitive)
+ Addedextended@0.0.6(transitive)
+ Addedextender@0.0.10(transitive)
+ Addedfast-csv@0.4.4(transitive)
+ Addedis-extended@0.0.10(transitive)
+ Addedobject-extended@0.0.7(transitive)
+ Addedstring-extended@0.0.8(transitive)