Comparing version 0.2.0 to 0.3.0
35
cli.js
@@ -5,12 +5,35 @@ #! /usr/bin/env node | ||
const csvnorm = require('.') | ||
const csvFilePath = process.argv[2] | ||
const {stdin, stdout, argv} = process | ||
if (csvFilePath) { | ||
csvnorm({filePath: path.resolve(csvFilePath)}) | ||
function logMetaInfos () { | ||
console.info( | ||
'=== Following meta infos won\'t be printed in non tty environments ===', | ||
'\n' | ||
) | ||
console.info('The input was interpreted in following way:', '\n') | ||
} | ||
else { | ||
function main (args) { | ||
const csvFilePath = args[0] | ||
if (csvFilePath) { | ||
if (stdout.isTTY) logMetaInfos() | ||
csvnorm({filePath: path.resolve(csvFilePath)}) | ||
return | ||
} | ||
if (stdin.isTTY) { | ||
console.info('Usage: csvnorm $input_file > $output_path') | ||
return | ||
} | ||
if (stdout.isTTY) logMetaInfos() | ||
csvnorm({ | ||
readableStream: process.stdin, | ||
writableStream: process.stdout, | ||
readableStream: stdin, | ||
writableStream: stdout, | ||
}) | ||
} | ||
main(argv.slice(2)) |
241
index.js
@@ -8,4 +8,80 @@ const fs = require('fs') | ||
const iconv = require('iconv-lite') | ||
const tempfile = require('tempfile') | ||
function formatDate (value) { | ||
if (typeof value !== 'string') return null | ||
const mmddyyyy = /^([0-9]{2})\/([0-9]{2})\/([0-9]{4})$/ | ||
if (mmddyyyy.test(value)) { | ||
console.error() | ||
return value.replace(mmddyyyy, '$3-$1-$2') | ||
} | ||
const ddmmyyyy = /^([0-9]{2})\.([0-9]{2})\.([0-9]{4})$/ | ||
if (ddmmyyyy.test(value)) { | ||
return value.replace(ddmmyyyy, '$3-$2-$1') | ||
} | ||
const ddmmyy = /^([0-3][0-9])\.([01][1-9])\.([0-9]{2})$/ | ||
if (ddmmyy.test(value)) { | ||
return value.replace(ddmmyy, '20$3-$2-$1') | ||
} | ||
} | ||
function formatNumber (value) { | ||
if (typeof value !== 'string') return null | ||
const containsANumber = /^[0-9+-.,]+$/.test(value) | ||
if (!containsANumber) return null | ||
const containsASeparator = /[.,]/.test(value) | ||
if (!containsASeparator) return Number(value) | ||
const containsOnlyThousands = /^[0-9]{1,3}(,[0-9]{3})$/.test(value) | ||
if (containsOnlyThousands) return Number(value.replace(/,/g, '')) | ||
const separatorChars = value | ||
.replace(/[^,.]/g, '') | ||
.split('') | ||
if ( // thousandSep is `.` and decimal mark is `,` | ||
separatorChars.shift() === '.' && | ||
separatorChars.pop() === ',' | ||
) { | ||
return Number( | ||
value | ||
.replace('.', '') | ||
.replace(',', '.') | ||
) | ||
} | ||
const commaAsDecimalMark = /^[0-9+-]+,[0-9]{1,2}$/.test(value) | ||
if (commaAsDecimalMark) return Number(value.replace(/,(.+?)/, '.$1')) | ||
} | ||
function formatCurrency (value) { | ||
value = value.trim() | ||
const currencies = /EUR|€|HUF|SEK|\$|USD/ | ||
const currencyPattern = `^[0-9+-., ]*(${currencies.source})[0-9+-., ]*$` | ||
const containsACurrency = new RegExp(currencyPattern) | ||
.test(value) | ||
if (!containsACurrency) return value | ||
const match = value.match(currencies) | ||
if (match) { | ||
const currency = match[0] | ||
const trimmedValue = value | ||
.replace(currency, '') | ||
.trim() | ||
const formattedNumber = formatNumber(trimmedValue) | ||
return `${formattedNumber || trimmedValue} ${currency}` | ||
} | ||
} | ||
module.exports = (options = {}) => { | ||
@@ -15,15 +91,34 @@ const { | ||
writableStream, | ||
filePath, | ||
} = options | ||
const { filePath } = options | ||
const config = {} | ||
const delimiterHistogram = { | ||
',': 0, | ||
';': 0, | ||
'\t': 0, | ||
'|': 0, | ||
function printCsv ({configGenerator, inputFilePath}) { | ||
const parser = csvParse({ | ||
delimiter: configGenerator.mostFrequentDelimter, | ||
}) | ||
parser.on('error', console.error) | ||
const stringifier = csvStringify() | ||
stringifier.on('error', console.error) | ||
fs | ||
.createReadStream(inputFilePath) | ||
.pipe(iconv.decodeStream(config.encoding)) | ||
.pipe(parser) | ||
.pipe(formatter) | ||
.pipe(stringifier) | ||
.pipe(writableStream || process.stdout) | ||
} | ||
class ConfigGenerator extends stream.Writable { | ||
constructor (opts) { | ||
super(opts) | ||
this.delimiterHistogram = { | ||
',': 0, | ||
';': 0, | ||
'\t': 0, | ||
'|': 0, | ||
} | ||
} | ||
@@ -43,4 +138,4 @@ | ||
if (delimiterHistogram.hasOwnProperty(char)) { | ||
delimiterHistogram[char]++ | ||
if (this.delimiterHistogram.hasOwnProperty(char)) { | ||
this.delimiterHistogram[char]++ | ||
} | ||
@@ -51,71 +146,16 @@ } | ||
} | ||
} | ||
function formatDate (value) { | ||
if (typeof value !== 'string') return null | ||
const yyyymmdd = /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/ | ||
if (yyyymmdd.test(value)) return null | ||
const mmddyyyy = /^([0-9]{2})\/([0-9]{2})\/([0-9]{4})$/ | ||
if (mmddyyyy.test(value)) { | ||
return value.replace(mmddyyyy, '$3-$1-$2') | ||
_final (done) { | ||
this.mostFrequentDelimter = Array | ||
.from(Object.entries(this.delimiterHistogram)) | ||
.sort((itemA, itemB) => | ||
itemB[1] - itemA[1] | ||
)[0][0] // [first entry of delimiter list][key of entry] | ||
done() | ||
} | ||
const ddmmyyyy = /^([0-9]{2})\.([0-9]{2})\.([0-9]{4})$/ | ||
if (ddmmyyyy.test(value)) { | ||
return value.replace(ddmmyyyy, '$3-$2-$1') | ||
} | ||
} | ||
function formatNumber (value) { | ||
if (typeof value !== 'string') return null | ||
const configGenerator = new ConfigGenerator() | ||
const containsANumber = /^[0-9+-.,]+$/.test(value) | ||
if (!containsANumber) return null | ||
const containsASeparator = /[.,]/.test(value) | ||
if (!containsASeparator) return Number(value) | ||
const containsOnlyThousands = /^[0-9]{1,3}(,[0-9]{3})$/.test(value) | ||
if (containsOnlyThousands) return Number(value.replace(/,/g, '')) | ||
const separatorChars = value | ||
.replace(/[^,.]/g, '') | ||
.split('') | ||
if ( // thousandSep is `.` and decimal mark is `,` | ||
separatorChars.shift() === '.' && | ||
separatorChars.pop() === ',' | ||
) { | ||
return Number( | ||
value | ||
.replace('.', '') | ||
.replace(',', '.') | ||
) | ||
} | ||
const commaAsDecimalMark = /^[0-9+-]+,[0-9]{1,2}$/.test(value) | ||
if (commaAsDecimalMark) return Number(value.replace(/,(.+?)/, '.$1')) | ||
} | ||
function formatCurrency (value) { | ||
value = value.trim() | ||
const currencies = /\$|USD|€|EUR/ | ||
const containsACurrency = /^[0-9+-., ]*[$€][0-9+-., ]*$/.test(value) | ||
if (!containsACurrency) return value | ||
const match = value.match(currencies) | ||
if (match) { | ||
const currency = match[0] | ||
const formattedNumber = formatNumber( | ||
value | ||
.replace(currency, '') | ||
.trim() | ||
) | ||
return `${formattedNumber} ${currency}` | ||
} | ||
} | ||
class Formatter extends stream.Transform { | ||
@@ -147,27 +187,5 @@ constructor (opts = {}) { | ||
const configGenerator = new ConfigGenerator() | ||
const formatter = new Formatter() | ||
configGenerator.on('finish', () => { | ||
const mostFrequentDelimter = Array | ||
.from(Object.entries(delimiterHistogram)) | ||
.sort((itemA, itemB) => | ||
itemB[1] - itemA[1] | ||
)[0][0] // [first entry of delimiter list][key of entry] | ||
const parser = csvParse({delimiter: mostFrequentDelimter}) | ||
parser.on('error', console.error) | ||
const stringifier = csvStringify() | ||
stringifier.on('error', console.error) | ||
fs | ||
.createReadStream(filePath) | ||
.pipe(iconv.decodeStream(config.encoding)) | ||
.pipe(parser) | ||
.pipe(formatter) | ||
.pipe(stringifier) | ||
.pipe(process.stdout) | ||
}) | ||
if (filePath) { | ||
@@ -177,12 +195,35 @@ fs | ||
.pipe(configGenerator) | ||
configGenerator.on('finish', () => { | ||
printCsv({ | ||
configGenerator, | ||
inputFilePath: filePath, | ||
}) | ||
}) | ||
return | ||
} | ||
else { | ||
const parser = csvParse() | ||
const stringifier = csvStringify() | ||
readableStream | ||
.pipe(parser) | ||
.pipe(stringifier) | ||
.pipe(writableStream) | ||
const temporaryFilePath = tempfile('.csv') | ||
const writableTempFile = fs.createWriteStream(temporaryFilePath) | ||
let firstStreamFinished = false | ||
function syncStreams () { | ||
if (!firstStreamFinished) { | ||
firstStreamFinished = true | ||
return | ||
} | ||
printCsv({ | ||
configGenerator, | ||
inputFilePath: temporaryFilePath, | ||
}) | ||
} | ||
configGenerator.on('finish', syncStreams) | ||
readableStream | ||
.pipe(configGenerator) | ||
writableTempFile.on('finish', syncStreams) | ||
readableStream | ||
.pipe(writableTempFile) | ||
} |
{ | ||
"name": "csvnorm", | ||
"version": "0.2.0", | ||
"version": "0.3.0", | ||
"main": "index.js", | ||
@@ -9,3 +9,3 @@ "bin": { | ||
"scripts": { | ||
"test": "node tests/main.js" | ||
"test": "find ./tests -iname '*.js' -exec node {} \\;" | ||
}, | ||
@@ -16,3 +16,4 @@ "dependencies": { | ||
"iconv-lite": "^0.4.15", | ||
"jschardet": "^1.4.2" | ||
"jschardet": "^1.4.2", | ||
"tempfile": "^2.0.0" | ||
}, | ||
@@ -19,0 +20,0 @@ "author": "Adrian Sieber <mail@adriansieber.com>", |
@@ -14,2 +14,3 @@ # Csvnorm | ||
## Installation | ||
@@ -31,1 +32,10 @@ | ||
``` | ||
```sh | ||
cat data.csv | csvnorm | ||
``` | ||
## TODO | ||
- [ ] Print debugging info in TTY mode |
const fs = require('fs') | ||
const path = require('path') | ||
const assert = require('assert') | ||
const execute = require('child_process').execFile | ||
const {execFile} = require('child_process') | ||
const expectedOutput = fs.readFileSync( | ||
@@ -12,3 +12,3 @@ path.join(__dirname, 'banking/expected-output.csv'), | ||
execute( | ||
execFile( | ||
'./cli.js', | ||
@@ -15,0 +15,0 @@ ['tests/banking/input-latin1.csv'], |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
10955
9
249
40
5
4
3
+ Addedtempfile@^2.0.0
+ Addedtemp-dir@1.0.0(transitive)
+ Addedtempfile@2.0.0(transitive)
+ Addeduuid@3.4.0(transitive)