csv-parser
Advanced tools
Comparing version 2.3.0 to 2.3.1
235
index.js
const { Transform } = require('stream') | ||
const genobj = require('generate-object-property') | ||
const genfun = require('generate-function') | ||
const bufferFrom = require('buffer-from') | ||
@@ -32,80 +30,42 @@ const bufferAlloc = require('buffer-alloc') | ||
this.customNewline = options.newline !== defaults.newline | ||
options.customNewline = options.newline !== defaults.newline | ||
for (const key of Object.keys(options)) { | ||
if (['newline', 'quote', 'separator'].includes(key)) { | ||
for (const key of ['newline', 'quote', 'separator']) { | ||
if (typeof options[key] !== 'undefined') { | ||
([options[key]] = bufferFrom(options[key])) | ||
} | ||
// legacy codebase support | ||
this[key] = options[key] | ||
} | ||
// if escape is not defined on the passed options, use the end value of quote | ||
this.escape = (opts || {}).escape ? bufferFrom(options.escape)[0] : options.quote | ||
options.escape = (opts || {}).escape ? bufferFrom(options.escape)[0] : options.quote | ||
if (this.headers === false) { | ||
// enforce, as the column length check will fail if headers:false | ||
this.strict = false | ||
this.state = { | ||
empty: options.raw ? bufferAlloc(0) : '', | ||
escaped: false, | ||
first: true, | ||
lineNumber: 0, | ||
previousEnd: 0, | ||
rowLength: 0, | ||
quoted: false | ||
} | ||
this._prev = null | ||
this._prevEnd = 0 | ||
this._first = true | ||
this._quoted = false | ||
this._escaped = false | ||
this._empty = this._raw ? bufferAlloc(0) : '' | ||
this._Row = null | ||
this._currentRowBytes = 0 | ||
this._line = 0 | ||
if (this.headers || this.headers === false) { | ||
this._first = false | ||
this._compile() | ||
if (options.headers === false) { | ||
// enforce, as the column length check will fail if headers:false | ||
options.strict = false | ||
} | ||
} | ||
_compile () { | ||
if (this._Row) return | ||
const Row = genfun()('function Row (cells) {') | ||
if (this.headers) { | ||
this.headers.forEach((header, index) => { | ||
const newHeader = this.mapHeaders({ header, index }) | ||
if (newHeader) { | ||
Row('%s = cells[%d]', genobj('this', newHeader), index) | ||
} | ||
}) | ||
} else { | ||
// -> false | ||
Row(` | ||
for (const [index, value] of cells.entries()) { | ||
this[index] = value | ||
} | ||
`) | ||
if (options.headers || options.headers === false) { | ||
this.state.first = false | ||
} | ||
Row('}') | ||
this._Row = Row.toFunction() | ||
Object.defineProperty(this._Row.prototype, 'headers', { | ||
enumerable: false, | ||
value: this.headers | ||
}) | ||
this.options = options | ||
this.headers = options.headers | ||
} | ||
_emit (Row, cells) { | ||
this.push(new Row(cells)) | ||
} | ||
_flush (cb) { | ||
if (this._escaped || !this._prev) return cb() | ||
this._online(this._prev, this._prevEnd, this._prev.length + 1) // plus since online -1s | ||
cb() | ||
} | ||
_oncell (buf, start, end) { | ||
parseCell (buffer, start, end) { | ||
const { escape, quote } = this.options | ||
// remove quotes from quoted cells | ||
if (buf[start] === this.quote && buf[end - 1] === this.quote) { | ||
if (buffer[start] === quote && buffer[end - 1] === quote) { | ||
start++ | ||
@@ -119,16 +79,24 @@ end-- | ||
// check for escape characters and skip them | ||
if (buf[i] === this.escape && i + 1 < end && buf[i + 1] === this.quote) i++ | ||
if (y !== i) buf[y] = buf[i] | ||
if (buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote) { | ||
i++ | ||
} | ||
if (y !== i) { | ||
buffer[y] = buffer[i] | ||
} | ||
y++ | ||
} | ||
const value = this._onvalue(buf, start, y) | ||
return value | ||
return this.parseValue(buffer, start, y) | ||
} | ||
_online (buf, start, end) { | ||
parseLine (buffer, start, end) { | ||
const { customNewline, escape, mapHeaders, mapValues, quote, separator, skipComments, skipLines } = this.options | ||
end-- // trim newline | ||
if (!this.customNewline && buf.length && buf[end - 1] === cr) end-- | ||
if (!customNewline && buffer.length && buffer[end - 1] === cr) { | ||
end-- | ||
} | ||
const comma = this.separator | ||
const comma = separator | ||
const cells = [] | ||
@@ -138,6 +106,5 @@ let isQuoted = false | ||
const { skipComments } = this | ||
if (skipComments) { | ||
const char = typeof skipComments === 'string' ? skipComments : '#' | ||
if (buf[start] === bufferFrom(char)[0]) { | ||
if (buffer[start] === bufferFrom(char)[0]) { | ||
return | ||
@@ -148,3 +115,3 @@ } | ||
const mapValue = (value) => { | ||
if (this._first) { | ||
if (this.state.first) { | ||
return value | ||
@@ -156,9 +123,9 @@ } | ||
return this.mapValues({ header, index, value }) | ||
return mapValues({ header, index, value }) | ||
} | ||
for (let i = start; i < end; i++) { | ||
const isStartingQuote = !isQuoted && buf[i] === this.quote | ||
const isEndingQuote = isQuoted && buf[i] === this.quote && i + 1 <= end && buf[i + 1] === comma | ||
const isEscape = isQuoted && buf[i] === this.escape && i + 1 < end && buf[i + 1] === this.quote | ||
const isStartingQuote = !isQuoted && buffer[i] === quote | ||
const isEndingQuote = isQuoted && buffer[i] === quote && i + 1 <= end && buffer[i + 1] === comma | ||
const isEscape = isQuoted && buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote | ||
@@ -173,4 +140,4 @@ if (isStartingQuote || isEndingQuote) { | ||
if (buf[i] === comma && !isQuoted) { | ||
let value = this._oncell(buf, offset, i) | ||
if (buffer[i] === comma && !isQuoted) { | ||
let value = this.parseCell(buffer, offset, i) | ||
value = mapValue(value) | ||
@@ -183,3 +150,3 @@ cells.push(value) | ||
if (offset < end) { | ||
let value = this._oncell(buf, offset, end) | ||
let value = this.parseCell(buffer, offset, end) | ||
value = mapValue(value) | ||
@@ -189,13 +156,13 @@ cells.push(value) | ||
if (buf[end - 1] === comma) { | ||
cells.push(mapValue(this._empty)) | ||
if (buffer[end - 1] === comma) { | ||
cells.push(mapValue(this.state.empty)) | ||
} | ||
const skip = this.skipLines && this.skipLines > this._line | ||
this._line++ | ||
const skip = skipLines && skipLines > this.state.lineNumber | ||
this.state.lineNumber++ | ||
if (this._first && !skip) { | ||
this._first = false | ||
this.headers = cells | ||
this._compile(cells) | ||
if (this.state.first && !skip) { | ||
this.state.first = false | ||
this.headers = cells.map((header, index) => mapHeaders({ header, index })) | ||
this.emit('headers', this.headers) | ||
@@ -205,47 +172,75 @@ return | ||
if (this.strict && cells.length !== this.headers.length) { | ||
if (this.options.strict && cells.length !== this.headers.length) { | ||
const e = new RangeError('Row length does not match headers') | ||
this.emit('error', e) | ||
} else { | ||
if (!skip) this._emit(this._Row, cells) | ||
if (!skip) this.writeRow(cells) | ||
} | ||
} | ||
_onvalue (buf, start, end) { | ||
if (this._raw) return buf.slice(start, end) | ||
return buf.toString('utf-8', start, end) | ||
parseValue (buffer, start, end) { | ||
if (this.options.raw) { | ||
return buffer.slice(start, end) | ||
} | ||
return buffer.toString('utf-8', start, end) | ||
} | ||
writeRow (cells) { | ||
if (this.headers === false || cells.length > this.headers.length) { | ||
this.headers = cells.map((value, index) => index) | ||
} | ||
const row = cells.reduce((o, cell, index) => { | ||
const header = this.headers[index] | ||
if (header !== null) { | ||
o[header] = cell | ||
} | ||
return o | ||
}, {}) | ||
this.push(row) | ||
} | ||
_flush (cb) { | ||
if (this.state.escaped || !this._prev) return cb() | ||
this.parseLine(this._prev, this.state.previousEnd, this._prev.length + 1) // plus since online -1s | ||
cb() | ||
} | ||
_transform (data, enc, cb) { | ||
if (typeof data === 'string') data = bufferFrom(data) | ||
if (typeof data === 'string') { | ||
data = bufferFrom(data) | ||
} | ||
const { escape, quote } = this.options | ||
let start = 0 | ||
let buf = data | ||
let buffer = data | ||
if (this._prev) { | ||
start = this._prev.length | ||
buf = Buffer.concat([this._prev, data]) | ||
buffer = Buffer.concat([this._prev, data]) | ||
this._prev = null | ||
} | ||
const bufLen = buf.length | ||
const bufferLength = buffer.length | ||
for (let i = start; i < bufLen; i++) { | ||
const chr = buf[i] | ||
const nextChr = i + 1 < bufLen ? buf[i + 1] : null | ||
for (let i = start; i < bufferLength; i++) { | ||
const chr = buffer[i] | ||
const nextChr = i + 1 < bufferLength ? buffer[i + 1] : null | ||
this._currentRowBytes++ | ||
if (this._currentRowBytes > this.maxRowBytes) { | ||
this.state.rowLength++ | ||
if (this.state.rowLength > this.options.maxRowBytes) { | ||
return cb(new Error('Row exceeds the maximum size')) | ||
} | ||
if (!this._escaped && chr === this.escape && nextChr === this.quote && i !== start) { | ||
this._escaped = true | ||
if (!this.state.escaped && chr === escape && nextChr === quote && i !== start) { | ||
this.state.escaped = true | ||
continue | ||
} else if (chr === this.quote) { | ||
if (this._escaped) { | ||
this._escaped = false | ||
} else if (chr === quote) { | ||
if (this.state.escaped) { | ||
this.state.escaped = false | ||
// non-escaped quote (quoting the cell) | ||
} else { | ||
this._quoted = !this._quoted | ||
this.state.quoted = !this.state.quoted | ||
} | ||
@@ -255,9 +250,9 @@ continue | ||
if (!this._quoted) { | ||
if (this._first && !this.customNewline) { | ||
if (!this.state.quoted) { | ||
if (this.state.first && !this.options.customNewline) { | ||
if (chr === nl) { | ||
this.newline = nl | ||
this.options.newline = nl | ||
} else if (chr === cr) { | ||
if (nextChr !== nl) { | ||
this.newline = cr | ||
this.options.newline = cr | ||
} | ||
@@ -267,6 +262,6 @@ } | ||
if (chr === this.newline) { | ||
this._online(buf, this._prevEnd, i + 1) | ||
this._prevEnd = i + 1 | ||
this._currentRowBytes = 0 | ||
if (chr === this.options.newline) { | ||
this.parseLine(buffer, this.state.previousEnd, i + 1) | ||
this.state.previousEnd = i + 1 | ||
this.state.rowLength = 0 | ||
} | ||
@@ -276,14 +271,14 @@ } | ||
if (this._prevEnd === bufLen) { | ||
this._prevEnd = 0 | ||
if (this.state.previousEnd === bufferLength) { | ||
this.state.previousEnd = 0 | ||
return cb() | ||
} | ||
if (bufLen - this._prevEnd < data.length) { | ||
if (bufferLength - this.state.previousEnd < data.length) { | ||
this._prev = data | ||
this._prevEnd -= (bufLen - data.length) | ||
this.state.previousEnd -= (bufferLength - data.length) | ||
return cb() | ||
} | ||
this._prev = buf | ||
this._prev = buffer | ||
cb() | ||
@@ -290,0 +285,0 @@ } |
{ | ||
"name": "csv-parser", | ||
"version": "2.3.0", | ||
"version": "2.3.1", | ||
"description": "Streaming CSV parser that aims for maximum speed as well as compatibility with the csv-spectrum test suite", | ||
@@ -23,3 +23,3 @@ "license": "MIT", | ||
"engines": { | ||
"node": ">= 6.14.0" | ||
"node": ">= 8.16.0" | ||
}, | ||
@@ -29,11 +29,12 @@ "scripts": { | ||
"commitlint": "commitlint", | ||
"coverage": "nyc npm run test && nyc report --reporter=text-lcov > coverage.lcov", | ||
"lint": "eslint .", | ||
"lint-staged": "lint-staged", | ||
"security": "npm audit", | ||
"test": "ava && tsd" | ||
}, | ||
"dependencies": { | ||
"@hapi/joi": "^16.1.4", | ||
"buffer-alloc": "^1.1.0", | ||
"buffer-from": "^1.0.0", | ||
"execa": "^1.0.0", | ||
"generate-function": "^1.0.1", | ||
"generate-object-property": "^1.0.0", | ||
@@ -44,6 +45,6 @@ "minimist": "^1.2.0", | ||
"devDependencies": { | ||
"@commitlint/cli": "^7.6.1", | ||
"@commitlint/config-conventional": "^7.6.0", | ||
"@commitlint/cli": "^8.2.0", | ||
"@commitlint/config-conventional": "^8.0.0", | ||
"@types/node": "^12.0.0", | ||
"ava": "^1.4.1", | ||
"ava": "^2.4.0", | ||
"bops": "^1.0.0", | ||
@@ -53,18 +54,19 @@ "chalk": "^2.4.2", | ||
"csv-spectrum": "^1.0.0", | ||
"eslint": "^5.16.0", | ||
"eslint-config-standard": "^12.0.0", | ||
"eslint-plugin-import": "^2.17.2", | ||
"eslint-plugin-node": "^9.0.1", | ||
"eslint": "^6.4.0", | ||
"eslint-config-standard": "^14.1.0", | ||
"eslint-plugin-import": "^2.18.2", | ||
"eslint-plugin-node": "^10.0.0", | ||
"eslint-plugin-promise": "^4.1.1", | ||
"eslint-plugin-standard": "^4.0.0", | ||
"globby": "^9.2.0", | ||
"husky": "^2.2.0", | ||
"lint-staged": "^8.1.6", | ||
"execa": "^2.0.4", | ||
"globby": "^10.0.1", | ||
"husky": "^3.0.0", | ||
"lint-staged": "^9.0.2", | ||
"loud-rejection": "^2.1.0", | ||
"nyc": "^14.1.1", | ||
"pre-commit": "^1.2.2", | ||
"standard-version": "^6.0.1", | ||
"strip-ansi": "^5.2.0", | ||
"text-table": "^0.2.0", | ||
"time-span": "^3.1.0", | ||
"tsd": "^0.7.3" | ||
"tsd": "^0.8.0" | ||
}, | ||
@@ -81,3 +83,13 @@ "directories": { | ||
], | ||
"pre-commit": "lint-staged", | ||
"ava": { | ||
"files": [ | ||
"!**/fixtures/**", | ||
"!**/helpers/**" | ||
] | ||
}, | ||
"husky": { | ||
"hooks": { | ||
"commit-msg": "commitlint -e $HUSKY_GIT_PARAMS" | ||
} | ||
}, | ||
"lint-staged": { | ||
@@ -89,7 +101,3 @@ "*.js": [ | ||
}, | ||
"husky": { | ||
"hooks": { | ||
"commit-msg": "commitlint -e $HUSKY_GIT_PARAMS" | ||
} | ||
} | ||
"pre-commit": "lint-staged" | ||
} |
@@ -28,3 +28,3 @@ [tests]: http://img.shields.io/travis/mafintosh/csv-parser.svg | ||
_Note: This module requires Node v6.14.0 or higher._ | ||
_Note: This module requires Node v8.16.0 or higher._ | ||
@@ -38,23 +38,28 @@ ## Benchmarks | ||
Filename Rows Parsed Duration | ||
comma_in_quotes.csv 1 4.8ms | ||
custom_escape_character.csv 3 0.69ms | ||
custom_quote_and_escape_character.csv 3 0.85ms | ||
custom_quote_character.csv 2 0.71ms | ||
custom_quote_character_default_escape.csv 3 0.78ms | ||
dummy.csv 1 0.75ms | ||
escaped_quotes.csv 3 0.77ms | ||
empty_columns.csv 1 0.83ms | ||
junk_rows.csv 3 0.83ms | ||
mac_newlines.csv 2 0.67ms | ||
newlines.csv 3 0.61ms | ||
process_all_rows.csv 7268 78ms | ||
quotes_and_newlines.csv 3 1.1ms | ||
test_geojson.csv 3 2.6ms | ||
test_latin1.csv 2 0.76ms | ||
test_strict.csv 3 0.70ms | ||
test_utf16_big.csv 2 1.0ms | ||
test_utf16_little.csv 2 0.59ms | ||
test_utf8.csv 2 0.59ms | ||
Filename Rows Parsed Duration | ||
backtick.csv 2 3.5ms | ||
bad-data.csv 3 0.55ms | ||
basic.csv 1 0.26ms | ||
comma-in-quote.csv 1 0.29ms | ||
comment.csv 2 0.40ms | ||
empty-columns.csv 1 0.40ms | ||
escape-quotes.csv 3 0.38ms | ||
geojson.csv 3 0.46ms | ||
large-dataset.csv 7268 73ms | ||
newlines.csv 3 0.35ms | ||
no-headers.csv 3 0.26ms | ||
option-comment.csv 2 0.24ms | ||
option-escape.csv 3 0.25ms | ||
option-maxRowBytes.csv 4577 39ms | ||
option-newline.csv 0 0.47ms | ||
option-quote-escape.csv 3 0.33ms | ||
option-quote-many.csv 3 0.38ms | ||
option-quote.csv 2 0.22ms | ||
quotes+newlines.csv 3 0.20ms | ||
strict.csv 3 0.22ms | ||
latin.csv 2 0.38ms | ||
mac-newlines.csv 2 0.28ms | ||
utf16-big.csv 2 0.33ms | ||
utf16.csv 2 0.26ms | ||
utf8.csv 2 0.24ms | ||
``` | ||
@@ -135,3 +140,3 @@ | ||
##### escape | ||
#### escape | ||
@@ -144,3 +149,3 @@ Type: `String`<br> | ||
##### headers | ||
#### headers | ||
@@ -154,3 +159,3 @@ Type: `Array[String] | Boolean` | ||
If `false`, specifies that the first row in a data file does _not_ contain | ||
headers, and instructs the parser to use the row index as the key for each row. | ||
headers, and instructs the parser to use the column index as the key for each column. | ||
Using `headers: false` with the same `data.csv` example from above would yield: | ||
@@ -165,8 +170,7 @@ | ||
##### mapHeaders | ||
#### mapHeaders | ||
Type: `Function` | ||
A function that can be used to modify the values of each header. Return `null` | ||
to remove the header, and it's column, from the results. | ||
A function that can be used to modify the values of each header. Return a `String` to modify the header. Return `null` to remove the header, and it's column, from the results. | ||
@@ -179,7 +183,12 @@ ```js | ||
##### mapValues | ||
##### Parameters | ||
**header** _String_ The current column header.<br/> | ||
**index** _Number_ The current column index. | ||
#### mapValues | ||
Type: `Function` | ||
A function that can be used to modify the value of each column value. | ||
A function that can be used to modify the content of each column. The return value will replace the current column content. | ||
@@ -192,2 +201,8 @@ ```js | ||
##### Parameters | ||
**header** _String_ The current column header.<br/> | ||
**index** _Number_ The current column index.<br/> | ||
**value** _String_ The current column value (or content). | ||
##### newline | ||
@@ -200,3 +215,3 @@ | ||
##### quote | ||
#### quote | ||
@@ -208,3 +223,3 @@ Type: `String`<br> | ||
##### raw | ||
#### raw | ||
@@ -215,3 +230,3 @@ Type: `Boolean`<br> | ||
##### separator | ||
#### separator | ||
@@ -223,3 +238,3 @@ Type: `String`<br> | ||
##### skipComments | ||
#### skipComments | ||
@@ -231,3 +246,3 @@ Type: `Boolean | String`<br> | ||
##### skipLines | ||
#### skipLines | ||
@@ -240,3 +255,3 @@ Type: `Number`<br> | ||
##### maxRowBytes | ||
#### maxRowBytes | ||
@@ -248,3 +263,3 @@ Type: `Number`<br> | ||
##### strict | ||
#### strict | ||
@@ -251,0 +266,0 @@ Type: `Boolean`<br> |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
27100
6
353
25
356
+ Added@hapi/joi@^16.1.4
+ Added@hapi/address@2.1.4(transitive)
+ Added@hapi/formula@1.2.0(transitive)
+ Added@hapi/hoek@8.5.1(transitive)
+ Added@hapi/joi@16.1.8(transitive)
+ Added@hapi/pinpoint@1.0.2(transitive)
+ Added@hapi/topo@3.1.6(transitive)
- Removedexeca@^1.0.0
- Removedgenerate-function@^1.0.1
- Removedcross-spawn@6.0.5(transitive)
- Removedend-of-stream@1.4.4(transitive)
- Removedexeca@1.0.0(transitive)
- Removedgenerate-function@1.1.0(transitive)
- Removedget-stream@4.1.0(transitive)
- Removedis-stream@1.1.0(transitive)
- Removedisexe@2.0.0(transitive)
- Removednice-try@1.0.5(transitive)
- Removednpm-run-path@2.0.2(transitive)
- Removedonce@1.4.0(transitive)
- Removedp-finally@1.0.0(transitive)
- Removedpath-key@2.0.1(transitive)
- Removedpump@3.0.2(transitive)
- Removedsemver@5.7.2(transitive)
- Removedshebang-command@1.2.0(transitive)
- Removedshebang-regex@1.0.0(transitive)
- Removedsignal-exit@3.0.7(transitive)
- Removedstrip-eof@1.0.0(transitive)
- Removedwhich@1.3.1(transitive)
- Removedwrappy@1.0.2(transitive)