Socket
Socket
Sign inDemoInstall

csv-parse

Package Overview
Dependencies
Maintainers
1
Versions
141
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

csv-parse - npm Package Compare versions

Comparing version 5.0.4 to 5.1.0

lib/api/CsvError.js

1233

lib/index.js

@@ -10,562 +10,17 @@

import { Transform } from 'stream';
import ResizeableBuffer from './ResizeableBuffer.js';
import {is_object} from './utils/is_object.js';
import {transform} from './api/index.js';
import {CsvError} from './api/CsvError.js';
// white space characters
// https://en.wikipedia.org/wiki/Whitespace_character
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#Types
// \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff
const tab = 9;
const nl = 10; // \n, 0x0A in hexadecimal, 10 in decimal
const np = 12;
const cr = 13; // \r, 0x0D in hexadécimal, 13 in decimal
const space = 32;
const boms = {
// Note, the following are equals:
// Buffer.from("\ufeff")
// Buffer.from([239, 187, 191])
// Buffer.from('EFBBBF', 'hex')
'utf8': Buffer.from([239, 187, 191]),
// Note, the following are equals:
// Buffer.from "\ufeff", 'utf16le
// Buffer.from([255, 254])
'utf16le': Buffer.from([255, 254])
};
class CsvError extends Error {
constructor(code, message, options, ...contexts) {
if(Array.isArray(message)) message = message.join(' ');
super(message);
if(Error.captureStackTrace !== undefined){
Error.captureStackTrace(this, CsvError);
}
this.code = code;
for(const context of contexts){
for(const key in context){
const value = context[key];
this[key] = Buffer.isBuffer(value) ? value.toString(options.encoding) : value == null ? value : JSON.parse(JSON.stringify(value));
}
}
}
}
const underscore = function(str){
return str.replace(/([A-Z])/g, function(_, match){
return '_' + match.toLowerCase();
});
};
const isObject = function(obj){
return (typeof obj === 'object' && obj !== null && !Array.isArray(obj));
};
const isRecordEmpty = function(record){
return record.every((field) => field == null || field.toString && field.toString().trim() === '');
};
const normalizeColumnsArray = function(columns){
const normalizedColumns = [];
for(let i = 0, l = columns.length; i < l; i++){
const column = columns[i];
if(column === undefined || column === null || column === false){
normalizedColumns[i] = { disabled: true };
}else if(typeof column === 'string'){
normalizedColumns[i] = { name: column };
}else if(isObject(column)){
if(typeof column.name !== 'string'){
throw new CsvError('CSV_OPTION_COLUMNS_MISSING_NAME', [
'Option columns missing name:',
`property "name" is required at position ${i}`,
'when column is an object literal'
]);
}
normalizedColumns[i] = column;
}else{
throw new CsvError('CSV_INVALID_COLUMN_DEFINITION', [
'Invalid column definition:',
'expect a string or a literal object,',
`got ${JSON.stringify(column)} at position ${i}`
]);
}
}
return normalizedColumns;
};
class Parser extends Transform {
constructor(opts = {}){
super({...{readableObjectMode: true}, ...opts, encoding: null});
this.__originalOptions = opts;
this.__normalizeOptions(opts);
}
__normalizeOptions(opts){
const options = {};
// Merge with user options
for(const opt in opts){
options[underscore(opt)] = opts[opt];
}
// Normalize option `encoding`
// Note: defined first because other options depends on it
// to convert chars/strings into buffers.
if(options.encoding === undefined || options.encoding === true){
options.encoding = 'utf8';
}else if(options.encoding === null || options.encoding === false){
options.encoding = null;
}else if(typeof options.encoding !== 'string' && options.encoding !== null){
throw new CsvError('CSV_INVALID_OPTION_ENCODING', [
'Invalid option encoding:',
'encoding must be a string or null to return a buffer,',
`got ${JSON.stringify(options.encoding)}`
], options);
}
// Normalize option `bom`
if(options.bom === undefined || options.bom === null || options.bom === false){
options.bom = false;
}else if(options.bom !== true){
throw new CsvError('CSV_INVALID_OPTION_BOM', [
'Invalid option bom:', 'bom must be true,',
`got ${JSON.stringify(options.bom)}`
], options);
}
// Normalize option `cast`
let fnCastField = null;
if(options.cast === undefined || options.cast === null || options.cast === false || options.cast === ''){
options.cast = undefined;
}else if(typeof options.cast === 'function'){
fnCastField = options.cast;
options.cast = true;
}else if(options.cast !== true){
throw new CsvError('CSV_INVALID_OPTION_CAST', [
'Invalid option cast:', 'cast must be true or a function,',
`got ${JSON.stringify(options.cast)}`
], options);
}
// Normalize option `cast_date`
if(options.cast_date === undefined || options.cast_date === null || options.cast_date === false || options.cast_date === ''){
options.cast_date = false;
}else if(options.cast_date === true){
options.cast_date = function(value){
const date = Date.parse(value);
return !isNaN(date) ? new Date(date) : value;
};
}else{
throw new CsvError('CSV_INVALID_OPTION_CAST_DATE', [
'Invalid option cast_date:', 'cast_date must be true or a function,',
`got ${JSON.stringify(options.cast_date)}`
], options);
}
// Normalize option `columns`
let fnFirstLineToHeaders = null;
if(options.columns === true){
// Fields in the first line are converted as-is to columns
fnFirstLineToHeaders = undefined;
}else if(typeof options.columns === 'function'){
fnFirstLineToHeaders = options.columns;
options.columns = true;
}else if(Array.isArray(options.columns)){
options.columns = normalizeColumnsArray(options.columns);
}else if(options.columns === undefined || options.columns === null || options.columns === false){
options.columns = false;
}else{
throw new CsvError('CSV_INVALID_OPTION_COLUMNS', [
'Invalid option columns:',
'expect an array, a function or true,',
`got ${JSON.stringify(options.columns)}`
], options);
}
// Normalize option `group_columns_by_name`
if(options.group_columns_by_name === undefined || options.group_columns_by_name === null || options.group_columns_by_name === false){
options.group_columns_by_name = false;
}else if(options.group_columns_by_name !== true){
throw new CsvError('CSV_INVALID_OPTION_GROUP_COLUMNS_BY_NAME', [
'Invalid option group_columns_by_name:',
'expect an boolean,',
`got ${JSON.stringify(options.group_columns_by_name)}`
], options);
}else if(options.columns === false){
throw new CsvError('CSV_INVALID_OPTION_GROUP_COLUMNS_BY_NAME', [
'Invalid option group_columns_by_name:',
'the `columns` mode must be activated.'
], options);
}
// Normalize option `comment`
if(options.comment === undefined || options.comment === null || options.comment === false || options.comment === ''){
options.comment = null;
}else{
if(typeof options.comment === 'string'){
options.comment = Buffer.from(options.comment, options.encoding);
}
if(!Buffer.isBuffer(options.comment)){
throw new CsvError('CSV_INVALID_OPTION_COMMENT', [
'Invalid option comment:',
'comment must be a buffer or a string,',
`got ${JSON.stringify(options.comment)}`
], options);
}
}
// Normalize option `delimiter`
const delimiter_json = JSON.stringify(options.delimiter);
if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter];
if(options.delimiter.length === 0){
throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [
'Invalid option delimiter:',
'delimiter must be a non empty string or buffer or array of string|buffer,',
`got ${delimiter_json}`
], options);
}
options.delimiter = options.delimiter.map(function(delimiter){
if(delimiter === undefined || delimiter === null || delimiter === false){
return Buffer.from(',', options.encoding);
}
if(typeof delimiter === 'string'){
delimiter = Buffer.from(delimiter, options.encoding);
}
if(!Buffer.isBuffer(delimiter) || delimiter.length === 0){
throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [
'Invalid option delimiter:',
'delimiter must be a non empty string or buffer or array of string|buffer,',
`got ${delimiter_json}`
], options);
}
return delimiter;
});
// Normalize option `escape`
if(options.escape === undefined || options.escape === true){
options.escape = Buffer.from('"', options.encoding);
}else if(typeof options.escape === 'string'){
options.escape = Buffer.from(options.escape, options.encoding);
}else if (options.escape === null || options.escape === false){
options.escape = null;
}
if(options.escape !== null){
if(!Buffer.isBuffer(options.escape)){
throw new Error(`Invalid Option: escape must be a buffer, a string or a boolean, got ${JSON.stringify(options.escape)}`);
}
}
// Normalize option `from`
if(options.from === undefined || options.from === null){
options.from = 1;
}else{
if(typeof options.from === 'string' && /\d+/.test(options.from)){
options.from = parseInt(options.from);
}
if(Number.isInteger(options.from)){
if(options.from < 0){
throw new Error(`Invalid Option: from must be a positive integer, got ${JSON.stringify(opts.from)}`);
}
}else{
throw new Error(`Invalid Option: from must be an integer, got ${JSON.stringify(options.from)}`);
}
}
// Normalize option `from_line`
if(options.from_line === undefined || options.from_line === null){
options.from_line = 1;
}else{
if(typeof options.from_line === 'string' && /\d+/.test(options.from_line)){
options.from_line = parseInt(options.from_line);
}
if(Number.isInteger(options.from_line)){
if(options.from_line <= 0){
throw new Error(`Invalid Option: from_line must be a positive integer greater than 0, got ${JSON.stringify(opts.from_line)}`);
}
}else{
throw new Error(`Invalid Option: from_line must be an integer, got ${JSON.stringify(opts.from_line)}`);
}
}
// Normalize options `ignore_last_delimiters`
if(options.ignore_last_delimiters === undefined || options.ignore_last_delimiters === null){
options.ignore_last_delimiters = false;
}else if(typeof options.ignore_last_delimiters === 'number'){
options.ignore_last_delimiters = Math.floor(options.ignore_last_delimiters);
if(options.ignore_last_delimiters === 0){
options.ignore_last_delimiters = false;
}
}else if(typeof options.ignore_last_delimiters !== 'boolean'){
throw new CsvError('CSV_INVALID_OPTION_IGNORE_LAST_DELIMITERS', [
'Invalid option `ignore_last_delimiters`:',
'the value must be a boolean value or an integer,',
`got ${JSON.stringify(options.ignore_last_delimiters)}`
], options);
}
if(options.ignore_last_delimiters === true && options.columns === false){
throw new CsvError('CSV_IGNORE_LAST_DELIMITERS_REQUIRES_COLUMNS', [
'The option `ignore_last_delimiters`',
'requires the activation of the `columns` option'
], options);
}
// Normalize option `info`
if(options.info === undefined || options.info === null || options.info === false){
options.info = false;
}else if(options.info !== true){
throw new Error(`Invalid Option: info must be true, got ${JSON.stringify(options.info)}`);
}
// Normalize option `max_record_size`
if(options.max_record_size === undefined || options.max_record_size === null || options.max_record_size === false){
options.max_record_size = 0;
}else if(Number.isInteger(options.max_record_size) && options.max_record_size >= 0){
// Great, nothing to do
}else if(typeof options.max_record_size === 'string' && /\d+/.test(options.max_record_size)){
options.max_record_size = parseInt(options.max_record_size);
}else{
throw new Error(`Invalid Option: max_record_size must be a positive integer, got ${JSON.stringify(options.max_record_size)}`);
}
// Normalize option `objname`
if(options.objname === undefined || options.objname === null || options.objname === false){
options.objname = undefined;
}else if(Buffer.isBuffer(options.objname)){
if(options.objname.length === 0){
throw new Error(`Invalid Option: objname must be a non empty buffer`);
}
if(options.encoding === null){
// Don't call `toString`, leave objname as a buffer
}else{
options.objname = options.objname.toString(options.encoding);
}
}else if(typeof options.objname === 'string'){
if(options.objname.length === 0){
throw new Error(`Invalid Option: objname must be a non empty string`);
}
// Great, nothing to do
}else if(typeof options.objname === 'number'){
// if(options.objname.length === 0){
// throw new Error(`Invalid Option: objname must be a non empty string`);
// }
// Great, nothing to do
}else{
throw new Error(`Invalid Option: objname must be a string or a buffer, got ${options.objname}`);
}
if(options.objname !== undefined){
if(typeof options.objname === 'number'){
if(options.columns !== false){
throw Error('Invalid Option: objname index cannot be combined with columns or be defined as a field');
}
}else{ // A string or a buffer
if(options.columns === false){
throw Error('Invalid Option: objname field must be combined with columns or be defined as an index');
}
}
}
// Normalize option `on_record`
if(options.on_record === undefined || options.on_record === null){
options.on_record = undefined;
}else if(typeof options.on_record !== 'function'){
throw new CsvError('CSV_INVALID_OPTION_ON_RECORD', [
'Invalid option `on_record`:',
'expect a function,',
`got ${JSON.stringify(options.on_record)}`
], options);
}
// Normalize option `quote`
if(options.quote === null || options.quote === false || options.quote === ''){
options.quote = null;
}else{
if(options.quote === undefined || options.quote === true){
options.quote = Buffer.from('"', options.encoding);
}else if(typeof options.quote === 'string'){
options.quote = Buffer.from(options.quote, options.encoding);
}
if(!Buffer.isBuffer(options.quote)){
throw new Error(`Invalid Option: quote must be a buffer or a string, got ${JSON.stringify(options.quote)}`);
}
}
// Normalize option `raw`
if(options.raw === undefined || options.raw === null || options.raw === false){
options.raw = false;
}else if(options.raw !== true){
throw new Error(`Invalid Option: raw must be true, got ${JSON.stringify(options.raw)}`);
}
// Normalize option `record_delimiter`
if(options.record_delimiter === undefined){
options.record_delimiter = [];
}else if(typeof options.record_delimiter === 'string' || Buffer.isBuffer(options.record_delimiter)){
if(options.record_delimiter.length === 0){
throw new CsvError('CSV_INVALID_OPTION_RECORD_DELIMITER', [
'Invalid option `record_delimiter`:',
'value must be a non empty string or buffer,',
`got ${JSON.stringify(options.record_delimiter)}`
], options);
}
options.record_delimiter = [options.record_delimiter];
}else if(!Array.isArray(options.record_delimiter)){
throw new CsvError('CSV_INVALID_OPTION_RECORD_DELIMITER', [
'Invalid option `record_delimiter`:',
'value must be a string, a buffer or array of string|buffer,',
`got ${JSON.stringify(options.record_delimiter)}`
], options);
}
options.record_delimiter = options.record_delimiter.map(function(rd, i){
if(typeof rd !== 'string' && ! Buffer.isBuffer(rd)){
throw new CsvError('CSV_INVALID_OPTION_RECORD_DELIMITER', [
'Invalid option `record_delimiter`:',
'value must be a string, a buffer or array of string|buffer',
`at index ${i},`,
`got ${JSON.stringify(rd)}`
], options);
}else if(rd.length === 0){
throw new CsvError('CSV_INVALID_OPTION_RECORD_DELIMITER', [
'Invalid option `record_delimiter`:',
'value must be a non empty string or buffer',
`at index ${i},`,
`got ${JSON.stringify(rd)}`
], options);
}
if(typeof rd === 'string'){
rd = Buffer.from(rd, options.encoding);
}
return rd;
});
// Normalize option `relax_column_count`
if(typeof options.relax_column_count === 'boolean'){
// Great, nothing to do
}else if(options.relax_column_count === undefined || options.relax_column_count === null){
options.relax_column_count = false;
}else{
throw new Error(`Invalid Option: relax_column_count must be a boolean, got ${JSON.stringify(options.relax_column_count)}`);
}
if(typeof options.relax_column_count_less === 'boolean'){
// Great, nothing to do
}else if(options.relax_column_count_less === undefined || options.relax_column_count_less === null){
options.relax_column_count_less = false;
}else{
throw new Error(`Invalid Option: relax_column_count_less must be a boolean, got ${JSON.stringify(options.relax_column_count_less)}`);
}
if(typeof options.relax_column_count_more === 'boolean'){
// Great, nothing to do
}else if(options.relax_column_count_more === undefined || options.relax_column_count_more === null){
options.relax_column_count_more = false;
}else{
throw new Error(`Invalid Option: relax_column_count_more must be a boolean, got ${JSON.stringify(options.relax_column_count_more)}`);
}
// Normalize option `relax_quotes`
if(typeof options.relax_quotes === 'boolean'){
// Great, nothing to do
}else if(options.relax_quotes === undefined || options.relax_quotes === null){
options.relax_quotes = false;
}else{
throw new Error(`Invalid Option: relax_quotes must be a boolean, got ${JSON.stringify(options.relax_quotes)}`);
}
// Normalize option `skip_empty_lines`
if(typeof options.skip_empty_lines === 'boolean'){
// Great, nothing to do
}else if(options.skip_empty_lines === undefined || options.skip_empty_lines === null){
options.skip_empty_lines = false;
}else{
throw new Error(`Invalid Option: skip_empty_lines must be a boolean, got ${JSON.stringify(options.skip_empty_lines)}`);
}
// Normalize option `skip_records_with_empty_values`
if(typeof options.skip_records_with_empty_values === 'boolean'){
// Great, nothing to do
}else if(options.skip_records_with_empty_values === undefined || options.skip_records_with_empty_values === null){
options.skip_records_with_empty_values = false;
}else{
throw new Error(`Invalid Option: skip_records_with_empty_values must be a boolean, got ${JSON.stringify(options.skip_records_with_empty_values)}`);
}
// Normalize option `skip_records_with_error`
if(typeof options.skip_records_with_error === 'boolean'){
// Great, nothing to do
}else if(options.skip_records_with_error === undefined || options.skip_records_with_error === null){
options.skip_records_with_error = false;
}else{
throw new Error(`Invalid Option: skip_records_with_error must be a boolean, got ${JSON.stringify(options.skip_records_with_error)}`);
}
// Normalize option `rtrim`
if(options.rtrim === undefined || options.rtrim === null || options.rtrim === false){
options.rtrim = false;
}else if(options.rtrim !== true){
throw new Error(`Invalid Option: rtrim must be a boolean, got ${JSON.stringify(options.rtrim)}`);
}
// Normalize option `ltrim`
if(options.ltrim === undefined || options.ltrim === null || options.ltrim === false){
options.ltrim = false;
}else if(options.ltrim !== true){
throw new Error(`Invalid Option: ltrim must be a boolean, got ${JSON.stringify(options.ltrim)}`);
}
// Normalize option `trim`
if(options.trim === undefined || options.trim === null || options.trim === false){
options.trim = false;
}else if(options.trim !== true){
throw new Error(`Invalid Option: trim must be a boolean, got ${JSON.stringify(options.trim)}`);
}
// Normalize options `trim`, `ltrim` and `rtrim`
if(options.trim === true && opts.ltrim !== false){
options.ltrim = true;
}else if(options.ltrim !== true){
options.ltrim = false;
}
if(options.trim === true && opts.rtrim !== false){
options.rtrim = true;
}else if(options.rtrim !== true){
options.rtrim = false;
}
// Normalize option `to`
if(options.to === undefined || options.to === null){
options.to = -1;
}else{
if(typeof options.to === 'string' && /\d+/.test(options.to)){
options.to = parseInt(options.to);
}
if(Number.isInteger(options.to)){
if(options.to <= 0){
throw new Error(`Invalid Option: to must be a positive integer greater than 0, got ${JSON.stringify(opts.to)}`);
}
}else{
throw new Error(`Invalid Option: to must be an integer, got ${JSON.stringify(opts.to)}`);
}
}
// Normalize option `to_line`
if(options.to_line === undefined || options.to_line === null){
options.to_line = -1;
}else{
if(typeof options.to_line === 'string' && /\d+/.test(options.to_line)){
options.to_line = parseInt(options.to_line);
}
if(Number.isInteger(options.to_line)){
if(options.to_line <= 0){
throw new Error(`Invalid Option: to_line must be a positive integer greater than 0, got ${JSON.stringify(opts.to_line)}`);
}
}else{
throw new Error(`Invalid Option: to_line must be an integer, got ${JSON.stringify(opts.to_line)}`);
}
}
this.info = {
bytes: 0,
comment_lines: 0,
empty_lines: 0,
invalid_field_length: 0,
lines: 1,
records: 0
this.api = transform(opts);
this.api.options.on_skip = (err, chunk) => {
this.emit('skip', err, chunk);
};
this.options = options;
this.state = {
bomSkipped: false,
bufBytesStart: 0,
castField: fnCastField,
commenting: false,
// Current error encountered by a record
error: undefined,
enabled: options.from_line === 1,
escaping: false,
escapeIsQuote: Buffer.isBuffer(options.escape) && Buffer.isBuffer(options.quote) && Buffer.compare(options.escape, options.quote) === 0,
// columns can be `false`, `true`, `Array`
expectedRecordLength: Array.isArray(options.columns) ? options.columns.length : undefined,
field: new ResizeableBuffer(20),
firstLineToHeaders: fnFirstLineToHeaders,
needMoreDataSize: Math.max(
// Skip if the remaining buffer smaller than comment
options.comment !== null ? options.comment.length : 0,
// Skip if the remaining buffer can be delimiter
...options.delimiter.map((delimiter) => delimiter.length),
// Skip if the remaining buffer can be escape sequence
options.quote !== null ? options.quote.length : 0,
),
previousBuf: undefined,
quoting: false,
stop: false,
rawBuffer: new ResizeableBuffer(100),
record: [],
recordHasError: false,
record_length: 0,
recordDelimiterMaxLength: options.record_delimiter.length === 0 ? 2 : Math.max(...options.record_delimiter.map((v) => v.length)),
trimChars: [Buffer.from(' ', options.encoding)[0], Buffer.from('\t', options.encoding)[0]],
wasQuoting: false,
wasRowDelimiter: false
};
// Backward compatibility
this.state = this.api.state;
this.options = this.api.options;
this.info = this.api.info;
}

@@ -577,3 +32,7 @@ // Implementation of `Transform._transform`

}
const err = this.__parse(buf, false);
const err = this.api.parse(buf, false, (record) => {
this.push.call(this, record);
}, () => {
this.push.call(this, null);
});
if(err !== undefined){

@@ -589,655 +48,9 @@ this.state.stop = true;

}
const err = this.__parse(undefined, true);
const err = this.api.parse(undefined, true, (record) => {
this.push.call(this, record);
}, () => {
this.push.call(this, null);
});
callback(err);
}
// Central parser implementation
__parse(nextBuf, end){
const {bom, comment, escape, from_line, ltrim, max_record_size, quote, raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options;
let {record_delimiter} = this.options;
const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state;
let buf;
if(previousBuf === undefined){
if(nextBuf === undefined){
// Handle empty string
this.push(null);
return;
}else{
buf = nextBuf;
}
}else if(previousBuf !== undefined && nextBuf === undefined){
buf = previousBuf;
}else{
buf = Buffer.concat([previousBuf, nextBuf]);
}
// Handle UTF BOM
if(bomSkipped === false){
if(bom === false){
this.state.bomSkipped = true;
}else if(buf.length < 3){
// No enough data
if(end === false){
// Wait for more data
this.state.previousBuf = buf;
return;
}
}else{
for(const encoding in boms){
if(boms[encoding].compare(buf, 0, boms[encoding].length) === 0){
// Skip BOM
const bomLength = boms[encoding].length;
this.state.bufBytesStart += bomLength;
buf = buf.slice(bomLength);
// Renormalize original options with the new encoding
this.__normalizeOptions({...this.__originalOptions, encoding: encoding});
break;
}
}
this.state.bomSkipped = true;
}
}
const bufLen = buf.length;
let pos;
for(pos = 0; pos < bufLen; pos++){
// Ensure we get enough space to look ahead
// There should be a way to move this out of the loop
if(this.__needMoreData(pos, bufLen, end)){
break;
}
if(this.state.wasRowDelimiter === true){
this.info.lines++;
this.state.wasRowDelimiter = false;
}
if(to_line !== -1 && this.info.lines > to_line){
this.state.stop = true;
this.push(null);
return;
}
// Auto discovery of record_delimiter, unix, mac and windows supported
if(this.state.quoting === false && record_delimiter.length === 0){
const record_delimiterCount = this.__autoDiscoverRecordDelimiter(buf, pos);
if(record_delimiterCount){
record_delimiter = this.options.record_delimiter;
}
}
const chr = buf[pos];
if(raw === true){
rawBuffer.append(chr);
}
if((chr === cr || chr === nl) && this.state.wasRowDelimiter === false){
this.state.wasRowDelimiter = true;
}
// Previous char was a valid escape char
// treat the current char as a regular char
if(this.state.escaping === true){
this.state.escaping = false;
}else{
// Escape is only active inside quoted fields
// We are quoting, the char is an escape chr and there is a chr to escape
// if(escape !== null && this.state.quoting === true && chr === escape && pos + 1 < bufLen){
if(escape !== null && this.state.quoting === true && this.__isEscape(buf, pos, chr) && pos + escape.length < bufLen){
if(escapeIsQuote){
if(this.__isQuote(buf, pos+escape.length)){
this.state.escaping = true;
pos += escape.length - 1;
continue;
}
}else{
this.state.escaping = true;
pos += escape.length - 1;
continue;
}
}
// Not currently escaping and chr is a quote
// TODO: need to compare bytes instead of single char
if(this.state.commenting === false && this.__isQuote(buf, pos)){
if(this.state.quoting === true){
const nextChr = buf[pos+quote.length];
const isNextChrTrimable = rtrim && this.__isCharTrimable(nextChr);
const isNextChrComment = comment !== null && this.__compareBytes(comment, buf, pos+quote.length, nextChr);
const isNextChrDelimiter = this.__isDelimiter(buf, pos+quote.length, nextChr);
const isNextChrRecordDelimiter = record_delimiter.length === 0 ? this.__autoDiscoverRecordDelimiter(buf, pos+quote.length) : this.__isRecordDelimiter(nextChr, buf, pos+quote.length);
// Escape a quote
// Treat next char as a regular character
if(escape !== null && this.__isEscape(buf, pos, chr) && this.__isQuote(buf, pos + escape.length)){
pos += escape.length - 1;
}else if(!nextChr || isNextChrDelimiter || isNextChrRecordDelimiter || isNextChrComment || isNextChrTrimable){
this.state.quoting = false;
this.state.wasQuoting = true;
pos += quote.length - 1;
continue;
}else if(relax_quotes === false){
const err = this.__error(
new CsvError('CSV_INVALID_CLOSING_QUOTE', [
'Invalid Closing Quote:',
`got "${String.fromCharCode(nextChr)}"`,
`at line ${this.info.lines}`,
'instead of delimiter, record delimiter, trimable character',
'(if activated) or comment',
], this.options, this.__infoField())
);
if(err !== undefined) return err;
}else{
this.state.quoting = false;
this.state.wasQuoting = true;
this.state.field.prepend(quote);
pos += quote.length - 1;
}
}else{
if(this.state.field.length !== 0){
// In relax_quotes mode, treat opening quote preceded by chrs as regular
if(relax_quotes === false){
const err = this.__error(
new CsvError('INVALID_OPENING_QUOTE', [
'Invalid Opening Quote:',
`a quote is found inside a field at line ${this.info.lines}`,
], this.options, this.__infoField(), {
field: this.state.field,
})
);
if(err !== undefined) return err;
}
}else{
this.state.quoting = true;
pos += quote.length - 1;
continue;
}
}
}
if(this.state.quoting === false){
const recordDelimiterLength = this.__isRecordDelimiter(chr, buf, pos);
if(recordDelimiterLength !== 0){
// Do not emit comments which take a full line
const skipCommentLine = this.state.commenting && (this.state.wasQuoting === false && this.state.record.length === 0 && this.state.field.length === 0);
if(skipCommentLine){
this.info.comment_lines++;
// Skip full comment line
}else{
// Activate records emition if above from_line
if(this.state.enabled === false && this.info.lines + (this.state.wasRowDelimiter === true ? 1: 0) >= from_line){
this.state.enabled = true;
this.__resetField();
this.__resetRecord();
pos += recordDelimiterLength - 1;
continue;
}
// Skip if line is empty and skip_empty_lines activated
if(skip_empty_lines === true && this.state.wasQuoting === false && this.state.record.length === 0 && this.state.field.length === 0){
this.info.empty_lines++;
pos += recordDelimiterLength - 1;
continue;
}
this.info.bytes = this.state.bufBytesStart + pos;
const errField = this.__onField();
if(errField !== undefined) return errField;
this.info.bytes = this.state.bufBytesStart + pos + recordDelimiterLength;
const errRecord = this.__onRecord();
if(errRecord !== undefined) return errRecord;
if(to !== -1 && this.info.records >= to){
this.state.stop = true;
this.push(null);
return;
}
}
this.state.commenting = false;
pos += recordDelimiterLength - 1;
continue;
}
if(this.state.commenting){
continue;
}
const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr);
if(commentCount !== 0){
this.state.commenting = true;
continue;
}
const delimiterLength = this.__isDelimiter(buf, pos, chr);
if(delimiterLength !== 0){
this.info.bytes = this.state.bufBytesStart + pos;
const errField = this.__onField();
if(errField !== undefined) return errField;
pos += delimiterLength - 1;
continue;
}
}
}
if(this.state.commenting === false){
if(max_record_size !== 0 && this.state.record_length + this.state.field.length > max_record_size){
const err = this.__error(
new CsvError('CSV_MAX_RECORD_SIZE', [
'Max Record Size:',
'record exceed the maximum number of tolerated bytes',
`of ${max_record_size}`,
`at line ${this.info.lines}`,
], this.options, this.__infoField())
);
if(err !== undefined) return err;
}
}
const lappend = ltrim === false || this.state.quoting === true || this.state.field.length !== 0 || !this.__isCharTrimable(chr);
// rtrim in non quoting is handle in __onField
const rappend = rtrim === false || this.state.wasQuoting === false;
if(lappend === true && rappend === true){
this.state.field.append(chr);
}else if(rtrim === true && !this.__isCharTrimable(chr)){
const err = this.__error(
new CsvError('CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE', [
'Invalid Closing Quote:',
'found non trimable byte after quote',
`at line ${this.info.lines}`,
], this.options, this.__infoField())
);
if(err !== undefined) return err;
}
}
if(end === true){
// Ensure we are not ending in a quoting state
if(this.state.quoting === true){
const err = this.__error(
new CsvError('CSV_QUOTE_NOT_CLOSED', [
'Quote Not Closed:',
`the parsing is finished with an opening quote at line ${this.info.lines}`,
], this.options, this.__infoField())
);
if(err !== undefined) return err;
}else{
// Skip last line if it has no characters
if(this.state.wasQuoting === true || this.state.record.length !== 0 || this.state.field.length !== 0){
this.info.bytes = this.state.bufBytesStart + pos;
const errField = this.__onField();
if(errField !== undefined) return errField;
const errRecord = this.__onRecord();
if(errRecord !== undefined) return errRecord;
}else if(this.state.wasRowDelimiter === true){
this.info.empty_lines++;
}else if(this.state.commenting === true){
this.info.comment_lines++;
}
}
}else{
this.state.bufBytesStart += pos;
this.state.previousBuf = buf.slice(pos);
}
if(this.state.wasRowDelimiter === true){
this.info.lines++;
this.state.wasRowDelimiter = false;
}
}
__onRecord(){
const {columns, group_columns_by_name, encoding, info, from, relax_column_count, relax_column_count_less, relax_column_count_more, raw, skip_records_with_empty_values} = this.options;
const {enabled, record} = this.state;
if(enabled === false){
return this.__resetRecord();
}
// Convert the first line into column names
const recordLength = record.length;
if(columns === true){
if(skip_records_with_empty_values === true && isRecordEmpty(record)){
this.__resetRecord();
return;
}
return this.__firstLineToColumns(record);
}
if(columns === false && this.info.records === 0){
this.state.expectedRecordLength = recordLength;
}
if(recordLength !== this.state.expectedRecordLength){
const err = columns === false ?
new CsvError('CSV_RECORD_INCONSISTENT_FIELDS_LENGTH', [
'Invalid Record Length:',
`expect ${this.state.expectedRecordLength},`,
`got ${recordLength} on line ${this.info.lines}`,
], this.options, this.__infoField(), {
record: record,
})
:
new CsvError('CSV_RECORD_INCONSISTENT_COLUMNS', [
'Invalid Record Length:',
`columns length is ${columns.length},`, // rename columns
`got ${recordLength} on line ${this.info.lines}`,
], this.options, this.__infoField(), {
record: record,
});
if(relax_column_count === true ||
(relax_column_count_less === true && recordLength < this.state.expectedRecordLength) ||
(relax_column_count_more === true && recordLength > this.state.expectedRecordLength)){
this.info.invalid_field_length++;
this.state.error = err;
// Error is undefined with skip_records_with_error
}else{
const finalErr = this.__error(err);
if(finalErr) return finalErr;
}
}
if(skip_records_with_empty_values === true && isRecordEmpty(record)){
this.__resetRecord();
return;
}
if(this.state.recordHasError === true){
this.__resetRecord();
this.state.recordHasError = false;
return;
}
this.info.records++;
if(from === 1 || this.info.records >= from){
const {objname} = this.options;
// With columns, records are object
if(columns !== false){
const obj = {};
// Transform record array to an object
for(let i = 0, l = record.length; i < l; i++){
if(columns[i] === undefined || columns[i].disabled) continue;
// Turn duplicate columns into an array
if (group_columns_by_name === true && obj[columns[i].name] !== undefined) {
if (Array.isArray(obj[columns[i].name])) {
obj[columns[i].name] = obj[columns[i].name].concat(record[i]);
} else {
obj[columns[i].name] = [obj[columns[i].name], record[i]];
}
} else {
obj[columns[i].name] = record[i];
}
}
// Without objname (default)
if(raw === true || info === true){
const extRecord = Object.assign(
{record: obj},
(raw === true ? {raw: this.state.rawBuffer.toString(encoding)}: {}),
(info === true ? {info: this.__infoRecord()}: {})
);
const err = this.__push(
objname === undefined ? extRecord : [obj[objname], extRecord]
);
if(err){
return err;
}
}else{
const err = this.__push(
objname === undefined ? obj : [obj[objname], obj]
);
if(err){
return err;
}
}
// Without columns, records are array
}else{
if(raw === true || info === true){
const extRecord = Object.assign(
{record: record},
raw === true ? {raw: this.state.rawBuffer.toString(encoding)}: {},
info === true ? {info: this.__infoRecord()}: {}
);
const err = this.__push(
objname === undefined ? extRecord : [record[objname], extRecord]
);
if(err){
return err;
}
}else{
const err = this.__push(
objname === undefined ? record : [record[objname], record]
);
if(err){
return err;
}
}
}
}
this.__resetRecord();
}
__firstLineToColumns(record){
const {firstLineToHeaders} = this.state;
try{
const headers = firstLineToHeaders === undefined ? record : firstLineToHeaders.call(null, record);
if(!Array.isArray(headers)){
return this.__error(
new CsvError('CSV_INVALID_COLUMN_MAPPING', [
'Invalid Column Mapping:',
'expect an array from column function,',
`got ${JSON.stringify(headers)}`
], this.options, this.__infoField(), {
headers: headers,
})
);
}
const normalizedHeaders = normalizeColumnsArray(headers);
this.state.expectedRecordLength = normalizedHeaders.length;
this.options.columns = normalizedHeaders;
this.__resetRecord();
return;
}catch(err){
return err;
}
}
__resetRecord(){
if(this.options.raw === true){
this.state.rawBuffer.reset();
}
this.state.error = undefined;
this.state.record = [];
this.state.record_length = 0;
}
__onField(){
const {cast, encoding, rtrim, max_record_size} = this.options;
const {enabled, wasQuoting} = this.state;
// Short circuit for the from_line options
if(enabled === false){
return this.__resetField();
}
let field = this.state.field.toString(encoding);
if(rtrim === true && wasQuoting === false){
field = field.trimRight();
}
if(cast === true){
const [err, f] = this.__cast(field);
if(err !== undefined) return err;
field = f;
}
this.state.record.push(field);
// Increment record length if record size must not exceed a limit
if(max_record_size !== 0 && typeof field === 'string'){
this.state.record_length += field.length;
}
this.__resetField();
}
__resetField(){
this.state.field.reset();
this.state.wasQuoting = false;
}
__push(record){
const {on_record} = this.options;
if(on_record !== undefined){
const info = this.__infoRecord();
try{
record = on_record.call(null, record, info);
}catch(err){
return err;
}
if(record === undefined || record === null){ return; }
}
this.push(record);
}
// Return a tuple with the error and the casted value
__cast(field){
const {columns, relax_column_count} = this.options;
const isColumns = Array.isArray(columns);
// Dont loose time calling cast
// because the final record is an object
// and this field can't be associated to a key present in columns
if(isColumns === true && relax_column_count && this.options.columns.length <= this.state.record.length){
return [undefined, undefined];
}
if(this.state.castField !== null){
try{
const info = this.__infoField();
return [undefined, this.state.castField.call(null, field, info)];
}catch(err){
return [err];
}
}
if(this.__isFloat(field)){
return [undefined, parseFloat(field)];
}else if(this.options.cast_date !== false){
const info = this.__infoField();
return [undefined, this.options.cast_date.call(null, field, info)];
}
return [undefined, field];
}
// Helper to test if a character is a space or a line delimiter
__isCharTrimable(chr){
return chr === space || chr === tab || chr === cr || chr === nl || chr === np;
}
// Keep it in case we implement the `cast_int` option
// __isInt(value){
// // return Number.isInteger(parseInt(value))
// // return !isNaN( parseInt( obj ) );
// return /^(\-|\+)?[1-9][0-9]*$/.test(value)
// }
__isFloat(value){
return (value - parseFloat(value) + 1) >= 0; // Borrowed from jquery
}
__compareBytes(sourceBuf, targetBuf, targetPos, firstByte){
if(sourceBuf[0] !== firstByte) return 0;
const sourceLength = sourceBuf.length;
for(let i = 1; i < sourceLength; i++){
if(sourceBuf[i] !== targetBuf[targetPos+i]) return 0;
}
return sourceLength;
}
__needMoreData(i, bufLen, end){
if(end) return false;
const {quote} = this.options;
const {quoting, needMoreDataSize, recordDelimiterMaxLength} = this.state;
const numOfCharLeft = bufLen - i - 1;
const requiredLength = Math.max(
needMoreDataSize,
// Skip if the remaining buffer smaller than record delimiter
recordDelimiterMaxLength,
// Skip if the remaining buffer can be record delimiter following the closing quote
// 1 is for quote.length
quoting ? (quote.length + recordDelimiterMaxLength) : 0,
);
return numOfCharLeft < requiredLength;
}
__isDelimiter(buf, pos, chr){
const {delimiter, ignore_last_delimiters} = this.options;
if(ignore_last_delimiters === true && this.state.record.length === this.options.columns.length - 1){
return 0;
}else if(ignore_last_delimiters !== false && typeof ignore_last_delimiters === 'number' && this.state.record.length === ignore_last_delimiters - 1){
return 0;
}
loop1: for(let i = 0; i < delimiter.length; i++){
const del = delimiter[i];
if(del[0] === chr){
for(let j = 1; j < del.length; j++){
if(del[j] !== buf[pos+j]) continue loop1;
}
return del.length;
}
}
return 0;
}
__isRecordDelimiter(chr, buf, pos){
const {record_delimiter} = this.options;
const recordDelimiterLength = record_delimiter.length;
loop1: for(let i = 0; i < recordDelimiterLength; i++){
const rd = record_delimiter[i];
const rdLength = rd.length;
if(rd[0] !== chr){
continue;
}
for(let j = 1; j < rdLength; j++){
if(rd[j] !== buf[pos+j]){
continue loop1;
}
}
return rd.length;
}
return 0;
}
__isEscape(buf, pos, chr){
const {escape} = this.options;
if(escape === null) return false;
const l = escape.length;
if(escape[0] === chr){
for(let i = 0; i < l; i++){
if(escape[i] !== buf[pos+i]){
return false;
}
}
return true;
}
return false;
}
__isQuote(buf, pos){
const {quote} = this.options;
if(quote === null) return false;
const l = quote.length;
for(let i = 0; i < l; i++){
if(quote[i] !== buf[pos+i]){
return false;
}
}
return true;
}
__autoDiscoverRecordDelimiter(buf, pos){
const {encoding} = this.options;
const chr = buf[pos];
if(chr === cr){
if(buf[pos+1] === nl){
this.options.record_delimiter.push(Buffer.from('\r\n', encoding));
this.state.recordDelimiterMaxLength = 2;
return 2;
}else{
this.options.record_delimiter.push(Buffer.from('\r', encoding));
this.state.recordDelimiterMaxLength = 1;
return 1;
}
}else if(chr === nl){
this.options.record_delimiter.push(Buffer.from('\n', encoding));
this.state.recordDelimiterMaxLength = 1;
return 1;
}
return 0;
}
__error(msg){
const {encoding, raw, skip_records_with_error} = this.options;
const err = typeof msg === 'string' ? new Error(msg) : msg;
if(skip_records_with_error){
this.state.recordHasError = true;
this.emit('skip', err, raw ? this.state.rawBuffer.toString(encoding) : undefined);
return undefined;
}else{
return err;
}
}
__infoDataSet(){
return {
...this.info,
columns: this.options.columns
};
}
__infoRecord(){
const {columns, raw, encoding} = this.options;
return {
...this.__infoDataSet(),
error: this.state.error,
header: columns === true,
index: this.state.record.length,
raw: raw ? this.state.rawBuffer.toString(encoding) : undefined
};
}
__infoField(){
const {columns} = this.options;
const isColumns = Array.isArray(columns);
return {
...this.__infoRecord(),
column: isColumns === true ?
(columns.length > this.state.record.length ?
columns[this.state.record.length].name :
null
) :
this.state.record.length,
quoting: this.state.wasQuoting,
};
}
}

@@ -1252,3 +65,3 @@

data = argument;
}else if(options === undefined && isObject(argument)){
}else if(options === undefined && is_object(argument)){
options = argument;

@@ -1278,6 +91,6 @@ }else if(callback === undefined && type === 'function'){

parser.on('error', function(err){
callback(err, undefined, parser.__infoDataSet());
callback(err, undefined, parser.api.__infoDataSet());
});
parser.on('end', function(){
callback(undefined, records, parser.__infoDataSet());
callback(undefined, records, parser.api.__infoDataSet());
});

@@ -1284,0 +97,0 @@ }

import { Parser } from './index.js';
import {CsvError, transform} from './api/index.js';
const parse = function(data, options={}){
const parse = function(data, opts={}){
if(typeof data === 'string'){
data = Buffer.from(data);
}
const records = options && options.objname ? {} : [];
const parser = new Parser(options);
parser.push = function(record){
if(record === null){
return;
}
if(options.objname === undefined)
const records = opts && opts.objname ? {} : [];
const parser = transform(opts);
const push = (record) => {
if(parser.options.objname === undefined)
records.push(record);

@@ -20,5 +17,6 @@ else{

};
const err1 = parser.__parse(data, false);
const close = () => {};
const err1 = parser.parse(data, false, push, close);
if(err1 !== undefined) throw err1;
const err2 = parser.__parse(undefined, true);
const err2 = parser.parse(undefined, true, push, close);
if(err2 !== undefined) throw err2;

@@ -30,2 +28,2 @@ return records;

export { parse };
export { CsvError } from './index.js';
export { CsvError };
{
"version": "5.0.4",
"version": "5.1.0",
"name": "csv-parse",

@@ -45,21 +45,21 @@ "description": "CSV parsing implementing the Node.js `stream.Transform` API",

"devDependencies": {
"@rollup/plugin-eslint": "^8.0.1",
"@rollup/plugin-node-resolve": "^13.0.6",
"@types/mocha": "^9.0.0",
"@types/node": "^16.11.7",
"@rollup/plugin-eslint": "^8.0.2",
"@rollup/plugin-node-resolve": "^13.3.0",
"@types/mocha": "^9.1.1",
"@types/node": "^17.0.35",
"coffeelint": "^2.1.0",
"coffeescript": "^2.6.1",
"csv-generate": "^4.0.4",
"coffeescript": "^2.7.0",
"csv-generate": "^4.1.0",
"csv-spectrum": "^1.0.0",
"each": "^1.2.2",
"eslint": "^8.2.0",
"mocha": "^9.1.3",
"eslint": "^8.16.0",
"mocha": "^10.0.0",
"pad": "^3.2.0",
"rollup": "^2.60.0",
"rollup": "^2.74.1",
"rollup-plugin-node-builtins": "^2.1.2",
"rollup-plugin-node-globals": "^1.4.0",
"should": "^13.2.3",
"stream-transform": "^3.0.4",
"ts-node": "^10.4.0",
"typescript": "^4.4.4"
"stream-transform": "^3.1.0",
"ts-node": "^10.8.0",
"typescript": "^4.6.4"
},

@@ -75,3 +75,3 @@ "files": [

"inline-diffs": true,
"loader": "./test/loaders/all.mjs",
"loader": "./test/loaders/all.js",
"recursive": true,

@@ -101,3 +101,3 @@ "reporter": "spec",

"test": "mocha 'test/**/*.{coffee,ts}'",
"test:legacy": "mocha --loader=./test/loaders/legacy/all.mjs 'test/**/*.{coffee,ts}'"
"test:legacy": "mocha --ignore test/api.web_stream.coffee --loader=./test/loaders/legacy/all.js 'test/**/*.{coffee,ts}'"
},

@@ -122,3 +122,3 @@ "type": "module",

},
"gitHead": "bab8d89a6eb3bc073233e27b7af0a50284b1590f"
"gitHead": "59cf7a4333c08020a029fa6922483f058bec04ab"
}

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc