Comparing version 0.5.3 to 0.6.0
/** | ||
* Copyright (c) 2023, Leon Sorokin | ||
* Copyright (c) 2024, Leon Sorokin | ||
* All rights reserved. (MIT Licensed) | ||
@@ -7,3 +7,3 @@ * | ||
* A small, fast CSV parser | ||
* https://github.com/leeoniya/uDSV (v0.5.3) | ||
* https://github.com/leeoniya/uDSV (v0.6.0) | ||
*/ | ||
@@ -19,3 +19,3 @@ | ||
const ISO8601 = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{3,})?(?:Z|[-+]\d{2}:?\d{2})$/; | ||
const ISO8601 = /^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}(?:\.\d{3,})?(?:Z|[-+]\d{2}:?\d{2}))?$/; | ||
const BOOL_RE = /^(?:t(?:rue)?|f(?:alse)?|y(?:es)?|n(?:o)?|0|1)$/i; | ||
@@ -220,3 +220,3 @@ | ||
rowDelim ??= firstRowMatch[2]; | ||
colDelim ??= COL_DELIMS.find(delim => firstRowStr.indexOf(delim) > -1) ?? ''; | ||
colDelim ??= COL_DELIMS.find(delim => firstRowStr.indexOf(delim) > -1) ?? comma; | ||
@@ -274,2 +274,3 @@ const schema = { | ||
let _toDeep = null; | ||
let _toObjsS = null; | ||
@@ -333,2 +334,15 @@ let _toCols = null; | ||
const stringObjs = gen(initRows, addRows, () => { | ||
_toObjsS ??= genToTypedRows(cols.map(col => ({ | ||
...col, | ||
type: 's', | ||
repl: { | ||
...col.repl, | ||
empty: void 0, | ||
} | ||
})), true, false); | ||
return _toObjsS; | ||
}); | ||
const typedArrs = gen(initRows, addRows, () => { | ||
@@ -360,2 +374,4 @@ _toArrs ??= genToTypedRows(cols, false, false); | ||
stringArrs, | ||
stringObjs, | ||
typedArrs, | ||
@@ -399,2 +415,4 @@ typedObjs, | ||
let replEsc = `${escEncl}${colEncl}`; | ||
let numCols = _maxCols || schema.cols.length; | ||
@@ -482,3 +500,3 @@ | ||
if (withEOF && colIdx === lastColIdx) | ||
if (withEOF && colIdx === lastColIdx && filledColIdx > -1) | ||
--skip < 0 && rows.push(row); | ||
@@ -500,5 +518,7 @@ | ||
let v = ""; | ||
let c; | ||
let v = ''; | ||
let c = 0; | ||
let pos0 = pos; | ||
while (pos <= endPos) { | ||
@@ -511,2 +531,3 @@ c = csvStr.charCodeAt(pos); | ||
pos += 1; | ||
pos0 = pos; | ||
@@ -531,3 +552,3 @@ if (pos > endPos) | ||
pos += 1; | ||
v = ""; | ||
v = ''; | ||
@@ -560,4 +581,2 @@ if (c === rowDelimChar) { | ||
break; | ||
c = csvStr.charCodeAt(pos); | ||
} | ||
@@ -575,2 +594,5 @@ else { | ||
if (inCol === 2) { | ||
let shouldRep = false; | ||
let posTo = 0; | ||
while (true) { | ||
@@ -580,2 +602,3 @@ if (c === colEnclChar) { | ||
if (pos + 1 > endPos) { // TODO: test with chunk ending in closing ", even at EOL but not EOF | ||
posTo = pos; | ||
pos = endPos + 1; | ||
@@ -591,3 +614,3 @@ break; | ||
// MACRO START | ||
v += colEncl; | ||
shouldRep = true; | ||
if (pos > endPos) | ||
@@ -600,2 +623,3 @@ break; | ||
inCol = 0; | ||
posTo = pos; | ||
pos += 1; | ||
@@ -612,3 +636,3 @@ break; | ||
// MACRO START | ||
v += colEncl; | ||
shouldRep = true; | ||
if (pos > endPos) | ||
@@ -621,2 +645,3 @@ break; | ||
inCol = 0; | ||
posTo = pos; | ||
pos += 1; | ||
@@ -635,3 +660,2 @@ break; | ||
v += csvStr.slice(pos, colEnclChar === escEnclChar ? pos2 : pos2 - 1); | ||
pos = pos2; | ||
@@ -641,2 +665,8 @@ c = colEnclChar; | ||
} | ||
if (inCol === 0 || pos > endPos) { | ||
v = shouldRep ? | ||
csvStr.slice(pos0, posTo).replaceAll(replEsc, colEncl) : | ||
csvStr.slice(pos0, posTo); | ||
} | ||
} | ||
@@ -657,3 +687,3 @@ else if (inCol === 1) { | ||
pos += 1; | ||
v = ""; | ||
v = ''; | ||
@@ -690,3 +720,3 @@ if (c === rowDelimChar) { | ||
let m = takeToCommaOrEOL.exec(csvStr)[0]; | ||
v += m; | ||
v = m; | ||
pos += m.length; // rowdelim when - 1 | ||
@@ -701,3 +731,3 @@ } | ||
let s = csvStr.slice(pos, pos2); | ||
v += trim ? s.trim() : s; | ||
v = trim ? s.trim() : s; | ||
pos = pos2; | ||
@@ -704,0 +734,0 @@ } |
@@ -7,3 +7,3 @@ type DeepReadonly<T> = { | ||
/** should return an array whose length is how many header rows to skip, and should include a string[] to use for col names */ | ||
header?: (rows: string[]) => (string[] | null)[]; // default: rows => [rows[0]] | ||
header?: (rows: string[][]) => (string[] | null)[]; // default: rows => [rows[0]] | ||
@@ -29,19 +29,18 @@ /** column delimiter (null = infer, ',' = comma) */ | ||
export const enum SchemaColumnType { | ||
String = 's', | ||
Number = 'n', | ||
Date = 'd', | ||
JSON = 'j', | ||
Boolean_1 = 'b:1', | ||
Boolean_t = 'b:t', | ||
Boolean_T = 'b:T', | ||
Boolean_true = 'b:true', | ||
Boolean_True = 'b:True', | ||
Boolean_TRUE = 'b:TRUE', | ||
Boolean_y = 'b:y', | ||
Boolean_Y = 'b:Y', | ||
Boolean_yes = 'b:yes', | ||
Boolean_Yes = 'b:Yes', | ||
Boolean_YES = 'b:YES', | ||
} | ||
export type SchemaColumnType = | ||
| /** String */ 's' | ||
| /** Number */ 'n' | ||
| /** Date */ 'd' | ||
| /** JSON */ 'j' | ||
| /** Boolean_1 */ 'b:1' | ||
| /** Boolean_t */ 'b:t' | ||
| /** Boolean_T */ 'b:T' | ||
| /** Boolean_true */ 'b:true' | ||
| /** Boolean_True */ 'b:True' | ||
| /** Boolean_TRUE */ 'b:TRUE' | ||
| /** Boolean_y */ 'b:y' | ||
| /** Boolean_Y */ 'b:Y' | ||
| /** Boolean_yes */ 'b:yes' | ||
| /** Boolean_Yes */ 'b:Yes' | ||
| /** Boolean_YES */ 'b:YES'; | ||
@@ -98,3 +97,4 @@ export interface SchemaColumn { | ||
type record = Record<string, unknown>; | ||
type record = Record<string, unknown>; | ||
type stringRec = Record<string, string>; | ||
@@ -110,2 +110,5 @@ export interface Parser { | ||
/** parses to string objects */ | ||
stringObjs: <T extends stringRec = {}>(csvStr: string, onData?: OnDataFn<T>) => T[]; | ||
/** parses to typed tuples */ | ||
@@ -112,0 +115,0 @@ typedArrs: <T extends unknown[] = []>(csvStr: string, onData?: OnDataFn<T>) => T[]; |
/** | ||
* Copyright (c) 2023, Leon Sorokin | ||
* Copyright (c) 2024, Leon Sorokin | ||
* All rights reserved. (MIT Licensed) | ||
@@ -7,3 +7,3 @@ * | ||
* A small, fast CSV parser | ||
* https://github.com/leeoniya/uDSV (v0.5.3) | ||
* https://github.com/leeoniya/uDSV (v0.6.0) | ||
*/ | ||
@@ -20,3 +20,3 @@ | ||
const ISO8601 = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{3,})?(?:Z|[-+]\d{2}:?\d{2})$/; | ||
const ISO8601 = /^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}(?:\.\d{3,})?(?:Z|[-+]\d{2}:?\d{2}))?$/; | ||
const BOOL_RE = /^(?:t(?:rue)?|f(?:alse)?|y(?:es)?|n(?:o)?|0|1)$/i; | ||
@@ -221,3 +221,3 @@ | ||
rowDelim ??= firstRowMatch[2]; | ||
colDelim ??= COL_DELIMS.find(delim => firstRowStr.indexOf(delim) > -1) ?? ''; | ||
colDelim ??= COL_DELIMS.find(delim => firstRowStr.indexOf(delim) > -1) ?? comma; | ||
@@ -275,2 +275,3 @@ const schema = { | ||
let _toDeep = null; | ||
let _toObjsS = null; | ||
@@ -334,2 +335,15 @@ let _toCols = null; | ||
const stringObjs = gen(initRows, addRows, () => { | ||
_toObjsS ??= genToTypedRows(cols.map(col => ({ | ||
...col, | ||
type: 's', | ||
repl: { | ||
...col.repl, | ||
empty: void 0, | ||
} | ||
})), true, false); | ||
return _toObjsS; | ||
}); | ||
const typedArrs = gen(initRows, addRows, () => { | ||
@@ -361,2 +375,4 @@ _toArrs ??= genToTypedRows(cols, false, false); | ||
stringArrs, | ||
stringObjs, | ||
typedArrs, | ||
@@ -400,2 +416,4 @@ typedObjs, | ||
let replEsc = `${escEncl}${colEncl}`; | ||
let numCols = _maxCols || schema.cols.length; | ||
@@ -483,3 +501,3 @@ | ||
if (withEOF && colIdx === lastColIdx) | ||
if (withEOF && colIdx === lastColIdx && filledColIdx > -1) | ||
--skip < 0 && rows.push(row); | ||
@@ -501,5 +519,7 @@ | ||
let v = ""; | ||
let c; | ||
let v = ''; | ||
let c = 0; | ||
let pos0 = pos; | ||
while (pos <= endPos) { | ||
@@ -512,2 +532,3 @@ c = csvStr.charCodeAt(pos); | ||
pos += 1; | ||
pos0 = pos; | ||
@@ -532,3 +553,3 @@ if (pos > endPos) | ||
pos += 1; | ||
v = ""; | ||
v = ''; | ||
@@ -561,4 +582,2 @@ if (c === rowDelimChar) { | ||
break; | ||
c = csvStr.charCodeAt(pos); | ||
} | ||
@@ -576,2 +595,5 @@ else { | ||
if (inCol === 2) { | ||
let shouldRep = false; | ||
let posTo = 0; | ||
while (true) { | ||
@@ -581,2 +603,3 @@ if (c === colEnclChar) { | ||
if (pos + 1 > endPos) { // TODO: test with chunk ending in closing ", even at EOL but not EOF | ||
posTo = pos; | ||
pos = endPos + 1; | ||
@@ -592,3 +615,3 @@ break; | ||
// MACRO START | ||
v += colEncl; | ||
shouldRep = true; | ||
if (pos > endPos) | ||
@@ -601,2 +624,3 @@ break; | ||
inCol = 0; | ||
posTo = pos; | ||
pos += 1; | ||
@@ -613,3 +637,3 @@ break; | ||
// MACRO START | ||
v += colEncl; | ||
shouldRep = true; | ||
if (pos > endPos) | ||
@@ -622,2 +646,3 @@ break; | ||
inCol = 0; | ||
posTo = pos; | ||
pos += 1; | ||
@@ -636,3 +661,2 @@ break; | ||
v += csvStr.slice(pos, colEnclChar === escEnclChar ? pos2 : pos2 - 1); | ||
pos = pos2; | ||
@@ -642,2 +666,8 @@ c = colEnclChar; | ||
} | ||
if (inCol === 0 || pos > endPos) { | ||
v = shouldRep ? | ||
csvStr.slice(pos0, posTo).replaceAll(replEsc, colEncl) : | ||
csvStr.slice(pos0, posTo); | ||
} | ||
} | ||
@@ -658,3 +688,3 @@ else if (inCol === 1) { | ||
pos += 1; | ||
v = ""; | ||
v = ''; | ||
@@ -691,3 +721,3 @@ if (c === rowDelimChar) { | ||
let m = takeToCommaOrEOL.exec(csvStr)[0]; | ||
v += m; | ||
v = m; | ||
pos += m.length; // rowdelim when - 1 | ||
@@ -702,3 +732,3 @@ } | ||
let s = csvStr.slice(pos, pos2); | ||
v += trim ? s.trim() : s; | ||
v = trim ? s.trim() : s; | ||
pos = pos2; | ||
@@ -705,0 +735,0 @@ } |
@@ -1,2 +0,2 @@ | ||
/*! https://github.com/leeoniya/uDSV (v0.5.3) */ | ||
var uDSV=function(e){"use strict";const t=/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{3,})?(?:Z|[-+]\d{2}:?\d{2})$/,l=/^(?:t(?:rue)?|f(?:alse)?|y(?:es)?|n(?:o)?|0|1)$/i,n=["\t","|",";",","],r="d",i="n",s="j",c="b";const o=JSON.stringify,f=e=>"string"==typeof e?o(e):e;function u(e,t){let{type:l}=t,n=`r[${e}]`,u=l===r?`new Date(${n})`:l===s?`JSON.parse(${n})`:l===i?"+"+n:l[0]===c?`${n} === ${o(l.slice(2))} ? true : false`:n,{repl:a}=t,h=void 0!==a.NaN&&l===i?`${n} === 'NaN' ? ${f(a.NaN)} : `:"",d=void 0!==a.null?`${n} === 'null' || ${n} === 'NULL' ? ${f(a.null)} : `:"";return`${void 0!==a.empty?`${n} === '' ? ${f(a.empty)} : `:""} ${d} ${h} ${u}`}const a=/\w+(?:\[|\]?[\.\[]?|$)/gm;function h(e,t=!1,l=!1){let n="";if(t&&l){let t={},l=0,r=e.map((e=>e.name.replace(/\.(\d+)\.?/gi,"[$1]")));do{let e=r.shift(),n=/\s/.test(e)?[e]:[...e.matchAll(a)].flatMap((e=>e.map((e=>e.replace("]",""))))),i=t;do{let e=n.shift(),t=e,r=e.at(-1);"."==r||"["==r?(t=e.slice(0,-1),i=i[t]=i[t]??("."==r?{}:[])):i[t]=`¦${l}¦`}while(n.length>0);l++}while(r.length>0);n=o(t).replace(/"¦(\d+)¦"/g,((t,l)=>u(+l,e[+l])))}else!t&&e.every((e=>"s"===e.type))?n="r":(n=t?"{":"[",e.forEach(((e,l)=>{n+=t?o(e.name)+":":"";let r=u(l,e);n+=r+","})),n+=t?"}":"]");return Function("rows",` let arr = Array(rows.length); for (let i = 0; i < rows.length; i++) { let r = rows[i]; arr[i] = ${n}; } return arr; `)}function d(e,t,l,n=0,r=!0,i=1e3,s=null,c=null){let{row:o,col:f,encl:u,esc:a,trim:h}=t;u??=e.indexOf('"')>-1?'"':"",a??=u;let d=c||t.cols.length,p=null!=s,g=null!=c&&p,$=o.length,y=f.length,m=u.charCodeAt(0),A=a.charCodeAt(0),N=o.charCodeAt(0),k=f.charCodeAt(0),w=0,C=0,b=e.length-1,O=0,x=[],v=Array(d).fill(""),E=v.slice(),S=0,L=d-1,T=-1;if(""===u){for(;b>=C;)if(S===L){let t=e.indexOf(o,C);if(-1===t){if(!r)break;t=b+1}let c=e.slice(C,t);if(E[S]=h?c.trim():c,0>--n&&x.push(E),x.length===i){let e=!1===l(x,"");if(x=[],e||p&&++w===s)return}E=v.slice(),S=0,T=-1,C=t+$,O=C}else if(0===S&&e.charCodeAt(C)===N)C+=$;else{let t=e.indexOf(f,C);if(-1===t&&!r)break;let l=e.slice(C,t);E[S]=h?l.trim():l,C=t+y,T=S++}return r&&S===L&&0>--n&&x.push(E),void(r&&0>=x.length||l(x,r?"":e.slice(O)))}const j=g?RegExp(`[^${f}${o}]+`,"my"):null;let R,Y=0,D="";for(;b>=C;){if(R=e.charCodeAt(C),0===Y)if(R===m){if(Y=2,C+=1,C>b)break;R=e.charCodeAt(C)}else if(R===k||R===N){if(R===N&&0===S){C+=$;continue}if(E[S]=D,T=S,S+=1,C+=1,D="",R===N){if(g&&L>T&&0===x.length&&(E.length=v.length=T+1,L=T),0>--n&&x.push(E),x.length===i){let e=!1===l(x,"");if(x=[],e||p&&++w===s)return}E=v.slice(),S=0,T=-1,C+=$-1,O=C}if(C>b)break;R=e.charCodeAt(C)}else if(h&&32===R)for(;32===R;)R=e.charCodeAt(++C);else Y=1;if(2===Y)for(;;)if(R===m)if(m===A){if(C+1>b){C=b+1;break}if(e.charCodeAt(C+1)!==m){Y=0,C+=1;break}if(C+=2,D+=u,C>b)break;R=e.charCodeAt(C)}else{if(e.charCodeAt(C-1)!==A){Y=0,C+=1;break}if(C+=1,D+=u,C>b)break;R=e.charCodeAt(C)}else{let t=e.indexOf(u,C);if(-1===t){C=b+1;break}D+=e.slice(C,m===A?t:t-1),C=t,R=m}else if(1===Y)if(R===k||R===N){if(R===N&&0===S){C+=$;continue}if(E[S]=D,T=S,S+=1,C+=1,D="",R===N){if(g&&L>T&&0===x.length&&(E.length=v.length=T+1,L=T),0>--n&&x.push(E),x.length===i){let e=!1===l(x,"");if(x=[],e||p&&++w===s)return}E=v.slice(),S=0,T=-1,C+=$-1,O=C}Y=0}else if(g){j.lastIndex=C;let t=j.exec(e)[0];D+=t,C+=t.length}else{let t=e.indexOf(S===L?o:f,C);-1===t&&(t=b+1);let l=e.slice(C,t);D+=h?l.trim():l,C=t}}r&&S===L&&(E[S]=D,0>--n&&x.push(E),Y=0),l(x,r||0===Y&&!(-1===T?""!==D:L>T)?"":e.slice(O))}return e.inferSchema=function(e,o,f){let{header:u,col:a,row:h,encl:p,esc:g,trim:$=!1}=o??{};u??=e=>[e[0]],f??=10;const y=RegExp(`(.*)(${h??"\r\n|\r|\n"})`),m=e.match(y),A=m[1];h??=m[2],a??=n.find((e=>A.indexOf(e)>-1))??"";const N={skip:1,col:a,row:h,encl:p,esc:g,trim:$,cols:[]},k=A.split(a).length,w=[];d(e,N,(e=>{w.push(...e)}),0,!0,f,1,k);let C=u(w)??[],b=N.skip=C.length,O=C.find((e=>null!=e))??[...Array(w[0].length).keys()];return w.splice(0,b),O.forEach(((e,n)=>{let o=function(e,n){let o=n.findLast((t=>""!==t[e]&&"null"!==t[e]&&"NULL"!==t[e]&&"NaN"!==t[e])),f="s";if(null!=o){let n=o[e];f=t.test(n)?r:+n==+n?i:l.test(n)?c+":"+function(e){let[t,l=""]=e;return"1"==t||"0"==t?"1":"t"==t||"f"==t?""==l?"t":"true":"T"==t||"F"==t?""==l?"T":"R"==l||"A"===l?"TRUE":"True":"y"==t||"n"==t?""==l?"y":"yes":"Y"==t||"N"==t?""==l?"Y":"E"==l||"O"===l?"YES":"Yes":""}(n):function(e){if("["===e[0]||"{"===e[0])try{return JSON.parse(e),!0}catch{}return!1}(n)?s:f}return f}(n,w);N.cols.push({name:e,type:o,repl:{empty:null,NaN:void 0,null:void 0}})})),N},e.initParser=function(e,t){let{skip:l,cols:n}=e,r=null,i=null,s=null,c=null,o=null,f=0,u=0,a=null,p=null,g="",$="",y=null;function m(){u=f=0,$=g="",a=p=y=null}let A=(e,t)=>{t(e)},N=()=>[],k=e=>{y.push(...e)};function w(n,r,i){let s=null;return(c,o=A)=>{s??=i(),y??=n();let a=y,h=0===u||2===u;return d(c,e,((e,t)=>{$=t;let l=o(s(e),r);return!1===l&&0!==u&&m(),l}),0===f?l:0,h,t),h&&(y=null),a}}const C=w(N,k,(()=>(r??=e=>e,r))),b=w(N,k,(()=>(i??=h(n,!1,!1),i))),O=w(N,k,(()=>(s??=h(n,!0,!1),s))),x=w(N,k,(()=>(c??=h(n,!0,!0),c))),v=w((()=>n.map((()=>[]))),(e=>{e.forEach(((e,t)=>{y[t].push(...e)}))}),(()=>(i??=h(n,!1,!1),o??=function(e){return Function("rows",` let cols = [${e.map((()=>"Array(rows.length)")).join(",")}]; for (let i = 0; i < rows.length; i++) { let r = rows[i]; ${e.map(((e,t)=>`cols[${t}][i] = r[${t}]`)).join(";")}; } return cols; `)}(n),e=>o(i(e)))));return{schema:e,stringArrs:C,typedArrs:b,typedObjs:O,typedDeep:x,typedCols:v,chunk(e,t=C,l=A){a??=t,p??=l,1===u&&(a($+g,p),f++),g=e,u=1},end(){u=2;let e=a($+g,p);return m(),e}}},e}({}); | ||
/*! https://github.com/leeoniya/uDSV (v0.6.0) */ | ||
var uDSV=function(e){"use strict";const l=/^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}(?:\.\d{3,})?(?:Z|[-+]\d{2}:?\d{2}))?$/,t=/^(?:t(?:rue)?|f(?:alse)?|y(?:es)?|n(?:o)?|0|1)$/i,n=["\t","|",";",","],r="d",i="n",s="j",c="b";const o=JSON.stringify,f=e=>"string"==typeof e?o(e):e;function u(e,l){let{type:t}=l,n=`r[${e}]`,u=t===r?`new Date(${n})`:t===s?`JSON.parse(${n})`:t===i?"+"+n:t[0]===c?`${n} === ${o(t.slice(2))} ? true : false`:n,{repl:a}=l,h=void 0!==a.NaN&&t===i?`${n} === 'NaN' ? ${f(a.NaN)} : `:"",d=void 0!==a.null?`${n} === 'null' || ${n} === 'NULL' ? ${f(a.null)} : `:"";return`${void 0!==a.empty?`${n} === '' ? ${f(a.empty)} : `:""} ${d} ${h} ${u}`}const a=/\w+(?:\[|\]?[\.\[]?|$)/gm;function h(e,l=!1,t=!1){let n="";if(l&&t){let l={},t=0,r=e.map((e=>e.name.replace(/\.(\d+)\.?/gi,"[$1]")));do{let e=r.shift(),n=/\s/.test(e)?[e]:[...e.matchAll(a)].flatMap((e=>e.map((e=>e.replace("]",""))))),i=l;do{let e=n.shift(),l=e,r=e.at(-1);"."==r||"["==r?(l=e.slice(0,-1),i=i[l]=i[l]??("."==r?{}:[])):i[l]=`¦${t}¦`}while(n.length>0);t++}while(r.length>0);n=o(l).replace(/"¦(\d+)¦"/g,((l,t)=>u(+t,e[+t])))}else!l&&e.every((e=>"s"===e.type))?n="r":(n=l?"{":"[",e.forEach(((e,t)=>{n+=l?o(e.name)+":":"";let r=u(t,e);n+=r+","})),n+=l?"}":"]");return Function("rows",` let arr = Array(rows.length); for (let i = 0; i < rows.length; i++) { let r = rows[i]; arr[i] = ${n}; } return arr; `)}function d(e,l,t,n=0,r=!0,i=1e3,s=null,c=null){let{row:o,col:f,encl:u,esc:a,trim:h}=l;u??=e.indexOf('"')>-1?'"':"",a??=u;let d=`${a}${u}`,p=c||l.cols.length,g=null!=s,$=null!=c&&g,y=o.length,m=f.length,A=u.charCodeAt(0),N=a.charCodeAt(0),k=o.charCodeAt(0),w=f.charCodeAt(0),b=0,C=0,O=e.length-1,x=0,v=[],E=Array(p).fill(""),S=E.slice(),j=0,L=p-1,T=-1;if(""===u){for(;O>=C;)if(j===L){let l=e.indexOf(o,C);if(-1===l){if(!r)break;l=O+1}let c=e.slice(C,l);if(S[j]=h?c.trim():c,0>--n&&v.push(S),v.length===i){let e=!1===t(v,"");if(v=[],e||g&&++b===s)return}S=E.slice(),j=0,T=-1,C=l+y,x=C}else if(0===j&&e.charCodeAt(C)===k)C+=y;else{let l=e.indexOf(f,C);if(-1===l&&!r)break;let t=e.slice(C,l);S[j]=h?t.trim():t,C=l+m,T=j++}return r&&j===L&&T>-1&&0>--n&&v.push(S),void(r&&0>=v.length||t(v,r?"":e.slice(x)))}const R=$?RegExp(`[^${f}${o}]+`,"my"):null;let Y=0,D="",F=0,J=C;for(;O>=C;){if(F=e.charCodeAt(C),0===Y)if(F===A){if(Y=2,C+=1,J=C,C>O)break;F=e.charCodeAt(C)}else if(F===w||F===k){if(F===k&&0===j){C+=y;continue}if(S[j]=D,T=j,j+=1,C+=1,D="",F===k){if($&&L>T&&0===v.length&&(S.length=E.length=T+1,L=T),0>--n&&v.push(S),v.length===i){let e=!1===t(v,"");if(v=[],e||g&&++b===s)return}S=E.slice(),j=0,T=-1,C+=y-1,x=C}if(C>O)break}else if(h&&32===F)for(;32===F;)F=e.charCodeAt(++C);else Y=1;if(2===Y){let l=!1,t=0;for(;;)if(F===A)if(A===N){if(C+1>O){t=C,C=O+1;break}if(e.charCodeAt(C+1)!==A){Y=0,t=C,C+=1;break}if(C+=2,l=!0,C>O)break;F=e.charCodeAt(C)}else{if(e.charCodeAt(C-1)!==N){Y=0,t=C,C+=1;break}if(C+=1,l=!0,C>O)break;F=e.charCodeAt(C)}else{let l=e.indexOf(u,C);if(-1===l){C=O+1;break}C=l,F=A}(0===Y||C>O)&&(D=l?e.slice(J,t).replaceAll(d,u):e.slice(J,t))}else if(1===Y)if(F===w||F===k){if(F===k&&0===j){C+=y;continue}if(S[j]=D,T=j,j+=1,C+=1,D="",F===k){if($&&L>T&&0===v.length&&(S.length=E.length=T+1,L=T),0>--n&&v.push(S),v.length===i){let e=!1===t(v,"");if(v=[],e||g&&++b===s)return}S=E.slice(),j=0,T=-1,C+=y-1,x=C}Y=0}else if($){R.lastIndex=C;let l=R.exec(e)[0];D=l,C+=l.length}else{let l=e.indexOf(j===L?o:f,C);-1===l&&(l=O+1);let t=e.slice(C,l);D=h?t.trim():t,C=l}}r&&j===L&&(S[j]=D,0>--n&&v.push(S),Y=0),t(v,r||0===Y&&!(-1===T?""!==D:L>T)?"":e.slice(x))}return e.inferSchema=function(e,o,f){let{header:u,col:a,row:h,encl:p,esc:g,trim:$=!1}=o??{};u??=e=>[e[0]],f??=10;const y=RegExp(`(.*)(${h??"\r\n|\r|\n"})`),m=e.match(y),A=m[1];h??=m[2],a??=n.find((e=>A.indexOf(e)>-1))??",";const N={skip:1,col:a,row:h,encl:p,esc:g,trim:$,cols:[]},k=A.split(a).length,w=[];d(e,N,(e=>{w.push(...e)}),0,!0,f,1,k);let b=u(w)??[],C=N.skip=b.length,O=b.find((e=>null!=e))??[...Array(w[0].length).keys()];return w.splice(0,C),O.forEach(((e,n)=>{let o=function(e,n){let o=n.findLast((l=>""!==l[e]&&"null"!==l[e]&&"NULL"!==l[e]&&"NaN"!==l[e])),f="s";if(null!=o){let n=o[e];f=l.test(n)?r:+n==+n?i:t.test(n)?c+":"+function(e){let[l,t=""]=e;return"1"==l||"0"==l?"1":"t"==l||"f"==l?""==t?"t":"true":"T"==l||"F"==l?""==t?"T":"R"==t||"A"===t?"TRUE":"True":"y"==l||"n"==l?""==t?"y":"yes":"Y"==l||"N"==l?""==t?"Y":"E"==t||"O"===t?"YES":"Yes":""}(n):function(e){if("["===e[0]||"{"===e[0])try{return JSON.parse(e),!0}catch{}return!1}(n)?s:f}return f}(n,w);N.cols.push({name:e,type:o,repl:{empty:null,NaN:void 0,null:void 0}})})),N},e.initParser=function(e,l){let{skip:t,cols:n}=e,r=null,i=null,s=null,c=null,o=null,f=null,u=0,a=0,p=null,g=null,$="",y="",m=null;function A(){a=u=0,y=$="",p=g=m=null}let N=(e,l)=>{l(e)},k=()=>[],w=e=>{m.push(...e)};function b(n,r,i){let s=null;return(c,o=N)=>{s??=i(),m??=n();let f=m,h=0===a||2===a;return d(c,e,((e,l)=>{y=l;let t=o(s(e),r);return!1===t&&0!==a&&A(),t}),0===u?t:0,h,l),h&&(m=null),f}}const C=b(k,w,(()=>(r??=e=>e,r))),O=b(k,w,(()=>(o??=h(n.map((e=>({...e,type:"s",repl:{...e.repl,empty:void 0}}))),!0,!1),o))),x=b(k,w,(()=>(i??=h(n,!1,!1),i))),v=b(k,w,(()=>(s??=h(n,!0,!1),s))),E=b(k,w,(()=>(c??=h(n,!0,!0),c))),S=b((()=>n.map((()=>[]))),(e=>{e.forEach(((e,l)=>{m[l].push(...e)}))}),(()=>(i??=h(n,!1,!1),f??=function(e){return Function("rows",` let cols = [${e.map((()=>"Array(rows.length)")).join(",")}]; for (let i = 0; i < rows.length; i++) { let r = rows[i]; ${e.map(((e,l)=>`cols[${l}][i] = r[${l}]`)).join(";")}; } return cols; `)}(n),e=>f(i(e)))));return{schema:e,stringArrs:C,stringObjs:O,typedArrs:x,typedObjs:v,typedDeep:E,typedCols:S,chunk(e,l=C,t=N){p??=l,g??=t,1===a&&(p(y+$,g),u++),$=e,a=1},end(){a=2;let e=p(y+$,g);return A(),e}}},e}({}); |
{ | ||
"name": "udsv", | ||
"version": "0.5.3", | ||
"version": "0.6.0", | ||
"description": "A small, fast CSV parser", | ||
@@ -10,3 +10,3 @@ "main": "./dist/uDSV.cjs.js", | ||
"build": "rollup -c --bundleConfigAsCjs", | ||
"test": "node ./test/parse.spec.mjs && node ./test/api.spec.mjs" | ||
"test": "node ./test/parse.spec.mjs" | ||
}, | ||
@@ -35,7 +35,7 @@ "repository": { | ||
"devDependencies": { | ||
"@rollup/plugin-terser": "^0.4.3", | ||
"@rollup/plugin-terser": "^0.4.4", | ||
"papaparse": "^5.4.1", | ||
"rollup": "^3.29.0", | ||
"rollup": "^4.24.3", | ||
"rollup-plugin-re": "^1.0.7" | ||
} | ||
} |
@@ -38,8 +38,7 @@ ## 𝌠 μDSV | ||
On a Ryzen 7 ThinkPad, Linux v6.4.11, and NodeJS v20.6.0, a diverse set of benchmarks show a 1x-5x performance boost relative to [Papa Parse](https://www.papaparse.com/). | ||
Papa Parse is used as a reference not because it's the fastest, but due to its [outsized popularity](https://github.com/search?q=csv+parser&type=repositories&s=stars&o=desc), battle-testedness, and [some external validation](https://leanylabs.com/blog/js-csv-parsers-benchmarks/) of its performance claims. | ||
Most CSV parsers have one happy/fast path -- the one without quoted values, without value typing, and only when using the default settings & output format. | ||
Once you're off that path, you can generally throw any self-promoting benchmarks in the trash. | ||
In contrast, uDSV remains fast with any datasets and all options; its happy path is _every path_. | ||
Most CSV parsers have one happy/fast path -- the one without quoted values, without value typing, and using the default settings & output format. | ||
Once you're off that path, you can generally throw their self-promoting benchmarks in the trash. | ||
In contrast, uDSV remains fast with all datasets and options; its happy path is _every path_. | ||
On a Ryzen 7 ThinkPad, Linux v6.4.11, and NodeJS v20.6.0, a diverse set of benchmarks show a 1x-5x performance boost relative to the [popular](https://github.com/search?q=csv+parser&type=repositories&s=stars&o=desc), [proven-fast](https://leanylabs.com/blog/js-csv-parsers-benchmarks/), [Papa Parse](https://www.papaparse.com/). | ||
@@ -115,2 +114,4 @@ For _way too many_ synthetic and real-world benchmarks, head over to [/bench](/bench)...and don't forget your coffee! | ||
let typedCols = parser.typedCols(csvStr); // [ [1, 4], [2, 5], [3, 6] ] | ||
let stringObjs = parser.stringObjs(csvStr); // [ {a: '1', b: '2', c: '3'}, {a: '4', b: '5', c: '6'} ] | ||
``` | ||
@@ -117,0 +118,0 @@ |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
71496
1879
230