Comparing version 1.1.1 to 1.2.0
{ | ||
"name": "hyparquet", | ||
"version": "1.1.1", | ||
"version": "1.2.0", | ||
"description": "parquet file parser for javascript", | ||
@@ -29,8 +29,8 @@ "keywords": [ | ||
"devDependencies": { | ||
"@types/node": "22.1.0", | ||
"@typescript-eslint/eslint-plugin": "8.0.0", | ||
"@types/node": "22.2.0", | ||
"@typescript-eslint/eslint-plugin": "8.1.0", | ||
"@vitest/coverage-v8": "2.0.5", | ||
"eslint": "8.57.0", | ||
"eslint-plugin-import": "2.29.1", | ||
"eslint-plugin-jsdoc": "48.11.0", | ||
"eslint-plugin-jsdoc": "50.2.1", | ||
"http-server": "14.1.1", | ||
@@ -37,0 +37,0 @@ "hyparquet-compressors": "0.1.4", |
@@ -106,3 +106,4 @@ # hyparquet | ||
Hyparquet is designed to load only the minimal amount of data needed to fulfill a query. | ||
You can filter rows by number, or columns by name: | ||
You can filter rows by number, or columns by name, | ||
and columns will be returned in the same order they were requested: | ||
@@ -114,3 +115,3 @@ ```js | ||
file, | ||
columns: ['colA', 'colB'], // include columns colA and colB | ||
columns: ['colB', 'colA'], // include columns colB and colA | ||
rowStart: 100, | ||
@@ -122,2 +123,17 @@ rowEnd: 200, | ||
## Column names | ||
By default, data returned in the `onComplete` function will be one array of columns per row. | ||
If you would like each row to be an object with each key the name of the column, set the option `rowFormat` to `object`. | ||
```js | ||
import { parquetRead } from 'hyparquet' | ||
await parquetRead({ | ||
file, | ||
rowFormat: 'object', | ||
onComplete: data => console.log(data), | ||
}) | ||
``` | ||
## Advanced Usage | ||
@@ -124,0 +140,0 @@ |
@@ -20,2 +20,3 @@ import type { AsyncBuffer, Compressors, FileMetaData, SchemaTree } from './types.d.ts' | ||
* @param {string[]} [options.columns] columns to read, all columns if undefined | ||
* @param {string} [options.rowFormat] desired format of each row passed to the onComplete function | ||
* @param {number} [options.rowStart] first requested row index (inclusive) | ||
@@ -115,6 +116,7 @@ * @param {number} [options.rowEnd] last requested row index (exclusive) | ||
columns?: string[] // columns to read, all columns if undefined | ||
rowFormat?: string // format of each row passed to the onComplete function | ||
rowStart?: number // inclusive | ||
rowEnd?: number // exclusive | ||
onChunk?: (chunk: ColumnData) => void // called when a column chunk is parsed. chunks may be outside the requested range. | ||
onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed | ||
onComplete?: (rows: any[][] | Record<string, any>[]) => void // called when all requested rows and columns are parsed | ||
compressors?: Compressors // custom decompressors | ||
@@ -121,0 +123,0 @@ utf8?: boolean // decode byte arrays as utf8 strings (default true) |
@@ -1,2 +0,1 @@ | ||
import { assembleNested } from './assemble.js' | ||
@@ -27,6 +26,7 @@ import { getColumnRange, readColumn } from './column.js' | ||
* @param {string[]} [options.columns] columns to read, all columns if undefined | ||
* @param {string} [options.rowFormat] format of each row passed to the onComplete function | ||
* @param {number} [options.rowStart] first requested row index (inclusive) | ||
* @param {number} [options.rowEnd] last requested row index (exclusive) | ||
* @param {(chunk: ColumnData) => void} [options.onChunk] called when a column chunk is parsed. chunks may include row data outside the requested range. | ||
* @param {(rows: any[][]) => void} [options.onComplete] called when all requested rows and columns are parsed | ||
* @param {(rows: any[][] | Record<string, any>[]) => void} [options.onComplete] called when all requested rows and columns are parsed | ||
* @param {Compressors} [options.compressors] custom decompressors | ||
@@ -78,4 +78,5 @@ * @returns {Promise<void>} resolves when all requested rows and columns are parsed | ||
* @param {string[]} [options.columns] columns to read, all columns if undefined | ||
* @param {string} [options.rowFormat] format of each row passed to the onComplete function | ||
* @param {(chunk: ColumnData) => void} [options.onChunk] called when a column chunk is parsed. chunks may include row data outside the requested range. | ||
* @param {(rows: any[][]) => void} [options.onComplete] called when all requested rows and columns are parsed | ||
* @param {(rows: any[][] | Record<string, any>[]) => void} [options.onComplete] called when all requested rows and columns are parsed | ||
* @param {Compressors} [options.compressors] | ||
@@ -191,8 +192,22 @@ * @param {RowGroup} rowGroup row group to read | ||
const groupData = new Array(rowLimit) | ||
const includedColumns = children | ||
const includedColumnNames = children | ||
.map(child => child.element.name) | ||
.filter(name => !columns || columns.includes(name)) | ||
.map(name => subcolumnData.get(name)) | ||
const columnOrder = columns || includedColumnNames | ||
const includedColumns = columnOrder | ||
.map(name => includedColumnNames.includes(name) ? subcolumnData.get(name) : undefined) | ||
for (let row = 0; row < rowLimit; row++) { | ||
groupData[row] = includedColumns.map(column => column[row]) | ||
if (options.rowFormat === 'object') { | ||
// return each row as an object | ||
/** @type {Record<string, any>} */ | ||
const rowData = {} | ||
columnOrder.forEach((name, index) => { | ||
rowData[name] = includedColumns[index]?.[row] | ||
}) | ||
groupData[row] = rowData | ||
} else { | ||
// return each row as an array | ||
groupData[row] = includedColumns.map(column => column?.[row]) | ||
} | ||
} | ||
@@ -199,0 +214,0 @@ return groupData |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
106185
2766
255