Comparing version 0.7.7 to 0.7.8
{ | ||
"name": "hyparquet", | ||
"version": "0.7.7", | ||
"version": "0.7.8", | ||
"description": "parquet file parser for javascript", | ||
@@ -37,3 +37,3 @@ "keywords": [ | ||
"http-server": "14.1.1", | ||
"hysnappy": "0.3.0", | ||
"hysnappy": "0.3.1", | ||
"typescript": "5.4.5", | ||
@@ -40,0 +40,0 @@ "vitest": "1.5.2" |
@@ -205,4 +205,5 @@ # hyparquet | ||
- https://github.com/dask/fastparquet | ||
- https://github.com/duckdb/duckdb | ||
- https://github.com/google/snappy | ||
- https://github.com/ironSource/parquetjs | ||
- https://github.com/zhipeng-jia/snappyjs |
@@ -5,9 +5,8 @@ /** | ||
* Reconstructs a complex nested structure from flat arrays of definition and repetition levels, | ||
* according to Dremel encoding. This simplified version focuses on arrays and scalar values, | ||
* with optional support for null values. | ||
* according to Dremel encoding. | ||
* | ||
* @param {number[] | undefined} definitionLevels definition levels, max 3 | ||
* @param {number[]} repetitionLevels repetition levels, max 1 | ||
* @param {number[] | undefined} definitionLevels definition levels | ||
* @param {number[]} repetitionLevels repetition levels | ||
* @param {ArrayLike<any>} values values to process | ||
* @param {boolean} isNull can an entry be null? | ||
* @param {boolean} isNullable can entries be null? | ||
* @param {number} maxDefinitionLevel definition level that corresponds to non-null | ||
@@ -18,3 +17,3 @@ * @param {number} maxRepetitionLevel repetition level that corresponds to a new row | ||
export function assembleObjects( | ||
definitionLevels, repetitionLevels, values, isNull, maxDefinitionLevel, maxRepetitionLevel | ||
definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel | ||
) { | ||
@@ -41,32 +40,23 @@ let valueIndex = 0 | ||
currentContainer = containerStack.at(-1) | ||
if (def) { | ||
for (let j = rep; j < maxRepetitionLevel; j++) { | ||
/** @type {any[]} */ | ||
const newList = [] | ||
currentContainer.push(newList) | ||
currentContainer = newList | ||
containerStack.push(newList) | ||
} | ||
} | ||
} | ||
// Add lists up to definition level | ||
const targetDepth = isNullable ? (def + 1) / 2 : maxRepetitionLevel + 1 | ||
for (let j = containerStack.length; j < targetDepth; j++) { | ||
/** @type {any[]} */ | ||
const newList = [] | ||
currentContainer.push(newList) | ||
currentContainer = newList | ||
containerStack.push(newList) | ||
} | ||
// Add value or null based on definition level | ||
if (def === maxDefinitionLevel) { | ||
if (!currentContainer) { | ||
throw new Error('parquet assembleObjects: currentContainer is undefined') | ||
} | ||
currentContainer.push(values[valueIndex++]) | ||
} else if (isNull) { | ||
if (def) { | ||
// TODO: Go up maxDefinitionLevel - def - 1 levels to add null | ||
for (let j = def; j < maxDefinitionLevel - 1; j++) { | ||
containerStack.pop() | ||
// @ts-expect-error won't be empty | ||
currentContainer = containerStack.at(-1) | ||
} | ||
if (def > 1) { | ||
currentContainer.push(undefined) | ||
} | ||
} else if (isNullable) { | ||
// TODO: actually depends on level required or not | ||
if (def % 2 === 0) { | ||
currentContainer.push(undefined) | ||
} else { | ||
currentContainer.push(undefined) | ||
currentContainer.push([]) | ||
} | ||
@@ -73,0 +63,0 @@ } |
@@ -172,7 +172,5 @@ import { assembleObjects } from './assemble.js' | ||
/** | ||
* @typedef {import('./types.js').PageHeader} PageHeader | ||
* @typedef {import('./types.js').CompressionCodec} CompressionCodec | ||
* @param {Uint8Array} compressedBytes | ||
* @param {number} uncompressed_page_size | ||
* @param {CompressionCodec} codec | ||
* @param {import('./types.js').CompressionCodec} codec | ||
* @param {Compressors | undefined} compressors | ||
@@ -179,0 +177,0 @@ * @returns {Uint8Array} |
/** | ||
* @typedef {import('./types.js').ParquetType} ParquetTypeType | ||
* @type {ParquetTypeType[]} | ||
* @type {import('./types.js').ParquetType[]} | ||
*/ | ||
@@ -36,4 +35,3 @@ export const ParquetType = [ | ||
/** | ||
* @typedef {import('./types.js').ConvertedType} ConvertedTypeType | ||
* @type {ConvertedTypeType[]} | ||
* @type {import('./types.js').ConvertedType[]} | ||
*/ | ||
@@ -66,4 +64,3 @@ export const ConvertedType = [ | ||
/** | ||
* @typedef {import('./types.js').LogicalTypeType} LogicalTypeType | ||
* @type {LogicalTypeType[]} | ||
* @type {import('./types.js').LogicalTypeType[]} | ||
*/ | ||
@@ -100,4 +97,3 @@ export const logicalTypeType = [ | ||
/** | ||
* @typedef {import('./types.js').PageType} PageType | ||
* @type {PageType[]} | ||
* @type {import('./types.js').PageType[]} | ||
*/ | ||
@@ -104,0 +100,0 @@ export const PageType = [ |
@@ -1,7 +0,3 @@ | ||
/** | ||
* @typedef {import('./types.js').SchemaElement} SchemaElement | ||
*/ | ||
const dayMillis = 86400000000000 // 1 day in ms | ||
const dayMillis = 86400000000000 // 1 day in milliseconds | ||
/** | ||
@@ -11,3 +7,3 @@ * Convert known types from primitive to rich. | ||
* @param {any[]} data series of primitive types | ||
* @param {SchemaElement} schemaElement schema element for the data | ||
* @param {import('./types.js').SchemaElement} schemaElement schema element for the data | ||
* @returns {any[]} series of rich types | ||
@@ -14,0 +10,0 @@ */ |
@@ -13,6 +13,5 @@ import { decompressPage } from './column.js' | ||
* @typedef {import("./types.d.ts").DataPageHeaderV2} DataPageHeaderV2 | ||
* @typedef {import("./types.d.ts").PageHeader} PageHeader | ||
* @typedef {import("./types.d.ts").SchemaElement} SchemaElement | ||
* @param {Uint8Array} compressedBytes raw page data (should already be decompressed) | ||
* @param {PageHeader} ph page header | ||
* @param {import("./types.d.ts").PageHeader} ph page header | ||
* @param {SchemaElement[]} schema schema for the file | ||
@@ -19,0 +18,0 @@ * @param {ColumnMetaData} columnMetadata metadata for the column |
@@ -171,5 +171,4 @@ import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, ParquetType } from './constants.js' | ||
* | ||
* @typedef {import("./types.d.ts").SchemaTree} SchemaTree | ||
* @param {FileMetaData} metadata parquet metadata object | ||
* @returns {SchemaTree} tree of schema elements | ||
* @returns {import("./types.d.ts").SchemaTree} tree of schema elements | ||
*/ | ||
@@ -176,0 +175,0 @@ export function parquetSchema(metadata) { |
@@ -185,6 +185,10 @@ | ||
if (Array.isArray(keys[i][j])) { | ||
// TODO: key should not be an array, this is an assemble bug | ||
// TODO: key should not be an array, this is an assemble bug? | ||
keys[i][j] = keys[i][j][0] | ||
values[i][j] = values[i][j][0] | ||
} | ||
if (keys[i][j] instanceof Uint8Array) { | ||
// decode utf-8 keys | ||
keys[i][j] = new TextDecoder().decode(keys[i][j]) | ||
} | ||
if (!keys[i][j]) continue | ||
@@ -191,0 +195,0 @@ obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j] |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
209
96955
2499