Comparing version 0.2.5 to 0.2.6
{ | ||
"name": "hyparquet", | ||
"version": "0.2.5", | ||
"version": "0.2.6", | ||
"description": "parquet file parser for javascript", | ||
@@ -30,8 +30,8 @@ "keywords": [ | ||
"devDependencies": { | ||
"@types/node": "20.11.16", | ||
"@typescript-eslint/eslint-plugin": "6.20.0", | ||
"@types/node": "20.11.17", | ||
"@typescript-eslint/eslint-plugin": "6.21.0", | ||
"@vitest/coverage-v8": "1.2.2", | ||
"eslint": "8.56.0", | ||
"eslint-plugin-import": "2.29.1", | ||
"eslint-plugin-jsdoc": "48.0.4", | ||
"eslint-plugin-jsdoc": "48.0.6", | ||
"http-server": "14.1.1", | ||
@@ -38,0 +38,0 @@ "typescript": "5.3.3", |
@@ -19,3 +19,4 @@ import { CompressionCodec, ConvertedType, Encoding, PageType } from './constants.js' | ||
* | ||
* @param {ArrayBufferLike} arrayBuffer parquet file contents | ||
* @param {ArrayBuffer} arrayBuffer parquet file contents | ||
* @param {number} columnOffset offset to start reading from | ||
* @param {RowGroup} rowGroup row group metadata | ||
@@ -26,6 +27,3 @@ * @param {ColumnMetaData} columnMetadata column metadata | ||
*/ | ||
export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) { | ||
// find start of column data | ||
const columnOffset = getColumnOffset(columnMetadata) | ||
export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schema) { | ||
// parse column data | ||
@@ -42,9 +40,11 @@ let valuesSeen = 0 | ||
byteOffset += headerLength | ||
if (!header || header.compressed_page_size === undefined) throw new Error('parquet header is undefined') | ||
if (header.compressed_page_size === undefined) { | ||
throw new Error(`parquet compressed page size is undefined in column '${columnMetadata.path_in_schema}'`) | ||
} | ||
// read compressed_page_size bytes starting at offset | ||
const compressedBytes = new Uint8Array(arrayBuffer.slice( | ||
const compressedBytes = new Uint8Array(arrayBuffer).subarray( | ||
columnOffset + byteOffset, | ||
columnOffset + byteOffset + header.compressed_page_size | ||
)) | ||
) | ||
// decompress bytes | ||
@@ -51,0 +51,0 @@ /** @type {Uint8Array | undefined} */ |
@@ -16,3 +16,3 @@ import { deserializeTCompactProtocol } from './thrift.js' | ||
* @typedef {import("./types.d.ts").PageHeader} PageHeader | ||
* @param {ArrayBufferLike} arrayBuffer parquet file contents | ||
* @param {ArrayBuffer} arrayBuffer parquet file contents | ||
* @param {number} offset offset to start reading from | ||
@@ -22,4 +22,3 @@ * @returns {Decoded<PageHeader>} metadata object and bytes read | ||
export function parquetHeader(arrayBuffer, offset) { | ||
const headerBuffer = arrayBuffer.slice(offset) | ||
const { value: header, byteLength } = deserializeTCompactProtocol(headerBuffer) | ||
const { value: header, byteLength } = deserializeTCompactProtocol(arrayBuffer, offset) | ||
@@ -26,0 +25,0 @@ // Parse parquet header from thrift data |
@@ -93,4 +93,3 @@ import { schemaTree } from './schema.js' | ||
const metadataOffset = metadataLengthOffset - metadataLength | ||
const metadataBuffer = view.buffer.slice(metadataOffset, metadataLengthOffset) | ||
const { value: metadata } = deserializeTCompactProtocol(metadataBuffer) | ||
const { value: metadata } = deserializeTCompactProtocol(view.buffer, view.byteOffset + metadataOffset) | ||
@@ -97,0 +96,0 @@ // Parse parquet metadata from thrift data |
import { offsetArrayBuffer } from './asyncbuffer.js' | ||
import { getColumnOffset, readColumn } from './column.js' | ||
@@ -97,7 +96,7 @@ import { parquetMetadataAsync } from './metadata.js' | ||
// if row group size is less than 128mb, pre-load in one read | ||
let groupBuffer = undefined | ||
let groupBuffer | ||
if (groupEndByte - groupStartByte <= 1 << 27) { | ||
// pre-load row group byte data in one big read, | ||
// otherwise read column data individually | ||
groupBuffer = offsetArrayBuffer(await file.slice(groupStartByte, groupEndByte), groupStartByte) | ||
groupBuffer = await file.slice(groupStartByte, groupEndByte) | ||
} | ||
@@ -126,8 +125,8 @@ | ||
let buffer | ||
if (!groupBuffer) { | ||
buffer = file.slice(columnStartByte, columnEndByte).then(arrayBuffer => { | ||
return offsetArrayBuffer(arrayBuffer, columnStartByte) | ||
}) | ||
let bufferOffset = 0 | ||
if (groupBuffer) { | ||
buffer = Promise.resolve(groupBuffer) | ||
bufferOffset = columnStartByte - groupStartByte | ||
} else { | ||
buffer = Promise.resolve(groupBuffer) | ||
buffer = file.slice(columnStartByte, columnEndByte) | ||
} | ||
@@ -138,3 +137,3 @@ | ||
// TODO: extract SchemaElement for this column | ||
const columnData = readColumn(arrayBuffer, rowGroup, columnMetadata, metadata.schema) | ||
const columnData = readColumn(arrayBuffer, bufferOffset, rowGroup, columnMetadata, metadata.schema) | ||
if (columnData.length !== Number(rowGroup.num_rows)) { | ||
@@ -141,0 +140,0 @@ throw new Error('parquet column length does not match row group length') |
@@ -0,1 +1,7 @@ | ||
/** | ||
* The MIT License (MIT) | ||
* Copyright (c) 2016 Zhipeng Jia | ||
* https://github.com/zhipeng-jia/snappyjs | ||
*/ | ||
const WORD_MASK = [0, 0xff, 0xffff, 0xffffff, 0xffffffff] | ||
@@ -2,0 +8,0 @@ |
@@ -25,6 +25,7 @@ // TCompactProtocol types | ||
* @param {ArrayBuffer} arrayBuffer | ||
* @param {number} byteOffset offset into the buffer | ||
* @returns {Decoded<Record<string, any>>} | ||
*/ | ||
export function deserializeTCompactProtocol(arrayBuffer) { | ||
const view = new DataView(arrayBuffer) | ||
export function deserializeTCompactProtocol(arrayBuffer, byteOffset) { | ||
const view = new DataView(arrayBuffer, byteOffset) | ||
let byteLength = 0 | ||
@@ -35,3 +36,3 @@ let lastFid = 0 | ||
while (byteLength < arrayBuffer.byteLength) { | ||
while (byteLength < arrayBuffer.byteLength - byteOffset) { | ||
// Parse each field based on its type and add to the result object | ||
@@ -82,3 +83,3 @@ const [type, fid, newIndex, newLastFid] = readFieldBegin(view, byteLength, lastFid) | ||
const [stringLength, stringIndex] = readVarInt(view, index) | ||
const strBytes = new Uint8Array(view.buffer, stringIndex, stringLength) | ||
const strBytes = new Uint8Array(view.buffer, view.byteOffset + stringIndex, stringLength) | ||
return [new TextDecoder().decode(strBytes), stringIndex + stringLength] | ||
@@ -85,0 +86,0 @@ } |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
74399
17
2012