Comparing version 0.2.3 to 0.2.4
{ | ||
"name": "hyparquet", | ||
"version": "0.2.3", | ||
"version": "0.2.4", | ||
"description": "parquet file parser for javascript", | ||
@@ -30,4 +30,4 @@ "keywords": [ | ||
"devDependencies": { | ||
"@types/node": "20.11.8", | ||
"@typescript-eslint/eslint-plugin": "6.19.1", | ||
"@types/node": "20.11.16", | ||
"@typescript-eslint/eslint-plugin": "6.20.0", | ||
"@vitest/coverage-v8": "1.2.2", | ||
@@ -34,0 +34,0 @@ "eslint": "8.56.0", |
@@ -48,15 +48,15 @@ import { CompressionCodec, ConvertedType, Encoding, PageType } from './constants.js' | ||
// decompress bytes | ||
/** @type {Uint8Array | undefined} */ | ||
let page | ||
const uncompressed_page_size = Number(header.uncompressed_page_size) | ||
const { codec } = columnMetadata | ||
if (codec === CompressionCodec.GZIP) { | ||
throw new Error('parquet gzip compression not supported') | ||
} else if (codec === CompressionCodec.SNAPPY) { | ||
if (codec === CompressionCodec.SNAPPY) { | ||
page = new Uint8Array(uncompressed_page_size) | ||
snappyUncompress(compressedBytes, page) | ||
} else if (codec === CompressionCodec.LZO) { | ||
throw new Error('parquet lzo compression not supported') | ||
} else { | ||
const compressor = Object.entries(CompressionCodec).find(([, value]) => value === codec) | ||
throw new Error(`parquet unsupported compression codec: ${codec} ${compressor?.[0]}`) | ||
} | ||
if (!page || page.length !== uncompressed_page_size) { | ||
throw new Error('parquet decompressed page size does not match header') | ||
if (page?.length !== uncompressed_page_size) { | ||
throw new Error(`parquet decompressed page length ${page?.length} does not match header ${uncompressed_page_size}`) | ||
} | ||
@@ -63,0 +63,0 @@ |
@@ -10,2 +10,9 @@ import { schemaTree } from './schema.js' | ||
* | ||
* You must provide the byteLength of the buffer, typically from a HEAD request. | ||
* | ||
* In theory, you could use suffix-range requests to fetch the end of the file, | ||
* and save a round trip. But in practice, this doesn't work because chrome | ||
* deems suffix-range requests as a not-safe-listed header, and will require | ||
* a pre-flight. So the byteLength is required. | ||
* | ||
* To make this efficient, we initially request the last 512kb of the file, | ||
@@ -16,3 +23,3 @@ * which is likely to contain the metadata. If the metadata length exceeds the | ||
* This ensures that we either make one 512kb initial request for the metadata, | ||
* or two requests for exactly the metadata size. | ||
* or a second request for up to the metadata size. | ||
* | ||
@@ -27,3 +34,3 @@ * @typedef {import("./types.d.ts").AsyncBuffer} AsyncBuffer | ||
// fetch last bytes (footer) of the file | ||
const footerOffset = asyncBuffer.byteLength - initialFetchSize | ||
const footerOffset = Math.max(0, asyncBuffer.byteLength - initialFetchSize) | ||
const footerBuffer = await asyncBuffer.slice(footerOffset) | ||
@@ -37,3 +44,3 @@ // check if metadata size fits inside the initial fetch | ||
const metadataBuffer = await asyncBuffer.slice(metadataOffset, footerOffset) | ||
// combine the buffers | ||
// combine initial fetch with the new slice | ||
const combinedBuffer = new ArrayBuffer(metadataLength + 8) | ||
@@ -40,0 +47,0 @@ const combinedView = new Uint8Array(combinedBuffer) |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
74645
2022