Comparing version 0.2.4 to 0.2.5
{ | ||
"name": "hyparquet", | ||
"version": "0.2.4", | ||
"version": "0.2.5", | ||
"description": "parquet file parser for javascript", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
@@ -8,3 +8,3 @@ # hyparquet | ||
[![mit license](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) | ||
![dependencies](https://img.shields.io/badge/Dependencies-0-blueviolet) | ||
[![dependencies](https://img.shields.io/badge/Dependencies-0-blueviolet)](https://www.npmjs.com/package/hyparquet?activeTab=dependencies) | ||
@@ -11,0 +11,0 @@ JavaScript parser for [Apache Parquet](https://parquet.apache.org) files. |
@@ -32,2 +32,9 @@ export { AsyncBuffer, FileMetaData, SchemaTree } from './types' | ||
* | ||
* You must provide the byteLength of the buffer, typically from a HEAD request. | ||
* | ||
* In theory, you could use suffix-range requests to fetch the end of the file, | ||
* and save a round trip. But in practice, this doesn't work because chrome | ||
* deems suffix-range requests as a not-safe-listed header, and will require | ||
* a pre-flight. So the byteLength is required. | ||
* | ||
* To make this efficient, we initially request the last 512kb of the file, | ||
@@ -38,3 +45,3 @@ * which is likely to contain the metadata. If the metadata length exceeds the | ||
* This ensures that we either make one 512kb initial request for the metadata, | ||
* or two requests for exactly the metadata size. | ||
* or a second request for up to the metadata size. | ||
* | ||
@@ -41,0 +48,0 @@ * @param {AsyncBuffer} asyncBuffer parquet file contents |
@@ -34,5 +34,17 @@ import { schemaTree } from './schema.js' | ||
const footerBuffer = await asyncBuffer.slice(footerOffset) | ||
// check if metadata size fits inside the initial fetch | ||
// Check for parquet magic number "PAR1" | ||
const footerView = new DataView(footerBuffer) | ||
if (footerView.getUint32(footerBuffer.byteLength - 4, true) !== 0x31524150) { | ||
throw new Error('parquet file invalid (footer != PAR1)') | ||
} | ||
// Parquet files store metadata at the end of the file | ||
// Metadata length is 4 bytes before the last PAR1 | ||
const metadataLength = footerView.getUint32(footerBuffer.byteLength - 8, true) | ||
if (metadataLength > asyncBuffer.byteLength - 8) { | ||
throw new Error(`parquet metadata length ${metadataLength} exceeds available buffer ${asyncBuffer.byteLength - 8}`) | ||
} | ||
// check if metadata size fits inside the initial fetch | ||
if (metadataLength + 8 > initialFetchSize) { | ||
@@ -69,3 +81,3 @@ // fetch the rest of the metadata | ||
if (view.getUint32(view.byteLength - 4, true) !== 0x31524150) { | ||
throw new Error('parquet file invalid magic number') | ||
throw new Error('parquet file invalid (footer != PAR1)') | ||
} | ||
@@ -72,0 +84,0 @@ |
75563
2038
2
1
49