New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
0
Versions
66
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 1.6.2 to 1.6.3

types/assemble.d.ts

7

package.json
{
"name": "hyparquet",
"version": "1.6.2",
"version": "1.6.3",
"description": "parquet file parser for javascript",

@@ -18,9 +18,12 @@ "keywords": [

"files": [
"types",
"src"
],
"type": "module",
"types": "src/hyparquet.d.ts",
"types": "types/hyparquet.d.ts",
"scripts": {
"build:types": "tsc -p ./tsconfig.build.json",
"coverage": "vitest run --coverage --coverage.include=src",
"lint": "eslint .",
"prepare": "npm run build:types",
"test": "vitest run"

@@ -27,0 +30,0 @@ },

@@ -9,4 +9,3 @@ import { isListLike, isMapLike } from './schema.js'

*
* @typedef {import('./types.d.ts').DecodedArray} DecodedArray
* @typedef {import('./types.d.ts').FieldRepetitionType} FieldRepetitionType
* @import {DecodedArray, FieldRepetitionType} from '../src/types.d.ts'
* @param {any[]} output

@@ -108,3 +107,3 @@ * @param {number[] | undefined} definitionLevels

*
* @typedef {import('./types.d.ts').SchemaTree} SchemaTree
* @import {SchemaTree} from '../src/types.d.ts'
* @param {Map<string, any[]>} subcolumnData

@@ -111,0 +110,0 @@ * @param {SchemaTree} schema top-level schema element

import { assembleLists } from './assemble.js'
import { Encoding, PageType } from './constants.js'
import { convertWithDictionary } from './convert.js'
import { decompressPage, readDataPage, readDictionaryPage } from './datapage.js'
import { readDataPageV2 } from './datapageV2.js'
import { parquetHeader } from './header.js'
import { decompressPage, readDataPage, readDataPageV2, readDictionaryPage } from './datapage.js'
import { getMaxDefinitionLevel } from './schema.js'
import { deserializeTCompactProtocol } from './thrift.js'
import { concat } from './utils.js'

@@ -12,9 +12,7 @@

*
* @typedef {import('./types.js').ColumnMetaData} ColumnMetaData
* @typedef {import('./types.js').DecodedArray} DecodedArray
* @param {import('./types.js').DataReader} reader
* @param {DataReader} reader
* @param {number} rowLimit maximum number of rows to read
* @param {ColumnMetaData} columnMetadata column metadata
* @param {import('./types.js').SchemaTree[]} schemaPath schema path for the column
* @param {import('./hyparquet.js').ParquetReadOptions} options read options
* @param {SchemaTree[]} schemaPath schema path for the column
* @param {ParquetReadOptions} options read options
* @returns {any[]} array of values

@@ -121,1 +119,59 @@ */

}
/**
* Read parquet header from a buffer.
*
* @import {ColumnMetaData, DecodedArray, DataReader, PageHeader, ParquetReadOptions, SchemaTree} from '../src/types.d.ts'
* @param {DataReader} reader - parquet file reader
* @returns {PageHeader} metadata object and bytes read
*/
function parquetHeader(reader) {
const header = deserializeTCompactProtocol(reader)
// Parse parquet header from thrift data
const type = PageType[header.field_1]
const uncompressed_page_size = header.field_2
const compressed_page_size = header.field_3
const crc = header.field_4
const data_page_header = header.field_5 && {
num_values: header.field_5.field_1,
encoding: Encoding[header.field_5.field_2],
definition_level_encoding: Encoding[header.field_5.field_3],
repetition_level_encoding: Encoding[header.field_5.field_4],
statistics: header.field_5.field_5 && {
max: header.field_5.field_5.field_1,
min: header.field_5.field_5.field_2,
null_count: header.field_5.field_5.field_3,
distinct_count: header.field_5.field_5.field_4,
max_value: header.field_5.field_5.field_5,
min_value: header.field_5.field_5.field_6,
},
}
const index_page_header = header.field_6
const dictionary_page_header = header.field_7 && {
num_values: header.field_7.field_1,
encoding: Encoding[header.field_7.field_2],
is_sorted: header.field_7.field_3,
}
const data_page_header_v2 = header.field_8 && {
num_values: header.field_8.field_1,
num_nulls: header.field_8.field_2,
num_rows: header.field_8.field_3,
encoding: Encoding[header.field_8.field_4],
definition_levels_byte_length: header.field_8.field_5,
repetition_levels_byte_length: header.field_8.field_6,
is_compressed: header.field_8.field_7 === undefined ? true : header.field_8.field_7, // default true
statistics: header.field_8.field_8,
}
return {
type,
uncompressed_page_size,
compressed_page_size,
crc,
data_page_header,
index_page_header,
dictionary_page_header,
data_page_header_v2,
}
}

@@ -1,2 +0,2 @@

/** @type {import('./types.js').ParquetType[]} */
/** @type {import('../src/types.d.ts').ParquetType[]} */
export const ParquetType = [

@@ -32,3 +32,3 @@ 'BOOLEAN',

/** @type {import('./types.js').ConvertedType[]} */
/** @type {import('../src/types.d.ts').ConvertedType[]} */
export const ConvertedType = [

@@ -59,3 +59,3 @@ 'UTF8',

/** @type {import('./types.js').LogicalTypeType[]} */
/** @type {import('../src/types.d.ts').LogicalTypeType[]} */
export const logicalTypeType = [

@@ -90,3 +90,3 @@ 'NULL',

/** @type {import('./types.js').PageType[]} */
/** @type {import('../src/types.d.ts').PageType[]} */
export const PageType = [

@@ -99,3 +99,3 @@ 'DATA_PAGE',

/** @type {import('./types.js').BoundaryOrder[]} */
/** @type {import('../src/types.d.ts').BoundaryOrder[]} */
export const BoundaryOrder = [

@@ -102,0 +102,0 @@ 'UNORDERED',

@@ -6,8 +6,7 @@ const dayMillis = 86400000 // 1 day in milliseconds

*
* @typedef {import('./types.js').DecodedArray} DecodedArray
* @typedef {import('./types.js').SchemaElement} SchemaElement
* @import {DecodedArray, Encoding, SchemaElement} from '../src/types.d.ts'
* @param {DecodedArray} data series of primitive types
* @param {DecodedArray | undefined} dictionary
* @param {SchemaElement} schemaElement
* @param {import('./types.js').Encoding} encoding
* @param {Encoding} encoding
* @param {boolean | undefined} utf8 decode bytes as utf8?

@@ -14,0 +13,0 @@ * @returns {DecodedArray} series of rich types

@@ -0,1 +1,2 @@

import { deltaBinaryUnpack, deltaByteArray, deltaLengthByteArray } from './delta.js'
import { bitWidth, byteStreamSplit, readRleBitPackedHybrid } from './encoding.js'

@@ -9,7 +10,2 @@ import { readPlain } from './plain.js'

*
* @typedef {import("./types.d.ts").DataPage} DataPage
* @typedef {import("./types.d.ts").ColumnMetaData} ColumnMetaData
* @typedef {import("./types.d.ts").DataPageHeader} DataPageHeader
* @typedef {import("./types.d.ts").SchemaTree} SchemaTree
* @typedef {import("./types.d.ts").DecodedArray} DecodedArray
* @param {Uint8Array} bytes raw page data (should already be decompressed)

@@ -62,3 +58,3 @@ * @param {DataPageHeader} daph data page header

* @param {Uint8Array} bytes raw page data
* @param {import("./types.d.ts").DictionaryPageHeader} diph dictionary page header
* @param {DictionaryPageHeader} diph dictionary page header
* @param {ColumnMetaData} columnMetadata

@@ -75,3 +71,3 @@ * @param {number | undefined} typeLength - type_length from schema

/**
* @typedef {import("./types.d.ts").DataReader} DataReader
* @import {ColumnMetaData, CompressionCodec, Compressors, DataPage, DataPageHeader, DataPageHeaderV2, DataReader, DecodedArray, DictionaryPageHeader, PageHeader, SchemaTree} from '../src/types.d.ts'
* @param {DataReader} reader data view for the page

@@ -120,4 +116,4 @@ * @param {DataPageHeader} daph data page header

* @param {number} uncompressed_page_size
* @param {import('./types.js').CompressionCodec} codec
* @param {import('./types.js').Compressors | undefined} compressors
* @param {CompressionCodec} codec
* @param {Compressors | undefined} compressors
* @returns {Uint8Array}

@@ -144,1 +140,108 @@ */

}
/**
* Read a data page from the given Uint8Array.
*
* @param {Uint8Array} compressedBytes raw page data
* @param {PageHeader} ph page header
* @param {SchemaTree[]} schemaPath
* @param {ColumnMetaData} columnMetadata
* @param {Compressors | undefined} compressors
* @returns {DataPage} definition levels, repetition levels, and array of values
*/
export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata, compressors) {
const view = new DataView(compressedBytes.buffer, compressedBytes.byteOffset, compressedBytes.byteLength)
const reader = { view, offset: 0 }
const { codec, type } = columnMetadata
const daph2 = ph.data_page_header_v2
if (!daph2) throw new Error('parquet data page header v2 is undefined')
// repetition levels
const repetitionLevels = readRepetitionLevelsV2(reader, daph2, schemaPath)
reader.offset = daph2.repetition_levels_byte_length // readVarInt() => len for boolean v2?
// definition levels
const definitionLevels = readDefinitionLevelsV2(reader, daph2, schemaPath)
// assert(reader.offset === daph2.repetition_levels_byte_length + daph2.definition_levels_byte_length)
const uncompressedPageSize = ph.uncompressed_page_size - daph2.definition_levels_byte_length - daph2.repetition_levels_byte_length
let page = compressedBytes.subarray(reader.offset)
if (daph2.is_compressed !== false) {
page = decompressPage(page, uncompressedPageSize, codec, compressors)
}
const pageView = new DataView(page.buffer, page.byteOffset, page.byteLength)
const pageReader = { view: pageView, offset: 0 }
// read values based on encoding
/** @type {DecodedArray} */
let dataPage
const nValues = daph2.num_values - daph2.num_nulls
if (daph2.encoding === 'PLAIN') {
const { type_length } = schemaPath[schemaPath.length - 1].element
dataPage = readPlain(pageReader, type, nValues, type_length)
} else if (daph2.encoding === 'RLE') {
// assert(columnMetadata.type === 'BOOLEAN')
dataPage = new Array(nValues)
readRleBitPackedHybrid(pageReader, 1, 0, dataPage)
dataPage = dataPage.map(x => !!x)
} else if (
daph2.encoding === 'PLAIN_DICTIONARY' ||
daph2.encoding === 'RLE_DICTIONARY'
) {
const bitWidth = pageView.getUint8(pageReader.offset++)
dataPage = new Array(nValues)
readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize - 1, dataPage)
} else if (daph2.encoding === 'DELTA_BINARY_PACKED') {
const int32 = type === 'INT32'
dataPage = int32 ? new Int32Array(nValues) : new BigInt64Array(nValues)
deltaBinaryUnpack(pageReader, nValues, dataPage)
} else if (daph2.encoding === 'DELTA_LENGTH_BYTE_ARRAY') {
dataPage = new Array(nValues)
deltaLengthByteArray(pageReader, nValues, dataPage)
} else if (daph2.encoding === 'DELTA_BYTE_ARRAY') {
dataPage = new Array(nValues)
deltaByteArray(pageReader, nValues, dataPage)
} else if (daph2.encoding === 'BYTE_STREAM_SPLIT') {
const { type_length } = schemaPath[schemaPath.length - 1].element
dataPage = byteStreamSplit(reader, nValues, type, type_length)
} else {
throw new Error(`parquet unsupported encoding: ${daph2.encoding}`)
}
return { definitionLevels, repetitionLevels, dataPage }
}
/**
* @param {DataReader} reader
* @param {DataPageHeaderV2} daph2 data page header v2
* @param {SchemaTree[]} schemaPath
* @returns {any[]} repetition levels
*/
function readRepetitionLevelsV2(reader, daph2, schemaPath) {
const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath)
if (!maxRepetitionLevel) return []
const values = new Array(daph2.num_values)
readRleBitPackedHybrid(
reader, bitWidth(maxRepetitionLevel), daph2.repetition_levels_byte_length, values
)
return values
}
/**
* @param {DataReader} reader
* @param {DataPageHeaderV2} daph2 data page header v2
* @param {SchemaTree[]} schemaPath
* @returns {number[] | undefined} definition levels
*/
function readDefinitionLevelsV2(reader, daph2, schemaPath) {
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
if (maxDefinitionLevel) {
// V2 we know the length
const values = new Array(daph2.num_values)
readRleBitPackedHybrid(reader, bitWidth(maxDefinitionLevel), daph2.definition_levels_byte_length, values)
return values
}
}
import { readVarInt, readZigZagBigInt } from './thrift.js'
/**
* @typedef {import('./types.d.ts').DataReader} DataReader
* @import {DataReader} from '../src/types.d.ts'
* @param {DataReader} reader

@@ -6,0 +6,0 @@ * @param {number} count number of values to read

@@ -18,4 +18,2 @@ import { readVarInt } from './thrift.js'

*
* @typedef {import("./types.d.ts").DataReader} DataReader
* @typedef {import("./types.d.ts").DecodedArray} DecodedArray
* @param {DataReader} reader

@@ -121,3 +119,2 @@ * @param {number} width - width of each bit-packed group

/**
* @typedef {import("./types.d.ts").ParquetType} ParquetType
* @param {DataReader} reader

@@ -154,2 +151,3 @@ * @param {number} count

/**
* @import {DataReader, DecodedArray, ParquetType} from '../src/types.d.ts'
* @param {ParquetType} type

@@ -156,0 +154,0 @@ * @param {number | undefined} typeLength

@@ -10,10 +10,8 @@ export { parquetMetadata, parquetMetadataAsync, parquetSchema } from './metadata.js'

export { asyncBufferFromFile, asyncBufferFromUrl, byteLengthFromUrl, toJson } from './utils.js'
export { asyncBufferFromFile, asyncBufferFromUrl, byteLengthFromUrl, cachedAsyncBuffer, toJson } from './utils.js'
export { cachedAsyncBuffer } from './asyncBuffer.js'
/**
* @param {import('./hyparquet.js').ParquetReadOptions} options
* @returns {Promise<Array<Record<string, any>>>}
*/
* @param {ParquetReadOptions} options
* @returns {Promise<Record<string, any>[]>} resolves when all requested rows and columns are parsed
*/
export function parquetReadObjects(options) {

@@ -28,1 +26,39 @@ return new Promise((onComplete, reject) => {

}
/**
* Explicitly export types for use in downstream typescript projects through
* `import { ParquetReadOptions } from 'hyparquet'` for example.
*
* @template {any} T
* @typedef {import('../src/types.d.ts').Awaitable<T>} Awaitable<T>
*/
/**
* @typedef {import('../src/types.d.ts').AsyncBuffer} AsyncBuffer
* @typedef {import('../src/types.d.ts').DataReader} DataReader
* @typedef {import('../src/types.d.ts').FileMetaData} FileMetaData
* @typedef {import('../src/types.d.ts').SchemaTree} SchemaTree
* @typedef {import('../src/types.d.ts').SchemaElement} SchemaElement
* @typedef {import('../src/types.d.ts').ParquetType} ParquetType
* @typedef {import('../src/types.d.ts').FieldRepetitionType} FieldRepetitionType
* @typedef {import('../src/types.d.ts').ConvertedType} ConvertedType
* @typedef {import('../src/types.d.ts').TimeUnit} TimeUnit
* @typedef {import('../src/types.d.ts').LogicalType} LogicalType
* @typedef {import('../src/types.d.ts').LogicalTypeType} LogicalTypeType
* @typedef {import('../src/types.d.ts').RowGroup} RowGroup
* @typedef {import('../src/types.d.ts').ColumnChunk} ColumnChunk
* @typedef {import('../src/types.d.ts').ColumnMetaData} ColumnMetaData
* @typedef {import('../src/types.d.ts').Encoding} Encoding
* @typedef {import('../src/types.d.ts').CompressionCodec} CompressionCodec
* @typedef {import('../src/types.d.ts').Compressors} Compressors
* @typedef {import('../src/types.d.ts').Statistics} Statistics
* @typedef {import('../src/types.d.ts').PageType} PageType
* @typedef {import('../src/types.d.ts').PageHeader} PageHeader
* @typedef {import('../src/types.d.ts').DataPageHeader} DataPageHeader
* @typedef {import('../src/types.d.ts').DictionaryPageHeader} DictionaryPageHeader
* @typedef {import('../src/types.d.ts').DecodedArray} DecodedArray
* @typedef {import('../src/types.d.ts').OffsetIndex} OffsetIndex
* @typedef {import('../src/types.d.ts').ColumnIndex} ColumnIndex
* @typedef {import('../src/types.d.ts').BoundaryOrder} BoundaryOrder
* @typedef {import('../src/types.d.ts').ColumnData} ColumnData
* @typedef {import('../src/types.d.ts').ParquetReadOptions} ParquetReadOptions
*/

@@ -6,6 +6,5 @@ import { BoundaryOrder } from './constants.js'

/**
* @typedef {import('./types.d.ts').DataReader} DataReader
* @param {DataReader} reader
* @param {import('./types.d.ts').SchemaElement} schema
* @returns {import('./types.d.ts').ColumnIndex}
* @param {SchemaElement} schema
* @returns {ColumnIndex}
*/

@@ -27,3 +26,3 @@ export function readColumnIndex(reader, schema) {

* @param {DataReader} reader
* @returns {import('./types.d.ts').OffsetIndex}
* @returns {OffsetIndex}
*/

@@ -39,4 +38,5 @@ export function readOffsetIndex(reader) {

/**
* @import {ColumnIndex, DataReader, OffsetIndex, PageLocation, SchemaElement} from '../src/types.d.ts'
* @param {any} loc
* @returns {import('./types.d.ts').PageLocation}
* @returns {PageLocation}
*/

@@ -43,0 +43,0 @@ function pageLocation(loc) {

@@ -26,7 +26,4 @@ import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, PageType, ParquetType } from './constants.js'

*
* @typedef {import("./types.d.ts").AsyncBuffer} AsyncBuffer
* @typedef {import("./types.d.ts").FileMetaData} FileMetaData
* @typedef {import("./types.d.ts").SchemaElement} SchemaElement
* @param {AsyncBuffer} asyncBuffer parquet file contents
* @param {number} initialFetchSize initial fetch size in bytes
* @param {number} initialFetchSize initial fetch size in bytes (default 512kb)
* @returns {Promise<FileMetaData>} parquet metadata object

@@ -194,3 +191,3 @@ */

* @param {FileMetaData} metadata parquet metadata object
* @returns {import("./types.d.ts").SchemaTree} tree of schema elements
* @returns {SchemaTree} tree of schema elements
*/

@@ -203,3 +200,3 @@ export function parquetSchema(metadata) {

* @param {any} logicalType
* @returns {import("./types.d.ts").LogicalType | undefined}
* @returns {LogicalType | undefined}
*/

@@ -242,3 +239,3 @@ function logicalType(logicalType) {

* @param {any} unit
* @returns {import("./types.d.ts").TimeUnit}
* @returns {TimeUnit}
*/

@@ -255,5 +252,6 @@ function timeUnit(unit) {

*
* @import {AsyncBuffer, FileMetaData, LogicalType, MinMaxType, SchemaElement, SchemaTree, Statistics, TimeUnit} from '../src/types.d.ts'
* @param {any} stats
* @param {SchemaElement} schema
* @returns {import("./types.d.ts").Statistics}
* @returns {Statistics}
*/

@@ -276,3 +274,3 @@ function convertStats(stats, schema) {

* @param {SchemaElement} schema
* @returns {import('./types.d.ts').MinMaxType | undefined}
* @returns {MinMaxType | undefined}
*/

@@ -279,0 +277,0 @@ export function convertMetadata(value, schema) {

/**
* Read `count` values of the given type from the reader.view.
*
* @typedef {import("./types.d.ts").DataReader} DataReader
* @typedef {import("./types.d.ts").DecodedArray} DecodedArray
* @typedef {import("./types.d.ts").ParquetType} ParquetType
* @import {DataReader, DecodedArray, ParquetType} from '../src/types.d.ts'
* @param {DataReader} reader - buffer to read data from

@@ -8,0 +6,0 @@ * @param {ParquetType} type - parquet type of the data

@@ -6,8 +6,8 @@ import { parquetReadObjects } from './hyparquet.js'

* Wraps parquetRead with orderBy support.
* This is a parquet-aware query engine that can read a subset of rows,
* with an optional orderBy clause.
* This is a parquet-aware query engine that can read a subset of rows and columns.
* Accepts an optional orderBy column name to sort the results.
* Note that using orderBy may SIGNIFICANTLY increase the query time.
*
* @typedef {import('./hyparquet.js').ParquetReadOptions} ParquetReadOptions
* @param {ParquetReadOptions & { orderBy?: string }} options
* @returns {Promise<Record<string, any>[]>}
* @returns {Promise<Record<string, any>[]>} resolves when all requested rows and columns are parsed
*/

@@ -40,2 +40,3 @@ export async function parquetQuery(options) {

* Returns a sparse array of rows.
* @import {ParquetReadOptions} from '../src/types.d.ts'
* @param {ParquetReadOptions & { rows: number[] }} options

@@ -42,0 +43,0 @@ * @returns {Promise<Record<string, any>[]>}

@@ -18,6 +18,2 @@ import { assembleNested } from './assemble.js'

*
* @typedef {import('./hyparquet.js').ColumnData} ColumnData
* @typedef {import('./types.js').Compressors} Compressors
* @typedef {import('./types.js').AsyncBuffer} AsyncBuffer
* @typedef {import('./types.js').FileMetaData} FileMetaData
* @param {object} options read options

@@ -27,3 +23,3 @@ * @param {AsyncBuffer} options.file file-like object containing parquet data

* @param {string[]} [options.columns] columns to read, all columns if undefined
* @param {string} [options.rowFormat] format of each row passed to the onComplete function
* @param {string} [options.rowFormat] desired format of each row passed to the onComplete function
* @param {number} [options.rowStart] first requested row index (inclusive)

@@ -74,3 +70,2 @@ * @param {number} [options.rowEnd] last requested row index (exclusive)

*
* @typedef {import('./types.js').RowGroup} RowGroup
* @param {object} options read options

@@ -223,3 +218,4 @@ * @param {AsyncBuffer} options.file file-like object containing parquet data

*
* @param {import('./types.js').SchemaTree} schema
* @import {AsyncBuffer, ColumnData, Compressors, FileMetaData, RowGroup, SchemaTree} from '../src/types.d.ts'
* @param {SchemaTree} schema
* @param {string[]} output

@@ -226,0 +222,0 @@ * @returns {string[]}

/**
* Build a tree from the schema elements.
*
* @typedef {import('./types.js').SchemaElement} SchemaElement
* @typedef {import('./types.js').SchemaTree} SchemaTree
* @import {SchemaElement, SchemaTree} from '../src/types.d.ts'
* @param {SchemaElement[]} schema

@@ -7,0 +6,0 @@ * @param {number} rootIndex index of the root element

@@ -22,3 +22,3 @@ // TCompactProtocol types

*
* @typedef {import("./types.d.ts").DataReader} DataReader
* @import {DataReader} from '../src/types.d.ts'
* @param {DataReader} reader

@@ -25,0 +25,0 @@ * @returns {Record<string, any>}

@@ -327,1 +327,27 @@ export type Awaitable<T> = T | Promise<T>

export type BoundaryOrder = 'UNORDERED' | 'ASCENDING' | 'DESCENDING'
/**
* A run of column data
*/
export interface ColumnData {
columnName: string
columnData: ArrayLike<any>
rowStart: number
rowEnd: number
}
/**
* Parquet query options for reading data
*/
export interface ParquetReadOptions {
file: AsyncBuffer // file-like object containing parquet data
metadata?: FileMetaData // parquet metadata, will be parsed if not provided
columns?: string[] // columns to read, all columns if undefined
rowFormat?: string // format of each row passed to the onComplete function
rowStart?: number // first requested row index (inclusive)
rowEnd?: number // last requested row index (exclusive)
onChunk?: (chunk: ColumnData) => void // called when a column chunk is parsed. chunks may be outside the requested range.
onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed
compressors?: Compressors // custom decompressors
utf8?: boolean // decode byte arrays as utf8 strings (default true)
}
/**
* Replace bigint, date, etc with legal JSON types.
* When parsing parquet files, bigints are used to represent 64-bit integers.
* However, JSON does not support bigints, so it's helpful to convert to numbers.
*

@@ -28,3 +30,2 @@ * @param {any} obj object to convert

*
* @typedef {import('./types.js').DecodedArray} DecodedArray
* @param {any[]} aaa first array

@@ -42,2 +43,3 @@ * @param {DecodedArray} bbb second array

* Get the byte length of a URL using a HEAD request.
* If requestInit is provided, it will be passed to fetch.
*

@@ -60,4 +62,5 @@ * @param {string} url

* Construct an AsyncBuffer for a URL.
* If byteLength is not provided, will make a HEAD request to get the file size.
* If requestInit is provided, it will be passed to fetch.
*
* @typedef {import('./types.js').AsyncBuffer} AsyncBuffer
* @param {object} options

@@ -125,1 +128,55 @@ * @param {string} options.url

}
/**
* Returns a cached layer on top of an AsyncBuffer. For caching slices of a file
* that are read multiple times, possibly over a network.
*
* @param {AsyncBuffer} file file-like object to cache
* @returns {AsyncBuffer} cached file-like object
*/
export function cachedAsyncBuffer({ byteLength, slice }) {
const cache = new Map()
return {
byteLength,
/**
* @param {number} start
* @param {number} [end]
* @returns {Awaitable<ArrayBuffer>}
*/
slice(start, end) {
const key = cacheKey(start, end, byteLength)
const cached = cache.get(key)
if (cached) return cached
// cache miss, read from file
const promise = slice(start, end)
cache.set(key, promise)
return promise
},
}
}
/**
* Returns canonical cache key for a byte range 'start,end'.
* Normalize int-range and suffix-range requests to the same key.
*
* @import {AsyncBuffer, Awaitable, DecodedArray} from '../src/types.d.ts'
* @param {number} start start byte of range
* @param {number} [end] end byte of range, or undefined for suffix range
* @param {number} [size] size of file, or undefined for suffix range
* @returns {string}
*/
function cacheKey(start, end, size) {
if (start < 0) {
if (end !== undefined) throw new Error(`invalid suffix range [${start}, ${end}]`)
if (size === undefined) return `${start},`
return `${size + start},${size}`
} else if (end !== undefined) {
if (start > end) throw new Error(`invalid empty range [${start}, ${end}]`)
return `${start},${end}`
} else if (size === undefined) {
return `${start},`
} else {
return `${start},${size}`
}
}
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc