Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
0
Versions
57
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 1.2.1 to 1.3.0

src/indexes.js

6

package.json
{
"name": "hyparquet",
"version": "1.2.1",
"version": "1.3.0",
"description": "parquet file parser for javascript",

@@ -29,4 +29,4 @@ "keywords": [

"devDependencies": {
"@types/node": "22.3.0",
"@typescript-eslint/eslint-plugin": "8.1.0",
"@types/node": "22.4.1",
"@typescript-eslint/eslint-plugin": "8.2.0",
"@vitest/coverage-v8": "2.0.5",

@@ -33,0 +33,0 @@ "eslint": "8.57.0",

@@ -1,4 +0,2 @@

/**
* @type {import('./types.js').ParquetType[]}
*/
/** @type {import('./types.js').ParquetType[]} */
export const ParquetType = [

@@ -34,5 +32,3 @@ 'BOOLEAN',

/**
* @type {import('./types.js').ConvertedType[]}
*/
/** @type {import('./types.js').ConvertedType[]} */
export const ConvertedType = [

@@ -63,5 +59,3 @@ 'UTF8',

/**
* @type {import('./types.js').LogicalTypeType[]}
*/
/** @type {import('./types.js').LogicalTypeType[]} */
export const logicalTypeType = [

@@ -96,5 +90,3 @@ 'NULL',

/**
* @type {import('./types.js').PageType[]}
*/
/** @type {import('./types.js').PageType[]} */
export const PageType = [

@@ -106,1 +98,8 @@ 'DATA_PAGE',

]
/** @type {import('./types.js').BoundaryOrder[]} */
export const BoundaryOrder = [
'UNORDERED',
'ASCENDING',
'DESCENDING',
]

@@ -31,2 +31,16 @@ import type { AsyncBuffer, Compressors, FileMetaData, SchemaTree } from './types.d.ts'

/**
* Read parquet data and return a Promise of object-oriented row data.
*
* @param {object} options read options
* @param {AsyncBuffer} options.file file-like object containing parquet data
* @param {FileMetaData} [options.metadata] parquet file metadata
* @param {string[]} [options.columns] columns to read, all columns if undefined
* @param {number} [options.rowStart] first requested row index (inclusive)
* @param {number} [options.rowEnd] last requested row index (exclusive)
* @param {Compressors} [options.compressor] custom decompressors
* @returns {Promise<void>} resolves when all requested rows and columns are parsed
*/
export function parquetReadObjects(options: ParquetReadOptions): Promise<Array<Record<string, any>>>
/**
* Read parquet metadata from an async buffer.

@@ -33,0 +47,0 @@ *

@@ -12,1 +12,15 @@ import { parquetMetadata, parquetMetadataAsync, parquetSchema } from './metadata.js'

export { asyncBufferFromFile, asyncBufferFromUrl, toJson }
/**
* @param {import('./hyparquet.js').ParquetReadOptions} options
* @returns {Promise<Array<Record<string, any>>>}
*/
export function parquetReadObjects(options) {
return new Promise((onComplete, reject) => {
parquetRead({
rowFormat: 'object',
...options,
onComplete,
}).catch(reject)
})
}

@@ -140,3 +140,3 @@ import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, PageType, ParquetType } from './constants.js'

dictionary_page_offset: column.field_3.field_11,
statistics: columnStats(column.field_3.field_12, columnSchema[columnIndex]),
statistics: convertStats(column.field_3.field_12, columnSchema[columnIndex]),
encoding_stats: column.field_3.field_13?.map((/** @type {any} */ encodingStat) => ({

@@ -256,28 +256,10 @@ page_type: PageType[encodingStat.field_1],

*/
function columnStats(stats, schema) {
const { type, converted_type, logical_type } = schema
function convert(/** @type {Uint8Array} */ value) {
if (value === undefined) return value
if (type === 'BOOLEAN') return value[0] === 1
if (type === 'BYTE_ARRAY') return new TextDecoder().decode(value)
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
if (type === 'FLOAT') return view.getFloat32(0, true)
if (type === 'DOUBLE') return view.getFloat64(0, true)
if (type === 'INT32' && converted_type === 'DATE') return new Date(view.getInt32(0, true) * 86400000)
if (type === 'INT64' && converted_type === 'TIMESTAMP_MICROS') return new Date(Number(view.getBigInt64(0, true) / 1000n))
if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS') return new Date(Number(view.getBigInt64(0, true)))
if (type === 'INT64' && logical_type?.type === 'TIMESTAMP') return new Date(Number(view.getBigInt64(0, true)))
if (type === 'INT32') return view.getInt32(0, true)
if (type === 'INT64') return view.getBigInt64(0, true)
if (converted_type === 'DECIMAL') return parseDecimal(value) * Math.pow(10, -(schema.scale || 0))
if (logical_type?.type === 'FLOAT16') return parseFloat16(value)
return value
}
function convertStats(stats, schema) {
return stats && {
max: convert(stats.field_1),
min: convert(stats.field_2),
max: convertMetadata(stats.field_1, schema),
min: convertMetadata(stats.field_2, schema),
null_count: stats.field_3,
distinct_count: stats.field_4,
max_value: convert(stats.field_5),
min_value: convert(stats.field_6),
max_value: convertMetadata(stats.field_5, schema),
min_value: convertMetadata(stats.field_6, schema),
is_max_value_exact: stats.field_7,

@@ -287,1 +269,27 @@ is_min_value_exact: stats.field_8,

}
/**
* @param {Uint8Array | undefined} value
* @param {SchemaElement} schema
* @returns {import('./types.d.ts').MinMaxType | undefined}
*/
export function convertMetadata(value, schema) {
const { type, converted_type, logical_type } = schema
if (value === undefined) return value
if (type === 'BOOLEAN') return value[0] === 1
if (type === 'BYTE_ARRAY') return new TextDecoder().decode(value)
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
if (type === 'FLOAT' && view.byteLength === 4) return view.getFloat32(0, true)
if (type === 'DOUBLE' && view.byteLength === 8) return view.getFloat64(0, true)
if (type === 'INT32' && converted_type === 'DATE') return new Date(view.getInt32(0, true) * 86400000)
if (type === 'INT64' && converted_type === 'TIMESTAMP_MICROS') return new Date(Number(view.getBigInt64(0, true) / 1000n))
if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS') return new Date(Number(view.getBigInt64(0, true)))
if (type === 'INT64' && logical_type?.type === 'TIMESTAMP') return new Date(Number(view.getBigInt64(0, true)))
if (type === 'INT32' && view.byteLength === 4) return view.getInt32(0, true)
if (type === 'INT64' && view.byteLength === 8) return view.getBigInt64(0, true)
if (converted_type === 'DECIMAL') return parseDecimal(value) * Math.pow(10, -(schema.scale || 0))
if (logical_type?.type === 'FLOAT16') return parseFloat16(value)
if (type === 'FIXED_LEN_BYTE_ARRAY') return value
// assert(false)
return value
}

@@ -81,5 +81,6 @@ // TCompactProtocol types

const [elemType, listSize] = readCollectionBegin(reader)
const boolType = elemType === CompactType.TRUE || elemType === CompactType.FALSE
const values = new Array(listSize)
for (let i = 0; i < listSize; i++) {
values[i] = readElement(reader, elemType)
values[i] = boolType ? readElement(reader, CompactType.BYTE) === 1 : readElement(reader, elemType)
}

@@ -207,8 +208,7 @@ return values

let fid // field id
if (delta === 0) {
// not a delta, read zigzag varint field id
fid = readZigZag(reader)
} else {
if (delta) {
// add delta to last field id
fid = lastFid + delta
} else {
throw new Error('non-delta field id not supported')
}

@@ -215,0 +215,0 @@ return [getCompactType(type), fid, fid]

@@ -214,3 +214,3 @@ export type Awaitable<T> = T | Promise<T>

type MinMaxType = bigint | boolean | number | string
type MinMaxType = bigint | boolean | number | string | Date | Uint8Array

@@ -305,1 +305,24 @@ export interface Statistics {

any[]
export interface OffsetIndex {
page_locations: PageLocation[]
unencoded_byte_array_data_bytes?: bigint[]
}
interface PageLocation {
offset: bigint
compressed_page_size: number
first_row_index: bigint
}
export interface ColumnIndex {
null_pages: boolean[]
min_values: MinMaxType[]
max_values: MinMaxType[]
boundary_order: BoundaryOrder
null_counts?: bigint[]
repetition_level_histograms?: bigint[]
definition_level_histograms?: bigint[]
}
export type BoundaryOrder = 'UNORDERED' | 'ASCENDING' | 'DESCENDING'
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc