Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
1
Versions
58
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 0.8.0 to 0.9.0

6

package.json
{
"name": "hyparquet",
"version": "0.8.0",
"version": "0.9.0",
"description": "parquet file parser for javascript",

@@ -32,3 +32,3 @@ "keywords": [

"@typescript-eslint/eslint-plugin": "7.8.0",
"@vitest/coverage-v8": "1.5.3",
"@vitest/coverage-v8": "1.6.0",
"eslint": "8.57.0",

@@ -40,4 +40,4 @@ "eslint-plugin-import": "2.29.1",

"typescript": "5.4.5",
"vitest": "1.5.3"
"vitest": "1.6.0"
}
}

@@ -99,2 +99,6 @@ import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, ParquetType } from './constants.js'

const metadata = deserializeTCompactProtocol(reader)
const decoder = new TextDecoder()
function decode(/** @type {Uint8Array} */ value) {
return value && decoder.decode(value)
}

@@ -107,3 +111,3 @@ // Parse metadata from thrift data

repetition_type: FieldRepetitionType[field.field_3],
name: field.field_4,
name: decode(field.field_4),
num_children: field.field_5,

@@ -116,6 +120,8 @@ converted_type: ConvertedType[field.field_6],

}))
// @ts-expect-error get types by column index
const columnTypes = schema.map(e => e.type).filter(e => e)
const num_rows = metadata.field_3
const row_groups = metadata.field_4.map((/** @type {any} */ rowGroup) => ({
columns: rowGroup.field_1.map((/** @type {any} */ column) => ({
file_path: column.field_1,
columns: rowGroup.field_1.map((/** @type {any} */ column, /** @type {number} */ columnIndex) => ({
file_path: decode(column.field_1),
file_offset: column.field_2,

@@ -125,3 +131,3 @@ meta_data: column.field_3 && {

encodings: column.field_3.field_2?.map((/** @type {number} */ e) => Encoding[e]),
path_in_schema: column.field_3.field_3,
path_in_schema: column.field_3.field_3.map(decode),
codec: CompressionCodec[column.field_3.field_4],

@@ -135,8 +141,3 @@ num_values: column.field_3.field_5,

dictionary_page_offset: column.field_3.field_11,
statistics: column.field_3.field_12 && {
max: column.field_3.field_12.field_1,
min: column.field_3.field_12.field_2,
null_count: column.field_3.field_12.field_3,
distinct_count: column.field_3.field_12.field_4,
},
statistics: columnStats(column.field_3.field_12, columnTypes[columnIndex]),
encoding_stats: column.field_3.field_13?.map((/** @type {any} */ encodingStat) => ({

@@ -147,3 +148,16 @@ page_type: encodingStat.field_1,

})),
bloom_filter_offset: column.field_3.field_14,
bloom_filter_length: column.field_3.field_15,
size_statistics: column.field_3.field_16 && {
unencoded_byte_array_data_bytes: column.field_3.field_16.field_1,
repetition_level_histogram: column.field_3.field_16.field_2,
definition_level_histogram: column.field_3.field_16.field_3,
},
},
offset_index_offset: column.field_4,
offset_index_length: column.field_5,
column_index_offset: column.field_6,
column_index_length: column.field_7,
crypto_metadata: column.field_7,
encrypted_column_metadata: column.field_8,
})),

@@ -157,8 +171,11 @@ total_byte_size: rowGroup.field_2,

})),
file_offset: rowGroup.field_5,
total_compressed_size: rowGroup.field_6,
ordinal: rowGroup.field_7,
}))
const key_value_metadata = metadata.field_5?.map((/** @type {any} */ keyValue) => ({
key: keyValue.field_1,
value: keyValue.field_2,
key: decode(keyValue.field_1),
value: decode(keyValue.field_2),
}))
const created_by = metadata.field_6
const created_by = decode(metadata.field_6)

@@ -202,3 +219,3 @@ return {

// TODO: TimestampType
// TOFO: TimeType
// TODO: TimeType
if (logicalType?.field_10) {

@@ -215,1 +232,43 @@ return {

}
/**
* Convert column statistics based on column type.
*
* @param {any} stats
* @param {import("./types.d.ts").ParquetType} type
* @returns {import("./types.d.ts").Statistics}
*/
function columnStats(stats, type) {
function convert(/** @type {Uint8Array} */ value) {
if (value === undefined) return value
if (type === 'BOOLEAN') return value[0] === 1
if (type === 'BYTE_ARRAY') return new TextDecoder().decode(value)
if (type === 'INT32') {
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
return view.getInt32(0, true)
}
if (type === 'INT64') {
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
return view.getBigInt64(0, true)
}
if (type === 'FLOAT') {
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
return view.getFloat32(0, true)
}
if (type === 'DOUBLE') {
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
return view.getFloat64(0, true)
}
return value
}
return stats && {
max: convert(stats.field_1),
min: convert(stats.field_2),
null_count: stats.field_3,
distinct_count: stats.field_4,
max_value: convert(stats.field_5),
min_value: convert(stats.field_6),
is_max_value_exact: stats.field_7,
is_min_value_exact: stats.field_8,
}
}

@@ -73,7 +73,6 @@ // TCompactProtocol types

case CompactType.BINARY: {
// strings are encoded as utf-8, no \0 delimiter
const stringLength = readVarInt(reader)
const strBytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, stringLength)
reader.offset += stringLength
return new TextDecoder().decode(strBytes)
return strBytes
}

@@ -80,0 +79,0 @@ case CompactType.LIST: {

@@ -20,6 +20,9 @@ type Awaitable<T> = T | Promise<T>

schema: SchemaElement[]
num_rows: number
num_rows: bigint
row_groups: RowGroup[]
key_value_metadata?: KeyValue[]
created_by?: string
// column_orders?: ColumnOrder[]
// encryption_algorithm?: EncryptionAlgorithm
// footer_signing_key_metadata?: Uint8Array
metadata_length: number

@@ -122,5 +125,8 @@ }

columns: ColumnChunk[]
total_byte_size: number
num_rows: number
total_byte_size: bigint
num_rows: bigint
sorting_columns?: SortingColumn[]
file_offset?: bigint
total_compressed_size?: bigint
ordinal?: number
}

@@ -130,4 +136,10 @@

file_path?: string
file_offset: number
file_offset: bigint
meta_data?: ColumnMetaData
offset_index_offset?: bigint
offset_index_length?: number
column_index_offset?: bigint
column_index_length?: number
crypto_metadata?: ColumnCryptoMetaData
encrypted_column_metadata?: Uint8Array
}

@@ -140,13 +152,18 @@

codec: CompressionCodec
num_values: number
total_uncompressed_size: number
total_compressed_size: number
num_values: bigint
total_uncompressed_size: bigint
total_compressed_size: bigint
key_value_metadata?: KeyValue[]
data_page_offset: number
index_page_offset?: number
dictionary_page_offset?: number
data_page_offset: bigint
index_page_offset?: bigint
dictionary_page_offset?: bigint
statistics?: Statistics
encoding_stats?: PageEncodingStats[]
bloom_filter_offset?: bigint
bloom_filter_length?: number
size_statistics?: SizeStatistics
}
interface ColumnCryptoMetaData {}
export type Encoding =

@@ -182,9 +199,21 @@ 'PLAIN' |

type MinMaxType = bigint | boolean | number | string
export interface Statistics {
max?: string
min?: string
null_count?: number
distinct_count?: number
max?: MinMaxType
min?: MinMaxType
null_count?: bigint
distinct_count?: bigint
max_value?: string
min_value?: string
is_max_value_exact?: boolean
is_min_value_exact?: boolean
}
interface SizeStatistics {
unencoded_byte_array_data_bytes?: bigint
repetition_level_histogram?: bigint[]
definition_level_histogram?: bigint[]
}
interface PageEncodingStats {

@@ -191,0 +220,0 @@ page_type: PageType

@@ -13,2 +13,3 @@ /**

if (Array.isArray(obj)) return obj.map(toJson)
if (obj instanceof Uint8Array) return Array.from(obj)
if (obj instanceof Object) {

@@ -15,0 +16,0 @@ /** @type {Record<string, unknown>} */

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc