Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
1
Versions
58
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 0.9.2 to 0.9.3

8

package.json
{
"name": "hyparquet",
"version": "0.9.2",
"version": "0.9.3",
"description": "parquet file parser for javascript",

@@ -30,8 +30,8 @@ "keywords": [

"devDependencies": {
"@types/node": "20.12.11",
"@typescript-eslint/eslint-plugin": "7.8.0",
"@types/node": "20.12.12",
"@typescript-eslint/eslint-plugin": "7.9.0",
"@vitest/coverage-v8": "1.6.0",
"eslint": "8.57.0",
"eslint-plugin-import": "2.29.1",
"eslint-plugin-jsdoc": "48.2.4",
"eslint-plugin-jsdoc": "48.2.5",
"http-server": "14.1.1",

@@ -38,0 +38,0 @@ "hysnappy": "0.3.1",

@@ -26,3 +26,3 @@ import { assembleObjects } from './assemble.js'

* @param {Compressors} [compressors] custom decompressors
* @returns {ArrayLike<any>} array of values
* @returns {any[]} array of values
*/

@@ -47,5 +47,4 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schemaPath, compressors) {

// read compressed_page_size bytes starting at offset
const compressedBytes = new Uint8Array(arrayBuffer).subarray(
columnOffset + reader.offset,
columnOffset + reader.offset + header.compressed_page_size
const compressedBytes = new Uint8Array(
arrayBuffer, columnOffset + reader.offset, header.compressed_page_size
)

@@ -65,2 +64,3 @@

valuesSeen += daph.num_values
// assert(!daph.statistics || daph.statistics.null_count === BigInt(daph.num_values - dataPage.length))

@@ -89,7 +89,4 @@ // construct output values: skip nulls and construct lists

}
// assert(BigInt(values.length) === rowGroup.num_rows)
// TODO: check that we are at the end of the page
// values.length !== daph.num_values isn't right. In cases like arrays,
// you need the total number of children, not the number of top-level values.
concat(rowData, values)

@@ -96,0 +93,0 @@ } else if (header.type === 'DICTIONARY_PAGE') {

@@ -15,3 +15,7 @@ const dayMillis = 86400000 // 1 day in milliseconds

const decoder = new TextDecoder()
return data.map(v => v && decoder.decode(v))
const arr = new Array(data.length)
for (let i = 0; i < arr.length; i++) {
arr[i] = data[i] && decoder.decode(data[i])
}
return arr
}

@@ -21,20 +25,21 @@ if (ctype === 'DECIMAL') {

const factor = Math.pow(10, -scale)
if (typeof data[0] === 'number') {
if (factor === 1) return data
return Array.from(data).map(v => v * factor)
} else if (typeof data[0] === 'bigint') {
if (factor === 1) return data
return Array.from(data).map(v => Number(v) * factor)
} else {
return Array.from(data).map(v => parseDecimal(v) * factor)
const arr = new Array(data.length)
for (let i = 0; i < arr.length; i++) {
if (data[0] instanceof Uint8Array) {
arr[i] = parseDecimal(data[i]) * factor
} else {
arr[i] = Number(data[i]) * factor
}
}
return arr
}
if (ctype === 'DATE') {
return Array.from(data).map(v => new Date(v * dayMillis))
}
if (ctype === undefined && schemaElement.type === 'INT96') {
return Array.from(data).map(parseInt96Date)
}
if (ctype === 'TIME_MILLIS') {
return Array.from(data).map(v => new Date(v))
if (ctype === 'DATE') {
const arr = new Array(data.length)
for (let i = 0; i < arr.length; i++) {
arr[i] = new Date(data[i] * dayMillis)
}
return arr
}

@@ -50,2 +55,8 @@ if (ctype === 'JSON') {

}
// TODO: ctype UINT
const logicalType = schemaElement.logical_type?.type
if (logicalType === 'FLOAT16') {
return Array.from(data).map(parseFloat16)
}
// TODO: logical types
return data

@@ -77,1 +88,16 @@ }

}
/**
* @param {Uint8Array | undefined} bytes
* @returns {number | undefined}
*/
export function parseFloat16(bytes) {
if (!bytes) return undefined
const int16 = (bytes[1] << 8) | bytes[0]
const sign = int16 >> 15 ? -1 : 1
const exp = (int16 >> 10) & 0x1f
const frac = int16 & 0x3ff
if (exp === 0) return sign * Math.pow(2, -14) * (frac / 1024) // subnormals
if (exp === 0x1f) return frac ? NaN : sign * Infinity
return sign * Math.pow(2, exp - 15) * (1 + frac / 1024)
}

@@ -61,3 +61,3 @@ import { readVarInt } from './thrift.js'

const width = (bitWidth + 7) >> 3
let value
let value = 0
if (width === 1) {

@@ -69,3 +69,3 @@ value = reader.view.getUint8(reader.offset)

value = reader.view.getUint32(reader.offset, true)
} else {
} else if (width) {
throw new Error(`parquet invalid rle width ${width}`)

@@ -72,0 +72,0 @@ }

import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, ParquetType } from './constants.js'
import { parseFloat16 } from './convert.js'
import { getSchemaPath } from './schema.js'

@@ -27,2 +28,3 @@ import { deserializeTCompactProtocol } from './thrift.js'

* @typedef {import("./types.d.ts").FileMetaData} FileMetaData
* @typedef {import("./types.d.ts").SchemaElement} SchemaElement
* @param {AsyncBuffer} asyncBuffer parquet file contents

@@ -107,2 +109,3 @@ * @param {number} initialFetchSize initial fetch size in bytes

const version = metadata.field_1
/** @type {SchemaElement[]} */
const schema = metadata.field_2.map((/** @type {any} */ field) => ({

@@ -120,4 +123,4 @@ type: ParquetType[field.field_1],

}))
// @ts-expect-error get types by column index
const columnTypes = schema.map(e => e.type).filter(e => e)
// schema element per column index
const columnSchema = schema.filter(e => e.type)
const num_rows = metadata.field_3

@@ -140,3 +143,3 @@ const row_groups = metadata.field_4.map((/** @type {any} */ rowGroup) => ({

dictionary_page_offset: column.field_3.field_11,
statistics: columnStats(column.field_3.field_12, columnTypes[columnIndex]),
statistics: columnStats(column.field_3.field_12, columnSchema[columnIndex]),
encoding_stats: column.field_3.field_13?.map((/** @type {any} */ encodingStat) => ({

@@ -242,6 +245,7 @@ page_type: encodingStat.field_1,

* @param {any} stats
* @param {import("./types.d.ts").ParquetType} type
* @param {SchemaElement} schema
* @returns {import("./types.d.ts").Statistics}
*/
function columnStats(stats, type) {
function columnStats(stats, schema) {
const { type, logical_type } = schema
function convert(/** @type {Uint8Array} */ value) {

@@ -267,2 +271,5 @@ if (value === undefined) return value

}
if (logical_type?.type === 'FLOAT16') {
return parseFloat16(value)
}
return value

@@ -269,0 +276,0 @@ }

@@ -112,6 +112,5 @@

/** @type {any[][]} */
const groupData = []
const groupColumnData = []
const promises = []
const maps = new Map()
let outputColumnIndex = 0
// read column data

@@ -153,3 +152,3 @@ for (let columnIndex = 0; columnIndex < rowGroup.columns.length; columnIndex++) {

const schemaPath = getSchemaPath(metadata.schema, columnMetadata.path_in_schema)
/** @type {ArrayLike<any> | undefined} */
/** @type {any[] | undefined} */
let columnData = readColumn(

@@ -212,23 +211,12 @@ arrayBuffer, bufferOffset, rowGroup, columnMetadata, schemaPath, compressors

})
// add colum data to group data only if onComplete is defined
if (options.onComplete) addColumn(groupData, outputColumnIndex, columnData)
outputColumnIndex++
// save column data only if onComplete is defined
if (options.onComplete) groupColumnData.push(columnData)
}))
}
await Promise.all(promises)
return groupData
}
/**
* Add a column to rows.
*
* @param {any[][]} rows rows to add column data to
* @param {number} columnIndex column index to add
* @param {ArrayLike<any>} columnData column data to add
*/
function addColumn(rows, columnIndex, columnData) {
for (let i = 0; i < columnData.length; i++) {
if (!rows[i]) rows[i] = []
rows[i][columnIndex] = columnData[i]
if (options.onComplete) {
// transpose columns into rows
return groupColumnData[0].map((_, row) => groupColumnData.map(col => col[row]))
}
return []
}

@@ -1,2 +0,2 @@

type Awaitable<T> = T | Promise<T>
export type Awaitable<T> = T | Promise<T>

@@ -47,3 +47,3 @@ /**

field_id?: number
logicalType?: LogicalType
logical_type?: LogicalType
}

@@ -50,0 +50,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc