Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
1
Versions
58
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 0.2.5 to 0.2.6

8

package.json
{
"name": "hyparquet",
"version": "0.2.5",
"version": "0.2.6",
"description": "parquet file parser for javascript",

@@ -30,8 +30,8 @@ "keywords": [

"devDependencies": {
"@types/node": "20.11.16",
"@typescript-eslint/eslint-plugin": "6.20.0",
"@types/node": "20.11.17",
"@typescript-eslint/eslint-plugin": "6.21.0",
"@vitest/coverage-v8": "1.2.2",
"eslint": "8.56.0",
"eslint-plugin-import": "2.29.1",
"eslint-plugin-jsdoc": "48.0.4",
"eslint-plugin-jsdoc": "48.0.6",
"http-server": "14.1.1",

@@ -38,0 +38,0 @@ "typescript": "5.3.3",

@@ -19,3 +19,4 @@ import { CompressionCodec, ConvertedType, Encoding, PageType } from './constants.js'

*
* @param {ArrayBufferLike} arrayBuffer parquet file contents
* @param {ArrayBuffer} arrayBuffer parquet file contents
* @param {number} columnOffset offset to start reading from
* @param {RowGroup} rowGroup row group metadata

@@ -26,6 +27,3 @@ * @param {ColumnMetaData} columnMetadata column metadata

*/
export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) {
// find start of column data
const columnOffset = getColumnOffset(columnMetadata)
export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schema) {
// parse column data

@@ -42,9 +40,11 @@ let valuesSeen = 0

byteOffset += headerLength
if (!header || header.compressed_page_size === undefined) throw new Error('parquet header is undefined')
if (header.compressed_page_size === undefined) {
throw new Error(`parquet compressed page size is undefined in column '${columnMetadata.path_in_schema}'`)
}
// read compressed_page_size bytes starting at offset
const compressedBytes = new Uint8Array(arrayBuffer.slice(
const compressedBytes = new Uint8Array(arrayBuffer).subarray(
columnOffset + byteOffset,
columnOffset + byteOffset + header.compressed_page_size
))
)
// decompress bytes

@@ -51,0 +51,0 @@ /** @type {Uint8Array | undefined} */

@@ -16,3 +16,3 @@ import { deserializeTCompactProtocol } from './thrift.js'

* @typedef {import("./types.d.ts").PageHeader} PageHeader
* @param {ArrayBufferLike} arrayBuffer parquet file contents
* @param {ArrayBuffer} arrayBuffer parquet file contents
* @param {number} offset offset to start reading from

@@ -22,4 +22,3 @@ * @returns {Decoded<PageHeader>} metadata object and bytes read

export function parquetHeader(arrayBuffer, offset) {
const headerBuffer = arrayBuffer.slice(offset)
const { value: header, byteLength } = deserializeTCompactProtocol(headerBuffer)
const { value: header, byteLength } = deserializeTCompactProtocol(arrayBuffer, offset)

@@ -26,0 +25,0 @@ // Parse parquet header from thrift data

@@ -93,4 +93,3 @@ import { schemaTree } from './schema.js'

const metadataOffset = metadataLengthOffset - metadataLength
const metadataBuffer = view.buffer.slice(metadataOffset, metadataLengthOffset)
const { value: metadata } = deserializeTCompactProtocol(metadataBuffer)
const { value: metadata } = deserializeTCompactProtocol(view.buffer, view.byteOffset + metadataOffset)

@@ -97,0 +96,0 @@ // Parse parquet metadata from thrift data

import { offsetArrayBuffer } from './asyncbuffer.js'
import { getColumnOffset, readColumn } from './column.js'

@@ -97,7 +96,7 @@ import { parquetMetadataAsync } from './metadata.js'

// if row group size is less than 128mb, pre-load in one read
let groupBuffer = undefined
let groupBuffer
if (groupEndByte - groupStartByte <= 1 << 27) {
// pre-load row group byte data in one big read,
// otherwise read column data individually
groupBuffer = offsetArrayBuffer(await file.slice(groupStartByte, groupEndByte), groupStartByte)
groupBuffer = await file.slice(groupStartByte, groupEndByte)
}

@@ -126,8 +125,8 @@

let buffer
if (!groupBuffer) {
buffer = file.slice(columnStartByte, columnEndByte).then(arrayBuffer => {
return offsetArrayBuffer(arrayBuffer, columnStartByte)
})
let bufferOffset = 0
if (groupBuffer) {
buffer = Promise.resolve(groupBuffer)
bufferOffset = columnStartByte - groupStartByte
} else {
buffer = Promise.resolve(groupBuffer)
buffer = file.slice(columnStartByte, columnEndByte)
}

@@ -138,3 +137,3 @@

// TODO: extract SchemaElement for this column
const columnData = readColumn(arrayBuffer, rowGroup, columnMetadata, metadata.schema)
const columnData = readColumn(arrayBuffer, bufferOffset, rowGroup, columnMetadata, metadata.schema)
if (columnData.length !== Number(rowGroup.num_rows)) {

@@ -141,0 +140,0 @@ throw new Error('parquet column length does not match row group length')

@@ -0,1 +1,7 @@

/**
* The MIT License (MIT)
* Copyright (c) 2016 Zhipeng Jia
* https://github.com/zhipeng-jia/snappyjs
*/
const WORD_MASK = [0, 0xff, 0xffff, 0xffffff, 0xffffffff]

@@ -2,0 +8,0 @@

@@ -25,6 +25,7 @@ // TCompactProtocol types

* @param {ArrayBuffer} arrayBuffer
* @param {number} byteOffset offset into the buffer
* @returns {Decoded<Record<string, any>>}
*/
export function deserializeTCompactProtocol(arrayBuffer) {
const view = new DataView(arrayBuffer)
export function deserializeTCompactProtocol(arrayBuffer, byteOffset) {
const view = new DataView(arrayBuffer, byteOffset)
let byteLength = 0

@@ -35,3 +36,3 @@ let lastFid = 0

while (byteLength < arrayBuffer.byteLength) {
while (byteLength < arrayBuffer.byteLength - byteOffset) {
// Parse each field based on its type and add to the result object

@@ -82,3 +83,3 @@ const [type, fid, newIndex, newLastFid] = readFieldBegin(view, byteLength, lastFid)

const [stringLength, stringIndex] = readVarInt(view, index)
const strBytes = new Uint8Array(view.buffer, stringIndex, stringLength)
const strBytes = new Uint8Array(view.buffer, view.byteOffset + stringIndex, stringLength)
return [new TextDecoder().decode(strBytes), stringIndex + stringLength]

@@ -85,0 +86,0 @@ }

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc