New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
1
Versions
65
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 0.7.10 to 0.7.11

4

package.json
{
"name": "hyparquet",
"version": "0.7.10",
"version": "0.7.11",
"description": "parquet file parser for javascript",

@@ -30,3 +30,3 @@ "keywords": [

"devDependencies": {
"@types/node": "20.12.7",
"@types/node": "20.12.8",
"@typescript-eslint/eslint-plugin": "7.8.0",

@@ -33,0 +33,0 @@ "@vitest/coverage-v8": "1.5.3",

@@ -59,3 +59,3 @@ import { assembleObjects } from './assemble.js'

)
const { definitionLevels, repetitionLevels, value: dataPage } = readDataPage(page, daph, schemaPath, columnMetadata)
const { definitionLevels, repetitionLevels, dataPage } = readDataPage(page, daph, schemaPath, columnMetadata)
valuesSeen += daph.num_values

@@ -66,3 +66,3 @@

// construct output values: skip nulls and construct lists
/** @type {any[]} */
/** @type {DecodedArray} */
let values

@@ -114,3 +114,3 @@ if (repetitionLevels.length) {

const { definitionLevels, repetitionLevels, value: dataPage } = readDataPageV2(
const { definitionLevels, repetitionLevels, dataPage } = readDataPageV2(
compressedBytes, header, schemaPath, columnMetadata, compressors

@@ -151,4 +151,5 @@ )

*
* @typedef {import('./types.js').DecodedArray} DecodedArray
* @param {ArrayLike<any> | undefined} dictionary
* @param {number[]} dataPage
* @param {DecodedArray} dataPage
*/

@@ -155,0 +156,0 @@ function dereferenceDictionary(dictionary, dataPage) {

@@ -1,2 +0,2 @@

const dayMillis = 86400000000000 // 1 day in ms
const dayMillis = 86400000000000 // 1 day in milliseconds

@@ -6,7 +6,9 @@ /**

*
* @param {any[]} data series of primitive types
* @typedef {import('./types.js').DecodedArray} DecodedArray
* @param {DecodedArray} data series of primitive types
* @param {import('./types.js').SchemaElement} schemaElement schema element for the data
* @returns {any[]} series of rich types
* @returns {DecodedArray} series of rich types
*/
export function convert(data, schemaElement) {
if (!Array.isArray(data)) return data
const ctype = schemaElement.converted_type

@@ -13,0 +15,0 @@ if (ctype === 'UTF8') {

import { readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js'
import { readPlain } from './plain.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired, skipDefinitionBytes } from './schema.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired } from './schema.js'
const skipNulls = false // TODO
/**
* Read a data page from the given Uint8Array.
*
* @typedef {{ definitionLevels: number[], numNulls: number }} DefinitionLevels
* @typedef {import("./types.d.ts").DataPage} DataPage

@@ -15,2 +12,3 @@ * @typedef {import("./types.d.ts").ColumnMetaData} ColumnMetaData

* @typedef {import("./types.d.ts").SchemaTree} SchemaTree
* @typedef {import("./types.d.ts").DecodedArray} DecodedArray
* @param {Uint8Array} bytes raw page data (should already be decompressed)

@@ -25,4 +23,4 @@ * @param {DataPageHeader} daph data page header

const reader = { view, offset: 0 }
/** @type {any[]} */
let values = []
/** @type {DecodedArray} */
let dataPage = []

@@ -33,14 +31,3 @@ // repetition levels

// definition levels
let definitionLevels = undefined
let numNulls = 0
// let maxDefinitionLevel = -1
// TODO: move into readDefinitionLevels
if (skipNulls && !isRequired(schemaPath)) {
// skip_definition_bytes
reader.offset += skipDefinitionBytes(daph.num_values)
} else {
const dl = readDefinitionLevels(reader, daph, schemaPath)
definitionLevels = dl.definitionLevels
numNulls = dl.numNulls
}
const { definitionLevels, numNulls } = readDefinitionLevels(reader, daph, schemaPath)

@@ -53,3 +40,3 @@ // read values based on encoding

const plainObj = readPlain(reader, columnMetadata.type, nValues, utf8)
values = Array.isArray(plainObj) ? plainObj : Array.from(plainObj)
dataPage = plainObj
} else if (

@@ -70,7 +57,7 @@ daph.encoding === 'PLAIN_DICTIONARY' ||

if (bitWidth) {
values = new Array(nValues)
readRleBitPackedHybrid(reader, bitWidth, view.byteLength - reader.offset, values)
dataPage = new Array(nValues)
readRleBitPackedHybrid(reader, bitWidth, view.byteLength - reader.offset, dataPage)
} else {
// nval zeros
values = new Array(nValues).fill(0)
dataPage = new Array(nValues).fill(0)
}

@@ -81,3 +68,3 @@ } else {

return { definitionLevels, repetitionLevels, value: values }
return { definitionLevels, repetitionLevels, dataPage }
}

@@ -128,3 +115,3 @@

* @param {SchemaTree[]} schemaPath
* @returns {DefinitionLevels} definition levels and number of bytes read
* @returns {{ definitionLevels: number[], numNulls: number }} definition levels
*/

@@ -131,0 +118,0 @@ function readDefinitionLevels(reader, daph, schemaPath) {

@@ -26,3 +26,3 @@ import { decompressPage } from './column.js'

/** @type {any} */
let values = []
let dataPage = []

@@ -60,3 +60,3 @@ const daph2 = ph.data_page_header_v2

const pageReader = { view: pageView, offset: 0 }
values = readPlain(pageReader, columnMetadata.type, nValues, utf8)
dataPage = readPlain(pageReader, columnMetadata.type, nValues, utf8)
} else if (daph2.encoding === 'RLE') {

@@ -70,4 +70,4 @@ const page = decompressPage(compressedBytes, uncompressedPageSize, columnMetadata.codec, compressors)

const pageReader = { view: pageView, offset: 4 }
values = new Array(nValues)
readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize, values)
dataPage = new Array(nValues)
readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize, dataPage)
}

@@ -83,4 +83,4 @@ } else if (

const pageReader = { view: pageView, offset: 1 }
values = new Array(nValues)
readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize, values)
dataPage = new Array(nValues)
readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize, dataPage)
} else if (daph2.encoding === 'DELTA_BINARY_PACKED') {

@@ -90,3 +90,3 @@ if (daph2.num_nulls) throw new Error('parquet delta-int not supported')

const page = decompressPage(compressedBytes, uncompressedPageSize, codec, compressors)
deltaBinaryUnpack(page, nValues, values)
deltaBinaryUnpack(page, nValues, dataPage)
} else {

@@ -96,3 +96,3 @@ throw new Error(`parquet unsupported encoding: ${daph2.encoding}`)

return { definitionLevels, repetitionLevels, value: values }
return { definitionLevels, repetitionLevels, dataPage }
}

@@ -99,0 +99,0 @@

@@ -19,6 +19,7 @@ import { readVarInt } from './thrift.js'

* @typedef {import("./types.d.ts").DataReader} DataReader
* @typedef {number[]} DecodedArray
* @param {DataReader} reader - buffer to read data from
* @param {number} width - width of each bit-packed group
* @param {number} length - length of the encoded data
* @param {number[]} values - output array
* @param {DecodedArray} values - output array
*/

@@ -56,3 +57,3 @@ export function readRleBitPackedHybrid(reader, width, length, values) {

* @param {number} bitWidth - width of each bit-packed group
* @param {number[]} values - output array
* @param {DecodedArray} values - output array
* @param {number} seen - number of values seen so far

@@ -59,0 +60,0 @@ */

@@ -26,6 +26,11 @@ /**

* @param {number} count - number of values to read
* @returns {number[]} array of int32 values
* @returns {Int32Array} array of int32 values
*/
function readPlainInt32(reader, count) {
const values = new Array(count)
if ((reader.view.byteOffset + reader.offset) % 4 === 0) {
const values = new Int32Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count)
reader.offset += count * 4
return values
}
const values = new Int32Array(count)
for (let i = 0; i < count; i++) {

@@ -43,6 +48,11 @@ values[i] = reader.view.getInt32(reader.offset + i * 4, true)

* @param {number} count - number of values to read
* @returns {bigint[]} array of int64 values
* @returns {BigInt64Array} array of int64 values
*/
function readPlainInt64(reader, count) {
const values = new Array(count)
if ((reader.view.byteOffset + reader.offset) % 8 === 0) {
const values = new BigInt64Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count)
reader.offset += count * 8
return values
}
const values = new BigInt64Array(count)
for (let i = 0; i < count; i++) {

@@ -78,9 +88,6 @@ values[i] = reader.view.getBigInt64(reader.offset + i * 8, true)

* @param {number} count - number of values to read
* @returns {number[]} array of float values
* @returns {Float32Array} array of float values
*/
function readPlainFloat(reader, count) {
const values = new Array(count)
for (let i = 0; i < count; i++) {
values[i] = reader.view.getFloat32(reader.offset + i * 4, true)
}
const values = new Float32Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count)
reader.offset += count * 4

@@ -95,9 +102,6 @@ return values

* @param {number} count - number of values to read
* @returns {number[]} array of double values
* @returns {Float64Array} array of double values
*/
function readPlainDouble(reader, count) {
const values = new Array(count)
for (let i = 0; i < count; i++) {
values[i] = reader.view.getFloat64(reader.offset + i * 8, true)
}
const values = new Float64Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count)
reader.offset += count * 8

@@ -144,2 +148,3 @@ return values

*
* @typedef {import("./types.d.ts").DecodedArray} DecodedArray
* @typedef {import("./types.d.ts").ParquetType} ParquetType

@@ -150,3 +155,3 @@ * @param {DataReader} reader - buffer to read data from

* @param {boolean} utf8 - whether to decode byte arrays as UTF-8
* @returns {ArrayLike<any>} array of values
* @returns {DecodedArray} array of values
*/

@@ -153,0 +158,0 @@ export function readPlain(reader, type, count, utf8) {

@@ -97,18 +97,2 @@ /**

/**
* Get the number of bytes to skip for definition levels.
*
* @param {number} num number of values
* @returns {number} number of bytes to skip
*/
export function skipDefinitionBytes(num) {
let byteLength = 6
let n = num >>> 8
while (n !== 0) {
byteLength++
n >>>= 7
}
return byteLength
}
/**
* Get the column name as foo.bar and handle list and map like columns.

@@ -115,0 +99,0 @@ *

@@ -245,3 +245,11 @@ type Awaitable<T> = T | Promise<T>

repetitionLevels: number[]
value: any[]
dataPage: DecodedArray
}
export type DecodedArray =
Uint8Array |
Int32Array |
BigInt64Array |
Float32Array |
Float64Array |
any[]

@@ -27,4 +27,6 @@ /**

* Concatenate two arrays fast.
*
* @typedef {import('./types.js').DecodedArray} DecodedArray
* @param {any[]} aaa first array
* @param {any[]} bbb second array
* @param {DecodedArray} bbb second array
*/

@@ -31,0 +33,0 @@ export function concat(aaa, bbb) {

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc