Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
1
Versions
58
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 0.3.2 to 0.3.3

14

package.json
{
"name": "hyparquet",
"version": "0.3.2",
"version": "0.3.3",
"description": "parquet file parser for javascript",

@@ -30,12 +30,12 @@ "keywords": [

"devDependencies": {
"@types/node": "20.11.19",
"@typescript-eslint/eslint-plugin": "7.0.1",
"@vitest/coverage-v8": "1.3.0",
"eslint": "8.56.0",
"@types/node": "20.11.20",
"@typescript-eslint/eslint-plugin": "7.0.2",
"@vitest/coverage-v8": "1.3.1",
"eslint": "8.57.0",
"eslint-plugin-import": "2.29.1",
"eslint-plugin-jsdoc": "48.1.0",
"eslint-plugin-jsdoc": "48.2.0",
"http-server": "14.1.1",
"typescript": "5.3.3",
"vitest": "1.3.0"
"vitest": "1.3.1"
}
}
# hyparquet
![hyparquet](hyparquet.jpg)
![hyparquet parakeet](hyparquet.jpg)

@@ -5,0 +5,0 @@ [![npm](https://img.shields.io/npm/v/hyparquet)](https://www.npmjs.com/package/hyparquet)

@@ -17,3 +17,3 @@ import { Encoding, PageType } from './constants.js'

/**
* Read a column from the file.
* Parse column data from a buffer.
*

@@ -28,9 +28,9 @@ * @param {ArrayBuffer} arrayBuffer parquet file contents

export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schema) {
// parse column data
/** @type {ArrayLike<any> | undefined} */
let dictionary = undefined
let valuesSeen = 0
let byteOffset = 0 // byteOffset within the column
/** @type {ArrayLike<any> | undefined} */
let dictionary = undefined
const rowIndex = [0] // map/list object index
const rowData = []
while (valuesSeen < rowGroup.num_rows) {

@@ -49,18 +49,2 @@ // parse column header

)
// decompress bytes
/** @type {Uint8Array | undefined} */
let page
const uncompressed_page_size = Number(header.uncompressed_page_size)
const { codec } = columnMetadata
if (codec === 'UNCOMPRESSED') {
page = compressedBytes
} else if (codec === 'SNAPPY') {
page = new Uint8Array(uncompressed_page_size)
snappyUncompress(compressedBytes, page)
} else {
throw new Error(`parquet unsupported compression codec: ${codec}`)
}
if (page?.length !== uncompressed_page_size) {
throw new Error(`parquet decompressed page length ${page?.length} does not match header ${uncompressed_page_size}`)
}

@@ -72,2 +56,3 @@ // parse page data by type

const page = decompressPage(compressedBytes, Number(header.uncompressed_page_size), columnMetadata.codec)
const { definitionLevels, repetitionLevels, value: dataPage } = readDataPage(page, daph, schema, columnMetadata)

@@ -104,2 +89,3 @@ valuesSeen += daph.num_values

let v = dataPage[index++]
// map to dictionary value

@@ -146,2 +132,3 @@ if (dictionary) {

const page = decompressPage(compressedBytes, Number(header.uncompressed_page_size), columnMetadata.codec)
dictionary = readDictionaryPage(page, diph, schema, columnMetadata)

@@ -232,1 +219,26 @@ } else if (header.type === PageType.DATA_PAGE_V2) {

}
/**
* @typedef {import('./types.js').PageHeader} PageHeader
* @typedef {import('./types.js').CompressionCodec} CompressionCodec
* @param {Uint8Array} compressedBytes
* @param {number} uncompressed_page_size
* @param {CompressionCodec} codec
* @returns {Uint8Array}
*/
function decompressPage(compressedBytes, uncompressed_page_size, codec) {
/** @type {Uint8Array | undefined} */
let page
if (codec === 'UNCOMPRESSED') {
page = compressedBytes
} else if (codec === 'SNAPPY') {
page = new Uint8Array(uncompressed_page_size)
snappyUncompress(compressedBytes, page)
} else {
throw new Error(`parquet unsupported compression codec: ${codec}`)
}
if (page?.length !== uncompressed_page_size) {
throw new Error(`parquet decompressed page length ${page?.length} does not match header ${uncompressed_page_size}`)
}
return page
}

@@ -135,3 +135,3 @@ import { Encoding, ParquetType } from './constants.js'

/**
* Read `count` fixed length byte array values.
* Read a fixed length byte array.
*

@@ -138,0 +138,0 @@ * @param {DataView} dataView - buffer to read data from

@@ -20,4 +20,4 @@ export { AsyncBuffer, FileMetaData, SchemaTree } from './types'

* @param {number} [options.rowEnd] last requested row index (exclusive)
* @param {(chunk: ColumnData) => void} [options.onChunk] called when a column chunk is parsed. chunks may include row data outside the requested range.
* @param {(rows: any[][]) => void} [options.onComplete] called when all requested rows and columns are parsed
* @param {Function} [options.onChunk] called when a column chunk is parsed. chunks may include row data outside the requested range.
* @param {Function} [options.onComplete] called when all requested rows and columns are parsed
* @returns {Promise<void>} resolves when all requested rows and columns are parsed

@@ -73,7 +73,7 @@ */

*
* @param {Uint8Array} inputArray compressed data
* @param {Uint8Array} outputArray output buffer
* @param {Uint8Array} input compressed data
* @param {Uint8Array} output output buffer
* @returns {boolean} true if successful
*/
export function snappyUncompress(inputArray: Uint8Array, outputArray: Uint8Array): boolean
export function snappyUncompress(input: Uint8Array, output: Uint8Array): boolean

@@ -80,0 +80,0 @@ /**

@@ -112,2 +112,3 @@

if (!columnMetadata) throw new Error('parquet column metadata is undefined')
const columnStartByte = getColumnOffset(columnMetadata)

@@ -118,2 +119,3 @@ const columnEndByte = columnStartByte + Number(columnMetadata.total_compressed_size)

// skip columns larger than 1gb
// TODO: stream process the data, returning only the requested rows
if (columnBytes > 1 << 30) {

@@ -120,0 +122,0 @@ console.warn(`parquet skipping huge column "${columnMetadata.path_in_schema}" ${columnBytes.toLocaleString()} bytes`)

@@ -44,9 +44,9 @@ /**

*
* @param {Uint8Array} inputArray compressed data
* @param {Uint8Array} outputArray output buffer
* @returns {boolean} true if successful
* @param {Uint8Array} input compressed data
* @param {Uint8Array} output output buffer
* @returns {void}
*/
export function snappyUncompress(inputArray, outputArray) {
const inputLength = inputArray.byteLength
export function snappyUncompress(input, output) {
const inputLength = input.byteLength
const outputLength = output.byteLength
let pos = 0

@@ -56,19 +56,22 @@ let outPos = 0

// skip preamble (contains uncompressed length as varint)
let uncompressedLength = 0
let shift = 0
while (pos < inputLength) {
const c = inputArray[pos]
const c = input[pos]
pos += 1
uncompressedLength |= (c & 0x7f) << shift
if (c < 128) {
break
}
shift += 7
}
if (outputLength && pos >= inputLength) {
throw new Error('invalid snappy length header')
}
while (pos < inputLength) {
const c = inputArray[pos]
const c = input[pos]
let len = 0
pos += 1
if (pos >= inputLength) {
throw new Error('missing eof marker')
}
// There are two types of elements, literals and copies (back references)

@@ -81,10 +84,9 @@ if ((c & 0x3) === 0) {

if (pos + 3 >= inputLength) {
console.warn('snappy error literal pos + 3 >= inputLength')
return false
throw new Error('snappy error literal pos + 3 >= inputLength')
}
const lengthSize = len - 60 // length bytes - 1
len = inputArray[pos]
+ (inputArray[pos + 1] << 8)
+ (inputArray[pos + 2] << 16)
+ (inputArray[pos + 3] << 24)
len = input[pos]
+ (input[pos + 1] << 8)
+ (input[pos + 2] << 16)
+ (input[pos + 3] << 24)
len = (len & WORD_MASK[lengthSize]) + 1

@@ -94,5 +96,5 @@ pos += lengthSize

if (pos + len > inputLength) {
return false // literal exceeds input length
throw new Error('snappy error literal exceeds input length')
}
copyBytes(inputArray, pos, outputArray, outPos, len)
copyBytes(input, pos, output, outPos, len)
pos += len

@@ -107,3 +109,3 @@ outPos += len

len = ((c >>> 2) & 0x7) + 4
offset = inputArray[pos] + ((c >>> 5) << 8)
offset = input[pos] + ((c >>> 5) << 8)
pos += 1

@@ -114,6 +116,6 @@ break

if (inputLength <= pos + 1) {
return false // end of input
throw new Error('snappy error end of input')
}
len = (c >>> 2) + 1
offset = inputArray[pos] + (inputArray[pos + 1] << 8)
offset = input[pos] + (input[pos + 1] << 8)
pos += 2

@@ -124,9 +126,9 @@ break

if (inputLength <= pos + 3) {
return false // end of input
throw new Error('snappy error end of input')
}
len = (c >>> 2) + 1
offset = inputArray[pos]
+ (inputArray[pos + 1] << 8)
+ (inputArray[pos + 2] << 16)
+ (inputArray[pos + 3] << 24)
offset = input[pos]
+ (input[pos + 1] << 8)
+ (input[pos + 2] << 16)
+ (input[pos + 3] << 24)
pos += 4

@@ -138,12 +140,15 @@ break

if (offset === 0 || isNaN(offset)) {
return false // invalid offset
throw new Error(`invalid offset ${offset} pos ${pos} inputLength ${inputLength}`)
}
if (offset > outPos) {
return false // cannot copy from before start of buffer
throw new Error('cannot copy from before start of buffer')
}
selfCopyBytes(outputArray, outPos, offset, len)
selfCopyBytes(output, outPos, offset, len)
outPos += len
}
}
return true
if (outPos !== outputLength) {
throw new Error('premature end of input')
}
}
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc