New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
1
Versions
65
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 0.2.6 to 0.3.0

2

package.json
{
"name": "hyparquet",
"version": "0.2.6",
"version": "0.3.0",
"description": "parquet file parser for javascript",

@@ -5,0 +5,0 @@ "keywords": [

@@ -1,2 +0,2 @@

import { CompressionCodec, ConvertedType, Encoding, PageType } from './constants.js'
import { Encoding, PageType } from './constants.js'
import { assembleObjects, readDataPage, readDictionaryPage } from './datapage.js'

@@ -52,8 +52,9 @@ import { parquetHeader } from './header.js'

const { codec } = columnMetadata
if (codec === CompressionCodec.SNAPPY) {
if (codec === 'UNCOMPRESSED') {
page = compressedBytes
} else if (codec === 'SNAPPY') {
page = new Uint8Array(uncompressed_page_size)
snappyUncompress(compressedBytes, page)
} else {
const compressor = Object.entries(CompressionCodec).find(([, value]) => value === codec)
throw new Error(`parquet unsupported compression codec: ${codec} ${compressor?.[0]}`)
throw new Error(`parquet unsupported compression codec: ${codec}`)
}

@@ -142,2 +143,4 @@ if (page?.length !== uncompressed_page_size) {

dictionary = readDictionaryPage(page, diph, schema, columnMetadata)
} else if (header.type === PageType.DATA_PAGE_V2) {
throw new Error('parquet data page v2 not supported')
} else {

@@ -179,7 +182,7 @@ throw new Error(`parquet unsupported page type: ${header.type}`)

if (ctype === undefined) return data
if (ctype === ConvertedType.UTF8) {
if (ctype === 'UTF8') {
const decoder = new TextDecoder()
return data.map(v => decoder.decode(v))
}
if (ctype === ConvertedType.DECIMAL) {
if (ctype === 'DECIMAL') {
const scaleFactor = Math.pow(10, schemaElement.scale || 0)

@@ -193,15 +196,15 @@ if (typeof data[0] === 'number') {

}
if (ctype === ConvertedType.DATE) {
if (ctype === 'DATE') {
return data.map(v => new Date(v * dayMillis))
}
if (ctype === ConvertedType.TIME_MILLIS) {
if (ctype === 'TIME_MILLIS') {
return data.map(v => new Date(v))
}
if (ctype === ConvertedType.JSON) {
if (ctype === 'JSON') {
return data.map(v => JSON.parse(v))
}
if (ctype === ConvertedType.BSON) {
if (ctype === 'BSON') {
throw new Error('parquet bson not supported')
}
if (ctype === ConvertedType.INTERVAL) {
if (ctype === 'INTERVAL') {
throw new Error('parquet interval not supported')

@@ -208,0 +211,0 @@ }

@@ -12,3 +12,3 @@ export const ParquetType = {

export const ParquetEncoding = {
export const Encoding = {
PLAIN: 0,

@@ -25,43 +25,43 @@ PLAIN_DICTIONARY: 2,

export const FieldRepetitionType = {
REQUIRED: 0,
OPTIONAL: 1,
REPEATED: 2,
}
export const FieldRepetitionType = [
'REQUIRED',
'OPTIONAL',
'REPEATED',
]
export const ConvertedType = {
UTF8: 0,
MAP: 1,
MAP_KEY_VALUE: 2,
LIST: 3,
ENUM: 4,
DECIMAL: 5,
DATE: 6,
TIME_MILLIS: 7,
TIME_MICROS: 8,
TIMESTAMP_MILLIS: 9,
TIMESTAMP_MICROS: 10,
UINT_8: 11,
UINT_16: 12,
UINT_32: 13,
UINT_64: 14,
INT_8: 15,
INT_16: 16,
INT_32: 17,
INT_64: 18,
JSON: 19,
BSON: 20,
INTERVAL: 21,
}
export const ConvertedType = [
'UTF8',
'MAP',
'MAP_KEY_VALUE',
'LIST',
'ENUM',
'DECIMAL',
'DATE',
'TIME_MILLIS',
'TIME_MICROS',
'TIMESTAMP_MILLIS',
'TIMESTAMP_MICROS',
'UINT_8',
'UINT_16',
'UINT_32',
'UINT_64',
'INT_8',
'INT_16',
'INT_32',
'INT_64',
'JSON',
'BSON',
'INTERVAL',
]
export const CompressionCodec = {
UNCOMPRESSED: 0,
SNAPPY: 1,
GZIP: 2,
LZO: 3,
BROTLI: 4,
LZ4: 5,
ZSTD: 6,
LZ4_RAW: 7,
}
export const CompressionCodec = [
'UNCOMPRESSED',
'SNAPPY',
'GZIP',
'LZO',
'BROTLI',
'LZ4',
'ZSTD',
'LZ4_RAW',
]

@@ -74,13 +74,1 @@ export const PageType = {

}
export const Encoding = {
PLAIN: 0,
PLAIN_DICTIONARY: 2,
RLE: 3,
BIT_PACKED: 4, // deprecated
DELTA_BINARY_PACKED: 5,
DELTA_LENGTH_BYTE_ARRAY: 6,
DELTA_BYTE_ARRAY: 7,
RLE_DICTIONARY: 8,
BYTE_STREAM_SPLIT: 9,
}

@@ -35,3 +35,5 @@ import { Encoding, ParquetType } from './constants.js'

// repetition levels
const { value: repetitionLevels, byteLength } = readRepetitionLevels(dataView, offset, daph, schema, columnMetadata)
const { value: repetitionLevels, byteLength } = readRepetitionLevels(
dataView, offset, daph, schema, columnMetadata
)
offset += byteLength

@@ -56,5 +58,10 @@

if (daph.encoding === Encoding.PLAIN) {
const plainObj = readPlain(dataView, columnMetadata.type, daph.num_values - numNulls, offset)
const plainObj = readPlain(dataView, columnMetadata.type, nval, offset)
values = plainObj.value
offset += plainObj.byteLength
} else if (daph.encoding === Encoding.PLAIN_DICTIONARY) {
const plainObj = readPlain(dataView, columnMetadata.type, nval, offset)
values = plainObj.value
offset += plainObj.byteLength
// TODO: dictionary decoding
} else if (daph.encoding === Encoding.RLE_DICTIONARY) {

@@ -71,3 +78,5 @@ // bit width is stored as single byte

if (bitWidth) {
const { value, byteLength } = readRleBitPackedHybrid(dataView, offset, bitWidth, dataView.byteLength - offset, daph.num_values - numNulls)
const { value, byteLength } = readRleBitPackedHybrid(
dataView, offset, bitWidth, dataView.byteLength - offset, nval
)
offset += byteLength

@@ -130,3 +139,2 @@ values = value

* Read the definition levels from this page, if any.
* Other implementations read the definition levels and num nulls, but we don't need em.
*

@@ -133,0 +141,0 @@ * @param {DataView} dataView data view for the page

@@ -1,2 +0,2 @@

import { ParquetEncoding, ParquetType } from './constants.js'
import { Encoding, ParquetType } from './constants.js'
import { readVarInt } from './thrift.js'

@@ -206,10 +206,10 @@

let byteLength = 0
if (encoding === ParquetEncoding.RLE) {
if (encoding === Encoding.RLE) {
let seen = 0
while (seen < count) {
const { value: rleValues, byteLength: rleByteLength } = readRleBitPackedHybrid(dataView, offset + byteLength, bitWidth, 0, 1)
if (!rleValues.length) break // EOF
value.push(...rleValues)
seen += rleValues.length
byteLength += rleByteLength
const rle = readRleBitPackedHybrid(dataView, offset + byteLength, bitWidth, 0, count)
if (!rle.value.length) break // EOF
value.push(...rle.value)
seen += rle.value.length
byteLength += rle.byteLength
}

@@ -248,10 +248,12 @@ } else {

// rle
const { value: rleValues, byteLength: rleByteLength } = readRle(dataView, offset + byteLength, header, width)
value.push(...rleValues)
byteLength += rleByteLength
const rle = readRle(dataView, offset + byteLength, header, width)
value.push(...rle.value)
byteLength += rle.byteLength
} else {
// bit-packed
const { value: bitPackedValues, byteLength: bitPackedByteLength } = readBitPacked(dataView, offset + byteLength, header, width, numValues-value.length)
value.push(...bitPackedValues)
byteLength += bitPackedByteLength
const bitPacked = readBitPacked(
dataView, offset + byteLength, header, width, numValues - value.length
)
value.push(...bitPacked.value)
byteLength += bitPacked.byteLength
}

@@ -258,0 +260,0 @@ }

@@ -0,1 +1,2 @@

import { CompressionCodec, ConvertedType, FieldRepetitionType } from './constants.js'
import { schemaTree } from './schema.js'

@@ -100,6 +101,6 @@ import { deserializeTCompactProtocol } from './thrift.js'

type_length: field.field_2,
repetition_type: field.field_3,
repetition_type: FieldRepetitionType[field.field_3],
name: field.field_4,
num_children: field.field_5,
converted_type: field.field_6,
converted_type: ConvertedType[field.field_6],
scale: field.field_7,

@@ -118,3 +119,3 @@ precision: field.field_8,

path_in_schema: column.field_3.field_3,
codec: column.field_3.field_4,
codec: CompressionCodec[column.field_3.field_4],
num_values: column.field_3.field_5,

@@ -121,0 +122,0 @@ total_uncompressed_size: column.field_3.field_6,

@@ -1,3 +0,1 @@

import { FieldRepetitionType } from './constants.js'
/**

@@ -60,3 +58,3 @@ * @typedef {import('./types.js').SchemaElement} SchemaElement

export function isRequired(schema, name) {
return schemaElement(schema, name).repetition_type === FieldRepetitionType.REQUIRED
return schemaElement(schema, name).repetition_type === 'REQUIRED'
}

@@ -75,3 +73,3 @@

const element = schemaElement(schema, parts.slice(0, i + 1))
if (element.repetition_type === FieldRepetitionType.REPEATED) {
if (element.repetition_type === 'REPEATED') {
maxLevel += 1

@@ -94,3 +92,3 @@ }

const element = schemaElement(schema, parts.slice(0, i + 1))
if (element.repetition_type !== FieldRepetitionType.REQUIRED) {
if (element.repetition_type !== 'REQUIRED') {
maxLevel += 1

@@ -97,0 +95,0 @@ }

@@ -66,32 +66,30 @@ /**

export enum FieldRepetitionType {
REQUIRED = 0,
OPTIONAL = 1,
REPEATED = 2,
}
export type FieldRepetitionType =
'REQUIRED' |
'OPTIONAL' |
'REPEATED'
export enum ConvertedType {
UTF8 = 0,
MAP = 1,
MAP_KEY_VALUE = 2,
LIST = 3,
ENUM = 4,
DECIMAL = 5,
DATE = 6,
TIME_MILLIS = 7,
TIME_MICROS = 8,
TIMESTAMP_MILLIS = 9,
TIMESTAMP_MICROS = 10,
UINT_8 = 11,
UINT_16 = 12,
UINT_32 = 13,
UINT_64 = 14,
INT_8 = 15,
INT_16 = 16,
INT_32 = 17,
INT_64 = 18,
JSON = 19,
BSON = 20,
INTERVAL = 21,
}
export type ConvertedType =
'UTF8' |
'MAP' |
'MAP_KEY_VALUE' |
'LIST' |
'ENUM' |
'DECIMAL' |
'DATE' |
'TIME_MILLIS' |
'TIME_MICROS' |
'TIMESTAMP_MILLIS' |
'TIMESTAMP_MICROS' |
'UINT_8' |
'UINT_16' |
'UINT_32' |
'UINT_64' |
'INT_8' |
'INT_16' |
'INT_32' |
'INT_64' |
'JSON' |
'BSON' |
'INTERVAL'

@@ -139,12 +137,11 @@ export interface RowGroup {

export enum CompressionCodec {
UNCOMPRESSED = 0,
SNAPPY = 1,
GZIP = 2,
LZO = 3,
BROTLI = 4,
LZ4 = 5,
ZSTD = 6,
LZ4_RAW = 7,
}
export type CompressionCodec =
'UNCOMPRESSED' |
'SNAPPY' |
'GZIP' |
'LZO' |
'BROTLI' |
'LZ4' |
'ZSTD' |
'LZ4_RAW'

@@ -151,0 +148,0 @@ interface KeyValue {

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc