Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
1
Versions
58
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 0.7.8 to 0.7.9

8

package.json
{
"name": "hyparquet",
"version": "0.7.8",
"version": "0.7.9",
"description": "parquet file parser for javascript",

@@ -31,4 +31,4 @@ "keywords": [

"@types/node": "20.12.7",
"@typescript-eslint/eslint-plugin": "7.7.1",
"@vitest/coverage-v8": "1.5.2",
"@typescript-eslint/eslint-plugin": "7.8.0",
"@vitest/coverage-v8": "1.5.3",
"eslint": "8.57.0",

@@ -40,4 +40,4 @@ "eslint-plugin-import": "2.29.1",

"typescript": "5.4.5",
"vitest": "1.5.2"
"vitest": "1.5.3"
}
}

@@ -6,3 +6,3 @@ import { assembleObjects } from './assemble.js'

import { parquetHeader } from './header.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired, schemaElement } from './schema.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired } from './schema.js'
import { snappyUncompress } from './snappy.js'

@@ -12,3 +12,3 @@ import { concat } from './utils.js'

/**
* @typedef {import('./types.js').SchemaElement} SchemaElement
* @typedef {import('./types.js').SchemaTree} SchemaTree
* @typedef {import('./types.js').ColumnMetaData} ColumnMetaData

@@ -26,7 +26,7 @@ * @typedef {import('./types.js').Compressors} Compressors

* @param {ColumnMetaData} columnMetadata column metadata
* @param {SchemaElement[]} schema schema for the file
* @param {SchemaTree[]} schemaPath schema path for the column
* @param {Compressors} [compressors] custom decompressors
* @returns {ArrayLike<any>} array of values
*/
export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schema, compressors) {
export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schemaPath, compressors) {
/** @type {ArrayLike<any> | undefined} */

@@ -38,2 +38,3 @@ let dictionary = undefined

const rowData = []
const { element } = schemaPath[schemaPath.length - 1]

@@ -62,3 +63,3 @@ while (valuesSeen < rowGroup.num_rows) {

)
const { definitionLevels, repetitionLevels, value: dataPage } = readDataPage(page, daph, schema, columnMetadata)
const { definitionLevels, repetitionLevels, value: dataPage } = readDataPage(page, daph, schemaPath, columnMetadata)
valuesSeen += daph.num_values

@@ -74,10 +75,12 @@

// Use repetition levels to construct lists
const isNull = columnMetadata && !isRequired(schema, [columnMetadata.path_in_schema[0]])
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)
const maxRepetitionLevel = getMaxRepetitionLevel(schema, columnMetadata.path_in_schema)
const isNullable = columnMetadata && !isRequired(schemaPath.slice(0, 2))
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath)
// convert primitive types to rich types
values = convert(dataPage, element)
values = assembleObjects(
definitionLevels, repetitionLevels, dataPage, isNull, maxDefinitionLevel, maxRepetitionLevel
definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel
)
} else if (definitionLevels?.length) {
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
// Use definition levels to skip nulls

@@ -89,6 +92,6 @@ values = []

dereferenceDictionary(dictionary, dataPage)
values = convert(dataPage, schemaElement(schema, columnMetadata.path_in_schema).element)
values = convert(dataPage, element)
} else if (Array.isArray(dataPage)) {
// convert primitive types to rich types
values = convert(dataPage, schemaElement(schema, columnMetadata.path_in_schema).element)
values = convert(dataPage, element)
} else {

@@ -111,3 +114,3 @@ values = dataPage // TODO: data page shouldn't be a fixed byte array?

)
dictionary = readDictionaryPage(page, diph, schema, columnMetadata)
dictionary = readDictionaryPage(page, diph, columnMetadata)
} else if (header.type === 'DATA_PAGE_V2') {

@@ -118,8 +121,8 @@ const daph2 = header.data_page_header_v2

const { definitionLevels, repetitionLevels, value: dataPage } = readDataPageV2(
compressedBytes, header, schema, columnMetadata, compressors
compressedBytes, header, schemaPath, columnMetadata, compressors
)
valuesSeen += daph2.num_values
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)
const maxRepetitionLevel = getMaxRepetitionLevel(schema, columnMetadata.path_in_schema)
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath)
if (repetitionLevels.length) {

@@ -168,3 +171,3 @@ dereferenceDictionary(dictionary, dataPage)

*
* @param {ColumnMetaData} columnMetadata column metadata
* @param {ColumnMetaData} columnMetadata
* @returns {number} byte offset

@@ -171,0 +174,0 @@ */

import { readData, readPlain, readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired, schemaElement, skipDefinitionBytes } from './schema.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired, skipDefinitionBytes } from './schema.js'

@@ -13,10 +13,10 @@ const skipNulls = false // TODO

* @typedef {import("./types.d.ts").DataPageHeader} DataPageHeader
* @typedef {import("./types.d.ts").SchemaElement} SchemaElement
* @typedef {import("./types.d.ts").SchemaTree} SchemaTree
* @param {Uint8Array} bytes raw page data (should already be decompressed)
* @param {DataPageHeader} daph data page header
* @param {SchemaElement[]} schema schema for the file
* @param {ColumnMetaData} columnMetadata metadata for the column
* @param {SchemaTree[]} schemaPath
* @param {ColumnMetaData} columnMetadata
* @returns {DataPage} definition levels, repetition levels, and array of values
*/
export function readDataPage(bytes, daph, schema, columnMetadata) {
export function readDataPage(bytes, daph, schemaPath, columnMetadata) {
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength)

@@ -28,5 +28,3 @@ const reader = { view, offset: 0 }

// repetition levels
const repetitionLevels = readRepetitionLevels(
reader, daph, schema, columnMetadata
)
const repetitionLevels = readRepetitionLevels(reader, daph, schemaPath)

@@ -38,7 +36,7 @@ // definition levels

// TODO: move into readDefinitionLevels
if (skipNulls && !isRequired(schema, columnMetadata.path_in_schema)) {
if (skipNulls && !isRequired(schemaPath)) {
// skip_definition_bytes
reader.offset += skipDefinitionBytes(daph.num_values)
} else {
const dl = readDefinitionLevels(reader, daph, schema, columnMetadata.path_in_schema)
const dl = readDefinitionLevels(reader, daph, schemaPath)
definitionLevels = dl.definitionLevels

@@ -51,3 +49,3 @@ numNulls = dl.numNulls

if (daph.encoding === 'PLAIN') {
const { element } = schemaElement(schema, columnMetadata.path_in_schema)
const { element } = schemaPath[schemaPath.length - 1]
const utf8 = element.converted_type === 'UTF8'

@@ -92,7 +90,6 @@ const plainObj = readPlain(reader, columnMetadata.type, nValues, utf8)

* @param {DictionaryPageHeader} diph dictionary page header
* @param {SchemaElement[]} schema schema for the file
* @param {ColumnMetaData} columnMetadata metadata for the column
* @param {ColumnMetaData} columnMetadata
* @returns {ArrayLike<any>} array of values
*/
export function readDictionaryPage(bytes, diph, schema, columnMetadata) {
export function readDictionaryPage(bytes, diph, columnMetadata) {
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength)

@@ -109,9 +106,8 @@ const reader = { view, offset: 0 }

* @param {DataPageHeader} daph data page header
* @param {SchemaElement[]} schema schema for the file
* @param {ColumnMetaData} columnMetadata metadata for the column
* @param {SchemaTree[]} schemaPath
* @returns {any[]} repetition levels and number of bytes read
*/
function readRepetitionLevels(reader, daph, schema, columnMetadata) {
if (columnMetadata.path_in_schema.length > 1) {
const maxRepetitionLevel = getMaxRepetitionLevel(schema, columnMetadata.path_in_schema)
function readRepetitionLevels(reader, daph, schemaPath) {
if (schemaPath.length > 1) {
const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath)
if (maxRepetitionLevel) {

@@ -132,9 +128,8 @@ const bitWidth = widthFromMaxInt(maxRepetitionLevel)

* @param {DataPageHeader} daph data page header
* @param {SchemaElement[]} schema schema for the file
* @param {string[]} path_in_schema path in the schema
* @param {SchemaTree[]} schemaPath
* @returns {DefinitionLevels} definition levels and number of bytes read
*/
function readDefinitionLevels(reader, daph, schema, path_in_schema) {
if (!isRequired(schema, path_in_schema)) {
const maxDefinitionLevel = getMaxDefinitionLevel(schema, path_in_schema)
function readDefinitionLevels(reader, daph, schemaPath) {
if (!isRequired(schemaPath)) {
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
const bitWidth = widthFromMaxInt(maxDefinitionLevel)

@@ -141,0 +136,0 @@ if (bitWidth) {

import { decompressPage } from './column.js'
import { readPlain, readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel, schemaElement } from './schema.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
import { readVarInt, readZigZag } from './thrift.js'

@@ -13,11 +13,11 @@

* @typedef {import("./types.d.ts").DataPageHeaderV2} DataPageHeaderV2
* @typedef {import("./types.d.ts").SchemaElement} SchemaElement
* @typedef {import("./types.d.ts").SchemaTree} SchemaTree
* @param {Uint8Array} compressedBytes raw page data (should already be decompressed)
* @param {import("./types.d.ts").PageHeader} ph page header
* @param {SchemaElement[]} schema schema for the file
* @param {ColumnMetaData} columnMetadata metadata for the column
* @param {SchemaTree[]} schemaPath
* @param {ColumnMetaData} columnMetadata
* @param {Compressors | undefined} compressors
* @returns {DataPage} definition levels, repetition levels, and array of values
*/
export function readDataPageV2(compressedBytes, ph, schema, columnMetadata, compressors) {
export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata, compressors) {
const view = new DataView(compressedBytes.buffer, compressedBytes.byteOffset, compressedBytes.byteLength)

@@ -32,3 +32,3 @@ const reader = { view, offset: 0 }

// repetition levels
const repetitionLevels = readRepetitionLevelsV2(reader, daph2, schema, columnMetadata)
const repetitionLevels = readRepetitionLevelsV2(reader, daph2, schemaPath)

@@ -40,3 +40,3 @@ if (reader.offset !== daph2.repetition_levels_byte_length) {

// definition levels
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
const definitionLevels = readDefinitionLevelsV2(reader, daph2, maxDefinitionLevel)

@@ -53,3 +53,3 @@

if (daph2.encoding === 'PLAIN') {
const { element } = schemaElement(schema, columnMetadata.path_in_schema)
const { element } = schemaPath[schemaPath.length - 1]
const utf8 = element.converted_type === 'UTF8'

@@ -106,8 +106,7 @@ let page = compressedBytes.slice(reader.offset)

* @param {DataPageHeaderV2} daph2 data page header
* @param {SchemaElement[]} schema schema for the file
* @param {ColumnMetaData} columnMetadata metadata for the column
* @param {SchemaTree[]} schemaPath
* @returns {any[]} repetition levels and number of bytes read
*/
export function readRepetitionLevelsV2(reader, daph2, schema, columnMetadata) {
const maxRepetitionLevel = getMaxRepetitionLevel(schema, columnMetadata.path_in_schema)
export function readRepetitionLevelsV2(reader, daph2, schemaPath) {
const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath)
if (!maxRepetitionLevel) return []

@@ -127,3 +126,3 @@

* @param {DataPageHeaderV2} daph2 data page header v2
* @param {number} maxDefinitionLevel maximum definition level for this column
* @param {number} maxDefinitionLevel
* @returns {number[] | undefined} definition levels and number of bytes read

@@ -130,0 +129,0 @@ */

@@ -12,3 +12,3 @@ import { readVarInt } from './thrift.js'

function readPlainBoolean(reader, count) {
const value = []
const values = new Array(count)
for (let i = 0; i < count; i++) {

@@ -18,6 +18,6 @@ const byteOffset = reader.offset + Math.floor(i / 8)

const byte = reader.view.getUint8(byteOffset)
value.push((byte & (1 << bitOffset)) !== 0)
values[i] = (byte & (1 << bitOffset)) !== 0
}
reader.offset += Math.ceil(count / 8)
return value
return values
}

@@ -33,8 +33,8 @@

function readPlainInt32(reader, count) {
const value = []
const values = new Array(count)
for (let i = 0; i < count; i++) {
value.push(reader.view.getInt32(reader.offset + i * 4, true))
values[i] = reader.view.getInt32(reader.offset + i * 4, true)
}
reader.offset += count * 4
return value
return values
}

@@ -50,8 +50,8 @@

function readPlainInt64(reader, count) {
const value = []
const values = new Array(count)
for (let i = 0; i < count; i++) {
value.push(reader.view.getBigInt64(reader.offset + i * 8, true))
values[i] = reader.view.getBigInt64(reader.offset + i * 8, true)
}
reader.offset += count * 8
return value
return values
}

@@ -67,10 +67,10 @@

function readPlainInt96(reader, count) {
const value = []
const values = new Array(count)
for (let i = 0; i < count; i++) {
const low = reader.view.getBigInt64(reader.offset + i * 12, true)
const high = reader.view.getInt32(reader.offset + i * 12 + 8, true)
value.push((BigInt(high) << BigInt(32)) | low)
values[i] = (BigInt(high) << BigInt(32)) | low
}
reader.offset += count * 12
return value
return values
}

@@ -86,8 +86,8 @@

function readPlainFloat(reader, count) {
const value = []
const values = new Array(count)
for (let i = 0; i < count; i++) {
value.push(reader.view.getFloat32(reader.offset + i * 4, true))
values[i] = reader.view.getFloat32(reader.offset + i * 4, true)
}
reader.offset += count * 4
return value
return values
}

@@ -103,8 +103,8 @@

function readPlainDouble(reader, count) {
const value = []
const values = new Array(count)
for (let i = 0; i < count; i++) {
value.push(reader.view.getFloat64(reader.offset + i * 8, true))
values[i] = reader.view.getFloat64(reader.offset + i * 8, true)
}
reader.offset += count * 8
return value
return values
}

@@ -120,11 +120,10 @@

function readPlainByteArray(reader, count) {
const value = []
const values = new Array(count)
for (let i = 0; i < count; i++) {
const length = reader.view.getInt32(reader.offset, true)
reader.offset += 4
const bytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, length)
value.push(bytes)
values[i] = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, length)
reader.offset += length
}
return value
return values
}

@@ -210,3 +209,3 @@

/** @type {any[]} */
const value = []
const values = []
if (encoding === 'RLE') {

@@ -217,3 +216,3 @@ let seen = 0

if (!rle.length) break // EOF
concat(value, rle)
concat(values, rle)
seen += rle.length

@@ -224,3 +223,3 @@ }

}
return value
return values
}

@@ -247,5 +246,5 @@

/** @type {number[]} */
const value = []
const values = []
const startOffset = reader.offset
while (reader.offset - startOffset < length && value.length < numValues) {
while (reader.offset - startOffset < length && values.length < numValues) {
const [header, newOffset] = readVarInt(reader.view, reader.offset)

@@ -256,13 +255,13 @@ reader.offset = newOffset

const rle = readRle(reader, header, width)
concat(value, rle)
concat(values, rle)
} else {
// bit-packed
const bitPacked = readBitPacked(
reader, header, width, numValues - value.length
reader, header, width, numValues - values.length
)
concat(value, bitPacked)
concat(values, bitPacked)
}
}
return value
return values
}

@@ -284,11 +283,11 @@

const width = (bitWidth + 7) >> 3
let readValue
let value
if (width === 1) {
readValue = reader.view.getUint8(reader.offset)
value = reader.view.getUint8(reader.offset)
reader.offset++
} else if (width === 2) {
readValue = reader.view.getUint16(reader.offset, true)
value = reader.view.getUint16(reader.offset, true)
reader.offset += 2
} else if (width === 4) {
readValue = reader.view.getUint32(reader.offset, true)
value = reader.view.getUint32(reader.offset, true)
reader.offset += 4

@@ -300,7 +299,7 @@ } else {

// repeat value count times
const value = []
const values = new Array(count)
for (let i = 0; i < count; i++) {
value.push(readValue)
values[i] = value
}
return value
return values
}

@@ -321,3 +320,4 @@

let count = (header >> 1) << 3
const mask = maskForBits(bitWidth)
// mask for bitWidth number of bits
const mask = (1 << bitWidth) - 1

@@ -335,3 +335,3 @@ // Sometimes it tries to read outside of available memory, but it will be masked out anyway

/** @type {number[]} */
const value = []
const values = []

@@ -353,3 +353,3 @@ // read values

// emit value by shifting off to the right and masking
value.push((data >> right) & mask)
values.push((data >> right) & mask)
remaining--

@@ -362,13 +362,3 @@ }

return value
return values
}
/**
* Generate a mask for the given number of bits.
*
* @param {number} bits - number of bits for the mask
* @returns {number} a mask for the given number of bits
*/
function maskForBits(bits) {
return (1 << bits) - 1
}
import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, ParquetType } from './constants.js'
import { schemaElement } from './schema.js'
import { getSchemaPath } from './schema.js'
import { deserializeTCompactProtocol } from './thrift.js'

@@ -175,3 +175,3 @@

export function parquetSchema(metadata) {
return schemaElement(metadata.schema, [])
return getSchemaPath(metadata.schema, [])[0]
}

@@ -178,0 +178,0 @@

import { getColumnOffset, readColumn } from './column.js'
import { parquetMetadataAsync } from './metadata.js'
import { getColumnName, isMapLike } from './schema.js'
import { getColumnName, getSchemaPath, isMapLike } from './schema.js'
import { concat } from './utils.js'

@@ -78,3 +78,3 @@

* @param {(rows: any[][]) => void} [options.onComplete] called when all requested rows and columns are parsed
* @param {Compressors} [options.compressors] custom decompressors
* @param {Compressors} [options.compressors]
* @param {RowGroup} rowGroup row group to read

@@ -92,5 +92,4 @@ * @param {number} groupStart row index of the first row in the group

if (!columnMetadata) throw new Error('parquet column metadata is undefined')
const columnName = getColumnName(metadata.schema, columnMetadata.path_in_schema)
// skip columns that are not requested
if (columns && !columns.includes(columnName)) return
if (columns && !columns.includes(getColumnName(columnMetadata.path_in_schema))) return

@@ -106,5 +105,5 @@ const startByte = getColumnOffset(columnMetadata)

}
// if row group size is less than 128mb, pre-load in one read
// if row group size is less than 32mb, pre-load in one read
let groupBuffer
if (groupEndByte - groupStartByte <= 1 << 27) {
if (groupEndByte - groupStartByte <= 1 << 25) {
// pre-load row group byte data in one big read,

@@ -126,4 +125,3 @@ // otherwise read column data individually

// skip columns that are not requested
const columnName = getColumnName(metadata.schema, columnMetadata.path_in_schema)
// skip columns that are not requested
const columnName = getColumnName(columnMetadata.path_in_schema)
if (columns && !columns.includes(columnName)) continue

@@ -157,6 +155,6 @@

promises.push(buffer.then(arrayBuffer => {
// TODO: extract SchemaElement for this column
const schemaPath = getSchemaPath(metadata.schema, columnMetadata.path_in_schema)
/** @type {ArrayLike<any> | undefined} */
let columnData = readColumn(
arrayBuffer, bufferOffset, rowGroup, columnMetadata, metadata.schema, compressors
arrayBuffer, bufferOffset, rowGroup, columnMetadata, schemaPath, compressors
)

@@ -167,3 +165,3 @@ if (columnData.length !== Number(rowGroup.num_rows)) {

if (isMapLike(metadata.schema, columnMetadata.path_in_schema)) {
if (isMapLike(schemaPath)) {
const name = columnMetadata.path_in_schema.slice(0, -2).join('.')

@@ -195,6 +193,2 @@ if (!maps.has(name)) {

}
if (keys[i][j] instanceof Uint8Array) {
// decode utf-8 keys
keys[i][j] = new TextDecoder().decode(keys[i][j])
}
if (!keys[i][j]) continue

@@ -201,0 +195,0 @@ obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j]

@@ -31,35 +31,29 @@ /**

/**
* Get the schema element with the given name.
* Get schema elements from the root to the given element name.
*
* @param {SchemaElement[]} schema
* @param {string[]} name path to the element
* @returns {SchemaTree} schema element
* @returns {SchemaTree[]} list of schema elements
*/
export function schemaElement(schema, name) {
export function getSchemaPath(schema, name) {
let tree = schemaTree(schema, 0)
// traverse the tree to find the element
const path = [tree]
for (const part of name) {
const child = tree.children.find(child => child.element.name === part)
if (!child) throw new Error(`parquet schema element not found: ${name}`)
path.push(child)
tree = child
}
return tree
return path
}
/**
* Check if the schema element with the given name is required.
* An element is required if all of its ancestors are required.
* Check if the schema path and all its ancestors are required.
*
* @param {SchemaElement[]} schema
* @param {string[]} name path to the element
* @param {SchemaTree[]} schemaPath
* @returns {boolean} true if the element is required
*/
export function isRequired(schema, name) {
/** @type {SchemaTree | undefined} */
let tree = schemaTree(schema, 0)
for (let i = 0; i < name.length; i++) {
// Find schema child with the given name
tree = tree.children.find(child => child.element.name === name[i])
if (!tree) throw new Error(`parquet schema element not found: ${name}`)
if (tree.element.repetition_type !== 'REQUIRED') {
export function isRequired(schemaPath) {
for (const { element } of schemaPath.slice(1)) {
if (element.repetition_type !== 'REQUIRED') {
return false

@@ -74,14 +68,12 @@ }

*
* @param {SchemaElement[]} schema
* @param {string[]} parts path to the element
* @param {SchemaTree[]} schemaPath
* @returns {number} max repetition level
*/
export function getMaxRepetitionLevel(schema, parts) {
export function getMaxRepetitionLevel(schemaPath) {
let maxLevel = 0
parts.forEach((part, i) => {
const { element } = schemaElement(schema, parts.slice(0, i + 1))
for (const { element } of schemaPath) {
if (element.repetition_type === 'REPEATED') {
maxLevel++
}
})
}
return maxLevel

@@ -93,14 +85,12 @@ }

*
* @param {SchemaElement[]} schema
* @param {string[]} parts path to the element
* @param {SchemaTree[]} schemaPath
* @returns {number} max definition level
*/
export function getMaxDefinitionLevel(schema, parts) {
export function getMaxDefinitionLevel(schemaPath) {
let maxLevel = 0
parts.forEach((part, i) => {
const { element } = schemaElement(schema, parts.slice(0, i + 1))
for (const { element } of schemaPath.slice(1)) {
if (element.repetition_type !== 'REQUIRED') {
maxLevel++
}
})
}
return maxLevel

@@ -126,13 +116,12 @@ }

/**
* Get the column name as foo.bar and handle list-like columns.
* @param {SchemaElement[]} schema
* Get the column name as foo.bar and handle list and map like columns.
*
* @param {string[]} path
* @returns {string} column name
*/
export function getColumnName(schema, path) {
if (isListLike(schema, path) || isMapLike(schema, path)) {
return path.slice(0, -2).join('.')
} else {
return path.join('.')
}
export function getColumnName(path) {
return path.join('.')
.replace(/(\.list\.element)+/g, '')
.replace(/\.key_value\.key/g, '')
.replace(/\.key_value\.value/g, '')
}

@@ -143,9 +132,8 @@

*
* @param {SchemaElement[]} schemaElements parquet schema elements
* @param {string[]} path column path
* @param {SchemaTree[]} schemaPath
* @returns {boolean} true if map-like
*/
function isListLike(schemaElements, path) {
const schema = schemaElement(schemaElements, path.slice(0, -2))
if (path.length < 3) return false
export function isListLike(schemaPath) {
const schema = schemaPath.at(-3)
if (!schema) return false
if (schema.element.converted_type !== 'LIST') return false

@@ -167,9 +155,8 @@ if (schema.children.length > 1) return false

*
* @param {SchemaElement[]} schemaElements parquet schema elements
* @param {string[]} path column path
* @param {SchemaTree[]} schemaPath
* @returns {boolean} true if map-like
*/
export function isMapLike(schemaElements, path) {
const schema = schemaElement(schemaElements, path.slice(0, -2))
if (path.length < 3) return false
export function isMapLike(schemaPath) {
const schema = schemaPath.at(-3)
if (!schema) return false
if (schema.element.converted_type !== 'MAP') return false

@@ -176,0 +163,0 @@ if (schema.children.length > 1) return false

@@ -87,9 +87,9 @@ // TCompactProtocol types

index = listIndex
const listValues = []
const values = new Array(listSize)
for (let i = 0; i < listSize; i++) {
let listElem
[listElem, index] = readElement(view, elemType, index)
listValues.push(listElem)
values[i] = listElem
}
return [listValues, index]
return [values, index]
}

@@ -96,0 +96,0 @@ case CompactType.STRUCT: {

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc