Comparing version 0.4.0 to 0.4.1
{ | ||
"name": "hyparquet", | ||
"version": "0.4.0", | ||
"version": "0.4.1", | ||
"description": "parquet file parser for javascript", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
@@ -52,3 +52,5 @@ import { Encoding, PageType } from './constants.js' | ||
const page = decompressPage(compressedBytes, Number(header.uncompressed_page_size), columnMetadata.codec) | ||
const page = decompressPage( | ||
compressedBytes, Number(header.uncompressed_page_size), columnMetadata.codec | ||
) | ||
const { definitionLevels, repetitionLevels, value: dataPage } = readDataPage(page, daph, schema, columnMetadata) | ||
@@ -68,3 +70,5 @@ valuesSeen += daph.num_values | ||
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema) | ||
values = assembleObjects(definitionLevels, repetitionLevels, dataPage, isNull, nullValue, maxDefinitionLevel, rowIndex[0]) | ||
values = assembleObjects( | ||
definitionLevels, repetitionLevels, dataPage, isNull, nullValue, maxDefinitionLevel, rowIndex[0] | ||
) | ||
} else if (definitionLevels?.length) { | ||
@@ -91,3 +95,3 @@ const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema) | ||
rowData.push(...Array.from(values)) | ||
rowData.push(...values) | ||
} else if (header.type === PageType.DICTIONARY_PAGE) { | ||
@@ -94,0 +98,0 @@ const diph = header.dictionary_page_header |
@@ -112,5 +112,3 @@ import { Encoding, ParquetType } from './constants.js' | ||
const dataView = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength) | ||
// read values based on encoding | ||
const { value } = readPlain(dataView, columnMetadata.type, diph.num_values, 0, false) | ||
return value | ||
return readPlain(dataView, columnMetadata.type, diph.num_values, 0, false).value | ||
} | ||
@@ -117,0 +115,0 @@ |
@@ -101,10 +101,9 @@ import { decompressPage } from './column.js' | ||
const maxRepetitionLevel = getMaxRepetitionLevel(schema, columnMetadata.path_in_schema) | ||
if (maxRepetitionLevel) { | ||
const bitWidth = widthFromMaxInt(maxRepetitionLevel) | ||
// num_values is index 1 for either type of page header | ||
return readRleBitPackedHybrid( | ||
dataView, offset, bitWidth, daph2.repetition_levels_byte_length, daph2.num_values | ||
).value | ||
} | ||
return [] | ||
if (!maxRepetitionLevel) return [] | ||
const bitWidth = widthFromMaxInt(maxRepetitionLevel) | ||
// num_values is index 1 for either type of page header | ||
return readRleBitPackedHybrid( | ||
dataView, offset, bitWidth, daph2.repetition_levels_byte_length, daph2.num_values | ||
).value | ||
} | ||
@@ -162,7 +161,8 @@ | ||
let stop = -bitWidth | ||
// TODO: possible loss of precision | ||
const mask = 0xffffffffffffffff >> (64 - bitWidth) | ||
// only works for bitWidth < 31 | ||
const mask = (1 << bitWidth) - 1 | ||
while (count) { | ||
if (stop < 0) { | ||
data = ((data & 0x00ffffffffffffff) << 8) | dataView.getUint8(offset++) | ||
// fails when data gets too large | ||
data = (data << 8) | dataView.getUint8(offset++) | ||
stop += 8 | ||
@@ -169,0 +169,0 @@ } else { |
@@ -252,3 +252,3 @@ import { Encoding, ParquetType } from './constants.js' | ||
const startByteLength = byteLength | ||
while (offset + byteLength - startByteLength < length) { | ||
while (byteLength - startByteLength < length && value.length < numValues) { | ||
const [header, newOffset] = readVarInt(dataView, offset + byteLength) | ||
@@ -255,0 +255,0 @@ byteLength = newOffset - offset |
2283
85538