Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hyparquet

Package Overview
Dependencies
Maintainers
1
Versions
58
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyparquet - npm Package Compare versions

Comparing version 0.3.1 to 0.3.2

12

package.json
{
"name": "hyparquet",
"version": "0.3.1",
"version": "0.3.2",
"description": "parquet file parser for javascript",

@@ -30,12 +30,12 @@ "keywords": [

"devDependencies": {
"@types/node": "20.11.17",
"@typescript-eslint/eslint-plugin": "6.21.0",
"@vitest/coverage-v8": "1.2.2",
"@types/node": "20.11.19",
"@typescript-eslint/eslint-plugin": "7.0.1",
"@vitest/coverage-v8": "1.3.0",
"eslint": "8.56.0",
"eslint-plugin-import": "2.29.1",
"eslint-plugin-jsdoc": "48.0.6",
"eslint-plugin-jsdoc": "48.1.0",
"http-server": "14.1.1",
"typescript": "5.3.3",
"vitest": "1.2.2"
"vitest": "1.3.0"
}
}

@@ -72,8 +72,32 @@ # hyparquet

## Supported Parquet Files
The parquet format supports a number of different compression and encoding types.
Hyparquet does not support 100% of all parquet files, and probably never will, since supporting all possible compression types will increase the size of the library, and are rarely used in practice.
Compression:
- [X] Uncompressed
- [X] Snappy
- [ ] GZip
- [ ] LZO
- [ ] Brotli
- [ ] LZ4
- [ ] ZSTD
- [ ] LZ4_RAW
Page Type:
- [X] Data Page
- [ ] Index Page
- [X] Dictionary Page
- [ ] Data Page V2
Contributions are welcome!
## References
- https://github.com/apache/parquet-format
- https://github.com/apache/parquet-testing
- https://github.com/apache/thrift
- https://github.com/dask/fastparquet
- https://github.com/apache/thrift
- https://github.com/google/snappy
- https://github.com/zhipeng-jia/snappyjs
import { Encoding, ParquetType } from './constants.js'
import { readData, readPlain, readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired, skipDefinitionBytes } from './schema.js'
import {
getMaxDefinitionLevel,
getMaxRepetitionLevel,
isRequired,
schemaElement,
skipDefinitionBytes,
} from './schema.js'

@@ -57,3 +63,5 @@ const skipNulls = false // TODO

if (daph.encoding === Encoding.PLAIN) {
const plainObj = readPlain(dataView, columnMetadata.type, nval, offset)
const se = schemaElement(schema, columnMetadata.path_in_schema)
const utf8 = se.converted_type === 'UTF8'
const plainObj = readPlain(dataView, columnMetadata.type, nval, offset, utf8)
values = plainObj.value

@@ -104,3 +112,3 @@ offset += plainObj.byteLength

// read values based on encoding
const { value } = readPlain(dataView, columnMetadata.type, diph.num_values)
const { value } = readPlain(dataView, columnMetadata.type, diph.num_values, 0, false)
return value

@@ -107,0 +115,0 @@ }

@@ -156,5 +156,6 @@ import { Encoding, ParquetType } from './constants.js'

* @param {number} offset - offset to start reading from the DataView
* @param {boolean} utf8 - whether to decode byte arrays as UTF-8
* @returns {Decoded<ArrayLike<any>>} array of values
*/
export function readPlain(dataView, type, count, offset = 0) {
export function readPlain(dataView, type, count, offset, utf8) {
if (count === 0) return { value: [], byteLength: 0 }

@@ -174,3 +175,11 @@ if (type === ParquetType.BOOLEAN) {

} else if (type === ParquetType.BYTE_ARRAY) {
return readPlainByteArray(dataView, offset, count)
const byteArray = readPlainByteArray(dataView, offset, count)
if (utf8) {
const decoder = new TextDecoder()
return {
value: byteArray.value.map(bytes => decoder.decode(bytes)),
byteLength: byteArray.byteLength,
}
}
return byteArray
} else if (type === ParquetType.FIXED_LEN_BYTE_ARRAY) {

@@ -177,0 +186,0 @@ return readPlainByteArrayFixed(dataView, offset, count)

@@ -42,5 +42,3 @@ /**

const child = tree.children.find(child => child.element.name === part)
if (!child) {
throw new Error(`parquet schema element not found: ${name}`)
}
if (!child) throw new Error(`parquet schema element not found: ${name}`)
tree = child

@@ -53,2 +51,3 @@ }

* Check if the schema element with the given name is required.
* An element is required if all of its ancestors are required.
*

@@ -60,3 +59,13 @@ * @param {SchemaElement[]} schema

export function isRequired(schema, name) {
return schemaElement(schema, name).repetition_type === 'REQUIRED'
/** @type {SchemaTree | undefined} */
let tree = schemaTree(schema, 0)
for (let i = 0; i < name.length; i++) {
// Find schema child with the given name
tree = tree.children.find(child => child.element.name === name[i])
if (!tree) throw new Error(`parquet schema element not found: ${name}`)
if (tree.element.repetition_type !== 'REQUIRED') {
return false
}
}
return true
}

@@ -63,0 +72,0 @@

@@ -10,16 +10,15 @@ /**

export function toJson(obj) {
if (typeof obj === 'bigint') {
return Number(obj)
} else if (Array.isArray(obj)) {
return obj.map(toJson)
} else if (obj instanceof Object) {
if (obj === undefined) return null
if (typeof obj === 'bigint') return Number(obj)
if (Array.isArray(obj)) return obj.map(toJson)
if (obj instanceof Object) {
/** @type {Record<string, unknown>} */
const newObj = {}
for (const key of Object.keys(obj)) {
if (obj[key] === undefined) continue
newObj[key] = toJson(obj[key])
}
return newObj
} else {
return obj
}
return obj
}
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc