Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

parquets

Package Overview
Dependencies
Maintainers
1
Versions
24
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

parquets - npm Package Compare versions

Comparing version 0.8.16 to 0.10.1

dev/demo.ts

3

jest.js
"use strict";
process.argv[1] = "./node_modules/jest/bin/jest";
process.argv[process.argv.length - 1] = process.argv[process.argv.length - 1].replace(".ts", ".js");
console.log("-----------");
console.log(process.argv);
console.log("-----------");
require(process.argv[1]);

@@ -6,0 +7,0 @@ // const importLocal = require('import-local');

/// <reference types="node" />
import { ParquetType, TODO } from '../declare';
export interface ParquetCodecImpl {
encodeValues(type: ParquetType, values: any, opts?: TODO): Buffer;
encodeValues(type: ParquetType, values: TODO, opts?: TODO): Buffer;
decodeValues(type: ParquetType, cursor: TODO, count: number, opts: TODO): any[];

@@ -6,0 +6,0 @@ }

@@ -146,3 +146,3 @@ "use strict";

if (values[i].length !== opts.typeLength) {
throw new Error('invalid value for FIXED_LEN_BYTE_ARRAY: ' + values[i]);
throw new Error(`invalid value for FIXED_LEN_BYTE_ARRAY: ${values[i]}`);
}

@@ -182,3 +182,3 @@ }

default:
throw new Error('unsupported type: ' + type);
throw new Error(`unsupported type: ${type}`);
}

@@ -206,3 +206,3 @@ }

default:
throw new Error('unsupported type: ' + type);
throw new Error(`unsupported type: ${type}`);
}

@@ -209,0 +209,0 @@ }

@@ -5,4 +5,4 @@ "use strict";

function encodeRunBitpacked(values, opts) {
if (values.length % 8 !== 0) {
throw new Error('must be a multiple of 8');
for (let i = 0; i < values.length % 8; i++) {
values.push(0);
}

@@ -43,34 +43,36 @@ const buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8)));

default:
throw new Error('unsupported type: ' + type);
throw new Error(`unsupported type: ${type}`);
}
let buf = Buffer.alloc(0);
const runs = [];
for (let cur = 0; cur < values.length; cur += 8) {
let repeating = true;
for (let i = 1; i < 8; ++i) {
if (values[cur + i] !== values[cur]) {
repeating = false;
let run = [];
let repeats = 0;
for (let i = 0; i < values.length; i++) {
// If we are at the beginning of a run and the next value is same we start
// collecting repeated values
if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {
// If we have any data in runs we need to encode them
if (run.length) {
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
run = [];
}
repeats = 1;
}
const append = runs.length > 0 &&
(runs[runs.length - 1][1] !== null) === repeating &&
(!repeating || runs[runs.length - 1][1] === values[cur]);
if (!append) {
runs.push([cur, repeating ? values[cur] : null]);
else if (repeats > 0 && values[i] === values[i - 1]) {
repeats += 1;
}
}
for (let i = values.length - (values.length % 8); i < values.length; ++i) {
runs.push([i, values[i]]);
}
for (let i = 0; i < runs.length; ++i) {
const begin = runs[i][0];
const end = i < runs.length - 1 ? runs[i + 1][0] : values.length;
const rep = runs[i][1];
if (rep === null) {
buf = Buffer.concat([buf, encodeRunBitpacked(values.slice(begin, end), opts)]);
}
else {
buf = Buffer.concat([buf, encodeRunRepeated(rep, end - begin, opts)]);
// If values changes we need to post any previous repeated values
if (repeats) {
buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);
repeats = 0;
}
run.push(values[i]);
}
}
if (repeats) {
buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);
}
else if (run.length) {
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
}
if (opts.disableEnvelope) {

@@ -116,3 +118,3 @@ return buf;

}
const values = [];
let values = [];
while (values.length < count) {

@@ -130,2 +132,3 @@ const header = varint.decode(cursor.buffer, cursor.offset);

}
values = values.slice(0, count);
if (values.length !== count) {

@@ -132,0 +135,0 @@ throw new Error('invalid RLE encoding');

@@ -45,2 +45,8 @@ /// <reference types="node" />

columnData?: Record<string, ColumnData>;
[path: string]: {
dlevels: any[];
rlevels: any[];
values: any[];
count: number;
} | any;
}

@@ -47,0 +53,0 @@ export interface ColumnData {

@@ -10,2 +10,3 @@ "use strict";

const Util = require("./util");
// import Fs = require('fs');
/**

@@ -229,3 +230,3 @@ * Parquet File Magic String

if (!(encoding in codec_1.PARQUET_CODEC)) {
throw new Error('invalid encoding: ' + encoding);
throw new Error(`invalid encoding: ${encoding}`);
}

@@ -261,3 +262,3 @@ return codec_1.PARQUET_CODEC[encoding].decodeValues(type, cursor, count, opts);

default:
throw new Error('invalid page type: ' + pageType);
throw new Error(`invalid page type: ${pageType}`);
}

@@ -274,3 +275,28 @@ Array.prototype.push.apply(data.rlevels, pageData.rlevels);

const valueCount = header.data_page_header.num_values;
const valueEncoding = Util.getThriftEnum(parquet_types_1.Encoding, header.data_page_header.encoding);
// const info = {
// path: opts.column.path.join('.'),
// valueEncoding,
// dLevelEncoding,
// rLevelEncoding,
// cursorOffset: cursor.offset,
// cursorEnd,
// cusrorSize: cursor.size,
// header,
// opts,
// buffer: cursor.buffer.toJSON(),
// values: null as any[],
// valBuf: null as any
// };
// Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));
/* uncompress page */
let uncursor = cursor;
if (opts.compression !== 'UNCOMPRESSED') {
const valuesBuf = Compression.inflate(opts.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
uncursor = {
buffer: valuesBuf,
offset: 0,
size: valuesBuf.length
};
cursor.offset = cursorEnd;
}
/* read repetition levels */

@@ -281,5 +307,6 @@ const rLevelEncoding = Util.getThriftEnum(parquet_types_1.Encoding, header.data_page_header.repetition_level_encoding);

if (opts.rLevelMax > 0) {
rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, cursor, valueCount, {
rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, uncursor, valueCount, {
bitWidth: Util.getBitWidth(opts.rLevelMax),
disableEnvelope: false
disableEnvelope: false,
column: opts.column
});

@@ -295,5 +322,6 @@ }

if (opts.dLevelMax > 0) {
dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, cursor, valueCount, {
dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, uncursor, valueCount, {
bitWidth: Util.getBitWidth(opts.dLevelMax),
disableEnvelope: false
disableEnvelope: false,
column: opts.column
});

@@ -304,3 +332,2 @@ }

}
/* read values */
let valueCountNonNull = 0;

@@ -313,16 +340,10 @@ for (const dlvl of dLevels) {

/* read values */
let valuesBufCursor = cursor;
if (opts.compression !== 'UNCOMPRESSED') {
const valuesBuf = Compression.inflate(opts.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
valuesBufCursor = {
buffer: valuesBuf,
offset: 0,
size: valuesBuf.length
};
cursor.offset = cursorEnd;
}
const values = decodeValues(opts.type, valueEncoding, valuesBufCursor, valueCountNonNull, {
const valueEncoding = Util.getThriftEnum(parquet_types_1.Encoding, header.data_page_header.encoding);
const values = decodeValues(opts.type, valueEncoding, uncursor, valueCountNonNull, {
typeLength: opts.column.typeLength,
bitWidth: opts.column.typeLength
});
// info.valBuf = uncursor.buffer.toJSON();
// info.values = values;
// Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));
return {

@@ -329,0 +350,0 @@ dlevels: dLevels,

@@ -100,3 +100,3 @@ "use strict";

if (!typeDef) {
throw new Error('invalid parquet type: ' + opts.type);
throw new Error(`invalid parquet type: ${opts.type}`);
}

@@ -108,3 +108,3 @@ /* field encoding */

if (!(opts.encoding in codec_1.PARQUET_CODEC)) {
throw new Error('unsupported parquet encoding: ' + opts.encoding);
throw new Error(`unsupported parquet encoding: ${opts.encoding}`);
}

@@ -115,3 +115,3 @@ if (!opts.compression) {

if (!(opts.compression in compression_1.PARQUET_COMPRESSION_METHODS)) {
throw new Error('unsupported compression method: ' + opts.compression);
throw new Error(`unsupported compression method: ${opts.compression}`);
}

@@ -118,0 +118,0 @@ /* add to schema */

@@ -78,6 +78,6 @@ "use strict";

if (values.length === 0 && !!record && field.repetitionType === 'REQUIRED') {
throw new Error('missing required field: ' + field.name);
throw new Error(`missing required field: ${field.name}`);
}
if (values.length > 1 && field.repetitionType !== 'REPEATED') {
throw new Error('too many values for field: ' + field.name);
throw new Error(`too many values for field: ${field.name}`);
}

@@ -84,0 +84,0 @@ // push null

@@ -247,3 +247,3 @@ "use strict";

if (!(encoding in codec_1.PARQUET_CODEC)) {
throw new Error('invalid encoding: ' + encoding);
throw new Error(`invalid encoding: ${encoding}`);
}

@@ -256,7 +256,2 @@ return codec_1.PARQUET_CODEC[encoding].encodeValues(type, values, opts);

function encodeDataPage(column, valueCount, rowCount, values, rlevels, dlevels, compression) {
/* encode values */
const valuesBuf = encodeValues(column.primitiveType, column.encoding, values, { typeLength: column.typeLength, bitWidth: column.typeLength });
// tslint:disable-next-line:no-parameter-reassignment
compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
const compressedBuf = Compression.deflate(compression, valuesBuf);
/* encode repetition and definition levels */

@@ -277,2 +272,12 @@ let rLevelsBuf = Buffer.alloc(0);

}
/* encode values */
const valuesBuf = encodeValues(column.primitiveType, column.encoding, values, { typeLength: column.typeLength, bitWidth: column.typeLength });
const dataBuf = Buffer.concat([
rLevelsBuf,
dLevelsBuf,
valuesBuf
]);
// tslint:disable-next-line:no-parameter-reassignment
compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
const compressedBuf = Compression.deflate(compression, dataBuf);
/* build page header */

@@ -287,4 +292,4 @@ const header = new parquet_types_1.PageHeader({

}),
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
uncompressed_page_size: dataBuf.length,
compressed_page_size: compressedBuf.length
});

@@ -295,4 +300,2 @@ /* concat page header, repetition and definition levels and values */

headerBuf,
rLevelsBuf,
dLevelsBuf,
compressedBuf

@@ -299,0 +302,0 @@ ]);

{
"name": "parquets",
"description": "TypeScript implementation of the Parquet file format, based on parquet.js",
"version": "0.8.16",
"version": "0.10.1",
"homepage": "https://github.com/kbajalc/parquets",

@@ -22,7 +22,9 @@ "author": "kbajalc@gmail.com",

"build": "rm -rf ./lib ; rm -rf ./build ; tsc -p src ; tsc -p .",
"watch": "rm -rf ./lib ; tsc -p src --watch",
"test": "tsc -p src && jest --verbose test/*.ts",
"watch": "rm -rf ./lib ; tsc -p . --watch",
"test": "npm run build && jest --verbose test/*.ts",
"tsgen": "thrift-typescript --target apache --rootDir . --sourceDir . --outDir codegen parquet.thrift",
"thrift": "thrift --gen js:node parquet.thrift && thrift --gen js:ts parquet.thrift",
"peer": "npm i brotli lzo lz4js --no-save"
"peer": "npm i brotli lzo lz4js --no-save",
"drill": "../../tools/apache-drill-1.15.0/bin/drill-embedded",
"sql": "echo SELECT CONVERT_FROM(timestamp_field, 'TIMESTAMP_IMPALA') as timestamp_field FROM `dfs.file_with_timestamp.parquet`"
},

@@ -33,8 +35,8 @@ "engines": {

"dependencies": {
"bson": "^1.0.4",
"debug": "^3.1.0",
"bson": "^1.1.0",
"debug": "^4.1.1",
"int53": "^0.2.4",
"object-stream": "0.0.1",
"snappyjs": "^0.6.0",
"thrift": "^0.10.0",
"thrift": "^0.11.0",
"varint": "^5.0.0"

@@ -48,18 +50,18 @@ },

"devDependencies": {
"@creditkarma/thrift-typescript": "^2.0.0",
"@types/bson": "^1.0.9",
"@creditkarma/thrift-typescript": "^2.0.14",
"@types/bson": "^1.0.11",
"@types/debug": "0.0.30",
"@types/jest": "^22.2.3",
"@types/node": "^10.5.1",
"@types/node": "^10.12.29",
"brotli": "^1.3.0",
"chai": "^4.1.2",
"jest": "^22.4.4",
"jest-environment-node": "^23.2.0",
"chai": "^4.2.0",
"jest": "^24.0.0",
"jest-environment-node": "^24.0.0",
"lz4js": "^0.2.0",
"lzo": "^0.4.3",
"ts-jest": "^22.4.6",
"ts-node": "^5.0.1",
"tslint": "^5.10.0",
"tslint-config-airbnb": "^5.9.2",
"typescript": "^2.9.2"
"lzo": "^0.4.10",
"ts-jest": "^24.0.0",
"ts-node": "^8.0.2",
"tslint": "^5.13.1",
"tslint-config-airbnb": "^5.11.1",
"typescript": "^3.3.3333"
},

@@ -74,3 +76,3 @@ "jest": {

"testPathIgnorePatterns": [
"<rootDir>/build"
"<rootDir>/build_"
],

@@ -77,0 +79,0 @@ "moduleFileExtensions": [

@@ -15,3 +15,7 @@ # parquets

**WARNING**: *There are compatibility issues with the reference implementation when using 'optional' columns! Only GZIP and SNAPPY compressions are working properly. Testing done with [Appache Drill](https://drill.apache.org)*.
**WARNING**: *There are compatibility issues with the reference implementation [Appache Drill](https://drill.apache.org)*:
- only GZIP and SNAPPY compressions are compatible
- resolved problem with columns 'optional': true and with 'compression' enabled
- files with 'repeated' columns can not be read with Drill
- columns with nested 'fields' are not compatible

@@ -18,0 +22,0 @@ **What is Parquet?**: Parquet is a column-oriented file format; it allows you to

import { ParquetType, TODO } from '../declare';
export interface ParquetCodecImpl {
encodeValues(type: ParquetType, values, opts?: TODO): Buffer;
encodeValues(type: ParquetType, values: TODO, opts?: TODO): Buffer;
decodeValues(type: ParquetType, cursor: TODO, count: number, opts: TODO): any[];

@@ -6,0 +6,0 @@ }

@@ -187,3 +187,3 @@ import { FieldDefinition, ParquetType, TODO } from '../declare';

if (values[i].length !== opts.typeLength) {
throw new Error('invalid value for FIXED_LEN_BYTE_ARRAY: ' + values[i]);
throw new Error(`invalid value for FIXED_LEN_BYTE_ARRAY: ${values[i]}`);
}

@@ -229,3 +229,3 @@ }

default:
throw new Error('unsupported type: ' + type);
throw new Error(`unsupported type: ${type}`);
}

@@ -253,4 +253,4 @@ }

default:
throw new Error('unsupported type: ' + type);
throw new Error(`unsupported type: ${type}`);
}
}

@@ -5,4 +5,4 @@ import varint = require('varint');

function encodeRunBitpacked(values: number[], opts: TODO): Buffer {
if (values.length % 8 !== 0) {
throw new Error('must be a multiple of 8');
for (let i = 0; i < values.length % 8; i++) {
values.push(0);
}

@@ -52,41 +52,37 @@

default:
throw new Error('unsupported type: ' + type);
throw new Error(`unsupported type: ${type}`);
}
let buf = Buffer.alloc(0);
const runs = [];
for (let cur = 0; cur < values.length; cur += 8) {
let repeating = true;
for (let i = 1; i < 8; ++i) {
if (values[cur + i] !== values[cur]) {
repeating = false;
let run = [];
let repeats = 0;
for (let i = 0; i < values.length; i++) {
// If we are at the beginning of a run and the next value is same we start
// collecting repeated values
if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {
// If we have any data in runs we need to encode them
if (run.length) {
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
run = [];
}
repeats = 1;
} else if (repeats > 0 && values[i] === values[i - 1]) {
repeats += 1;
} else {
// If values changes we need to post any previous repeated values
if (repeats) {
buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);
repeats = 0;
}
run.push(values[i]);
}
const append =
runs.length > 0 &&
(runs[runs.length - 1][1] !== null) === repeating &&
(!repeating || runs[runs.length - 1][1] === values[cur]);
if (!append) {
runs.push([cur, repeating ? values[cur] : null]);
}
}
for (let i = values.length - (values.length % 8); i < values.length; ++i) {
runs.push([i, values[i]]);
if (repeats) {
buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);
} else if (run.length) {
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
}
for (let i = 0; i < runs.length; ++i) {
const begin = runs[i][0];
const end = i < runs.length - 1 ? runs[i + 1][0] : values.length;
const rep = runs[i][1];
if (rep === null) {
buf = Buffer.concat([buf, encodeRunBitpacked(values.slice(begin, end), opts)]);
} else {
buf = Buffer.concat([buf, encodeRunRepeated(rep, end - begin, opts)]);
}
}
if (opts.disableEnvelope) {

@@ -141,3 +137,3 @@ return buf;

const values = [];
let values = [];
while (values.length < count) {

@@ -154,2 +150,3 @@ const header = varint.decode(cursor.buffer, cursor.offset);

}
values = values.slice(0, count);

@@ -156,0 +153,0 @@ if (values.length !== count) {

@@ -76,2 +76,8 @@ import { RowGroup } from './gen/parquet_types';

columnData?: Record<string, ColumnData>;
[path: string]: {
dlevels: any[],
rlevels: any[],
values: any[],
count: number
} | any;
}

@@ -78,0 +84,0 @@

@@ -9,2 +9,3 @@ import { PARQUET_CODEC } from './codec';

import * as Util from './util';
// import Fs = require('fs');

@@ -306,3 +307,3 @@ /**

if (!(encoding in PARQUET_CODEC)) {
throw new Error('invalid encoding: ' + encoding);
throw new Error(`invalid encoding: ${encoding}`);
}

@@ -347,3 +348,3 @@

default:
throw new Error('invalid page type: ' + pageType);
throw new Error(`invalid page type: ${pageType}`);
}

@@ -363,7 +364,35 @@

const valueCount = header.data_page_header.num_values;
const valueEncoding = Util.getThriftEnum(
Encoding,
header.data_page_header.encoding
) as ParquetCodec;
// const info = {
// path: opts.column.path.join('.'),
// valueEncoding,
// dLevelEncoding,
// rLevelEncoding,
// cursorOffset: cursor.offset,
// cursorEnd,
// cusrorSize: cursor.size,
// header,
// opts,
// buffer: cursor.buffer.toJSON(),
// values: null as any[],
// valBuf: null as any
// };
// Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));
/* uncompress page */
let uncursor = cursor;
if (opts.compression !== 'UNCOMPRESSED') {
const valuesBuf = Compression.inflate(
opts.compression,
cursor.buffer.slice(cursor.offset, cursorEnd),
header.uncompressed_page_size
);
uncursor = {
buffer: valuesBuf,
offset: 0,
size: valuesBuf.length
};
cursor.offset = cursorEnd;
}
/* read repetition levels */

@@ -374,3 +403,2 @@ const rLevelEncoding = Util.getThriftEnum(

) as ParquetCodec;
// tslint:disable-next-line:prefer-array-literal

@@ -382,7 +410,8 @@ let rLevels = new Array(valueCount);

rLevelEncoding,
cursor,
uncursor,
valueCount,
{
bitWidth: Util.getBitWidth(opts.rLevelMax),
disableEnvelope: false
disableEnvelope: false,
column: opts.column
}

@@ -399,3 +428,2 @@ );

) as ParquetCodec;
// tslint:disable-next-line:prefer-array-literal

@@ -407,7 +435,8 @@ let dLevels = new Array(valueCount);

dLevelEncoding,
cursor,
uncursor,
valueCount,
{
bitWidth: Util.getBitWidth(opts.dLevelMax),
disableEnvelope: false
disableEnvelope: false,
column: opts.column
}

@@ -418,4 +447,2 @@ );

}
/* read values */
let valueCountNonNull = 0;

@@ -427,25 +454,12 @@ for (const dlvl of dLevels) {

}
/* read values */
let valuesBufCursor = cursor;
if (opts.compression !== 'UNCOMPRESSED') {
const valuesBuf = Compression.inflate(
opts.compression,
cursor.buffer.slice(cursor.offset, cursorEnd),
header.uncompressed_page_size
);
valuesBufCursor = {
buffer: valuesBuf,
offset: 0,
size: valuesBuf.length
};
cursor.offset = cursorEnd;
}
const valueEncoding = Util.getThriftEnum(
Encoding,
header.data_page_header.encoding
) as ParquetCodec;
const values = decodeValues(
opts.type,
valueEncoding,
valuesBufCursor,
uncursor,
valueCountNonNull,

@@ -455,4 +469,9 @@ {

bitWidth: opts.column.typeLength
});
}
);
// info.valBuf = uncursor.buffer.toJSON();
// info.values = values;
// Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));
return {

@@ -537,3 +556,4 @@ dlevels: dLevels,

bitWidth: opts.column.typeLength
});
}
);

@@ -540,0 +560,0 @@ return {

@@ -139,3 +139,3 @@ import { PARQUET_CODEC } from './codec';

if (!typeDef) {
throw new Error('invalid parquet type: ' + opts.type);
throw new Error(`invalid parquet type: ${opts.type}`);
}

@@ -149,3 +149,3 @@

if (!(opts.encoding in PARQUET_CODEC)) {
throw new Error('unsupported parquet encoding: ' + opts.encoding);
throw new Error(`unsupported parquet encoding: ${opts.encoding}`);
}

@@ -158,3 +158,3 @@

if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {
throw new Error('unsupported compression method: ' + opts.compression);
throw new Error(`unsupported compression method: ${opts.compression}`);
}

@@ -161,0 +161,0 @@

@@ -93,7 +93,7 @@ import { ColumnData, FieldDefinition, ParquetRow, RecordBuffer, TODO } from './declare';

if (values.length === 0 && !!record && field.repetitionType === 'REQUIRED') {
throw new Error('missing required field: ' + field.name);
throw new Error(`missing required field: ${field.name}`);
}
if (values.length > 1 && field.repetitionType !== 'REPEATED') {
throw new Error('too many values for field: ' + field.name);
throw new Error(`too many values for field: ${field.name}`);
}

@@ -100,0 +100,0 @@

@@ -5,3 +5,6 @@ {

"outDir": "../lib"
}
},
"include": [
"./**/*"
]
}

@@ -334,3 +334,3 @@ import { WriteStream } from 'fs';

if (!(encoding in PARQUET_CODEC)) {
throw new Error('invalid encoding: ' + encoding);
throw new Error(`invalid encoding: ${encoding}`);
}

@@ -353,14 +353,2 @@

): { header: PageHeader, headerSize, page: Buffer } {
/* encode values */
const valuesBuf = encodeValues(
column.primitiveType,
column.encoding,
values,
{ typeLength: column.typeLength, bitWidth: column.typeLength }
);
// tslint:disable-next-line:no-parameter-reassignment
compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
const compressedBuf = Compression.deflate(compression, valuesBuf);
/* encode repetition and definition levels */

@@ -393,2 +381,20 @@ let rLevelsBuf = Buffer.alloc(0);

/* encode values */
const valuesBuf = encodeValues(
column.primitiveType,
column.encoding,
values,
{ typeLength: column.typeLength, bitWidth: column.typeLength }
);
const dataBuf = Buffer.concat([
rLevelsBuf,
dLevelsBuf,
valuesBuf
]);
// tslint:disable-next-line:no-parameter-reassignment
compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
const compressedBuf = Compression.deflate(compression, dataBuf);
/* build page header */

@@ -405,4 +411,4 @@ const header = new PageHeader({

}),
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
uncompressed_page_size: dataBuf.length,
compressed_page_size: compressedBuf.length
});

@@ -414,4 +420,2 @@

headerBuf,
rLevelsBuf,
dLevelsBuf,
compressedBuf

@@ -418,0 +422,0 @@ ]);

@@ -36,2 +36,32 @@ import chai = require('chai');

describe('number of values not a multiple of 8', function () {
it('should encode bitpacked values', function () {
const buf = parquet_codec_rle.encodeValues(
'INT32',
[0, 1, 2, 3, 4, 5, 6, 7, 6, 5],
{
disableEnvelope: true,
bitWidth: 3
});
assert.deepEqual(buf, new Buffer([0x05, 0x88, 0xc6, 0xfa, 0x2e, 0x00, 0x00]));
});
it('should decode bitpacked values', function () {
const vals = parquet_codec_rle.decodeValues(
'INT32',
{
buffer: new Buffer([0x05, 0x88, 0xc6, 0xfa, 0x2e, 0x00, 0x00]),
offset: 0,
},
10,
{
disableEnvelope: true,
bitWidth: 3
});
assert.deepEqual(vals, [0, 1, 2, 3, 4, 5, 6, 7, 6, 5]);
});
});
it('should encode repeated values', function () {

@@ -38,0 +68,0 @@ const buf = parquet_codec_rle.encodeValues(

@@ -7,3 +7,3 @@ import chai = require('chai');

const TEST_NUM_ROWS = 10000;
const TEST_NUM_ROWS = 1000;
const TEST_VTIME = Date.now();

@@ -10,0 +10,0 @@

@@ -61,2 +61,3 @@ {

// "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */
// "allowSyntheticDefaultImports": true,
// "esModuleInterop": true, /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */

@@ -82,2 +83,7 @@ // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */

},
"include": [
"src/**/*",
"dev/**/*",
"test/**/*"
],
"exclude": [

@@ -84,0 +90,0 @@ "node_modules",

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc