Comparing version 0.8.16 to 0.10.1
"use strict"; | ||
process.argv[1] = "./node_modules/jest/bin/jest"; | ||
process.argv[process.argv.length - 1] = process.argv[process.argv.length - 1].replace(".ts", ".js"); | ||
console.log("-----------"); | ||
console.log(process.argv); | ||
console.log("-----------"); | ||
require(process.argv[1]); | ||
@@ -6,0 +7,0 @@ // const importLocal = require('import-local'); |
/// <reference types="node" /> | ||
import { ParquetType, TODO } from '../declare'; | ||
export interface ParquetCodecImpl { | ||
encodeValues(type: ParquetType, values: any, opts?: TODO): Buffer; | ||
encodeValues(type: ParquetType, values: TODO, opts?: TODO): Buffer; | ||
decodeValues(type: ParquetType, cursor: TODO, count: number, opts: TODO): any[]; | ||
@@ -6,0 +6,0 @@ } |
@@ -146,3 +146,3 @@ "use strict"; | ||
if (values[i].length !== opts.typeLength) { | ||
throw new Error('invalid value for FIXED_LEN_BYTE_ARRAY: ' + values[i]); | ||
throw new Error(`invalid value for FIXED_LEN_BYTE_ARRAY: ${values[i]}`); | ||
} | ||
@@ -182,3 +182,3 @@ } | ||
default: | ||
throw new Error('unsupported type: ' + type); | ||
throw new Error(`unsupported type: ${type}`); | ||
} | ||
@@ -206,3 +206,3 @@ } | ||
default: | ||
throw new Error('unsupported type: ' + type); | ||
throw new Error(`unsupported type: ${type}`); | ||
} | ||
@@ -209,0 +209,0 @@ } |
@@ -5,4 +5,4 @@ "use strict"; | ||
function encodeRunBitpacked(values, opts) { | ||
if (values.length % 8 !== 0) { | ||
throw new Error('must be a multiple of 8'); | ||
for (let i = 0; i < values.length % 8; i++) { | ||
values.push(0); | ||
} | ||
@@ -43,34 +43,36 @@ const buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8))); | ||
default: | ||
throw new Error('unsupported type: ' + type); | ||
throw new Error(`unsupported type: ${type}`); | ||
} | ||
let buf = Buffer.alloc(0); | ||
const runs = []; | ||
for (let cur = 0; cur < values.length; cur += 8) { | ||
let repeating = true; | ||
for (let i = 1; i < 8; ++i) { | ||
if (values[cur + i] !== values[cur]) { | ||
repeating = false; | ||
let run = []; | ||
let repeats = 0; | ||
for (let i = 0; i < values.length; i++) { | ||
// If we are at the beginning of a run and the next value is same we start | ||
// collecting repeated values | ||
if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) { | ||
// If we have any data in runs we need to encode them | ||
if (run.length) { | ||
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]); | ||
run = []; | ||
} | ||
repeats = 1; | ||
} | ||
const append = runs.length > 0 && | ||
(runs[runs.length - 1][1] !== null) === repeating && | ||
(!repeating || runs[runs.length - 1][1] === values[cur]); | ||
if (!append) { | ||
runs.push([cur, repeating ? values[cur] : null]); | ||
else if (repeats > 0 && values[i] === values[i - 1]) { | ||
repeats += 1; | ||
} | ||
} | ||
for (let i = values.length - (values.length % 8); i < values.length; ++i) { | ||
runs.push([i, values[i]]); | ||
} | ||
for (let i = 0; i < runs.length; ++i) { | ||
const begin = runs[i][0]; | ||
const end = i < runs.length - 1 ? runs[i + 1][0] : values.length; | ||
const rep = runs[i][1]; | ||
if (rep === null) { | ||
buf = Buffer.concat([buf, encodeRunBitpacked(values.slice(begin, end), opts)]); | ||
} | ||
else { | ||
buf = Buffer.concat([buf, encodeRunRepeated(rep, end - begin, opts)]); | ||
// If values changes we need to post any previous repeated values | ||
if (repeats) { | ||
buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]); | ||
repeats = 0; | ||
} | ||
run.push(values[i]); | ||
} | ||
} | ||
if (repeats) { | ||
buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]); | ||
} | ||
else if (run.length) { | ||
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]); | ||
} | ||
if (opts.disableEnvelope) { | ||
@@ -116,3 +118,3 @@ return buf; | ||
} | ||
const values = []; | ||
let values = []; | ||
while (values.length < count) { | ||
@@ -130,2 +132,3 @@ const header = varint.decode(cursor.buffer, cursor.offset); | ||
} | ||
values = values.slice(0, count); | ||
if (values.length !== count) { | ||
@@ -132,0 +135,0 @@ throw new Error('invalid RLE encoding'); |
@@ -45,2 +45,8 @@ /// <reference types="node" /> | ||
columnData?: Record<string, ColumnData>; | ||
[path: string]: { | ||
dlevels: any[]; | ||
rlevels: any[]; | ||
values: any[]; | ||
count: number; | ||
} | any; | ||
} | ||
@@ -47,0 +53,0 @@ export interface ColumnData { |
@@ -10,2 +10,3 @@ "use strict"; | ||
const Util = require("./util"); | ||
// import Fs = require('fs'); | ||
/** | ||
@@ -229,3 +230,3 @@ * Parquet File Magic String | ||
if (!(encoding in codec_1.PARQUET_CODEC)) { | ||
throw new Error('invalid encoding: ' + encoding); | ||
throw new Error(`invalid encoding: ${encoding}`); | ||
} | ||
@@ -261,3 +262,3 @@ return codec_1.PARQUET_CODEC[encoding].decodeValues(type, cursor, count, opts); | ||
default: | ||
throw new Error('invalid page type: ' + pageType); | ||
throw new Error(`invalid page type: ${pageType}`); | ||
} | ||
@@ -274,3 +275,28 @@ Array.prototype.push.apply(data.rlevels, pageData.rlevels); | ||
const valueCount = header.data_page_header.num_values; | ||
const valueEncoding = Util.getThriftEnum(parquet_types_1.Encoding, header.data_page_header.encoding); | ||
// const info = { | ||
// path: opts.column.path.join('.'), | ||
// valueEncoding, | ||
// dLevelEncoding, | ||
// rLevelEncoding, | ||
// cursorOffset: cursor.offset, | ||
// cursorEnd, | ||
// cusrorSize: cursor.size, | ||
// header, | ||
// opts, | ||
// buffer: cursor.buffer.toJSON(), | ||
// values: null as any[], | ||
// valBuf: null as any | ||
// }; | ||
// Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2)); | ||
/* uncompress page */ | ||
let uncursor = cursor; | ||
if (opts.compression !== 'UNCOMPRESSED') { | ||
const valuesBuf = Compression.inflate(opts.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size); | ||
uncursor = { | ||
buffer: valuesBuf, | ||
offset: 0, | ||
size: valuesBuf.length | ||
}; | ||
cursor.offset = cursorEnd; | ||
} | ||
/* read repetition levels */ | ||
@@ -281,5 +307,6 @@ const rLevelEncoding = Util.getThriftEnum(parquet_types_1.Encoding, header.data_page_header.repetition_level_encoding); | ||
if (opts.rLevelMax > 0) { | ||
rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, cursor, valueCount, { | ||
rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, uncursor, valueCount, { | ||
bitWidth: Util.getBitWidth(opts.rLevelMax), | ||
disableEnvelope: false | ||
disableEnvelope: false, | ||
column: opts.column | ||
}); | ||
@@ -295,5 +322,6 @@ } | ||
if (opts.dLevelMax > 0) { | ||
dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, cursor, valueCount, { | ||
dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, uncursor, valueCount, { | ||
bitWidth: Util.getBitWidth(opts.dLevelMax), | ||
disableEnvelope: false | ||
disableEnvelope: false, | ||
column: opts.column | ||
}); | ||
@@ -304,3 +332,2 @@ } | ||
} | ||
/* read values */ | ||
let valueCountNonNull = 0; | ||
@@ -313,16 +340,10 @@ for (const dlvl of dLevels) { | ||
/* read values */ | ||
let valuesBufCursor = cursor; | ||
if (opts.compression !== 'UNCOMPRESSED') { | ||
const valuesBuf = Compression.inflate(opts.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size); | ||
valuesBufCursor = { | ||
buffer: valuesBuf, | ||
offset: 0, | ||
size: valuesBuf.length | ||
}; | ||
cursor.offset = cursorEnd; | ||
} | ||
const values = decodeValues(opts.type, valueEncoding, valuesBufCursor, valueCountNonNull, { | ||
const valueEncoding = Util.getThriftEnum(parquet_types_1.Encoding, header.data_page_header.encoding); | ||
const values = decodeValues(opts.type, valueEncoding, uncursor, valueCountNonNull, { | ||
typeLength: opts.column.typeLength, | ||
bitWidth: opts.column.typeLength | ||
}); | ||
// info.valBuf = uncursor.buffer.toJSON(); | ||
// info.values = values; | ||
// Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2)); | ||
return { | ||
@@ -329,0 +350,0 @@ dlevels: dLevels, |
@@ -100,3 +100,3 @@ "use strict"; | ||
if (!typeDef) { | ||
throw new Error('invalid parquet type: ' + opts.type); | ||
throw new Error(`invalid parquet type: ${opts.type}`); | ||
} | ||
@@ -108,3 +108,3 @@ /* field encoding */ | ||
if (!(opts.encoding in codec_1.PARQUET_CODEC)) { | ||
throw new Error('unsupported parquet encoding: ' + opts.encoding); | ||
throw new Error(`unsupported parquet encoding: ${opts.encoding}`); | ||
} | ||
@@ -115,3 +115,3 @@ if (!opts.compression) { | ||
if (!(opts.compression in compression_1.PARQUET_COMPRESSION_METHODS)) { | ||
throw new Error('unsupported compression method: ' + opts.compression); | ||
throw new Error(`unsupported compression method: ${opts.compression}`); | ||
} | ||
@@ -118,0 +118,0 @@ /* add to schema */ |
@@ -78,6 +78,6 @@ "use strict"; | ||
if (values.length === 0 && !!record && field.repetitionType === 'REQUIRED') { | ||
throw new Error('missing required field: ' + field.name); | ||
throw new Error(`missing required field: ${field.name}`); | ||
} | ||
if (values.length > 1 && field.repetitionType !== 'REPEATED') { | ||
throw new Error('too many values for field: ' + field.name); | ||
throw new Error(`too many values for field: ${field.name}`); | ||
} | ||
@@ -84,0 +84,0 @@ // push null |
@@ -247,3 +247,3 @@ "use strict"; | ||
if (!(encoding in codec_1.PARQUET_CODEC)) { | ||
throw new Error('invalid encoding: ' + encoding); | ||
throw new Error(`invalid encoding: ${encoding}`); | ||
} | ||
@@ -256,7 +256,2 @@ return codec_1.PARQUET_CODEC[encoding].encodeValues(type, values, opts); | ||
function encodeDataPage(column, valueCount, rowCount, values, rlevels, dlevels, compression) { | ||
/* encode values */ | ||
const valuesBuf = encodeValues(column.primitiveType, column.encoding, values, { typeLength: column.typeLength, bitWidth: column.typeLength }); | ||
// tslint:disable-next-line:no-parameter-reassignment | ||
compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression; | ||
const compressedBuf = Compression.deflate(compression, valuesBuf); | ||
/* encode repetition and definition levels */ | ||
@@ -277,2 +272,12 @@ let rLevelsBuf = Buffer.alloc(0); | ||
} | ||
/* encode values */ | ||
const valuesBuf = encodeValues(column.primitiveType, column.encoding, values, { typeLength: column.typeLength, bitWidth: column.typeLength }); | ||
const dataBuf = Buffer.concat([ | ||
rLevelsBuf, | ||
dLevelsBuf, | ||
valuesBuf | ||
]); | ||
// tslint:disable-next-line:no-parameter-reassignment | ||
compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression; | ||
const compressedBuf = Compression.deflate(compression, dataBuf); | ||
/* build page header */ | ||
@@ -287,4 +292,4 @@ const header = new parquet_types_1.PageHeader({ | ||
}), | ||
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length, | ||
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length | ||
uncompressed_page_size: dataBuf.length, | ||
compressed_page_size: compressedBuf.length | ||
}); | ||
@@ -295,4 +300,2 @@ /* concat page header, repetition and definition levels and values */ | ||
headerBuf, | ||
rLevelsBuf, | ||
dLevelsBuf, | ||
compressedBuf | ||
@@ -299,0 +302,0 @@ ]); |
{ | ||
"name": "parquets", | ||
"description": "TypeScript implementation of the Parquet file format, based on parquet.js", | ||
"version": "0.8.16", | ||
"version": "0.10.1", | ||
"homepage": "https://github.com/kbajalc/parquets", | ||
@@ -22,7 +22,9 @@ "author": "kbajalc@gmail.com", | ||
"build": "rm -rf ./lib ; rm -rf ./build ; tsc -p src ; tsc -p .", | ||
"watch": "rm -rf ./lib ; tsc -p src --watch", | ||
"test": "tsc -p src && jest --verbose test/*.ts", | ||
"watch": "rm -rf ./lib ; tsc -p . --watch", | ||
"test": "npm run build && jest --verbose test/*.ts", | ||
"tsgen": "thrift-typescript --target apache --rootDir . --sourceDir . --outDir codegen parquet.thrift", | ||
"thrift": "thrift --gen js:node parquet.thrift && thrift --gen js:ts parquet.thrift", | ||
"peer": "npm i brotli lzo lz4js --no-save" | ||
"peer": "npm i brotli lzo lz4js --no-save", | ||
"drill": "../../tools/apache-drill-1.15.0/bin/drill-embedded", | ||
"sql": "echo SELECT CONVERT_FROM(timestamp_field, 'TIMESTAMP_IMPALA') as timestamp_field FROM `dfs.file_with_timestamp.parquet`" | ||
}, | ||
@@ -33,8 +35,8 @@ "engines": { | ||
"dependencies": { | ||
"bson": "^1.0.4", | ||
"debug": "^3.1.0", | ||
"bson": "^1.1.0", | ||
"debug": "^4.1.1", | ||
"int53": "^0.2.4", | ||
"object-stream": "0.0.1", | ||
"snappyjs": "^0.6.0", | ||
"thrift": "^0.10.0", | ||
"thrift": "^0.11.0", | ||
"varint": "^5.0.0" | ||
@@ -48,18 +50,18 @@ }, | ||
"devDependencies": { | ||
"@creditkarma/thrift-typescript": "^2.0.0", | ||
"@types/bson": "^1.0.9", | ||
"@creditkarma/thrift-typescript": "^2.0.14", | ||
"@types/bson": "^1.0.11", | ||
"@types/debug": "0.0.30", | ||
"@types/jest": "^22.2.3", | ||
"@types/node": "^10.5.1", | ||
"@types/node": "^10.12.29", | ||
"brotli": "^1.3.0", | ||
"chai": "^4.1.2", | ||
"jest": "^22.4.4", | ||
"jest-environment-node": "^23.2.0", | ||
"chai": "^4.2.0", | ||
"jest": "^24.0.0", | ||
"jest-environment-node": "^24.0.0", | ||
"lz4js": "^0.2.0", | ||
"lzo": "^0.4.3", | ||
"ts-jest": "^22.4.6", | ||
"ts-node": "^5.0.1", | ||
"tslint": "^5.10.0", | ||
"tslint-config-airbnb": "^5.9.2", | ||
"typescript": "^2.9.2" | ||
"lzo": "^0.4.10", | ||
"ts-jest": "^24.0.0", | ||
"ts-node": "^8.0.2", | ||
"tslint": "^5.13.1", | ||
"tslint-config-airbnb": "^5.11.1", | ||
"typescript": "^3.3.3333" | ||
}, | ||
@@ -74,3 +76,3 @@ "jest": { | ||
"testPathIgnorePatterns": [ | ||
"<rootDir>/build" | ||
"<rootDir>/build_" | ||
], | ||
@@ -77,0 +79,0 @@ "moduleFileExtensions": [ |
@@ -15,3 +15,7 @@ # parquets | ||
**WARNING**: *There are compatibility issues with the reference implementation when using 'optional' columns! Only GZIP and SNAPPY compressions are working properly. Testing done with [Appache Drill](https://drill.apache.org)*. | ||
**WARNING**: *There are compatibility issues with the reference implementation [Appache Drill](https://drill.apache.org)*: | ||
- only GZIP and SNAPPY compressions are compatible | ||
- resolved problem with columns 'optional': true and with 'compression' enabled | ||
- files with 'repeated' columns can not be read with Drill | ||
- columns with nested 'fields' are not compatible | ||
@@ -18,0 +22,0 @@ **What is Parquet?**: Parquet is a column-oriented file format; it allows you to |
import { ParquetType, TODO } from '../declare'; | ||
export interface ParquetCodecImpl { | ||
encodeValues(type: ParquetType, values, opts?: TODO): Buffer; | ||
encodeValues(type: ParquetType, values: TODO, opts?: TODO): Buffer; | ||
decodeValues(type: ParquetType, cursor: TODO, count: number, opts: TODO): any[]; | ||
@@ -6,0 +6,0 @@ } |
@@ -187,3 +187,3 @@ import { FieldDefinition, ParquetType, TODO } from '../declare'; | ||
if (values[i].length !== opts.typeLength) { | ||
throw new Error('invalid value for FIXED_LEN_BYTE_ARRAY: ' + values[i]); | ||
throw new Error(`invalid value for FIXED_LEN_BYTE_ARRAY: ${values[i]}`); | ||
} | ||
@@ -229,3 +229,3 @@ } | ||
default: | ||
throw new Error('unsupported type: ' + type); | ||
throw new Error(`unsupported type: ${type}`); | ||
} | ||
@@ -253,4 +253,4 @@ } | ||
default: | ||
throw new Error('unsupported type: ' + type); | ||
throw new Error(`unsupported type: ${type}`); | ||
} | ||
} |
@@ -5,4 +5,4 @@ import varint = require('varint'); | ||
function encodeRunBitpacked(values: number[], opts: TODO): Buffer { | ||
if (values.length % 8 !== 0) { | ||
throw new Error('must be a multiple of 8'); | ||
for (let i = 0; i < values.length % 8; i++) { | ||
values.push(0); | ||
} | ||
@@ -52,41 +52,37 @@ | ||
default: | ||
throw new Error('unsupported type: ' + type); | ||
throw new Error(`unsupported type: ${type}`); | ||
} | ||
let buf = Buffer.alloc(0); | ||
const runs = []; | ||
for (let cur = 0; cur < values.length; cur += 8) { | ||
let repeating = true; | ||
for (let i = 1; i < 8; ++i) { | ||
if (values[cur + i] !== values[cur]) { | ||
repeating = false; | ||
let run = []; | ||
let repeats = 0; | ||
for (let i = 0; i < values.length; i++) { | ||
// If we are at the beginning of a run and the next value is same we start | ||
// collecting repeated values | ||
if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) { | ||
// If we have any data in runs we need to encode them | ||
if (run.length) { | ||
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]); | ||
run = []; | ||
} | ||
repeats = 1; | ||
} else if (repeats > 0 && values[i] === values[i - 1]) { | ||
repeats += 1; | ||
} else { | ||
// If values changes we need to post any previous repeated values | ||
if (repeats) { | ||
buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]); | ||
repeats = 0; | ||
} | ||
run.push(values[i]); | ||
} | ||
const append = | ||
runs.length > 0 && | ||
(runs[runs.length - 1][1] !== null) === repeating && | ||
(!repeating || runs[runs.length - 1][1] === values[cur]); | ||
if (!append) { | ||
runs.push([cur, repeating ? values[cur] : null]); | ||
} | ||
} | ||
for (let i = values.length - (values.length % 8); i < values.length; ++i) { | ||
runs.push([i, values[i]]); | ||
if (repeats) { | ||
buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]); | ||
} else if (run.length) { | ||
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]); | ||
} | ||
for (let i = 0; i < runs.length; ++i) { | ||
const begin = runs[i][0]; | ||
const end = i < runs.length - 1 ? runs[i + 1][0] : values.length; | ||
const rep = runs[i][1]; | ||
if (rep === null) { | ||
buf = Buffer.concat([buf, encodeRunBitpacked(values.slice(begin, end), opts)]); | ||
} else { | ||
buf = Buffer.concat([buf, encodeRunRepeated(rep, end - begin, opts)]); | ||
} | ||
} | ||
if (opts.disableEnvelope) { | ||
@@ -141,3 +137,3 @@ return buf; | ||
const values = []; | ||
let values = []; | ||
while (values.length < count) { | ||
@@ -154,2 +150,3 @@ const header = varint.decode(cursor.buffer, cursor.offset); | ||
} | ||
values = values.slice(0, count); | ||
@@ -156,0 +153,0 @@ if (values.length !== count) { |
@@ -76,2 +76,8 @@ import { RowGroup } from './gen/parquet_types'; | ||
columnData?: Record<string, ColumnData>; | ||
[path: string]: { | ||
dlevels: any[], | ||
rlevels: any[], | ||
values: any[], | ||
count: number | ||
} | any; | ||
} | ||
@@ -78,0 +84,0 @@ |
@@ -9,2 +9,3 @@ import { PARQUET_CODEC } from './codec'; | ||
import * as Util from './util'; | ||
// import Fs = require('fs'); | ||
@@ -306,3 +307,3 @@ /** | ||
if (!(encoding in PARQUET_CODEC)) { | ||
throw new Error('invalid encoding: ' + encoding); | ||
throw new Error(`invalid encoding: ${encoding}`); | ||
} | ||
@@ -347,3 +348,3 @@ | ||
default: | ||
throw new Error('invalid page type: ' + pageType); | ||
throw new Error(`invalid page type: ${pageType}`); | ||
} | ||
@@ -363,7 +364,35 @@ | ||
const valueCount = header.data_page_header.num_values; | ||
const valueEncoding = Util.getThriftEnum( | ||
Encoding, | ||
header.data_page_header.encoding | ||
) as ParquetCodec; | ||
// const info = { | ||
// path: opts.column.path.join('.'), | ||
// valueEncoding, | ||
// dLevelEncoding, | ||
// rLevelEncoding, | ||
// cursorOffset: cursor.offset, | ||
// cursorEnd, | ||
// cusrorSize: cursor.size, | ||
// header, | ||
// opts, | ||
// buffer: cursor.buffer.toJSON(), | ||
// values: null as any[], | ||
// valBuf: null as any | ||
// }; | ||
// Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2)); | ||
/* uncompress page */ | ||
let uncursor = cursor; | ||
if (opts.compression !== 'UNCOMPRESSED') { | ||
const valuesBuf = Compression.inflate( | ||
opts.compression, | ||
cursor.buffer.slice(cursor.offset, cursorEnd), | ||
header.uncompressed_page_size | ||
); | ||
uncursor = { | ||
buffer: valuesBuf, | ||
offset: 0, | ||
size: valuesBuf.length | ||
}; | ||
cursor.offset = cursorEnd; | ||
} | ||
/* read repetition levels */ | ||
@@ -374,3 +403,2 @@ const rLevelEncoding = Util.getThriftEnum( | ||
) as ParquetCodec; | ||
// tslint:disable-next-line:prefer-array-literal | ||
@@ -382,7 +410,8 @@ let rLevels = new Array(valueCount); | ||
rLevelEncoding, | ||
cursor, | ||
uncursor, | ||
valueCount, | ||
{ | ||
bitWidth: Util.getBitWidth(opts.rLevelMax), | ||
disableEnvelope: false | ||
disableEnvelope: false, | ||
column: opts.column | ||
} | ||
@@ -399,3 +428,2 @@ ); | ||
) as ParquetCodec; | ||
// tslint:disable-next-line:prefer-array-literal | ||
@@ -407,7 +435,8 @@ let dLevels = new Array(valueCount); | ||
dLevelEncoding, | ||
cursor, | ||
uncursor, | ||
valueCount, | ||
{ | ||
bitWidth: Util.getBitWidth(opts.dLevelMax), | ||
disableEnvelope: false | ||
disableEnvelope: false, | ||
column: opts.column | ||
} | ||
@@ -418,4 +447,2 @@ ); | ||
} | ||
/* read values */ | ||
let valueCountNonNull = 0; | ||
@@ -427,25 +454,12 @@ for (const dlvl of dLevels) { | ||
} | ||
/* read values */ | ||
let valuesBufCursor = cursor; | ||
if (opts.compression !== 'UNCOMPRESSED') { | ||
const valuesBuf = Compression.inflate( | ||
opts.compression, | ||
cursor.buffer.slice(cursor.offset, cursorEnd), | ||
header.uncompressed_page_size | ||
); | ||
valuesBufCursor = { | ||
buffer: valuesBuf, | ||
offset: 0, | ||
size: valuesBuf.length | ||
}; | ||
cursor.offset = cursorEnd; | ||
} | ||
const valueEncoding = Util.getThriftEnum( | ||
Encoding, | ||
header.data_page_header.encoding | ||
) as ParquetCodec; | ||
const values = decodeValues( | ||
opts.type, | ||
valueEncoding, | ||
valuesBufCursor, | ||
uncursor, | ||
valueCountNonNull, | ||
@@ -455,4 +469,9 @@ { | ||
bitWidth: opts.column.typeLength | ||
}); | ||
} | ||
); | ||
// info.valBuf = uncursor.buffer.toJSON(); | ||
// info.values = values; | ||
// Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2)); | ||
return { | ||
@@ -537,3 +556,4 @@ dlevels: dLevels, | ||
bitWidth: opts.column.typeLength | ||
}); | ||
} | ||
); | ||
@@ -540,0 +560,0 @@ return { |
@@ -139,3 +139,3 @@ import { PARQUET_CODEC } from './codec'; | ||
if (!typeDef) { | ||
throw new Error('invalid parquet type: ' + opts.type); | ||
throw new Error(`invalid parquet type: ${opts.type}`); | ||
} | ||
@@ -149,3 +149,3 @@ | ||
if (!(opts.encoding in PARQUET_CODEC)) { | ||
throw new Error('unsupported parquet encoding: ' + opts.encoding); | ||
throw new Error(`unsupported parquet encoding: ${opts.encoding}`); | ||
} | ||
@@ -158,3 +158,3 @@ | ||
if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) { | ||
throw new Error('unsupported compression method: ' + opts.compression); | ||
throw new Error(`unsupported compression method: ${opts.compression}`); | ||
} | ||
@@ -161,0 +161,0 @@ |
@@ -93,7 +93,7 @@ import { ColumnData, FieldDefinition, ParquetRow, RecordBuffer, TODO } from './declare'; | ||
if (values.length === 0 && !!record && field.repetitionType === 'REQUIRED') { | ||
throw new Error('missing required field: ' + field.name); | ||
throw new Error(`missing required field: ${field.name}`); | ||
} | ||
if (values.length > 1 && field.repetitionType !== 'REPEATED') { | ||
throw new Error('too many values for field: ' + field.name); | ||
throw new Error(`too many values for field: ${field.name}`); | ||
} | ||
@@ -100,0 +100,0 @@ |
@@ -5,3 +5,6 @@ { | ||
"outDir": "../lib" | ||
} | ||
}, | ||
"include": [ | ||
"./**/*" | ||
] | ||
} |
@@ -334,3 +334,3 @@ import { WriteStream } from 'fs'; | ||
if (!(encoding in PARQUET_CODEC)) { | ||
throw new Error('invalid encoding: ' + encoding); | ||
throw new Error(`invalid encoding: ${encoding}`); | ||
} | ||
@@ -353,14 +353,2 @@ | ||
): { header: PageHeader, headerSize, page: Buffer } { | ||
/* encode values */ | ||
const valuesBuf = encodeValues( | ||
column.primitiveType, | ||
column.encoding, | ||
values, | ||
{ typeLength: column.typeLength, bitWidth: column.typeLength } | ||
); | ||
// tslint:disable-next-line:no-parameter-reassignment | ||
compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression; | ||
const compressedBuf = Compression.deflate(compression, valuesBuf); | ||
/* encode repetition and definition levels */ | ||
@@ -393,2 +381,20 @@ let rLevelsBuf = Buffer.alloc(0); | ||
/* encode values */ | ||
const valuesBuf = encodeValues( | ||
column.primitiveType, | ||
column.encoding, | ||
values, | ||
{ typeLength: column.typeLength, bitWidth: column.typeLength } | ||
); | ||
const dataBuf = Buffer.concat([ | ||
rLevelsBuf, | ||
dLevelsBuf, | ||
valuesBuf | ||
]); | ||
// tslint:disable-next-line:no-parameter-reassignment | ||
compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression; | ||
const compressedBuf = Compression.deflate(compression, dataBuf); | ||
/* build page header */ | ||
@@ -405,4 +411,4 @@ const header = new PageHeader({ | ||
}), | ||
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length, | ||
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length | ||
uncompressed_page_size: dataBuf.length, | ||
compressed_page_size: compressedBuf.length | ||
}); | ||
@@ -414,4 +420,2 @@ | ||
headerBuf, | ||
rLevelsBuf, | ||
dLevelsBuf, | ||
compressedBuf | ||
@@ -418,0 +422,0 @@ ]); |
@@ -36,2 +36,32 @@ import chai = require('chai'); | ||
describe('number of values not a multiple of 8', function () { | ||
it('should encode bitpacked values', function () { | ||
const buf = parquet_codec_rle.encodeValues( | ||
'INT32', | ||
[0, 1, 2, 3, 4, 5, 6, 7, 6, 5], | ||
{ | ||
disableEnvelope: true, | ||
bitWidth: 3 | ||
}); | ||
assert.deepEqual(buf, new Buffer([0x05, 0x88, 0xc6, 0xfa, 0x2e, 0x00, 0x00])); | ||
}); | ||
it('should decode bitpacked values', function () { | ||
const vals = parquet_codec_rle.decodeValues( | ||
'INT32', | ||
{ | ||
buffer: new Buffer([0x05, 0x88, 0xc6, 0xfa, 0x2e, 0x00, 0x00]), | ||
offset: 0, | ||
}, | ||
10, | ||
{ | ||
disableEnvelope: true, | ||
bitWidth: 3 | ||
}); | ||
assert.deepEqual(vals, [0, 1, 2, 3, 4, 5, 6, 7, 6, 5]); | ||
}); | ||
}); | ||
it('should encode repeated values', function () { | ||
@@ -38,0 +68,0 @@ const buf = parquet_codec_rle.encodeValues( |
@@ -7,3 +7,3 @@ import chai = require('chai'); | ||
const TEST_NUM_ROWS = 10000; | ||
const TEST_NUM_ROWS = 1000; | ||
const TEST_VTIME = Date.now(); | ||
@@ -10,0 +10,0 @@ |
@@ -61,2 +61,3 @@ { | ||
// "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ | ||
// "allowSyntheticDefaultImports": true, | ||
// "esModuleInterop": true, /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */ | ||
@@ -82,2 +83,7 @@ // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ | ||
}, | ||
"include": [ | ||
"src/**/*", | ||
"dev/**/*", | ||
"test/**/*" | ||
], | ||
"exclude": [ | ||
@@ -84,0 +90,0 @@ "node_modules", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
780337
15552
331
72
+ Addeddebug@4.3.7(transitive)
+ Addednode-int64@0.4.0(transitive)
+ Addedq@1.5.1(transitive)
+ Addedthrift@0.11.0(transitive)
+ Addedws@8.18.0(transitive)
- Removedcommander@2.1.0(transitive)
- Removeddebug@3.2.7(transitive)
- Removednan@1.0.0(transitive)
- Removednode-int64@0.3.3(transitive)
- Removedoptions@0.0.6(transitive)
- Removedq@1.0.1(transitive)
- Removedthrift@0.10.0(transitive)
- Removedtinycolor@0.0.1(transitive)
- Removedws@0.4.32(transitive)
Updatedbson@^1.1.0
Updateddebug@^4.1.1
Updatedthrift@^0.11.0