parquets - npm Package Compare versions

lib/snappy/compressor.d.ts

lib/snappy/compressor.js

lib/snappy/compressor.js.map

lib/snappy/decompressor.d.ts

lib/snappy/decompressor.js

lib/snappy/decompressor.js.map

lib/snappy/index.d.ts

lib/snappy/index.js

lib/snappy/index.js.map

lib/thrift/BoundaryOrder.d.ts

lib/thrift/BoundaryOrder.js

lib/thrift/BoundaryOrder.js.map

lib/thrift/BsonType.d.ts

lib/thrift/BsonType.js

lib/thrift/BsonType.js.map

lib/thrift/ColumnChunk.d.ts

lib/thrift/ColumnChunk.js

lib/thrift/ColumnChunk.js.map

lib/thrift/ColumnIndex.d.ts

lib/thrift/ColumnIndex.js

lib/thrift/ColumnIndex.js.map

lib/thrift/ColumnMetaData.d.ts

lib/thrift/ColumnMetaData.js

lib/thrift/ColumnMetaData.js.map

lib/thrift/ColumnOrder.d.ts

lib/thrift/ColumnOrder.js

lib/thrift/ColumnOrder.js.map

lib/thrift/CompressionCodec.d.ts

lib/thrift/CompressionCodec.js

lib/thrift/CompressionCodec.js.map

lib/thrift/ConvertedType.d.ts

lib/thrift/ConvertedType.js

lib/thrift/ConvertedType.js.map

lib/thrift/DataPageHeader.d.ts

lib/thrift/DataPageHeader.js

lib/thrift/DataPageHeader.js.map

lib/thrift/DataPageHeaderV2.d.ts

lib/thrift/DataPageHeaderV2.js

lib/thrift/DataPageHeaderV2.js.map

lib/thrift/DateType.d.ts

lib/thrift/DateType.js

lib/thrift/DateType.js.map

lib/thrift/DecimalType.d.ts

lib/thrift/DecimalType.js

lib/thrift/DecimalType.js.map

lib/thrift/DictionaryPageHeader.d.ts

lib/thrift/DictionaryPageHeader.js

lib/thrift/DictionaryPageHeader.js.map

lib/thrift/Encoding.d.ts

lib/thrift/Encoding.js

lib/thrift/Encoding.js.map

lib/thrift/EnumType.d.ts

lib/thrift/EnumType.js

lib/thrift/EnumType.js.map

lib/thrift/FieldRepetitionType.d.ts

lib/thrift/FieldRepetitionType.js

lib/thrift/FieldRepetitionType.js.map

lib/thrift/FileMetaData.d.ts

lib/thrift/FileMetaData.js

lib/thrift/FileMetaData.js.map

lib/thrift/index.d.ts

lib/thrift/index.js

lib/thrift/index.js.map

lib/thrift/IndexPageHeader.d.ts

lib/thrift/IndexPageHeader.js

lib/thrift/IndexPageHeader.js.map

lib/thrift/IntType.d.ts

lib/thrift/IntType.js

lib/thrift/IntType.js.map

lib/thrift/JsonType.d.ts

lib/thrift/JsonType.js

lib/thrift/JsonType.js.map

lib/thrift/KeyValue.d.ts

lib/thrift/KeyValue.js

lib/thrift/KeyValue.js.map

lib/thrift/ListType.d.ts

lib/thrift/ListType.js

lib/thrift/ListType.js.map

lib/thrift/LogicalType.d.ts

lib/thrift/LogicalType.js

lib/thrift/LogicalType.js.map

lib/thrift/MapType.d.ts

lib/thrift/MapType.js

lib/thrift/MapType.js.map

lib/thrift/MicroSeconds.d.ts

lib/thrift/MicroSeconds.js

lib/thrift/MicroSeconds.js.map

lib/thrift/MilliSeconds.d.ts

lib/thrift/MilliSeconds.js

lib/thrift/MilliSeconds.js.map

lib/thrift/NullType.d.ts

lib/thrift/NullType.js

lib/thrift/NullType.js.map

lib/thrift/OffsetIndex.d.ts

lib/thrift/OffsetIndex.js

lib/thrift/OffsetIndex.js.map

lib/thrift/PageEncodingStats.d.ts

lib/thrift/PageEncodingStats.js

lib/thrift/PageEncodingStats.js.map

lib/thrift/PageHeader.d.ts

lib/thrift/PageHeader.js

lib/thrift/PageHeader.js.map

lib/thrift/PageLocation.d.ts

lib/thrift/PageLocation.js

lib/thrift/PageLocation.js.map

lib/thrift/PageType.d.ts

lib/thrift/PageType.js

lib/thrift/PageType.js.map

lib/thrift/RowGroup.d.ts

lib/thrift/RowGroup.js

lib/thrift/RowGroup.js.map

lib/thrift/SchemaElement.d.ts

lib/thrift/SchemaElement.js

lib/thrift/SchemaElement.js.map

lib/thrift/SortingColumn.d.ts

lib/thrift/SortingColumn.js

lib/thrift/SortingColumn.js.map

lib/thrift/Statistics.d.ts

lib/thrift/Statistics.js

lib/thrift/Statistics.js.map

lib/thrift/StringType.d.ts

lib/thrift/StringType.js

lib/thrift/StringType.js.map

lib/thrift/TimestampType.d.ts

lib/thrift/TimestampType.js

lib/thrift/TimestampType.js.map

lib/thrift/TimeType.d.ts

lib/thrift/TimeType.js

lib/thrift/TimeType.js.map

lib/thrift/TimeUnit.d.ts

lib/thrift/TimeUnit.js

lib/thrift/TimeUnit.js.map

lib/thrift/Type.d.ts

lib/thrift/Type.js

lib/thrift/Type.js.map

lib/thrift/TypeDefinedOrder.d.ts

lib/thrift/TypeDefinedOrder.js

lib/thrift/TypeDefinedOrder.js.map

lib/thrift/UUIDType.d.ts

lib/thrift/UUIDType.js

lib/thrift/UUIDType.js.map

src/snappy/compressor.ts

src/snappy/decompressor.ts

src/snappy/index.ts

src/thrift/BoundaryOrder.ts

src/thrift/BsonType.ts

src/thrift/ColumnChunk.ts

src/thrift/ColumnIndex.ts

src/thrift/ColumnMetaData.ts

src/thrift/ColumnOrder.ts

src/thrift/CompressionCodec.ts

src/thrift/ConvertedType.ts

src/thrift/DataPageHeader.ts

src/thrift/DataPageHeaderV2.ts

src/thrift/DateType.ts

src/thrift/DecimalType.ts

src/thrift/DictionaryPageHeader.ts

src/thrift/Encoding.ts

src/thrift/EnumType.ts

src/thrift/FieldRepetitionType.ts

src/thrift/FileMetaData.ts

src/thrift/index.ts

src/thrift/IndexPageHeader.ts

src/thrift/IntType.ts

src/thrift/JsonType.ts

src/thrift/KeyValue.ts

src/thrift/ListType.ts

src/thrift/LogicalType.ts

src/thrift/MapType.ts

src/thrift/MicroSeconds.ts

src/thrift/MilliSeconds.ts

src/thrift/NullType.ts

src/thrift/OffsetIndex.ts

src/thrift/PageEncodingStats.ts

src/thrift/PageHeader.ts

src/thrift/PageLocation.ts

src/thrift/PageType.ts

src/thrift/RowGroup.ts

src/thrift/SchemaElement.ts

src/thrift/SortingColumn.ts

src/thrift/Statistics.ts

src/thrift/StringType.ts

src/thrift/TimestampType.ts

src/thrift/TimeType.ts

src/thrift/TimeUnit.ts

src/thrift/Type.ts

src/thrift/TypeDefinedOrder.ts

src/thrift/UUIDType.ts

14

lib/codec/plain.js

		@@ -53,3 +53,3 @@ "use strict";
		buf.fill(0);
		for (let i = 0; i < values.length; ++i) {
		for (let i = 0; i < values.length; i++) {
		if (values[i]) {
		@@ -63,3 +63,3 @@ buf[Math.floor(i / 8)] \|= (1 << (i % 8));
		const values = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		const b = cursor.buffer[cursor.offset + Math.floor(i / 8)];
		@@ -80,3 +80,3 @@ values.push((b & (1 << (i % 8))) > 0);
		const values = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		values.push(cursor.buffer.readInt32LE(cursor.offset));
		@@ -96,3 +96,3 @@ cursor.offset += 4;
		const values = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		values.push(INT53.readInt64LE(cursor.buffer, cursor.offset));
		@@ -119,3 +119,3 @@ cursor.offset += 8;
		const values = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		const low = INT53.readInt64LE(cursor.buffer, cursor.offset);
		@@ -142,3 +142,3 @@ const high = cursor.buffer.readUInt32LE(cursor.offset + 8);
		const values = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		values.push(cursor.buffer.readFloatLE(cursor.offset));
		@@ -158,3 +158,3 @@ cursor.offset += 4;
		const values = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		values.push(cursor.buffer.readDoubleLE(cursor.offset));
		@@ -161,0 +161,0 @@ cursor.offset += 8;

8

lib/codec/rle.js

		@@ -9,3 +9,3 @@ "use strict";
		const buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8)));
		for (let b = 0; b < opts.bitWidth * values.length; ++b) {
		for (let b = 0; b < opts.bitWidth * values.length; b++) {
		if ((values[Math.floor(b / opts.bitWidth)] & (1 << b % opts.bitWidth)) > 0) {
		@@ -22,3 +22,3 @@ buf[Math.floor(b / 8)] \|= (1 << (b % 8));
		const buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8));
		for (let i = 0; i < buf.length; ++i) {
		for (let i = 0; i < buf.length; i++) {
		buf.writeUInt8(value & 0xff, i);
		@@ -93,3 +93,3 @@ value >> 8;
		const values = new Array(count).fill(0);
		for (let b = 0; b < opts.bitWidth * count; ++b) {
		for (let b = 0; b < opts.bitWidth * count; b++) {
		if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << (b % 8))) {
		@@ -104,3 +104,3 @@ values[Math.floor(b / opts.bitWidth)] \|= (1 << b % opts.bitWidth);
		let value = 0;
		for (let i = 0; i < Math.ceil(opts.bitWidth / 8); ++i) {
		for (let i = 0; i < Math.ceil(opts.bitWidth / 8); i++) {
		value << 8;
		@@ -107,0 +107,0 @@ value += cursor.buffer[cursor.offset];

2

lib/compression.js

		@@ -5,3 +5,3 @@ "use strict";
		const zlib = require("zlib");
		const snappyjs = require("snappyjs");
		const snappyjs = require("./snappy");
		let brotli;
		@@ -8,0 +8,0 @@ let lzo;

9

lib/declare.d.ts

		@@ -8,5 +8,5 @@ export declare type ParquetCodec = 'PLAIN' \| 'RLE';
		export interface SchemaDefinition {
		[string: string]: ElementDefinition;
		[string: string]: FieldDefinition;
		}
		export interface ElementDefinition {
		export interface FieldDefinition {
		type?: ParquetType;
		@@ -20,5 +20,6 @@ typeLength?: number;
		}
		export interface FieldDefinition {
		export interface ParquetField {
		name: string;
		path: string[];
		key: string;
		primitiveType?: PrimitiveType;
		@@ -34,3 +35,3 @@ originalType?: OriginalType;
		fieldCount?: number;
		fields?: Record<string, FieldDefinition>;
		fields?: Record<string, ParquetField>;
		}
		@@ -37,0 +38,0 @@ export interface ParquetBuffer {

2

lib/reader.d.ts

		/// <reference types="node" />
		import { ParquetBuffer, ParquetData, ParquetRecord } from './declare';
		import { ColumnChunk, FileMetaData, RowGroup } from './gen';
		import { ParquetSchema } from './schema';
		import { ColumnChunk, FileMetaData, RowGroup } from './thrift';
		/**
		@@ -6,0 +6,0 @@ * A parquet cursor is used to retrieve rows from a parquet file in order

26

lib/reader.js

		@@ -5,6 +5,6 @@ "use strict";
		const Compression = require("./compression");
		// tslint:disable-next-line:max-line-length
		const gen_1 = require("./gen");
		const schema_1 = require("./schema");
		const Shred = require("./shred");
		// tslint:disable-next-line:max-line-length
		const thrift_1 = require("./thrift");
		const Util = require("./util");
		@@ -195,6 +195,6 @@ // import Fs = require('fs');
		const field = schema.findField(colChunk.meta_data.path_in_schema);
		const type = Util.getThriftEnum(gen_1.Type, colChunk.meta_data.type);
		const type = Util.getThriftEnum(thrift_1.Type, colChunk.meta_data.type);
		if (type !== field.primitiveType)
		throw new Error('chunk type not matching schema: ' + type);
		const compression = Util.getThriftEnum(gen_1.CompressionCodec, colChunk.meta_data.codec);
		const compression = Util.getThriftEnum(thrift_1.CompressionCodec, colChunk.meta_data.codec);
		const pagesOffset = +colChunk.meta_data.data_page_offset;
		@@ -250,3 +250,3 @@ const pagesSize = +colChunk.meta_data.total_compressed_size;
		cursor.offset += length;
		const pageType = Util.getThriftEnum(gen_1.PageType, pageHeader.type);
		const pageType = Util.getThriftEnum(thrift_1.PageType, pageHeader.type);
		let pageData = null;
		@@ -300,3 +300,3 @@ switch (pageType) {
		/* read repetition levels */
		const rLevelEncoding = Util.getThriftEnum(gen_1.Encoding, header.data_page_header.repetition_level_encoding);
		const rLevelEncoding = Util.getThriftEnum(thrift_1.Encoding, header.data_page_header.repetition_level_encoding);
		// tslint:disable-next-line:prefer-array-literal
		@@ -315,3 +315,3 @@ let rLevels = new Array(valueCount);
		/* read definition levels */
		const dLevelEncoding = Util.getThriftEnum(gen_1.Encoding, header.data_page_header.definition_level_encoding);
		const dLevelEncoding = Util.getThriftEnum(thrift_1.Encoding, header.data_page_header.definition_level_encoding);
		// tslint:disable-next-line:prefer-array-literal
		@@ -332,7 +332,7 @@ let dLevels = new Array(valueCount);
		if (dlvl === column.dLevelMax) {
		++valueCountNonNull;
		valueCountNonNull++;
		}
		}
		/* read values */
		const valueEncoding = Util.getThriftEnum(gen_1.Encoding, header.data_page_header.encoding);
		const valueEncoding = Util.getThriftEnum(thrift_1.Encoding, header.data_page_header.encoding);
		const values = decodeValues(column.primitiveType, valueEncoding, dataCursor, valueCountNonNull, {
		@@ -356,3 +356,3 @@ typeLength: column.typeLength,
		const valueCountNonNull = valueCount - header.data_page_header_v2.num_nulls;
		const valueEncoding = Util.getThriftEnum(gen_1.Encoding, header.data_page_header_v2.encoding);
		const valueEncoding = Util.getThriftEnum(thrift_1.Encoding, header.data_page_header_v2.encoding);
		/* read repetition levels */
		@@ -409,3 +409,3 @@ // tslint:disable-next-line:prefer-array-literal
		const schemaElement = schemaElements[next];
		const repetitionType = next > 0 ? Util.getThriftEnum(gen_1.FieldRepetitionType, schemaElement.repetition_type) : 'ROOT';
		const repetitionType = next > 0 ? Util.getThriftEnum(thrift_1.FieldRepetitionType, schemaElement.repetition_type) : 'ROOT';
		let optional = false;
		@@ -434,5 +434,5 @@ let repeated = false;
		else {
		let logicalType = Util.getThriftEnum(gen_1.Type, schemaElement.type);
		let logicalType = Util.getThriftEnum(thrift_1.Type, schemaElement.type);
		if (schemaElement.converted_type != null) {
		logicalType = Util.getThriftEnum(gen_1.ConvertedType, schemaElement.converted_type);
		logicalType = Util.getThriftEnum(thrift_1.ConvertedType, schemaElement.converted_type);
		}
		@@ -439,0 +439,0 @@ schema[schemaElement.name] = {

17

lib/schema.d.ts

		@@ -1,2 +0,2 @@
		import { ElementDefinition, FieldDefinition, ParquetBuffer, ParquetCompression, ParquetRecord, SchemaDefinition } from './declare';
		import { FieldDefinition, ParquetBuffer, ParquetCompression, ParquetField, ParquetRecord, SchemaDefinition } from './declare';
		/**
		@@ -6,5 +6,5 @@ * A parquet file schema
		export declare class ParquetSchema {
		schema: Record<string, ElementDefinition>;
		fields: Record<string, FieldDefinition>;
		fieldList: FieldDefinition[];
		schema: Record<string, FieldDefinition>;
		fields: Record<string, ParquetField>;
		fieldList: ParquetField[];
		/**
		@@ -17,12 +17,13 @@ * Create a new schema from a JSON schema definition
		*/
		findField(path: string): FieldDefinition;
		findField(path: string[]): FieldDefinition;
		findField(path: string): ParquetField;
		findField(path: string[]): ParquetField;
		/**
		* Retrieve a field definition and all the field's ancestors
		*/
		findFieldBranch(path: string): FieldDefinition[];
		findFieldBranch(path: string[]): FieldDefinition[];
		findFieldBranch(path: string): ParquetField[];
		findFieldBranch(path: string[]): ParquetField[];
		shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void;
		materializeRecords(buffer: ParquetBuffer): ParquetRecord[];
		compress(type: ParquetCompression): this;
		buffer(): ParquetBuffer;
		}

13

lib/schema.js

		@@ -60,2 +60,5 @@ "use strict";
		}
		buffer() {
		return shred_1.shredBuffer(this);
		}
		}
		@@ -96,5 +99,7 @@ exports.ParquetSchema = ParquetSchema;
		if (opts.fields) {
		const cpath = path.concat([name]);
		fieldList[name] = {
		name,
		path: path.concat([name]),
		path: cpath,
		key: cpath.join(),
		repetitionType,
		@@ -105,3 +110,3 @@ rLevelMax,
		fieldCount: Object.keys(opts.fields).length,
		fields: buildFields(opts.fields, rLevelMax, dLevelMax, path.concat([name]))
		fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)
		};
		@@ -123,2 +128,3 @@ continue;
		/* add to schema */
		const cpath = path.concat([name]);
		fieldList[name] = {
		@@ -128,3 +134,4 @@ name,
		originalType: typeDef.originalType,
		path: path.concat([name]),
		path: cpath,
		key: cpath.join(),
		repetitionType,
		@@ -131,0 +138,0 @@ encoding: opts.encoding,

3

lib/shred.d.ts

		import { ParquetBuffer, ParquetRecord } from './declare';
		import { ParquetSchema } from './schema';
		export declare function shredBuffer(schema: ParquetSchema): ParquetBuffer;
		/**
		@@ -24,3 +25,2 @@ * 'Shred' a record into a list of <value, repetition_level, definition_level>
		* }
		*
		*/
		@@ -46,4 +46,3 @@ export declare function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void;
		* }
		*
		*/
		export declare function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[];

180

lib/shred.js

		"use strict";
		Object.defineProperty(exports, "__esModule", { value: true });
		const Types = require("./types");
		function shredBuffer(schema) {
		const columnData = {};
		for (const field of schema.fieldList) {
		columnData[field.key] = {
		dlevels: [],
		rlevels: [],
		values: [],
		count: 0
		};
		}
		return { rowCount: 0, columnData };
		}
		exports.shredBuffer = shredBuffer;
		/**
		@@ -25,51 +38,33 @@ * 'Shred' a record into a list of <value, repetition_level, definition_level>
		* }
		*
		*/
		function shredRecord(schema, record, buffer) {
		/* shred the record, this may raise an exception */
		const recordShredded = {};
		for (const field of schema.fieldList) {
		recordShredded[field.path.join()] = {
		dlevels: [],
		rlevels: [],
		values: [],
		count: 0
		};
		}
		shredRecordInternal(schema.fields, record, recordShredded, 0, 0);
		const data = shredBuffer(schema).columnData;
		shredRecordFields(schema.fields, record, data, 0, 0);
		/* if no error during shredding, add the shredded record to the buffer */
		if (!('columnData' in buffer) \|\| !('rowCount' in buffer)) {
		buffer.rowCount = 0;
		buffer.columnData = {};
		for (const field of schema.fieldList) {
		const cd = {
		dlevels: [],
		rlevels: [],
		values: [],
		count: 0
		};
		buffer.columnData[field.path.join()] = cd;
		}
		buffer.rowCount = 1;
		buffer.columnData = data;
		return;
		}
		buffer.rowCount += 1;
		for (const field of schema.fieldList) {
		Array.prototype.push.apply(buffer.columnData[field.path.join()].rlevels, recordShredded[field.path.join()].rlevels);
		Array.prototype.push.apply(buffer.columnData[field.path.join()].dlevels, recordShredded[field.path.join()].dlevels);
		Array.prototype.push.apply(buffer.columnData[field.path.join()].values, recordShredded[field.path.join()].values);
		buffer.columnData[field.path.join()].count += recordShredded[field.path.join()].count;
		Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);
		Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);
		Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);
		buffer.columnData[field.key].count += data[field.key].count;
		}
		}
		exports.shredRecord = shredRecord;
		function shredRecordInternal(fields, record, data, rlvl, dlvl) {
		for (const fieldName in fields) {
		const field = fields[fieldName];
		const fieldType = field.originalType \|\| field.primitiveType;
		function shredRecordFields(fields, record, data, rLevel, dLevel) {
		for (const name in fields) {
		const field = fields[name];
		// fetch values
		let values = [];
		if (record && (fieldName in record) && record[fieldName] !== undefined && record[fieldName] !== null) {
		if (record[fieldName].constructor === Array) {
		values = record[fieldName];
		if (record && (field.name in record) && record[field.name] !== undefined && record[field.name] !== null) {
		if (record[field.name].constructor === Array) {
		values = record[field.name];
		}
		else {
		values.push(record[fieldName]);
		values.push(record[field.name]);
		}
		@@ -87,8 +82,8 @@ }
		if (field.isNested) {
		shredRecordInternal(field.fields, null, data, rlvl, dlvl);
		shredRecordFields(field.fields, null, data, rLevel, dLevel);
		}
		else {
		data[field.path.join()].rlevels.push(rlvl);
		data[field.path.join()].dlevels.push(dlvl);
		data[field.path.join()].count += 1;
		data[field.key].count += 1;
		data[field.key].rlevels.push(rLevel);
		data[field.key].dlevels.push(dLevel);
		}
		@@ -98,13 +93,12 @@ continue;
		// push values
		for (let i = 0; i < values.length; ++i) {
		// tslint:disable-next-line:variable-name
		const rlvl_i = i === 0 ? rlvl : field.rLevelMax;
		for (let i = 0; i < values.length; i++) {
		const rlvl = i === 0 ? rLevel : field.rLevelMax;
		if (field.isNested) {
		shredRecordInternal(field.fields, values[i], data, rlvl_i, field.dLevelMax);
		shredRecordFields(field.fields, values[i], data, rlvl, field.dLevelMax);
		}
		else {
		data[field.path.join()].values.push(Types.toPrimitive(fieldType, values[i]));
		data[field.path.join()].rlevels.push(rlvl_i);
		data[field.path.join()].dlevels.push(field.dLevelMax);
		data[field.path.join()].count += 1;
		data[field.key].count += 1;
		data[field.key].rlevels.push(rlvl);
		data[field.key].dlevels.push(field.dLevelMax);
		data[field.key].values.push(Types.toPrimitive(field.originalType \|\| field.primitiveType, values[i]));
		}
		@@ -132,66 +126,66 @@ }
		* }
		*
		*/
		function materializeRecords(schema, buffer) {
		const records = [];
		for (let i = 0; i < buffer.rowCount; ++i) {
		for (let i = 0; i < buffer.rowCount; i++)
		records.push({});
		for (const key in buffer.columnData) {
		materializeColumn(schema, buffer, key, records);
		}
		for (const k in buffer.columnData) {
		const field = schema.findField(k);
		const fieldBranch = schema.findFieldBranch(k);
		const values = buffer.columnData[k].values[Symbol.iterator]();
		// tslint:disable-next-line:prefer-array-literal
		const rLevels = new Array(field.rLevelMax + 1);
		rLevels.fill(0);
		for (let i = 0; i < buffer.columnData[k].count; ++i) {
		const dLevel = buffer.columnData[k].dlevels[i];
		const rLevel = buffer.columnData[k].rlevels[i];
		rLevels[rLevel]++;
		rLevels.fill(0, rLevel + 1);
		let value = null;
		if (dLevel === field.dLevelMax) {
		value = Types.fromPrimitive(field.originalType \|\| field.primitiveType, values.next().value);
		}
		materializeRecordField(records[rLevels[0] - 1], fieldBranch, rLevels.slice(1), dLevel, value);
		}
		}
		return records;
		}
		exports.materializeRecords = materializeRecords;
		function materializeRecordField(record, branch, rLevels, dLevel, value) {
		const node = branch[0];
		if (dLevel < node.dLevelMax) {
		function materializeColumn(schema, buffer, key, records) {
		const data = buffer.columnData[key];
		if (!data.count)
		return;
		}
		if (branch.length > 1) {
		if (node.repetitionType === 'REPEATED') {
		if (!(node.name in record)) {
		record[node.name] = [];
		const field = schema.findField(key);
		const branch = schema.findFieldBranch(key);
		// tslint:disable-next-line:prefer-array-literal
		const rLevels = new Array(field.rLevelMax + 1).fill(0);
		let vIndex = 0;
		for (let i = 0; i < data.count; i++) {
		const dLevel = data.dlevels[i];
		const rLevel = data.rlevels[i];
		rLevels[rLevel]++;
		rLevels.fill(0, rLevel + 1);
		let rIndex = 0;
		let record = records[rLevels[rIndex++] - 1];
		// Internal nodes
		for (const step of branch) {
		if (step === field)
		break;
		if (dLevel < step.dLevelMax)
		break;
		if (step.repetitionType === 'REPEATED') {
		if (!(step.name in record))
		record[step.name] = [];
		const ix = rLevels[rIndex++];
		while (record[step.name].length <= ix)
		record[step.name].push({});
		record = record[step.name][ix];
		}
		while (record[node.name].length < rLevels[0] + 1) {
		record[node.name].push({});
		else {
		record[step.name] = record[step.name] \|\| {};
		record = record[step.name];
		}
		materializeRecordField(record[node.name][rLevels[0]], branch.slice(1), rLevels.slice(1), dLevel, value);
		}
		else {
		record[node.name] = record[node.name] \|\| {};
		materializeRecordField(record[node.name], branch.slice(1), rLevels, dLevel, value);
		}
		}
		else {
		if (node.repetitionType === 'REPEATED') {
		if (!(node.name in record)) {
		record[node.name] = [];
		// Leaf node
		if (dLevel === field.dLevelMax) {
		const value = Types.fromPrimitive(field.originalType \|\| field.primitiveType, data.values[vIndex]);
		vIndex++;
		if (field.repetitionType === 'REPEATED') {
		if (!(field.name in record))
		record[field.name] = [];
		const ix = rLevels[rIndex];
		while (record[field.name].length <= ix)
		record[field.name].push(null);
		record[field.name][ix] = value;
		}
		while (record[node.name].length < rLevels[0] + 1) {
		record[node.name].push(null);
		else {
		record[field.name] = value;
		}
		record[node.name][rLevels[0]] = value;
		}
		else {
		record[node.name] = value;
		}
		}
		}
		//# sourceMappingURL=shred.js.map

2

lib/util.d.ts

		/// <reference types="node" />
		import fs = require('fs');
		import { FileMetaData, PageHeader } from './gen';
		import { FileMetaData, PageHeader } from './thrift';
		export interface WriteStreamOptions {
		@@ -5,0 +5,0 @@ flags?: string;

10

lib/util.js

		@@ -5,3 +5,3 @@ "use strict";
		const thrift_1 = require("thrift");
		const gen_1 = require("./gen");
		const thrift_2 = require("./thrift");
		class UFramedTransport extends thrift_1.TFramedTransport {
		@@ -43,3 +43,3 @@ }
		const protocol = new thrift_1.TCompactProtocol(transport);
		const metadata = gen_1.FileMetaData.read(protocol);
		const metadata = thrift_2.FileMetaData.read(protocol);
		return { length: transport.readPos - offset, metadata };
		@@ -56,3 +56,3 @@ }
		const protocol = new thrift_1.TCompactProtocol(transport);
		const pageHeader = gen_1.PageHeader.read(protocol);
		const pageHeader = thrift_2.PageHeader.read(protocol);
		return { length: transport.readPos - offset, pageHeader };
		@@ -177,7 +177,7 @@ }
		function fieldIndexOf(arr, elem) {
		for (let j = 0; j < arr.length; ++j) {
		for (let j = 0; j < arr.length; j++) {
		if (arr[j].length > elem.length)
		continue;
		let m = true;
		for (let i = 0; i < elem.length; ++i) {
		for (let i = 0; i < elem.length; i++) {
		if (arr[j][i] === elem[i] \|\| arr[j][i] === '+' \|\| arr[j][i] === '#')
		@@ -184,0 +184,0 @@ continue;

2

lib/writer.d.ts

		@@ -5,4 +5,4 @@ /// <reference types="node" />
		import { ParquetBuffer } from './declare';
		import { RowGroup } from './gen';
		import { ParquetSchema } from './schema';
		import { RowGroup } from './thrift';
		export interface ParquetWriterOptions {
		@@ -9,0 +9,0 @@ baseOffset?: number;

50

lib/writer.js

		@@ -6,5 +6,5 @@ "use strict";
		const Compression = require("./compression");
		const Shred = require("./shred");
		// tslint:disable-next-line:max-line-length
		const gen_1 = require("./gen");
		const Shred = require("./shred");
		const thrift_1 = require("./thrift");
		const Util = require("./util");
		@@ -275,9 +275,9 @@ const Int64 = require("node-int64");
		/* build page header */
		const header = new gen_1.PageHeader({
		type: gen_1.PageType.DATA_PAGE,
		data_page_header: new gen_1.DataPageHeader({
		const header = new thrift_1.PageHeader({
		type: thrift_1.PageType.DATA_PAGE,
		data_page_header: new thrift_1.DataPageHeader({
		num_values: data.count,
		encoding: gen_1.Encoding[column.encoding],
		definition_level_encoding: gen_1.Encoding[PARQUET_RDLVL_ENCODING],
		repetition_level_encoding: gen_1.Encoding[PARQUET_RDLVL_ENCODING],
		encoding: thrift_1.Encoding[column.encoding],
		definition_level_encoding: thrift_1.Encoding[PARQUET_RDLVL_ENCODING],
		repetition_level_encoding: thrift_1.Encoding[PARQUET_RDLVL_ENCODING],
		}),
		@@ -322,9 +322,9 @@ uncompressed_page_size: dataBuf.length,
		/* build page header */
		const header = new gen_1.PageHeader({
		type: gen_1.PageType.DATA_PAGE_V2,
		data_page_header_v2: new gen_1.DataPageHeaderV2({
		const header = new thrift_1.PageHeader({
		type: thrift_1.PageType.DATA_PAGE_V2,
		data_page_header_v2: new thrift_1.DataPageHeaderV2({
		num_values: data.count,
		num_nulls: data.count - data.values.length,
		num_rows: rowCount,
		encoding: gen_1.Encoding[column.encoding],
		encoding: thrift_1.Encoding[column.encoding],
		definition_levels_byte_length: dLevelsBuf.length,
		@@ -376,3 +376,3 @@ repetition_levels_byte_length: rLevelsBuf.length,
		/* prepare metadata header */
		const metadata = new gen_1.ColumnMetaData({
		const metadata = new thrift_1.ColumnMetaData({
		path_in_schema: column.path,
		@@ -384,4 +384,4 @@ num_values: data.count,
		total_compressed_size,
		type: gen_1.Type[column.primitiveType],
		codec: gen_1.CompressionCodec[column.compression]
		type: thrift_1.Type[column.primitiveType],
		codec: thrift_1.CompressionCodec[column.compression]
		});
		@@ -393,3 +393,3 @@ /* list encodings */
		for (const k in encodingsSet) {
		metadata.encodings.push(gen_1.Encoding[k]);
		metadata.encodings.push(thrift_1.Encoding[k]);
		}
		@@ -405,3 +405,3 @@ /* concat metadata header and data pages */
		function encodeRowGroup(schema, data, opts) {
		const metadata = new gen_1.RowGroup({
		const metadata = new thrift_1.RowGroup({
		num_rows: data.rowCount,
		@@ -417,3 +417,3 @@ columns: [],
		const cchunkData = encodeColumnChunk(field, data, body.length, opts);
		const cchunk = new gen_1.ColumnChunk({
		const cchunk = new thrift_1.ColumnChunk({
		file_offset: cchunkData.metadataOffset,
		@@ -432,3 +432,3 @@ meta_data: cchunkData.metadata
		function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
		const metadata = new gen_1.FileMetaData({
		const metadata = new thrift_1.FileMetaData({
		version: PARQUET_VERSION,
		@@ -442,3 +442,3 @@ created_by: 'parquets',
		for (const key in userMetadata) {
		const kv = new gen_1.KeyValue({
		const kv = new thrift_1.KeyValue({
		key,
		@@ -450,3 +450,3 @@ value: userMetadata[key]
		{
		const schemaRoot = new gen_1.SchemaElement({
		const schemaRoot = new thrift_1.SchemaElement({
		name: 'root',
		@@ -458,4 +458,4 @@ num_children: Object.keys(schema.fields).length
		for (const field of schema.fieldList) {
		const relt = gen_1.FieldRepetitionType[field.repetitionType];
		const schemaElem = new gen_1.SchemaElement({
		const relt = thrift_1.FieldRepetitionType[field.repetitionType];
		const schemaElem = new thrift_1.SchemaElement({
		name: field.name,
		@@ -468,6 +468,6 @@ repetition_type: relt
		else {
		schemaElem.type = gen_1.Type[field.primitiveType];
		schemaElem.type = thrift_1.Type[field.primitiveType];
		}
		if (field.originalType) {
		schemaElem.converted_type = gen_1.ConvertedType[field.originalType];
		schemaElem.converted_type = thrift_1.ConvertedType[field.originalType];
		}
		@@ -474,0 +474,0 @@ schemaElem.type_length = field.typeLength;

13

package.json

		{
		"name": "parquets",
		"description": "TypeScript implementation of the Parquet file format, based on parquet.js",
		"version": "0.10.6",
		"version": "0.10.7",
		"upstream": "0.10.1",
		@@ -42,6 +42,4 @@ "homepage": "https://github.com/kbajalc/parquets",
		"bson": "^4.0.2",
		"debug": "^4.1.1",
		"int53": "^1.0.0",
		"node-int64": "^0.4.0",
		"snappyjs": "^0.6.0",
		"thrift": "^0.12.0",
		@@ -60,6 +58,7 @@ "varint": "^5.0.0"
		"@types/debug": "^4.1.4",
		"@types/jest": "^24.0.16",
		"@types/jest": "^24.0.17",
		"@types/mocha": "^5.2.7",
		"@types/node": "^10.14.14",
		"@types/node": "^10.14.15",
		"@types/node-int64": "^0.4.29",
		"@types/snappy": "^6.0.0",
		"@types/thrift": "^0.10.8",
		@@ -70,2 +69,3 @@ "@types/varint": "^5.0.0",
		"chai": "^4.2.0",
		"debug": "^4.1.1",
		"jest": "^24.8.0",
		@@ -75,7 +75,6 @@ "jest-environment-node": "^24.8.0",
		"lzo": "^0.4.11",
		"mocha": "^6.2.0",
		"node-snappy": "^0.1.4",
		"object-stream": "0.0.1",
		"snappy": "^6.2.3",
		"ts-jest": "^24.0.2",
		"ts-mocha": "^6.0.0",
		"ts-node": "^8.3.0",
		@@ -82,0 +81,0 @@ "tslint": "^5.18.0",

9

README.md

		@@ -13,9 +13,8 @@ # parquets
		with the [Parquet specification](https://github.com/apache/parquet-format) and is being tested
		for compatibility with Apache's Java [reference implementation](https://github.com/apache/parquet-mr).
		for compatibility with Apache's [reference implementation](https://github.com/apache/parquet-mr).

		WARNING: There are compatibility issues with the reference implementation [Appache Drill](https://drill.apache.org):
		WARNING: There are compatibility issues with the reference implementation:
		- only GZIP and SNAPPY compressions are compatible
		- resolved problem with columns 'optional': true and with 'compression' enabled
		- files with 'repeated' columns can not be read with Drill
		- columns with nested 'fields' are not compatible
		- [Parquet Tools](https://github.com/apache/parquet-mr/tree/master/parquet-tools) are command line tools that aid in the inspection of Parquet files.
		- always verify your table structure loaded with realistic data sample can be read by Parquet Tools!

		@@ -22,0 +21,0 @@ What is Parquet?: Parquet is a column-oriented file format; it allows you to

14

src/codec/plain.ts

		@@ -54,3 +54,3 @@ import { PrimitiveType } from '../declare';
		buf.fill(0);
		for (let i = 0; i < values.length; ++i) {
		for (let i = 0; i < values.length; i++) {
		if (values[i]) {
		@@ -65,3 +65,3 @@ buf[Math.floor(i / 8)] \|= (1 << (i % 8));
		const values: boolean[] = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		const b = cursor.buffer[cursor.offset + Math.floor(i / 8)];
		@@ -84,3 +84,3 @@ values.push((b & (1 << (i % 8))) > 0);
		const values: number[] = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		values.push(cursor.buffer.readInt32LE(cursor.offset));
		@@ -102,3 +102,3 @@ cursor.offset += 4;
		const values: number[] = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		values.push(INT53.readInt64LE(cursor.buffer, cursor.offset));
		@@ -126,3 +126,3 @@ cursor.offset += 8;
		const values: number[] = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		const low = INT53.readInt64LE(cursor.buffer, cursor.offset);
		@@ -150,3 +150,3 @@ const high = cursor.buffer.readUInt32LE(cursor.offset + 8);
		const values: number[] = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		values.push(cursor.buffer.readFloatLE(cursor.offset));
		@@ -168,3 +168,3 @@ cursor.offset += 4;
		const values: number[] = [];
		for (let i = 0; i < count; ++i) {
		for (let i = 0; i < count; i++) {
		values.push(cursor.buffer.readDoubleLE(cursor.offset));
		@@ -171,0 +171,0 @@ cursor.offset += 8;

8

src/codec/rle.ts

		@@ -11,3 +11,3 @@ import varint = require('varint');
		const buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8)));
		for (let b = 0; b < opts.bitWidth * values.length; ++b) {
		for (let b = 0; b < opts.bitWidth * values.length; b++) {
		if ((values[Math.floor(b / opts.bitWidth)] & (1 << b % opts.bitWidth)) > 0) {
		@@ -27,3 +27,3 @@ buf[Math.floor(b / 8)] \|= (1 << (b % 8));

		for (let i = 0; i < buf.length; ++i) {
		for (let i = 0; i < buf.length; i++) {
		buf.writeUInt8(value & 0xff, i);
		@@ -107,3 +107,3 @@ value >> 8;
		const values = new Array(count).fill(0);
		for (let b = 0; b < opts.bitWidth * count; ++b) {
		for (let b = 0; b < opts.bitWidth * count; b++) {
		if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << (b % 8))) {
		@@ -120,3 +120,3 @@ values[Math.floor(b / opts.bitWidth)] \|= (1 << b % opts.bitWidth);
		let value = 0;
		for (let i = 0; i < Math.ceil(opts.bitWidth / 8); ++i) {
		for (let i = 0; i < Math.ceil(opts.bitWidth / 8); i++) {
		value << 8;
		@@ -123,0 +123,0 @@ value += cursor.buffer[cursor.offset];

2

src/compression.ts

		import { ParquetCompression } from './declare';
		import * as Util from './util';
		import zlib = require('zlib');
		import snappyjs = require('snappyjs');
		import snappyjs = require('./snappy');

		@@ -6,0 +6,0 @@ let brotli: any;

10

src/declare.ts

		@@ -0,1 +1,2 @@

		export type ParquetCodec = 'PLAIN' \| 'RLE';
		@@ -43,6 +44,6 @@ export type ParquetCompression = 'UNCOMPRESSED' \| 'GZIP' \| 'SNAPPY' \| 'LZO' \| 'BROTLI' \| 'LZ4';
		export interface SchemaDefinition {
		[string: string]: ElementDefinition;
		[string: string]: FieldDefinition;
		}

		export interface ElementDefinition {
		export interface FieldDefinition {
		type?: ParquetType;
		@@ -57,5 +58,6 @@ typeLength?: number;

		export interface FieldDefinition {
		export interface ParquetField {
		name: string;
		path: string[];
		key: string;
		primitiveType?: PrimitiveType;
		@@ -71,3 +73,3 @@ originalType?: OriginalType;
		fieldCount?: number;
		fields?: Record<string, FieldDefinition>;
		fields?: Record<string, ParquetField>;
		}
		@@ -74,0 +76,0 @@

16

src/modules.d.ts

		@@ -12,9 +12,9 @@ declare module 'int53' {

		declare module 'snappyjs' {
		declare function compress(uncompressed: Buffer): Buffer;
		declare function compress(uncompressed: ArrayBuffer): ArrayBuffer;
		declare function compress(uncompressed: Uint8Array): Uint8Array;
		declare function uncompress(compressed: Buffer): Buffer;
		declare function uncompress(compressed: ArrayBuffer): ArrayBuffer;
		declare function uncompress(compressed: Uint8Array): Uint8Array;
		}
		// declare module 'snappyjs' {
		// declare function compress(uncompressed: Buffer): Buffer;
		// declare function compress(uncompressed: ArrayBuffer): ArrayBuffer;
		// declare function compress(uncompressed: Uint8Array): Uint8Array;
		// declare function uncompress(compressed: Buffer): Buffer;
		// declare function uncompress(compressed: ArrayBuffer): ArrayBuffer;
		// declare function uncompress(compressed: Uint8Array): Uint8Array;
		// }

14

src/reader.ts

		import { CursorBuffer, ParquetCodecOptions, PARQUET_CODEC } from './codec';
		import * as Compression from './compression';
		import { FieldDefinition, ParquetBuffer, ParquetCodec, ParquetCompression, ParquetData, ParquetRecord, ParquetType, PrimitiveType, SchemaDefinition } from './declare';
		// tslint:disable-next-line:max-line-length
		import { ColumnChunk, CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, FileMetaData, PageHeader, PageType, RowGroup, SchemaElement, Type } from './gen';
		import { ParquetBuffer, ParquetCodec, ParquetCompression, ParquetData, ParquetField, ParquetRecord, ParquetType, PrimitiveType, SchemaDefinition } from './declare';
		import { ParquetSchema } from './schema';
		import * as Shred from './shred';
		// tslint:disable-next-line:max-line-length
		import { ColumnChunk, CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, FileMetaData, PageHeader, PageType, RowGroup, SchemaElement, Type } from './thrift';
		import * as Util from './util';
		@@ -301,3 +301,3 @@ // import Fs = require('fs');

		function decodeDataPages(buffer: Buffer, column: FieldDefinition, compression: ParquetCompression): ParquetData {
		function decodeDataPages(buffer: Buffer, column: ParquetField, compression: ParquetCompression): ParquetData {
		const cursor: CursorBuffer = {
		@@ -348,3 +348,3 @@ buffer,

		function decodeDataPage(cursor: CursorBuffer, header: PageHeader, column: FieldDefinition, compression: ParquetCompression): ParquetData {
		function decodeDataPage(cursor: CursorBuffer, header: PageHeader, column: ParquetField, compression: ParquetCompression): ParquetData {
		const cursorEnd = cursor.offset + header.compressed_page_size;
		@@ -433,3 +433,3 @@ const valueCount = header.data_page_header.num_values;
		if (dlvl === column.dLevelMax) {
		++valueCountNonNull;
		valueCountNonNull++;
		}
		@@ -466,3 +466,3 @@ }

		function decodeDataPageV2(cursor: CursorBuffer, header: PageHeader, column: FieldDefinition, compression: ParquetCompression): ParquetData {
		function decodeDataPageV2(cursor: CursorBuffer, header: PageHeader, column: ParquetField, compression: ParquetCompression): ParquetData {
		const cursorEnd = cursor.offset + header.compressed_page_size;
		@@ -469,0 +469,0 @@

43

src/schema.ts

		import { PARQUET_CODEC } from './codec';
		import { PARQUET_COMPRESSION_METHODS } from './compression';
		import { ElementDefinition, FieldDefinition, ParquetBuffer, ParquetCompression, ParquetRecord, RepetitionType, SchemaDefinition } from './declare';
		import { materializeRecords, shredRecord } from './shred';
		import { FieldDefinition, ParquetBuffer, ParquetCompression, ParquetField, ParquetRecord, RepetitionType, SchemaDefinition } from './declare';
		import { materializeRecords, shredBuffer, shredRecord } from './shred';
		import { PARQUET_LOGICAL_TYPES } from './types';
		@@ -11,5 +11,5 @@
		export class ParquetSchema {
		public schema: Record<string, ElementDefinition>;
		public fields: Record<string, FieldDefinition>;
		public fieldList: FieldDefinition[];
		public schema: Record<string, FieldDefinition>;
		public fields: Record<string, ParquetField>;
		public fieldList: ParquetField[];

		@@ -28,5 +28,5 @@ /**
		*/
		findField(path: string): FieldDefinition;
		findField(path: string[]): FieldDefinition;
		findField(path: any): FieldDefinition {
		findField(path: string): ParquetField;
		findField(path: string[]): ParquetField;
		findField(path: any): ParquetField {
		if (path.constructor !== Array) {
		@@ -51,4 +51,4 @@ // tslint:disable-next-line:no-parameter-reassignment
		*/
		findFieldBranch(path: string): FieldDefinition[];
		findFieldBranch(path: string[]): FieldDefinition[];
		findFieldBranch(path: string): ParquetField[];
		findFieldBranch(path: string[]): ParquetField[];
		findFieldBranch(path: any): any[] {
		@@ -83,2 +83,6 @@ if (path.constructor !== Array) {
		}

		buffer(): ParquetBuffer {
		return shredBuffer(this);
		}
		}
		@@ -102,4 +106,4 @@
		path: string[]
		): Record<string, FieldDefinition> {
		const fieldList: Record<string, FieldDefinition> = {};
		): Record<string, ParquetField> {
		const fieldList: Record<string, ParquetField> = {};

		@@ -128,5 +132,7 @@ for (const name in schema) {
		if (opts.fields) {
		const cpath = path.concat([name]);
		fieldList[name] = {
		name,
		path: path.concat([name]),
		path: cpath,
		key: cpath.join(),
		repetitionType,
		@@ -141,3 +147,4 @@ rLevelMax,
		dLevelMax,
		path.concat([name]))
		cpath
		)
		};
		@@ -163,2 +170,3 @@ continue;
		/* add to schema */
		const cpath = path.concat([name]);
		fieldList[name] = {
		@@ -168,3 +176,4 @@ name,
		originalType: typeDef.originalType,
		path: path.concat([name]),
		path: cpath,
		key: cpath.join(),
		repetitionType,
		@@ -181,4 +190,4 @@ encoding: opts.encoding,

		function listFields(fields: Record<string, FieldDefinition>): FieldDefinition[] {
		let list: FieldDefinition[] = [];
		function listFields(fields: Record<string, ParquetField>): ParquetField[] {
		let list: ParquetField[] = [];
		for (const k in fields) {
		@@ -185,0 +194,0 @@ list.push(fields[k]);

237

src/shred.ts

		@@ -1,5 +0,18 @@
		import { FieldDefinition, ParquetBuffer, ParquetData, ParquetRecord } from './declare';
		import { ParquetBuffer, ParquetData, ParquetField, ParquetRecord } from './declare';
		import { ParquetSchema } from './schema';
		import * as Types from './types';

		export function shredBuffer(schema: ParquetSchema): ParquetBuffer {
		const columnData: Record<string, ParquetData> = {};
		for (const field of schema.fieldList) {
		columnData[field.key] = {
		dlevels: [],
		rlevels: [],
		values: [],
		count: 0
		};
		}
		return { rowCount: 0, columnData };
		}

		/**
		@@ -26,73 +39,43 @@ * 'Shred' a record into a list of <value, repetition_level, definition_level>
		* }
		*
		*/
		export function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {
		/* shred the record, this may raise an exception */
		const recordShredded: Record<string, ParquetData> = {};
		for (const field of schema.fieldList) {
		recordShredded[field.path.join()] = {
		dlevels: [],
		rlevels: [],
		values: [],
		count: 0
		};
		}
		const data = shredBuffer(schema).columnData;

		shredRecordInternal(schema.fields, record, recordShredded, 0, 0);
		shredRecordFields(schema.fields, record, data, 0, 0);

		/* if no error during shredding, add the shredded record to the buffer */
		if (!('columnData' in buffer) \|\| !('rowCount' in buffer)) {
		buffer.rowCount = 0;
		buffer.columnData = {};

		for (const field of schema.fieldList) {
		const cd: ParquetData = {
		dlevels: [],
		rlevels: [],
		values: [],
		count: 0
		};
		buffer.columnData[field.path.join()] = cd;
		}
		buffer.rowCount = 1;
		buffer.columnData = data;
		return;
		}

		buffer.rowCount += 1;
		for (const field of schema.fieldList) {
		Array.prototype.push.apply(
		buffer.columnData[field.path.join()].rlevels,
		recordShredded[field.path.join()].rlevels);

		Array.prototype.push.apply(
		buffer.columnData[field.path.join()].dlevels,
		recordShredded[field.path.join()].dlevels);

		Array.prototype.push.apply(
		buffer.columnData[field.path.join()].values,
		recordShredded[field.path.join()].values);

		buffer.columnData[field.path.join()].count += recordShredded[field.path.join()].count;
		Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);
		Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);
		Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);
		buffer.columnData[field.key].count += data[field.key].count;
		}
		}

		function shredRecordInternal(
		fields: Record<string, FieldDefinition>,
		function shredRecordFields(
		fields: Record<string, ParquetField>,
		record: any,
		data: Record<string, ParquetData>,
		rlvl: number,
		dlvl: number
		rLevel: number,
		dLevel: number
		) {
		for (const fieldName in fields) {
		const field = fields[fieldName];
		const fieldType = field.originalType \|\| field.primitiveType;
		for (const name in fields) {
		const field = fields[name];

		// fetch values
		let values = [];
		if (record && (fieldName in record) && record[fieldName] !== undefined && record[fieldName] !== null) {
		if (record[fieldName].constructor === Array) {
		values = record[fieldName];
		if (record && (field.name in record) && record[field.name] !== undefined && record[field.name] !== null) {
		if (record[field.name].constructor === Array) {
		values = record[field.name];
		} else {
		values.push(record[fieldName]);
		values.push(record[field.name]);
		}
		}

		// check values
		@@ -102,3 +85,2 @@ if (values.length === 0 && !!record && field.repetitionType === 'REQUIRED') {
		}

		if (values.length > 1 && field.repetitionType !== 'REPEATED') {
		@@ -111,12 +93,12 @@ throw new Error(`too many values for field: ${field.name}`);
		if (field.isNested) {
		shredRecordInternal(
		shredRecordFields(
		field.fields,
		null,
		data,
		rlvl,
		dlvl);
		rLevel,
		dLevel);
		} else {
		data[field.path.join()].rlevels.push(rlvl);
		data[field.path.join()].dlevels.push(dlvl);
		data[field.path.join()].count += 1;
		data[field.key].count += 1;
		data[field.key].rlevels.push(rLevel);
		data[field.key].dlevels.push(dLevel);
		}
		@@ -127,18 +109,19 @@ continue;
		// push values
		for (let i = 0; i < values.length; ++i) {
		// tslint:disable-next-line:variable-name
		const rlvl_i = i === 0 ? rlvl : field.rLevelMax;

		for (let i = 0; i < values.length; i++) {
		const rlvl = i === 0 ? rLevel : field.rLevelMax;
		if (field.isNested) {
		shredRecordInternal(
		shredRecordFields(
		field.fields,
		values[i],
		data,
		rlvl_i,
		rlvl,
		field.dLevelMax);
		} else {
		data[field.path.join()].values.push(Types.toPrimitive(fieldType, values[i]));
		data[field.path.join()].rlevels.push(rlvl_i);
		data[field.path.join()].dlevels.push(field.dLevelMax);
		data[field.path.join()].count += 1;
		data[field.key].count += 1;
		data[field.key].rlevels.push(rlvl);
		data[field.key].dlevels.push(field.dLevelMax);
		data[field.key].values.push(Types.toPrimitive(
		field.originalType \|\| field.primitiveType,
		values[i]
		));
		}
		@@ -167,93 +150,63 @@ }
		* }
		*
		*/
		export function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {
		const records: ParquetRecord[] = [];
		for (let i = 0; i < buffer.rowCount; ++i) {
		records.push({});
		for (let i = 0; i < buffer.rowCount; i++) records.push({});
		for (const key in buffer.columnData) {
		materializeColumn(schema, buffer, key, records);
		}

		for (const k in buffer.columnData) {
		const field = schema.findField(k);
		const fieldBranch = schema.findFieldBranch(k);
		const values = buffer.columnData[k].values[Symbol.iterator]();

		// tslint:disable-next-line:prefer-array-literal
		const rLevels = new Array(field.rLevelMax + 1);
		rLevels.fill(0);

		for (let i = 0; i < buffer.columnData[k].count; ++i) {
		const dLevel = buffer.columnData[k].dlevels[i];
		const rLevel = buffer.columnData[k].rlevels[i];

		rLevels[rLevel]++;
		rLevels.fill(0, rLevel + 1);

		let value = null;
		if (dLevel === field.dLevelMax) {
		value = Types.fromPrimitive(
		field.originalType \|\| field.primitiveType,
		values.next().value);
		}

		materializeRecordField(
		records[rLevels[0] - 1],
		fieldBranch,
		rLevels.slice(1),
		dLevel,
		value);
		}
		}

		return records;
		}

		function materializeRecordField(record: any, branch: FieldDefinition[], rLevels: number[], dLevel: number, value: any): void {
		const node = branch[0];
		function materializeColumn(schema: ParquetSchema, buffer: ParquetBuffer, key: string, records: ParquetRecord[]) {
		const data = buffer.columnData[key];
		if (!data.count) return;

		if (dLevel < node.dLevelMax) {
		return;
		}
		const field = schema.findField(key);
		const branch = schema.findFieldBranch(key);

		if (branch.length > 1) {
		if (node.repetitionType === 'REPEATED') {
		if (!(node.name in record)) {
		record[node.name] = [];
		}
		// tslint:disable-next-line:prefer-array-literal
		const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
		let vIndex = 0;
		for (let i = 0; i < data.count; i++) {
		const dLevel = data.dlevels[i];
		const rLevel = data.rlevels[i];
		rLevels[rLevel]++;
		rLevels.fill(0, rLevel + 1);

		while (record[node.name].length < rLevels[0] + 1) {
		record[node.name].push({});
		let rIndex = 0;
		let record = records[rLevels[rIndex++] - 1];

		// Internal nodes
		for (const step of branch) {
		if (step === field) break;
		if (dLevel < step.dLevelMax) break;
		if (step.repetitionType === 'REPEATED') {
		if (!(step.name in record)) record[step.name] = [];
		const ix = rLevels[rIndex++];
		while (record[step.name].length <= ix) record[step.name].push({});
		record = record[step.name][ix];
		} else {
		record[step.name] = record[step.name] \|\| {};
		record = record[step.name];
		}

		materializeRecordField(
		record[node.name][rLevels[0]],
		branch.slice(1),
		rLevels.slice(1),
		dLevel,
		value);
		} else {
		record[node.name] = record[node.name] \|\| {};

		materializeRecordField(
		record[node.name],
		branch.slice(1),
		rLevels,
		dLevel,
		value);
		}
		} else {
		if (node.repetitionType === 'REPEATED') {
		if (!(node.name in record)) {
		record[node.name] = [];
		}

		while (record[node.name].length < rLevels[0] + 1) {
		record[node.name].push(null);
		// Leaf node
		if (dLevel === field.dLevelMax) {
		const value = Types.fromPrimitive(
		field.originalType \|\| field.primitiveType,
		data.values[vIndex]
		);
		vIndex++;
		if (field.repetitionType === 'REPEATED') {
		if (!(field.name in record)) record[field.name] = [];
		const ix = rLevels[rIndex];
		while (record[field.name].length <= ix) record[field.name].push(null);
		record[field.name][ix] = value;
		} else {
		record[field.name] = value;
		}

		record[node.name][rLevels[0]] = value;
		} else {
		record[node.name] = value;
		}
		}
		}

6

src/util.ts

		import fs = require('fs');
		import { TBufferedTransport, TCompactProtocol, TFramedTransport } from 'thrift';
		import { FileMetaData, PageHeader } from './gen';
		import { FileMetaData, PageHeader } from './thrift';

		@@ -183,6 +183,6 @@ export interface WriteStreamOptions {
		export function fieldIndexOf(arr: string[][], elem: string[]): number {
		for (let j = 0; j < arr.length; ++j) {
		for (let j = 0; j < arr.length; j++) {
		if (arr[j].length > elem.length) continue;
		let m = true;
		for (let i = 0; i < elem.length; ++i) {
		for (let i = 0; i < elem.length; i++) {
		if (arr[j][i] === elem[i] \|\| arr[j][i] === '+' \|\| arr[j][i] === '#') continue;
		@@ -189,0 +189,0 @@ if (i >= arr[j].length && arr[j][arr[j].length - 1] === '#') continue;

12

src/writer.ts

		@@ -5,7 +5,7 @@ import { WriteStream } from 'fs';
		import * as Compression from './compression';
		import { FieldDefinition, ParquetBuffer, ParquetCodec, ParquetData, PrimitiveType } from './declare';
		// tslint:disable-next-line:max-line-length
		import { ColumnChunk, ColumnMetaData, CompressionCodec, ConvertedType, DataPageHeader, DataPageHeaderV2, Encoding, FieldRepetitionType, FileMetaData, KeyValue, PageHeader, PageType, RowGroup, SchemaElement, Type } from './gen';
		import { ParquetBuffer, ParquetCodec, ParquetData, ParquetField, PrimitiveType } from './declare';
		import { ParquetSchema } from './schema';
		import * as Shred from './shred';
		// tslint:disable-next-line:max-line-length
		import { ColumnChunk, ColumnMetaData, CompressionCodec, ConvertedType, DataPageHeader, DataPageHeaderV2, Encoding, FieldRepetitionType, FileMetaData, KeyValue, PageHeader, PageType, RowGroup, SchemaElement, Type } from './thrift';
		import * as Util from './util';
		@@ -332,3 +332,3 @@ import Int64 = require('node-int64');
		function encodeDataPage(
		column: FieldDefinition,
		column: ParquetField,
		data: ParquetData
		@@ -413,3 +413,3 @@ ): {
		function encodeDataPageV2(
		column: FieldDefinition,
		column: ParquetField,
		data: ParquetData,
		@@ -491,3 +491,3 @@ rowCount: number
		function encodeColumnChunk(
		column: FieldDefinition,
		column: ParquetField,
		buffer: ParquetBuffer,
		@@ -494,0 +494,0 @@ offset: number,

lib/gen/BoundaryOrder.d.ts

lib/gen/BoundaryOrder.js

lib/gen/BoundaryOrder.js.map

lib/gen/BsonType.d.ts

lib/gen/BsonType.js

lib/gen/BsonType.js.map

lib/gen/ColumnChunk.d.ts

lib/gen/ColumnChunk.js

lib/gen/ColumnChunk.js.map

lib/gen/ColumnIndex.d.ts

lib/gen/ColumnIndex.js

lib/gen/ColumnIndex.js.map

lib/gen/ColumnMetaData.d.ts

lib/gen/ColumnMetaData.js

lib/gen/ColumnMetaData.js.map

lib/gen/ColumnOrder.d.ts

lib/gen/ColumnOrder.js

lib/gen/ColumnOrder.js.map

lib/gen/CompressionCodec.d.ts

lib/gen/CompressionCodec.js

lib/gen/CompressionCodec.js.map

lib/gen/ConvertedType.d.ts

lib/gen/ConvertedType.js

lib/gen/ConvertedType.js.map

lib/gen/DataPageHeader.d.ts

lib/gen/DataPageHeader.js

lib/gen/DataPageHeader.js.map

lib/gen/DataPageHeaderV2.d.ts

lib/gen/DataPageHeaderV2.js

lib/gen/DataPageHeaderV2.js.map

lib/gen/DateType.d.ts

lib/gen/DateType.js

lib/gen/DateType.js.map

lib/gen/DecimalType.d.ts

lib/gen/DecimalType.js

lib/gen/DecimalType.js.map

lib/gen/DictionaryPageHeader.d.ts

lib/gen/DictionaryPageHeader.js

lib/gen/DictionaryPageHeader.js.map

lib/gen/Encoding.d.ts

lib/gen/Encoding.js

lib/gen/Encoding.js.map

lib/gen/EnumType.d.ts

lib/gen/EnumType.js

lib/gen/EnumType.js.map

lib/gen/FieldRepetitionType.d.ts

lib/gen/FieldRepetitionType.js

lib/gen/FieldRepetitionType.js.map

lib/gen/FileMetaData.d.ts

lib/gen/FileMetaData.js

lib/gen/FileMetaData.js.map

lib/gen/index.d.ts

lib/gen/index.js

lib/gen/index.js.map

lib/gen/IndexPageHeader.d.ts

lib/gen/IndexPageHeader.js

lib/gen/IndexPageHeader.js.map

lib/gen/IntType.d.ts

lib/gen/IntType.js

lib/gen/IntType.js.map

lib/gen/JsonType.d.ts

lib/gen/JsonType.js

lib/gen/JsonType.js.map

lib/gen/KeyValue.d.ts

lib/gen/KeyValue.js

lib/gen/KeyValue.js.map

lib/gen/ListType.d.ts

lib/gen/ListType.js

lib/gen/ListType.js.map

lib/gen/LogicalType.d.ts

lib/gen/LogicalType.js

lib/gen/LogicalType.js.map

lib/gen/MapType.d.ts

lib/gen/MapType.js

lib/gen/MapType.js.map

lib/gen/MicroSeconds.d.ts

lib/gen/MicroSeconds.js

lib/gen/MicroSeconds.js.map

lib/gen/MilliSeconds.d.ts

lib/gen/MilliSeconds.js

lib/gen/MilliSeconds.js.map

lib/gen/NullType.d.ts

lib/gen/NullType.js

lib/gen/NullType.js.map

lib/gen/OffsetIndex.d.ts

lib/gen/OffsetIndex.js

lib/gen/OffsetIndex.js.map

lib/gen/PageEncodingStats.d.ts

lib/gen/PageEncodingStats.js

lib/gen/PageEncodingStats.js.map

lib/gen/PageHeader.d.ts

lib/gen/PageHeader.js

lib/gen/PageHeader.js.map

lib/gen/PageLocation.d.ts

lib/gen/PageLocation.js

lib/gen/PageLocation.js.map

lib/gen/PageType.d.ts

lib/gen/PageType.js

lib/gen/PageType.js.map

lib/gen/RowGroup.d.ts

lib/gen/RowGroup.js

lib/gen/RowGroup.js.map

lib/gen/SchemaElement.d.ts

lib/gen/SchemaElement.js

lib/gen/SchemaElement.js.map

lib/gen/SortingColumn.d.ts

lib/gen/SortingColumn.js

lib/gen/SortingColumn.js.map

lib/gen/Statistics.d.ts

lib/gen/Statistics.js

lib/gen/Statistics.js.map

lib/gen/StringType.d.ts

lib/gen/StringType.js

lib/gen/StringType.js.map

lib/gen/TimestampType.d.ts

lib/gen/TimestampType.js

lib/gen/TimestampType.js.map

lib/gen/TimeType.d.ts

lib/gen/TimeType.js

lib/gen/TimeType.js.map

lib/gen/TimeUnit.d.ts

lib/gen/TimeUnit.js

lib/gen/TimeUnit.js.map

lib/gen/Type.d.ts

lib/gen/Type.js

lib/gen/Type.js.map

lib/gen/TypeDefinedOrder.d.ts

lib/gen/TypeDefinedOrder.js

lib/gen/TypeDefinedOrder.js.map

lib/gen/UUIDType.d.ts

lib/gen/UUIDType.js

lib/gen/UUIDType.js.map

src/gen/BoundaryOrder.ts

src/gen/BsonType.ts

src/gen/ColumnChunk.ts

src/gen/ColumnIndex.ts

src/gen/ColumnMetaData.ts

src/gen/ColumnOrder.ts

src/gen/CompressionCodec.ts

src/gen/ConvertedType.ts

src/gen/DataPageHeader.ts

src/gen/DataPageHeaderV2.ts

src/gen/DateType.ts

src/gen/DecimalType.ts

src/gen/DictionaryPageHeader.ts

src/gen/Encoding.ts

src/gen/EnumType.ts

src/gen/FieldRepetitionType.ts

src/gen/FileMetaData.ts

src/gen/index.ts

src/gen/IndexPageHeader.ts

src/gen/IntType.ts

src/gen/JsonType.ts

src/gen/KeyValue.ts

src/gen/ListType.ts

src/gen/LogicalType.ts

src/gen/MapType.ts

src/gen/MicroSeconds.ts

src/gen/MilliSeconds.ts

src/gen/NullType.ts

src/gen/OffsetIndex.ts

src/gen/PageEncodingStats.ts

src/gen/PageHeader.ts

src/gen/PageLocation.ts

src/gen/PageType.ts

src/gen/RowGroup.ts

src/gen/SchemaElement.ts

src/gen/SortingColumn.ts

src/gen/Statistics.ts

src/gen/StringType.ts

src/gen/TimestampType.ts

src/gen/TimeType.ts

src/gen/TimeUnit.ts

src/gen/Type.ts

src/gen/TypeDefinedOrder.ts

src/gen/UUIDType.ts

lib/codec/plain.js.map