@dsnp/parquetjs - npm Package Compare versions

2

dist/browser/parquet.cjs.d.ts

		@@ -1,1 +0,1 @@
		export * from "../parquet";
		export * from '../parquet';

2

dist/browser/parquet.esm.d.ts

		@@ -1,1 +0,1 @@
		export * from "../parquet";
		export * from '../parquet';

2

dist/gen-nodejs/parquet_types.d.ts

		//
		// Autogenerated by Thrift Compiler (0.16.0)
		// Autogenerated by Thrift Compiler (0.18.1)
		//
		@@ -4,0 +4,0 @@ // DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING

7

dist/lib/bloom/sbbf.d.ts

		@@ -1,4 +0,3 @@
		/// <reference types="node" />
		import Long from 'long';
		import { Block } from "../declare";
		import { Block } from '../declare';
		/**
		@@ -52,3 +51,3 @@ * @class SplitBlockBloomFilter
		*/
		static from(buffer: Buffer, rowCount?: number): SplitBlockBloomFilter;
		static from(buffer: Buffer, _rowCount?: number): SplitBlockBloomFilter;
		/**
		@@ -119,3 +118,3 @@ * @function getBlockIndex: get a block index to insert a hash value for
		getNumFilterBlocks(): number;
		getFilter(): Array<Block>;
		getFilter(): Block[];
		/**
		@@ -122,0 +121,0 @@ * @function optNumFilterBytes

83

dist/lib/bloom/sbbf.js

		@@ -38,10 +38,3 @@ "use strict";
		static salt = [
		0x47b6137b,
		0x44974d91,
		0x8824ad5b,
		0xa2b7289d,
		0x705495c7,
		0x2df1424b,
		0x9efc4947,
		0x5c6bfb31
		0x47b6137b, 0x44974d91, 0x8824ad5b, 0xa2b7289d, 0x705495c7, 0x2df1424b, 0x9efc4947, 0x5c6bfb31,
		];
		@@ -58,3 +51,3 @@ // How many bits are in a single block:
		// Currently this is 1024
		static LOWER_BOUND_BYTES = SplitBlockBloomFilter.NUMBER_OF_BLOCKS * SplitBlockBloomFilter.BITS_PER_BLOCK / 8;
		static LOWER_BOUND_BYTES = (SplitBlockBloomFilter.NUMBER_OF_BLOCKS * SplitBlockBloomFilter.BITS_PER_BLOCK) / 8;
		// The upper bound of SBBF size, set to default row group size in bytes.
		@@ -79,9 +72,9 @@ // Note that the subsquent requirements for an effective bloom filter on a row group this size would mean this
		*/
		static from(buffer, rowCount) {
		static from(buffer, _rowCount) {
		if (buffer.length === 0) {
		throw new Error("buffer is empty");
		throw new Error('buffer is empty');
		}
		const chunkSize = SplitBlockBloomFilter.WORDS_PER_BLOCK;
		const uint32sFromBuf = new Uint32Array(buffer.buffer);
		let result = [];
		const result = [];
		const length = uint32sFromBuf.length;
		@@ -91,3 +84,3 @@ for (let index = 0; index < length; index += chunkSize) {
		}
		let sb = new SplitBlockBloomFilter();
		const sb = new SplitBlockBloomFilter();
		sb.splitBlockFilter = result;
		@@ -100,3 +93,2 @@ sb.numBlocks = result.length;
		}
		;
		/**
		@@ -128,4 +120,4 @@ * @function getBlockIndex: get a block index to insert a hash value for
		static optimalNumOfBlocks(numDistinct, falsePositiveRate) {
		let m = -8 * numDistinct / Math.log(1 - Math.pow(falsePositiveRate, 1.0 / 8));
		m = (m + SplitBlockBloomFilter.NUMBER_OF_BLOCKS - 1) & (~SplitBlockBloomFilter.NUMBER_OF_BLOCKS);
		let m = (-8 * numDistinct) / Math.log(1 - Math.pow(falsePositiveRate, 1.0 / 8));
		m = (m + SplitBlockBloomFilter.NUMBER_OF_BLOCKS - 1) & ~SplitBlockBloomFilter.NUMBER_OF_BLOCKS;
		// Handle overflow:
		@@ -153,3 +145,3 @@ const upperShiftL3 = SplitBlockBloomFilter.UPPER_BOUND_BYTES << 3;
		static mask(hashValue) {
		let result = SplitBlockBloomFilter.initBlock();
		const result = SplitBlockBloomFilter.initBlock();
		for (let i = 0; i < result.length; i++) {
		@@ -215,7 +207,17 @@ const y = hashValue.getLowBitsUnsigned() * SplitBlockBloomFilter.salt[i];
		hasher = new xxhasher_1.default();
		isInitialized() { return this.splitBlockFilter.length > 0; }
		getFalsePositiveRate() { return this.desiredFalsePositiveRate; }
		getNumDistinct() { return this.numDistinctValues; }
		getNumFilterBlocks() { return this.splitBlockFilter.length; }
		getFilter() { return this.splitBlockFilter; }
		isInitialized() {
		return this.splitBlockFilter.length > 0;
		}
		getFalsePositiveRate() {
		return this.desiredFalsePositiveRate;
		}
		getNumDistinct() {
		return this.numDistinctValues;
		}
		getNumFilterBlocks() {
		return this.splitBlockFilter.length;
		}
		getFilter() {
		return this.splitBlockFilter;
		}
		/**
		@@ -228,3 +230,3 @@ * @function optNumFilterBytes
		getNumFilterBytes() {
		return this.numBlocks * SplitBlockBloomFilter.BITS_PER_BLOCK >>> 3;
		return (this.numBlocks * SplitBlockBloomFilter.BITS_PER_BLOCK) >>> 3;
		}
		@@ -240,7 +242,7 @@ /**
		if (this.isInitialized()) {
		console.error("filter already initialized. options may no longer be changed.");
		console.error('filter already initialized. options may no longer be changed.');
		return this;
		}
		if (proportion <= 0.0 \|\| proportion >= 1.0) {
		console.error("falsePositiveProbability. Must be < 1.0 and > 0.0");
		console.error('falsePositiveProbability. Must be < 1.0 and > 0.0');
		return this;
		@@ -261,3 +263,3 @@ }
		if (this.isInitialized()) {
		console.error("filter already initialized. options may no longer be changed.");
		console.error('filter already initialized. options may no longer be changed.');
		return this;
		@@ -298,3 +300,3 @@ }
		if (this.isInitialized()) {
		console.error("filter already initialized. options may no longer be changed.");
		console.error('filter already initialized. options may no longer be changed.');
		return this;
		@@ -307,3 +309,3 @@ }
		// numBlocks = Bytes * 8b/Byte * 1Block/256b
		this.numBlocks = SplitBlockBloomFilter.nextPwr2(numBytes) * 8 / SplitBlockBloomFilter.BITS_PER_BLOCK;
		this.numBlocks = (SplitBlockBloomFilter.nextPwr2(numBytes) * 8) / SplitBlockBloomFilter.BITS_PER_BLOCK;
		return this;
		@@ -324,10 +326,11 @@ }
		if (this.isInitialized()) {
		console.error("filter already initialized.");
		console.error('filter already initialized.');
		return this;
		}
		if (!this.hashStrategy.hasOwnProperty("XXHASH")) {
		throw new Error("unsupported hash strategy");
		if (!Object.prototype.hasOwnProperty.call(this.hashStrategy, 'XXHASH')) {
		throw new Error('unsupported hash strategy');
		}
		if (this.numBlocks === 0) {
		this.numBlocks = SplitBlockBloomFilter.optimalNumOfBlocks(this.numDistinctValues, this.desiredFalsePositiveRate) >>> 3;
		this.numBlocks =
		SplitBlockBloomFilter.optimalNumOfBlocks(this.numDistinctValues, this.desiredFalsePositiveRate) >>> 3;
		}
		@@ -338,4 +341,4 @@ this.splitBlockFilter = Array(this.numBlocks).fill(SplitBlockBloomFilter.initBlock());
		async hash(value) {
		if (!this.hashStrategy.hasOwnProperty("XXHASH")) {
		throw new Error("unsupported hash strategy");
		if (!Object.prototype.hasOwnProperty.call(this.hashStrategy, 'XXHASH')) {
		throw new Error('unsupported hash strategy');
		}
		@@ -347,5 +350,5 @@ const hashed = await this.hasher.hash64(value);
		if (!hashValue.unsigned)
		throw new Error("hashValue must be an unsigned Long");
		throw new Error('hashValue must be an unsigned Long');
		if (!this.isInitialized())
		throw new Error("filter has not been initialized. call init() first");
		throw new Error('filter has not been initialized. call init() first');
		const i = SplitBlockBloomFilter.getBlockIndex(hashValue, this.splitBlockFilter.length);
		@@ -362,3 +365,3 @@ SplitBlockBloomFilter.blockInsert(this.splitBlockFilter[i], hashValue);
		if (!this.isInitialized())
		throw new Error("filter has not been initialized. call init() first");
		throw new Error('filter has not been initialized. call init() first');
		this.insertHash(await this.hash(value));
		@@ -368,5 +371,5 @@ }
		if (!hashValue.unsigned)
		throw new Error("hashValue must be an unsigned Long");
		throw new Error('hashValue must be an unsigned Long');
		if (!this.isInitialized())
		throw new Error("filter has not been initialized");
		throw new Error('filter has not been initialized');
		const i = SplitBlockBloomFilter.getBlockIndex(hashValue, this.splitBlockFilter.length);
		@@ -384,3 +387,3 @@ return SplitBlockBloomFilter.blockCheck(this.splitBlockFilter[i], hashValue);
		if (!this.isInitialized())
		throw new Error("filter has not been initialized");
		throw new Error('filter has not been initialized');
		return this.checkHash(await this.hash(value));
		@@ -387,0 +390,0 @@ }

4

dist/lib/bloom/xxhasher.js

		@@ -19,3 +19,3 @@ "use strict";
		class XxHasher {
		static h64 = (0, xxhash_wasm_1.default)().then(x => x.h64ToString);
		static h64 = (0, xxhash_wasm_1.default)().then((x) => x.h64ToString);
		async hashIt(value) {
		@@ -42,5 +42,5 @@ return (await XxHasher.h64)(value);
		}
		throw new Error("unsupported type: " + value);
		throw new Error('unsupported type: ' + value);
		}
		}
		exports.default = XxHasher;

16

dist/lib/bloomFilterIO/bloomFilterReader.d.ts

		@@ -1,12 +0,12 @@
		import sbbf from "../bloom/sbbf";
		import { ParquetEnvelopeReader } from "../reader";
		import { ColumnChunkData } from "../declare";
		type bloomFilterOffsetData = {
		import sbbf from '../bloom/sbbf';
		import { ParquetEnvelopeReader } from '../reader';
		import { ColumnChunkData } from '../declare';
		interface bloomFilterOffsetData {
		columnName: string;
		offsetBytes: number;
		rowGroupIndex: number;
		};
		export declare const parseBloomFilterOffsets: (ColumnChunkDataCollection: Array<ColumnChunkData>) => Array<bloomFilterOffsetData>;
		export declare const siftAllByteOffsets: (columnChunkDataCollection: Array<ColumnChunkData>) => Array<bloomFilterOffsetData>;
		export declare const getBloomFiltersFor: (paths: Array<string>, envelopeReader: InstanceType<typeof ParquetEnvelopeReader>) => Promise<{
		}
		export declare const parseBloomFilterOffsets: (ColumnChunkDataCollection: ColumnChunkData[]) => bloomFilterOffsetData[];
		export declare const siftAllByteOffsets: (columnChunkDataCollection: ColumnChunkData[]) => bloomFilterOffsetData[];
		export declare const getBloomFiltersFor: (paths: string[], envelopeReader: InstanceType<typeof ParquetEnvelopeReader>) => Promise<{
		sbbf: sbbf;
		@@ -13,0 +13,0 @@ columnName: string;

10

dist/lib/bloomFilterIO/bloomFilterReader.js

		@@ -39,5 +39,5 @@ "use strict";
		const toInteger = (buffer) => {
		const integer = parseInt(buffer.toString("hex"), 16);
		const integer = parseInt(buffer.toString('hex'), 16);
		if (integer >= Number.MAX_VALUE) {
		throw Error("Number exceeds Number.MAX_VALUE: Godspeed");
		throw Error('Number exceeds Number.MAX_VALUE: Godspeed');
		}
		@@ -48,6 +48,6 @@ return integer;
		return ColumnChunkDataCollection.map(({ rowGroupIndex, column }) => {
		const { bloom_filter_offset: bloomOffset, path_in_schema: pathInSchema, } = column.meta_data \|\| {};
		const { bloom_filter_offset: bloomOffset, path_in_schema: pathInSchema } = column.meta_data \|\| {};
		return {
		offsetBytes: toInteger(bloomOffset.buffer),
		columnName: pathInSchema.join(","),
		columnName: pathInSchema.join(','),
		rowGroupIndex,
		@@ -78,3 +78,3 @@ };
		const readFilterData = async (offsetBytes, envelopeReader) => {
		const { bloomFilterHeader, sizeOfBloomFilterHeader, } = await getBloomFilterHeader(offsetBytes, envelopeReader);
		const { bloomFilterHeader, sizeOfBloomFilterHeader } = await getBloomFilterHeader(offsetBytes, envelopeReader);
		const { numBytes: filterByteSize } = bloomFilterHeader;
		@@ -81,0 +81,0 @@ try {

17

dist/lib/bloomFilterIO/bloomFilterWriter.d.ts

		@@ -1,7 +0,6 @@
		/// <reference types="node" />
		import parquet_thrift from "../../gen-nodejs/parquet_types";
		import SplitBlockBloomFilter from "../bloom/sbbf";
		import { Block } from "../declare";
		import parquet_thrift from '../../gen-nodejs/parquet_types';
		import SplitBlockBloomFilter from '../bloom/sbbf';
		import { Block } from '../declare';
		import Int64 from 'node-int64';
		export type createSBBFParams = {
		export interface createSBBFParams {
		numFilterBytes?: number;
		@@ -11,9 +10,9 @@ falsePositiveRate?: number;
		column?: any;
		};
		}
		export declare const createSBBF: (params: createSBBFParams) => SplitBlockBloomFilter;
		export declare const serializeFilterHeaders: (numberOfBytes: number) => Buffer;
		type serializeFilterDataParams = {
		filterBlocks: Array<Block>;
		interface serializeFilterDataParams {
		filterBlocks: Block[];
		filterByteSize: number;
		};
		}
		export declare const serializeFilterData: (params: serializeFilterDataParams) => Buffer;
		@@ -20,0 +19,0 @@ export declare const setFilterOffset: (column: parquet_thrift.ColumnChunk, offset: Int64) => void;

12

dist/lib/browser/compression.js

		@@ -5,13 +5,13 @@ 'use strict';
		const PARQUET_COMPRESSION_METHODS = {
		'UNCOMPRESSED': {
		UNCOMPRESSED: {
		deflate: deflate_identity,
		inflate: inflate_identity
		inflate: inflate_identity,
		},
		'GZIP': {
		GZIP: {
		deflate: deflate_gzip,
		inflate: inflate_gzip
		inflate: inflate_gzip,
		},
		'SNAPPY': {
		SNAPPY: {
		deflate: deflate_snappy,
		inflate: inflate_snappy
		inflate: inflate_snappy,
		},
		@@ -18,0 +18,0 @@ };

9

dist/lib/bufferReader.d.ts

		@@ -1,5 +0,4 @@
		/// <reference types="node" />
		import { Statistics } from "../gen-nodejs/parquet_types";
		import { ParquetEnvelopeReader } from "./reader";
		import { FileMetaDataExt } from "./declare";
		import { Statistics } from '../gen-nodejs/parquet_types';
		import { ParquetEnvelopeReader } from './reader';
		import { FileMetaDataExt } from './declare';
		export interface BufferReaderOptions {
		@@ -24,3 +23,3 @@ maxSpan?: number;
		scheduled?: boolean;
		queue: Array<BufferReaderQueueRow>;
		queue: BufferReaderQueueRow[];
		envelopeReader: ParquetEnvelopeReader;
		@@ -27,0 +26,0 @@ constructor(envelopeReader: ParquetEnvelopeReader, options: BufferReaderOptions);

7

dist/lib/bufferReader.js

		@@ -37,3 +37,3 @@ "use strict";
		queue.sort((a, b) => a.offset - b.offset);
		var subqueue = [];
		let subqueue = [];
		const readSubqueue = async () => {
		@@ -55,5 +55,5 @@ if (!subqueue.length) {
		const prev = queue[i - 1];
		if (!prev \|\| (d.offset - (prev.offset + prev.length)) < this.maxSpan) {
		if (!prev \|\| d.offset - (prev.offset + prev.length) < this.maxSpan) {
		subqueue.push(d);
		if ((d.offset + d.length) - subqueue[0].offset > this.maxLength) {
		if (d.offset + d.length - subqueue[0].offset > this.maxLength) {
		readSubqueue();
		@@ -71,2 +71,1 @@ }
		exports.default = BufferReader;
		;

7

dist/lib/codec/plain.d.ts

		@@ -1,6 +0,5 @@
		/// <reference types="node" />
		import { Cursor, Options } from "./types";
		type ValidValueTypes = "BOOLEAN" \| "INT32" \| "INT64" \| "INT96" \| "FLOAT" \| "DOUBLE" \| "BYTE_ARRAY" \| "FIXED_LEN_BYTE_ARRAY";
		export declare const encodeValues: (type: ValidValueTypes \| string, values: Array<unknown>, opts: Options) => Buffer;
		import { Cursor, Options } from './types';
		type ValidValueTypes = 'BOOLEAN' \| 'INT32' \| 'INT64' \| 'INT96' \| 'FLOAT' \| 'DOUBLE' \| 'BYTE_ARRAY' \| 'FIXED_LEN_BYTE_ARRAY';
		export declare const encodeValues: (type: ValidValueTypes \| string, values: unknown[], opts: Options) => Buffer;
		export declare const decodeValues: (type: ValidValueTypes \| string, cursor: Cursor, count: number, opts: Options) => number[] \| boolean[] \| bigint[] \| Buffer[];
		export {};

76

dist/lib/codec/plain.js

		@@ -9,3 +9,3 @@ "use strict";
		function encodeValues_BOOLEAN(values) {
		let buf = Buffer.alloc(Math.ceil(values.length / 8));
		const buf = Buffer.alloc(Math.ceil(values.length / 8));
		buf.fill(0);
		@@ -20,5 +20,5 @@ for (let i = 0; i < values.length; ++i) {
		function decodeValues_BOOLEAN(cursor, count) {
		let values = [];
		const values = [];
		for (let i = 0; i < count; ++i) {
		let b = cursor.buffer[cursor.offset + Math.floor(i / 8)];
		const b = cursor.buffer[cursor.offset + Math.floor(i / 8)];
		values.push((b & (1 << i % 8)) > 0);
		@@ -32,3 +32,3 @@ }
		const scale = opts?.scale \|\| 0;
		let buf = Buffer.alloc(4 * values.length);
		const buf = Buffer.alloc(4 * values.length);
		for (let i = 0; i < values.length; i++) {
		@@ -67,3 +67,3 @@ if (isDecimal) {
		const scale = opts?.scale \|\| 0;
		let buf = Buffer.alloc(8 * values.length);
		const buf = Buffer.alloc(8 * values.length);
		for (let i = 0; i < values.length; i++) {
		@@ -84,3 +84,3 @@ if (isDecimal) {
		if (opts.originalType === 'DECIMAL' \|\| opts.column?.originalType === 'DECIMAL') {
		let columnOptions = opts.column?.originalType ? opts.column : opts;
		const columnOptions = opts.column?.originalType ? opts.column : opts;
		values = decodeValues_DECIMAL(cursor, count, columnOptions);
		@@ -109,3 +109,3 @@ }
		}
		let values = [];
		const values = [];
		// by default we prepare the offset and bufferFunction to work with 32bit integers
		@@ -131,3 +131,3 @@ let offset = 4;
		function encodeValues_INT96(values) {
		let buf = Buffer.alloc(12 * values.length);
		const buf = Buffer.alloc(12 * values.length);
		for (let i = 0; i < values.length; i++) {
		@@ -146,3 +146,3 @@ if (values[i] >= 0) {
		function decodeValues_INT96(cursor, count) {
		let values = [];
		const values = [];
		for (let i = 0; i < count; ++i) {
		@@ -162,3 +162,3 @@ const low = int53_1.default.readInt64LE(cursor.buffer, cursor.offset);
		function encodeValues_FLOAT(values) {
		let buf = Buffer.alloc(4 * values.length);
		const buf = Buffer.alloc(4 * values.length);
		for (let i = 0; i < values.length; i++) {
		@@ -170,3 +170,3 @@ buf.writeFloatLE(values[i], i * 4);
		function decodeValues_FLOAT(cursor, count) {
		let values = [];
		const values = [];
		for (let i = 0; i < count; ++i) {
		@@ -179,3 +179,3 @@ values.push(cursor.buffer.readFloatLE(cursor.offset));
		function encodeValues_DOUBLE(values) {
		let buf = Buffer.alloc(8 * values.length);
		const buf = Buffer.alloc(8 * values.length);
		for (let i = 0; i < values.length; i++) {
		@@ -187,3 +187,3 @@ buf.writeDoubleLE(values[i], i * 8);
		function decodeValues_DOUBLE(cursor, count) {
		let values = [];
		const values = [];
		for (let i = 0; i < count; ++i) {
		@@ -202,3 +202,3 @@ values.push(cursor.buffer.readDoubleLE(cursor.offset));
		}
		let buf = Buffer.alloc(buf_len);
		const buf = Buffer.alloc(buf_len);
		let buf_pos = 0;
		@@ -213,5 +213,5 @@ for (let i = 0; i < returnedValues.length; i++) {
		function decodeValues_BYTE_ARRAY(cursor, count) {
		let values = [];
		const values = [];
		for (let i = 0; i < count; ++i) {
		let len = cursor.buffer.readUInt32LE(cursor.offset);
		const len = cursor.buffer.readUInt32LE(cursor.offset);
		cursor.offset += 4;
		@@ -225,3 +225,3 @@ values.push(cursor.buffer.subarray(cursor.offset, cursor.offset + len));
		if (!opts.typeLength) {
		throw "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)";
		throw 'missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)';
		}
		@@ -232,3 +232,3 @@ const returnedValues = [];
		if (returnedValues[i].length !== opts.typeLength) {
		throw "invalid value for FIXED_LEN_BYTE_ARRAY: " + returnedValues[i];
		throw 'invalid value for FIXED_LEN_BYTE_ARRAY: ' + returnedValues[i];
		}
		@@ -239,6 +239,6 @@ }
		function decodeValues_FIXED_LEN_BYTE_ARRAY(cursor, count, opts) {
		let values = [];
		const values = [];
		const typeLength = opts.typeLength ?? (opts.column ? opts.column.typeLength : undefined);
		if (!typeLength) {
		throw "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)";
		throw 'missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)';
		}
		@@ -253,20 +253,20 @@ for (let i = 0; i < count; ++i) {
		switch (type) {
		case "BOOLEAN":
		case 'BOOLEAN':
		return encodeValues_BOOLEAN(values);
		case "INT32":
		case 'INT32':
		return encodeValues_INT32(values, opts);
		case "INT64":
		case 'INT64':
		return encodeValues_INT64(values, opts);
		case "INT96":
		case 'INT96':
		return encodeValues_INT96(values);
		case "FLOAT":
		case 'FLOAT':
		return encodeValues_FLOAT(values);
		case "DOUBLE":
		case 'DOUBLE':
		return encodeValues_DOUBLE(values);
		case "BYTE_ARRAY":
		case 'BYTE_ARRAY':
		return encodeValues_BYTE_ARRAY(values);
		case "FIXED_LEN_BYTE_ARRAY":
		case 'FIXED_LEN_BYTE_ARRAY':
		return encodeValues_FIXED_LEN_BYTE_ARRAY(values, opts);
		default:
		throw "unsupported type: " + type;
		throw 'unsupported type: ' + type;
		}
		@@ -277,22 +277,22 @@ };
		switch (type) {
		case "BOOLEAN":
		case 'BOOLEAN':
		return decodeValues_BOOLEAN(cursor, count);
		case "INT32":
		case 'INT32':
		return decodeValues_INT32(cursor, count, opts);
		case "INT64":
		case 'INT64':
		return decodeValues_INT64(cursor, count, opts);
		case "INT96":
		case 'INT96':
		return decodeValues_INT96(cursor, count);
		case "FLOAT":
		case 'FLOAT':
		return decodeValues_FLOAT(cursor, count);
		case "DOUBLE":
		case 'DOUBLE':
		return decodeValues_DOUBLE(cursor, count);
		case "BYTE_ARRAY":
		case 'BYTE_ARRAY':
		return decodeValues_BYTE_ARRAY(cursor, count);
		case "FIXED_LEN_BYTE_ARRAY":
		case 'FIXED_LEN_BYTE_ARRAY':
		return decodeValues_FIXED_LEN_BYTE_ARRAY(cursor, count, opts);
		default:
		throw "unsupported type: " + type;
		throw 'unsupported type: ' + type;
		}
		};
		exports.decodeValues = decodeValues;

3

dist/lib/codec/rle.d.ts

		@@ -1,4 +0,3 @@
		/// <reference types="node" />
		import { Cursor } from './types';
		export declare const encodeValues: (type: string, values: Array<number>, opts: {
		export declare const encodeValues: (type: string, values: number[], opts: {
		bitWidth: number;
		@@ -5,0 +4,0 @@ disableEnvelope?: boolean;

28

dist/lib/codec/rle.js

		@@ -15,15 +15,12 @@ "use strict";
		}
		let buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8)));
		const buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8)));
		for (let b = 0; b < opts.bitWidth * values.length; ++b) {
		if ((values[Math.floor(b / opts.bitWidth)] & (1 << b % opts.bitWidth)) > 0) {
		buf[Math.floor(b / 8)] \|= (1 << (b % 8));
		buf[Math.floor(b / 8)] \|= 1 << b % 8;
		}
		}
		return Buffer.concat([
		Buffer.from(varint_1.default.encode(((values.length / 8) << 1) \| 1)),
		buf
		]);
		return Buffer.concat([Buffer.from(varint_1.default.encode(((values.length / 8) << 1) \| 1)), buf]);
		}
		function encodeRunRepeated(value, count, opts) {
		let buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8));
		const buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8));
		let remainingValue = value;
		@@ -36,6 +33,3 @@ // This is encoded LSB to MSB, so we pick off the least
		}
		return Buffer.concat([
		Buffer.from(varint_1.default.encode(count << 1)),
		buf
		]);
		return Buffer.concat([Buffer.from(varint_1.default.encode(count << 1)), buf]);
		}
		@@ -98,3 +92,3 @@ function unknownToParsedInt(value) {
		}
		let envelope = Buffer.alloc(buf.length + 4);
		const envelope = Buffer.alloc(buf.length + 4);
		envelope.writeUInt32LE(buf.length);
		@@ -109,6 +103,6 @@ buf.copy(envelope, 4);
		}
		let values = new Array(count).fill(0);
		const values = new Array(count).fill(0);
		for (let b = 0; b < opts.bitWidth * count; ++b) {
		if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << (b % 8))) {
		values[Math.floor(b / opts.bitWidth)] \|= (1 << b % opts.bitWidth);
		if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {
		values[Math.floor(b / opts.bitWidth)] \|= 1 << b % opts.bitWidth;
		}
		@@ -120,3 +114,3 @@ }
		function decodeRunRepeated(cursor, count, opts) {
		var bytesNeededForFixedBitWidth = Math.ceil(opts.bitWidth / 8);
		const bytesNeededForFixedBitWidth = Math.ceil(opts.bitWidth / 8);
		let value = 0;
		@@ -156,3 +150,3 @@ for (let i = 0; i < bytesNeededForFixedBitWidth; ++i) {
		if (values.length !== count) {
		throw "invalid RLE encoding";
		throw 'invalid RLE encoding';
		}
		@@ -159,0 +153,0 @@ return values;

9

dist/lib/codec/types.d.ts

		@@ -1,5 +0,4 @@
		/// <reference types="node" />
		import { PrimitiveType } from "../declare";
		import { ParquetCodec, OriginalType, ParquetField } from "../declare";
		import { Statistics } from "../../gen-nodejs/parquet_types";
		import { PrimitiveType } from '../declare';
		import { ParquetCodec, OriginalType, ParquetField } from '../declare';
		import { Statistics } from '../../gen-nodejs/parquet_types';
		export interface Options {
		@@ -16,3 +15,3 @@ typeLength: number;
		cache?: unknown;
		dictionary?: Array<number>;
		dictionary?: number[];
		num_values?: number;
		@@ -19,0 +18,0 @@ rLevelMax?: number;

11

dist/lib/compression.d.ts

		@@ -1,8 +0,5 @@
		/// <reference types="node" />
		interface PARQUET_COMPRESSION_METHODS {
		[key: string]: {
		deflate: (value: any) => Buffer \| Promise<Buffer>;
		inflate: (value: any) => Buffer \| Promise<Buffer>;
		};
		}
		type PARQUET_COMPRESSION_METHODS = Record<string, {
		deflate: (value: any) => Buffer \| Promise<Buffer>;
		inflate: (value: any) => Buffer \| Promise<Buffer>;
		}>;
		export declare const PARQUET_COMPRESSION_METHODS: PARQUET_COMPRESSION_METHODS;
		@@ -9,0 +6,0 @@ /**

34

dist/lib/compression.js

		@@ -6,3 +6,5 @@ "use strict";
		Object.defineProperty(exports, "__esModule", { value: true });
		exports.inflate = exports.deflate = exports.PARQUET_COMPRESSION_METHODS = void 0;
		exports.PARQUET_COMPRESSION_METHODS = void 0;
		exports.deflate = deflate;
		exports.inflate = inflate;
		const zlib_1 = __importDefault(require("zlib"));
		@@ -13,18 +15,18 @@ const snappyjs_1 = __importDefault(require("snappyjs"));
		exports.PARQUET_COMPRESSION_METHODS = {
		'UNCOMPRESSED': {
		UNCOMPRESSED: {
		deflate: deflate_identity,
		inflate: inflate_identity
		inflate: inflate_identity,
		},
		'GZIP': {
		GZIP: {
		deflate: deflate_gzip,
		inflate: inflate_gzip
		inflate: inflate_gzip,
		},
		'SNAPPY': {
		SNAPPY: {
		deflate: deflate_snappy,
		inflate: inflate_snappy
		inflate: inflate_snappy,
		},
		'BROTLI': {
		BROTLI: {
		deflate: deflate_brotli,
		inflate: inflate_brotli
		}
		inflate: inflate_brotli,
		},
		};
		@@ -40,3 +42,2 @@ /**
		}
		exports.deflate = deflate;
		function deflate_identity(value) {
		@@ -54,7 +55,7 @@ return buffer_from_result(value);
		const compressedContent = await (0, brotli_wasm_1.compress)(value /*, {
		mode: 0,
		quality: 8,
		lgwin: 22
		}
		*/);
		mode: 0,
		quality: 8,
		lgwin: 22
		}
		*/);
		return Buffer.from(compressedContent);
		@@ -71,3 +72,2 @@ }
		}
		exports.inflate = inflate;
		async function inflate_identity(value) {
		@@ -74,0 +74,0 @@ return buffer_from_result(value);

28

dist/lib/declare.d.ts

		@@ -1,6 +0,5 @@
		/// <reference types="node" />
		import parquet_thrift from "../gen-nodejs/parquet_types";
		import { Statistics, OffsetIndex, ColumnIndex, PageType, DataPageHeader, DataPageHeaderV2, DictionaryPageHeader, IndexPageHeader } from "../gen-nodejs/parquet_types";
		import SplitBlockBloomFilter from "./bloom/sbbf";
		import { createSBBFParams } from "./bloomFilterIO/bloomFilterWriter";
		import parquet_thrift from '../gen-nodejs/parquet_types';
		import { Statistics, OffsetIndex, ColumnIndex, PageType, DataPageHeader, DataPageHeaderV2, DictionaryPageHeader, IndexPageHeader } from '../gen-nodejs/parquet_types';
		import SplitBlockBloomFilter from './bloom/sbbf';
		import { createSBBFParams } from './bloomFilterIO/bloomFilterWriter';
		import Int64 from 'node-int64';
		@@ -13,5 +12,3 @@ export type ParquetCodec = 'PLAIN' \| 'RLE';
		export type OriginalType = 'UTF8' \| 'MAP' \| 'LIST' \| 'ENUM' \| 'DECIMAL' \| 'DATE' \| 'TIME_MILLIS' \| 'TIME_MICROS' \| 'TIMESTAMP_MILLIS' \| 'TIMESTAMP_MICROS' \| 'UINT_8' \| 'UINT_16' \| 'UINT_32' \| 'UINT_64' \| 'INT_8' \| 'INT_16' \| 'INT_32' \| 'INT_64' \| 'JSON' \| 'BSON' \| 'INTERVAL';
		export interface SchemaDefinition {
		[string: string]: FieldDefinition;
		}
		export type SchemaDefinition = Record<string, FieldDefinition>;
		export interface FieldDefinition {
		@@ -62,5 +59,3 @@ type?: ParquetType;
		}
		export interface ParquetRecord {
		[key: string]: any;
		}
		export type ParquetRecord = Record<string, any>;
		export interface ColumnChunkData {
		@@ -104,3 +99,3 @@ rowGroupIndex: number;
		count?: number;
		dictionary?: Array<unknown>;
		dictionary?: unknown[];
		column?: parquet_thrift.ColumnChunk;
		@@ -151,5 +146,4 @@ useDictionary?: boolean;
		headerSize?: number;
		constructor();
		}
		export type WriterOptions = {
		export interface WriterOptions {
		pageIndex?: boolean;
		@@ -169,4 +163,4 @@ pageSize?: number;
		highWaterMark?: number;
		};
		export type Page = {
		}
		export interface Page {
		page: Buffer;
		@@ -178,3 +172,3 @@ statistics: parquet_thrift.Statistics;
		count?: number;
		};
		}
		export {};

4

dist/lib/declare.js

		@@ -9,10 +9,6 @@ "use strict";
		const parquet_types_1 = __importDefault(require("../gen-nodejs/parquet_types"));
		;
		class NewPageHeader extends parquet_types_1.default.PageHeader {
		offset;
		headerSize;
		constructor() {
		super();
		}
		}
		exports.NewPageHeader = NewPageHeader;

2

dist/lib/fields.d.ts

		@@ -1,2 +0,2 @@
		import { FieldDefinition, ParquetType, SchemaDefinition } from "./declare";
		import { FieldDefinition, ParquetType, SchemaDefinition } from './declare';
		export declare function createStringField(optional?: boolean, fieldOptions?: FieldDefinition): FieldDefinition;
		@@ -3,0 +3,0 @@ export declare function createBooleanField(optional?: boolean, fieldOptions?: FieldDefinition): FieldDefinition;

23

dist/lib/fields.js

		"use strict";
		// Helper functions for creating fields
		Object.defineProperty(exports, "__esModule", { value: true });
		exports.createListField = exports.createStructListField = exports.createStructField = exports.createTimestampField = exports.createDecimalField = exports.createDoubleField = exports.createFloatField = exports.createIntField = exports.createBooleanField = exports.createStringField = void 0;
		exports.createStringField = createStringField;
		exports.createBooleanField = createBooleanField;
		exports.createIntField = createIntField;
		exports.createFloatField = createFloatField;
		exports.createDoubleField = createDoubleField;
		exports.createDecimalField = createDecimalField;
		exports.createTimestampField = createTimestampField;
		exports.createStructField = createStructField;
		exports.createStructListField = createStructListField;
		exports.createListField = createListField;
		function createStringField(optional = true, fieldOptions = {}) {
		return { ...fieldOptions, optional, type: 'UTF8' };
		}
		exports.createStringField = createStringField;
		function createBooleanField(optional = true, fieldOptions = {}) {
		return { ...fieldOptions, optional, type: 'BOOLEAN' };
		}
		exports.createBooleanField = createBooleanField;
		function createIntField(size, optional = true, fieldOptions = {}) {
		return { ...fieldOptions, optional, type: `INT${size}` };
		}
		exports.createIntField = createIntField;
		function createFloatField(optional = true, fieldOptions = {}) {
		return { ...fieldOptions, optional, type: 'FLOAT' };
		}
		exports.createFloatField = createFloatField;
		function createDoubleField(optional = true, fieldOptions = {}) {
		return { ...fieldOptions, optional, type: 'DOUBLE' };
		}
		exports.createDoubleField = createDoubleField;
		function createDecimalField(precision, optional = true, fieldOptions = {}) {
		return { ...fieldOptions, precision, optional, type: 'FLOAT' };
		}
		exports.createDecimalField = createDecimalField;
		function createTimestampField(optional = true, fieldOptions = {}) {
		return { ...fieldOptions, optional, type: 'TIMESTAMP_MILLIS' };
		}
		exports.createTimestampField = createTimestampField;
		function createStructField(fields, optional = true) {
		@@ -39,3 +41,2 @@ return {
		}
		exports.createStructField = createStructField;
		function createStructListField(fields, optional = true) {
		@@ -52,3 +53,3 @@ return {
		},
		}
		},
		},
		@@ -58,3 +59,2 @@ },
		}
		exports.createStructListField = createStructListField;
		function createListField(type, optional = true, elementOptions = { optional: true }) {
		@@ -78,2 +78,1 @@ return {
		}
		exports.createListField = createListField;

25

dist/lib/jsonSchema.js

		@@ -34,11 +34,11 @@ "use strict";
		const unsupportedFields = [
		"$ref",
		"multipleOf",
		"allOf",
		"anyOf",
		"oneOf",
		"not",
		"additionalItems",
		"enum",
		"extends",
		'$ref',
		'multipleOf',
		'allOf',
		'anyOf',
		'oneOf',
		'not',
		'additionalItems',
		'enum',
		'extends',
		];
		@@ -68,3 +68,4 @@ for (const field in unsupportedFields) {
		switch (jsonSchema.required) {
		case true: return true;
		case true:
		return true;
		case undefined:
		@@ -81,3 +82,3 @@ case false:
		if (!fieldValue.items \|\| !fieldValue.items.type) {
		throw new UnsupportedJsonSchemaError("Array field with no values found.");
		throw new UnsupportedJsonSchemaError('Array field with no values found.');
		}
		@@ -135,3 +136,3 @@ switch (fieldValue.items.type) {
		if (!isJsonSchemaSupported(jsonSchema)) {
		throw new UnsupportedJsonSchemaError("Unsupported fields found");
		throw new UnsupportedJsonSchemaError('Unsupported fields found');
		}
		@@ -138,0 +139,0 @@ const schema = {};

21

dist/lib/reader.d.ts

		@@ -1,2 +0,1 @@
		/// <reference types="node" />
		import Int64 from 'node-int64';
		@@ -9,3 +8,3 @@ import parquet_thrift from '../gen-nodejs/parquet_types';
		import { Options } from './codec/types';
		import { S3Client } from "@aws-sdk/client-s3";
		import { S3Client } from '@aws-sdk/client-s3';
		/**
		@@ -18,4 +17,4 @@ * A parquet cursor is used to retrieve rows from a parquet file in order
		schema: parquet_schema.ParquetSchema;
		columnList: Array<Array<unknown>>;
		rowGroup: Array<unknown>;
		columnList: unknown[][];
		rowGroup: unknown[];
		rowGroupIndex: number;
		@@ -29,3 +28,3 @@ cursorIndex: number;
		*/
		constructor(metadata: FileMetaDataExt, envelopeReader: ParquetEnvelopeReader, schema: parquet_schema.ParquetSchema, columnList: Array<Array<unknown>>);
		constructor(metadata: FileMetaDataExt, envelopeReader: ParquetEnvelopeReader, schema: parquet_schema.ParquetSchema, columnList: unknown[][]);
		/**
		@@ -102,3 +101,3 @@ * Retrieve the next row from the cursor. Returns a row or NULL if the end
		*/
		getCursor(columnList?: Array<Array<unknown>>): ParquetCursor;
		getCursor(columnList?: unknown[][]): ParquetCursor;
		getBloomFiltersFor(columnNames: string[]): Promise<Record<string, {
		@@ -134,3 +133,3 @@ sbbf: import("./bloom/sbbf").default;
		id: number;
		fileSize: Function \| number;
		fileSize: number \| (() => Promise<number>);
		default_dictionary_size: number;
		@@ -145,7 +144,7 @@ metadata?: FileMetaDataExt;
		static openUrl(url: Parameter \| URL \| string, options?: BufferReaderOptions): Promise<ParquetEnvelopeReader>;
		constructor(readFn: (offset: number, length: number, file?: string) => Promise<Buffer>, closeFn: () => unknown, fileSize: Function \| number, options?: BufferReaderOptions, metadata?: FileMetaDataExt);
		constructor(readFn: (offset: number, length: number, file?: string) => Promise<Buffer>, closeFn: () => unknown, fileSize: number \| (() => Promise<number>), options?: BufferReaderOptions, metadata?: FileMetaDataExt);
		read(offset: number, length: number, file?: string): Promise<Buffer>;
		readHeader(): Promise<void>;
		getColumn(path: string \| parquet_thrift.ColumnChunk, row_group: RowGroupExt \| number \| string \| null): ColumnChunkExt;
		getAllColumnChunkDataFor(paths: Array<string>, row_groups?: Array<RowGroupExt>): {
		getAllColumnChunkDataFor(paths: string[], row_groups?: RowGroupExt[]): {
		rowGroupIndex: number;
		@@ -156,4 +155,4 @@ column: ColumnChunkExt;
		readColumnIndex(path: string \| ColumnChunkExt, row_group: RowGroupExt \| number, opts: Options): Promise<parquet_thrift.ColumnIndex>;
		readPage(column: ColumnChunkExt, page: parquet_thrift.PageLocation \| number, records: Array<Record<string, unknown>>, opts: Options): Promise<Record<string, unknown>[]>;
		readRowGroup(schema: parquet_schema.ParquetSchema, rowGroup: RowGroupExt, columnList: Array<Array<unknown>>): Promise<parquet_shredder.RecordBuffer>;
		readPage(column: ColumnChunkExt, page: parquet_thrift.PageLocation \| number, records: Record<string, unknown>[], opts: Options): Promise<Record<string, unknown>[]>;
		readRowGroup(schema: parquet_schema.ParquetSchema, rowGroup: RowGroupExt, columnList: unknown[][]): Promise<parquet_shredder.RecordBuffer>;
		readColumnChunk(schema: parquet_schema.ParquetSchema, colChunk: ColumnChunkExt, opts?: Options): Promise<PageData>;
		@@ -160,0 +159,0 @@ readFooter(): Promise<parquet_thrift.FileMetaData>;

244

dist/lib/reader.js

		@@ -43,3 +43,3 @@ "use strict";
		const client_s3_1 = require("@aws-sdk/client-s3");
		const { getBloomFiltersFor, } = bloomFilterReader;
		const { getBloomFiltersFor } = bloomFilterReader;
		/**
		@@ -93,3 +93,3 @@ * Parquet File Magic String
		}
		let rowBuffer = await this.envelopeReader.readRowGroup(this.schema, this.metadata.row_groups[this.rowGroupIndex], this.columnList);
		const rowBuffer = await this.envelopeReader.readRowGroup(this.schema, this.metadata.row_groups[this.rowGroupIndex], this.columnList);
		this.rowGroup = parquet_shredder.materializeRecords(this.schema, rowBuffer);
		@@ -110,3 +110,2 @@ this.rowGroupIndex++;
		}
		;
		/**
		@@ -128,7 +127,7 @@ * A parquet reader allows retrieving the rows from a parquet file in order.
		static async openFile(filePath, options) {
		let envelopeReader = await ParquetEnvelopeReader.openFile(filePath, options);
		const envelopeReader = await ParquetEnvelopeReader.openFile(filePath, options);
		return this.openEnvelopeReader(envelopeReader, options);
		}
		static async openBuffer(buffer, options) {
		let envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer, options);
		const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer, options);
		return this.openEnvelopeReader(envelopeReader, options);
		@@ -146,5 +145,5 @@ }
		try {
		let envelopeReader = 'function' === typeof client['headObject'] ?
		await ParquetEnvelopeReader.openS3(client, params, options) : // S3 client v2
		await ParquetEnvelopeReader.openS3v3(client, params, options); // S3 client v3
		const envelopeReader = 'function' === typeof client['headObject']
		? await ParquetEnvelopeReader.openS3(client, params, options) // S3 client v2
		: await ParquetEnvelopeReader.openS3v3(client, params, options); // S3 client v3
		return this.openEnvelopeReader(envelopeReader, options);
		@@ -163,3 +162,3 @@ }
		static async openUrl(params, options) {
		let envelopeReader = await ParquetEnvelopeReader.openUrl(params, options);
		const envelopeReader = await ParquetEnvelopeReader.openUrl(params, options);
		return this.openEnvelopeReader(envelopeReader, options);
		@@ -173,3 +172,3 @@ }
		await envelopeReader.readHeader();
		let metadata = await envelopeReader.readFooter();
		const metadata = await envelopeReader.readFooter();
		return new ParquetReader(metadata, envelopeReader, opts);
		@@ -197,3 +196,3 @@ }
		if (o && typeof o === 'object') {
		Object.keys(o).forEach(key => o[key] = convert(o[key]));
		Object.keys(o).forEach((key) => (o[key] = convert(o[key])));
		if (o.parquetType === 'CTIME') {
		@@ -209,10 +208,10 @@ return new Date(o.value);
		// Go through all PageLocation objects and set the proper prototype
		metadata.row_groups.forEach(rowGroup => {
		rowGroup.columns.forEach(column => {
		metadata.row_groups.forEach((rowGroup) => {
		rowGroup.columns.forEach((column) => {
		if (column.offsetIndex) {
		Promise.resolve(column.offsetIndex).then(offset => (offset.page_locations.forEach(d => {
		Promise.resolve(column.offsetIndex).then((offset) => offset.page_locations.forEach((d) => {
		if (Array.isArray(d)) {
		Object.setPrototypeOf(d, parquet_types_1.default.PageLocation.prototype);
		}
		})));
		}));
		}
		@@ -228,3 +227,3 @@ });
		if (this.metadata.row_groups && !this.metadata.json && !opts.rawStatistics) {
		this.metadata.row_groups.forEach(row => row.columns.forEach(col => {
		this.metadata.row_groups.forEach((row) => row.columns.forEach((col) => {
		const stats = col.meta_data.statistics;
		@@ -254,3 +253,3 @@ if (stats) {
		let record = null;
		while (record = await cursor.next()) {
		while ((record = await cursor.next())) {
		yield record;
		@@ -272,3 +271,3 @@ }
		}
		columnList = columnList.map((x) => x.constructor === Array ? x : [x]);
		columnList = columnList.map((x) => (x.constructor === Array ? x : [x]));
		return new ParquetCursor(this.metadata, this.envelopeReader, this.schema, columnList);
		@@ -303,4 +302,4 @@ }
		getMetadata() {
		let md = {};
		for (let kv of this.metadata.key_value_metadata) {
		const md = {};
		for (const kv of this.metadata.key_value_metadata) {
		md[kv.key] = kv.value;
		@@ -316,7 +315,7 @@ }
		if (typeof value === 'object') {
		for (let k in value) {
		for (const k in value) {
		if (value[k] instanceof Date) {
		value[k].toJSON = () => ({
		value[k].toJSON = () => JSON.stringify({
		parquetType: 'CTIME',
		value: value[k].valueOf()
		value: value[k].valueOf(),
		});
		@@ -336,3 +335,3 @@ }
		parquetType: 'INT64',
		value: [...value.buffer]
		value: [...value.buffer],
		};
		@@ -390,5 +389,5 @@ }
		static async openFile(filePath, options) {
		let fileStat = await parquet_util.fstat(filePath);
		let fileDescriptor = await parquet_util.fopen(filePath);
		let readFn = (offset, length, file) => {
		const fileStat = await parquet_util.fstat(filePath);
		const fileDescriptor = await parquet_util.fopen(filePath);
		const readFn = (offset, length, file) => {
		if (file) {
		@@ -399,7 +398,7 @@ return Promise.reject('external references are not supported');
		};
		let closeFn = parquet_util.fclose.bind(undefined, fileDescriptor);
		const closeFn = parquet_util.fclose.bind(undefined, fileDescriptor);
		return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size, options);
		}
		static async openBuffer(buffer, options) {
		let readFn = (offset, length, file) => {
		const readFn = (offset, length, file) => {
		if (file) {
		@@ -410,16 +409,19 @@ return Promise.reject('external references are not supported');
		};
		let closeFn = () => ({});
		const closeFn = () => ({});
		return new ParquetEnvelopeReader(readFn, closeFn, buffer.length, options);
		}
		static async openS3(client, params, options) {
		let fileStat = async () => client.headObject(params).promise().then((d) => d.ContentLength);
		let readFn = async (offset, length, file) => {
		const fileStat = async () => client
		.headObject(params)
		.promise()
		.then((d) => d.ContentLength);
		const readFn = async (offset, length, file) => {
		if (file) {
		return Promise.reject('external references are not supported');
		}
		let Range = `bytes=${offset}-${offset + length - 1}`;
		let res = await client.getObject(Object.assign({ Range }, params)).promise();
		const Range = `bytes=${offset}-${offset + length - 1}`;
		const res = await client.getObject(Object.assign({ Range }, params)).promise();
		return Promise.resolve(res.Body);
		};
		let closeFn = () => ({});
		const closeFn = () => ({});
		return new ParquetEnvelopeReader(readFn, closeFn, fileStat, options);
		@@ -430,3 +432,6 @@ }
		try {
		let headObjectCommand = await client.send(new client_s3_1.HeadObjectCommand(params));
		const headObjectCommand = await client.send(new client_s3_1.HeadObjectCommand(params));
		if (headObjectCommand.ContentLength === undefined) {
		throw new Error('Content Length is undefined!');
		}
		return Promise.resolve(headObjectCommand.ContentLength);
		@@ -436,3 +441,3 @@ }
		// having params match command names makes e.message clear to user
		return Promise.reject("rejected headObjectCommand: " + e.message);
		return Promise.reject('rejected headObjectCommand: ' + e.message);
		}
		@@ -442,3 +447,3 @@ };
		if (file) {
		return Promise.reject("external references are not supported");
		return Promise.reject('external references are not supported');
		}
		@@ -454,3 +459,3 @@ const Range = `bytes=${offset}-${offset + length - 1}`;
		};
		let closeFn = () => ({});
		const closeFn = () => ({});
		return new ParquetEnvelopeReader(readFn, closeFn, fileStat, options);
		@@ -469,5 +474,5 @@ }
		const chunks = [];
		readable.on("data", (chunk) => chunks.push(chunk));
		readable.on("error", reject);
		readable.on("end", () => resolve(Buffer.concat(chunks)));
		readable.on('data', (chunk) => chunks.push(chunk));
		readable.on('error', reject);
		readable.on('end', () => resolve(Buffer.concat(chunks)));
		});
		@@ -487,11 +492,11 @@ }
		const base = baseArr.slice(0, baseArr.length - 1).join('/') + '/';
		let defaultHeaders = params.headers \|\| {};
		let filesize = async () => {
		const defaultHeaders = params.headers \|\| {};
		const filesize = async () => {
		const { headers } = await (0, cross_fetch_1.default)(params.url);
		return headers.get('Content-Length');
		return Number(headers.get('Content-Length')) \|\| 0;
		};
		let readFn = async (offset, length, file) => {
		let url = file ? base + file : params.url;
		let range = `bytes=${offset}-${offset + length - 1}`;
		let headers = Object.assign({}, defaultHeaders, { range });
		const readFn = async (offset, length, file) => {
		const url = file ? base + file : params.url;
		const range = `bytes=${offset}-${offset + length - 1}`;
		const headers = Object.assign({}, defaultHeaders, { range });
		const response = await (0, cross_fetch_1.default)(url, { headers });
		@@ -502,3 +507,3 @@ const arrayBuffer = await response.arrayBuffer();
		};
		let closeFn = () => ({});
		const closeFn = () => ({});
		return new ParquetEnvelopeReader(readFn, closeFn, filesize, options);
		@@ -543,3 +548,3 @@ }
		}
		column = parsedRowGroup.columns.find(d => d.meta_data.path_in_schema.join(',') === path);
		column = parsedRowGroup.columns.find((d) => d.meta_data.path_in_schema.join(',') === path);
		if (!column) {
		@@ -558,9 +563,9 @@ throw `Column ${path} Not Found`;
		}
		return row_groups.flatMap((rowGroup, index) => paths.map(columnName => ({
		return row_groups.flatMap((rowGroup, index) => paths.map((columnName) => ({
		rowGroupIndex: index,
		column: this.getColumn(columnName, rowGroup)
		column: this.getColumn(columnName, rowGroup),
		})));
		}
		readOffsetIndex(path, row_group, opts) {
		let column = this.getColumn(path, row_group);
		const column = this.getColumn(path, row_group);
		if (column.offsetIndex) {
		@@ -573,3 +578,3 @@ return Promise.resolve(column.offsetIndex);
		const data = this.read(+column.offset_index_offset, column.offset_index_length).then((data) => {
		let offset_index = new parquet_types_1.default.OffsetIndex();
		const offset_index = new parquet_types_1.default.OffsetIndex();
		parquet_util.decodeThrift(offset_index, data);
		@@ -585,3 +590,3 @@ Object.defineProperty(offset_index, 'column', { value: column, enumerable: false });
		readColumnIndex(path, row_group, opts) {
		let column = this.getColumn(path, row_group);
		const column = this.getColumn(path, row_group);
		if (column.columnIndex) {
		@@ -594,3 +599,3 @@ return Promise.resolve(column.columnIndex);
		const data = this.read(+column.column_index_offset, column.column_index_length).then((buf) => {
		let column_index = new parquet_types_1.default.ColumnIndex();
		const column_index = new parquet_types_1.default.ColumnIndex();
		parquet_util.decodeThrift(column_index, buf);
		@@ -601,6 +606,6 @@ Object.defineProperty(column_index, 'column', { value: column });
		if (column_index.max_values) {
		column_index.max_values = column_index.max_values.map(max_value => decodeStatisticsValue(max_value, field));
		column_index.max_values = column_index.max_values.map((max_value) => decodeStatisticsValue(max_value, field));
		}
		if (column_index.min_values) {
		column_index.min_values = column_index.min_values.map(min_value => decodeStatisticsValue(min_value, field));
		column_index.min_values = column_index.min_values.map((min_value) => decodeStatisticsValue(min_value, field));
		}
		@@ -631,4 +636,4 @@ return column_index;
		Object.defineProperty(chunk, 'column', { value: column });
		let data = {
		columnData: { [chunk.column.meta_data.path_in_schema.join(',')]: chunk }
		const data = {
		columnData: { [chunk.column.meta_data.path_in_schema.join(',')]: chunk },
		};
		@@ -638,9 +643,9 @@ return parquet_shredder.materializeRecords(this.schema, data, records);
		async readRowGroup(schema, rowGroup, columnList) {
		var buffer = {
		const buffer = {
		rowCount: +rowGroup.num_rows,
		columnData: {},
		pageRowCount: 0,
		pages: {}
		pages: {},
		};
		for (let colChunk of rowGroup.columns) {
		for (const colChunk of rowGroup.columns) {
		const colMetadata = colChunk.meta_data;
		@@ -656,7 +661,7 @@ const colKey = colMetadata.path_in_schema;
		async readColumnChunk(schema, colChunk, opts) {
		let metadata = colChunk.meta_data;
		let field = schema.findField(metadata.path_in_schema);
		let type = parquet_util.getThriftEnum(parquet_types_1.default.Type, metadata.type);
		let compression = parquet_util.getThriftEnum(parquet_types_1.default.CompressionCodec, metadata.codec);
		let pagesOffset = +metadata.data_page_offset;
		const metadata = colChunk.meta_data;
		const field = schema.findField(metadata.path_in_schema);
		const type = parquet_util.getThriftEnum(parquet_types_1.default.Type, metadata.type);
		const compression = parquet_util.getThriftEnum(parquet_types_1.default.CompressionCodec, metadata.codec);
		const pagesOffset = +metadata.data_page_offset;
		let pagesSize = +metadata.total_compressed_size;
		@@ -672,3 +677,3 @@ if (!colChunk.file_path) {
		column: field,
		num_values: metadata.num_values
		num_values: metadata.num_values,
		});
		@@ -680,3 +685,3 @@ // If this exists and is greater than zero then we need to have an offset
		await this.read(offset, size, colChunk.file_path).then(async (buffer) => {
		await decodePage({ offset: 0, buffer, size: buffer.length }, opts).then(dict => {
		await decodePage({ offset: 0, buffer, size: buffer.length }, opts).then((dict) => {
		opts.dictionary = opts.dictionary \|\| dict.dictionary;
		@@ -692,15 +697,15 @@ });
		}
		let trailerLen = PARQUET_MAGIC.length + 4;
		let offset = this.fileSize - trailerLen;
		let trailerBuf = await this.read(offset, trailerLen);
		const trailerLen = PARQUET_MAGIC.length + 4;
		const offset = this.fileSize - trailerLen;
		const trailerBuf = await this.read(offset, trailerLen);
		if (trailerBuf.subarray(4).toString() != PARQUET_MAGIC) {
		throw 'not a valid parquet file';
		}
		let metadataSize = trailerBuf.readUInt32LE(0);
		let metadataOffset = this.fileSize - metadataSize - trailerLen;
		const metadataSize = trailerBuf.readUInt32LE(0);
		const metadataOffset = this.fileSize - metadataSize - trailerLen;
		if (metadataOffset < PARQUET_MAGIC.length) {
		throw 'invalid metadata size';
		}
		let metadataBuf = await this.read(metadataOffset, metadataSize);
		let metadata = new parquet_types_1.default.FileMetaData();
		const metadataBuf = await this.read(metadataOffset, metadataSize);
		const metadata = new parquet_types_1.default.FileMetaData();
		parquet_util.decodeThrift(metadata, metadataBuf);
		@@ -770,5 +775,4 @@ return metadata;
		case 'DICTIONARY_PAGE':
		const dict = await decodeDictionaryPage(cursor, pageHeader, opts);
		page = {
		dictionary: dict
		dictionary: await decodeDictionaryPage(cursor, pageHeader, opts),
		};
		@@ -786,8 +790,8 @@ break;
		opts = opts \|\| {};
		let cursor = {
		const cursor = {
		buffer: buffer,
		offset: 0,
		size: buffer.length
		size: buffer.length,
		};
		let data = {
		const data = {
		rlevels: [],
		@@ -797,3 +801,3 @@ dlevels: [],
		pageHeaders: [],
		count: 0
		count: 0,
		};
		@@ -810,9 +814,9 @@ while (cursor.offset < cursor.size && (!opts.num_values \|\| data.dlevels.length < opts.num_values)) {
		if (opts.dictionary && pageData.useDictionary) {
		pageData.values = pageData.values.map(d => opts.dictionary[d]);
		pageData.values = pageData.values.map((d) => opts.dictionary[d]);
		}
		let length = pageData.rlevels != undefined ? pageData.rlevels.length : 0;
		const length = pageData.rlevels != undefined ? pageData.rlevels.length : 0;
		for (let i = 0; i < length; i++) {
		data.rlevels.push(pageData.rlevels[i]);
		data.dlevels.push(pageData.dlevels[i]);
		let value = pageData.values[i];
		const value = pageData.values[i];
		if (value !== undefined) {
		@@ -832,15 +836,14 @@ data.values.push(value);
		buffer: cursor.buffer.subarray(cursor.offset, cursorEnd),
		size: cursorEnd - cursor.offset
		size: cursorEnd - cursor.offset,
		};
		cursor.offset = cursorEnd;
		if (opts.compression && opts.compression !== 'UNCOMPRESSED') {
		let valuesBuf = await parquet_compression.inflate(opts.compression, dictCursor.buffer.subarray(dictCursor.offset, cursorEnd));
		const valuesBuf = await parquet_compression.inflate(opts.compression, dictCursor.buffer.subarray(dictCursor.offset, cursorEnd));
		dictCursor = {
		buffer: valuesBuf,
		offset: 0,
		size: valuesBuf.length
		size: valuesBuf.length,
		};
		}
		return decodeValues(opts.column.primitiveType, opts.column.encoding, dictCursor, (header.dictionary_page_header).num_values, opts)
		.map((d) => d.toString());
		return decodeValues(opts.column.primitiveType, opts.column.encoding, dictCursor, header.dictionary_page_header.num_values, opts).map((d) => d.toString());
		}
		@@ -850,18 +853,20 @@ async function decodeDataPage(cursor, header, opts) {
		const dataPageHeader = header.data_page_header;
		let valueCount = dataPageHeader.num_values;
		let valueEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeader.encoding);
		const valueCount = dataPageHeader.num_values;
		const valueEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeader.encoding);
		let valuesBufCursor = cursor;
		if (opts.compression && opts.compression !== 'UNCOMPRESSED') {
		let valuesBuf = await parquet_compression.inflate(opts.compression, cursor.buffer.subarray(cursor.offset, cursorEnd));
		const valuesBuf = await parquet_compression.inflate(opts.compression, cursor.buffer.subarray(cursor.offset, cursorEnd));
		valuesBufCursor = {
		buffer: valuesBuf,
		offset: 0,
		size: valuesBuf.length
		size: valuesBuf.length,
		};
		}
		/* read repetition levels */
		let rLevelEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeader.repetition_level_encoding);
		const rLevelEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeader.repetition_level_encoding);
		let rLevels = new Array(valueCount);
		if (opts.rLevelMax > 0) {
		rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, valuesBufCursor, valueCount, { bitWidth: parquet_util.getBitWidth(opts.rLevelMax) });
		rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, valuesBufCursor, valueCount, {
		bitWidth: parquet_util.getBitWidth(opts.rLevelMax),
		});
		}
		@@ -872,6 +877,8 @@ else {
		/* read definition levels */
		let dLevelEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeader.definition_level_encoding);
		const dLevelEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeader.definition_level_encoding);
		let dLevels = new Array(valueCount);
		if (opts.dLevelMax > 0) {
		dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, valuesBufCursor, valueCount, { bitWidth: parquet_util.getBitWidth(opts.dLevelMax) });
		dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, valuesBufCursor, valueCount, {
		bitWidth: parquet_util.getBitWidth(opts.dLevelMax),
		});
		}
		@@ -883,3 +890,3 @@ else {
		let valueCountNonNull = 0;
		for (let dlvl of dLevels) {
		for (const dlvl of dLevels) {
		if (dlvl === opts.dLevelMax) {
		@@ -889,3 +896,3 @@ ++valueCountNonNull;
		}
		let values = decodeValues(opts.type, valueEncoding, valuesBufCursor, valueCountNonNull, {
		const values = decodeValues(opts.type, valueEncoding, valuesBufCursor, valueCountNonNull, {
		typeLength: opts.column.typeLength,
		@@ -897,3 +904,3 @@ bitWidth: opts.column.typeLength,
		scale: opts.column.scale,
		name: opts.column.name
		name: opts.column.name,
		});
		@@ -906,3 +913,3 @@ cursor.offset = cursorEnd;
		count: valueCount,
		useDictionary: valueEncoding === 'PLAIN_DICTIONARY' \|\| valueEncoding === 'RLE_DICTIONARY'
		useDictionary: valueEncoding === 'PLAIN_DICTIONARY' \|\| valueEncoding === 'RLE_DICTIONARY',
		};
		@@ -921,3 +928,3 @@ }
		bitWidth: parquet_util.getBitWidth(opts.rLevelMax),
		disableEnvelope: true
		disableEnvelope: true,
		});
		@@ -933,3 +940,3 @@ }
		bitWidth: parquet_util.getBitWidth(opts.dLevelMax),
		disableEnvelope: true
		disableEnvelope: true,
		});
		@@ -943,13 +950,13 @@ }
		if (dataPageHeaderV2.is_compressed) {
		let valuesBuf = await parquet_compression.inflate(opts.compression, cursor.buffer.subarray(cursor.offset, cursorEnd));
		const valuesBuf = await parquet_compression.inflate(opts.compression, cursor.buffer.subarray(cursor.offset, cursorEnd));
		valuesBufCursor = {
		buffer: valuesBuf,
		offset: 0,
		size: valuesBuf.length
		size: valuesBuf.length,
		};
		cursor.offset = cursorEnd;
		}
		let values = decodeValues(opts.type, valueEncoding, valuesBufCursor, valueCountNonNull, {
		const values = decodeValues(opts.type, valueEncoding, valuesBufCursor, valueCountNonNull, {
		bitWidth: opts.column.typeLength,
		...opts.column
		...opts.column,
		});
		@@ -961,3 +968,3 @@ return {
		count: valueCount,
		useDictionary: valueEncoding === 'PLAIN_DICTIONARY' \|\| valueEncoding === 'RLE_DICTIONARY'
		useDictionary: valueEncoding === 'PLAIN_DICTIONARY' \|\| valueEncoding === 'RLE_DICTIONARY',
		};
		@@ -967,4 +974,4 @@ }
		let schema = {};
		schemaElements.forEach(schemaElement => {
		let repetitionType = parquet_util.getThriftEnum(parquet_types_1.default.FieldRepetitionType, schemaElement.repetition_type);
		schemaElements.forEach((schemaElement) => {
		const repetitionType = parquet_util.getThriftEnum(parquet_types_1.default.FieldRepetitionType, schemaElement.repetition_type);
		let optional = false;
		@@ -982,3 +989,2 @@ let repeated = false;
		}
		;
		if (schemaElement.num_children != undefined && schemaElement.num_children > 0) {
		@@ -992,9 +998,9 @@ schema[schemaElement.name] = {
		value: schema,
		enumerable: false
		enumerable: false,
		},
		num_children: {
		value: schemaElement.num_children,
		enumerable: false
		}
		})
		enumerable: false,
		},
		}),
		};
		@@ -1015,3 +1021,3 @@ /* move the schema pointer to the children */
		scale: schemaElement.scale,
		precision: schemaElement.precision
		precision: schemaElement.precision,
		};
		@@ -1018,0 +1024,0 @@ }

6

dist/lib/schema.d.ts

		@@ -9,3 +9,3 @@ import { SchemaDefinition, ParquetField } from './declare';
		fields: Record<string, ParquetField>;
		fieldList: Array<ParquetField>;
		fieldList: ParquetField[];
		/**
		@@ -22,7 +22,7 @@ * Create a new schema from JSON Schema (json-schema.org)
		*/
		findField(path: string \| Array<string>): ParquetField;
		findField(path: string \| string[]): ParquetField;
		/**
		* Retrieve a field definition and all the field's ancestors
		*/
		findFieldBranch(path: string \| Array<string>): ParquetField[];
		findFieldBranch(path: string \| string[]): ParquetField[];
		}

32

dist/lib/schema.js

		@@ -31,3 +31,2 @@ "use strict";
		const jsonSchema_1 = require("./jsonSchema");
		const PARQUET_COLUMN_KEY_SEPARATOR = '.';
		/**
		@@ -60,3 +59,3 @@ * A parquet file schema
		if (typeof path === 'string') {
		path = path.split(",");
		path = path.split(',');
		}
		@@ -68,3 +67,3 @@ else {
		for (; path.length > 1; path.shift()) {
		let fields = n[path[0]]?.fields;
		const fields = n[path[0]]?.fields;
		if (isDefined(fields)) {
		@@ -81,9 +80,9 @@ n = fields;
		if (typeof path === 'string') {
		path = path.split(",");
		path = path.split(',');
		}
		let branch = [];
		const branch = [];
		let n = this.fields;
		for (; path.length > 0; path.shift()) {
		branch.push(n[path[0]]);
		let fields = n[path[0]].fields;
		const fields = n[path[0]].fields;
		if (path.length > 1 && isDefined(fields)) {
		@@ -97,3 +96,2 @@ n = fields;
		exports.ParquetSchema = ParquetSchema;
		;
		function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
		@@ -109,5 +107,5 @@ if (!rLevelParentMax) {
		}
		let fieldList = {};
		const fieldList = {};
		let fieldErrors = [];
		for (let name in schema) {
		for (const name in schema) {
		const opts = schema[name];
		@@ -142,3 +140,3 @@ /* field repetition type */
		fieldCount: Object.keys(opts.fields).length,
		fields: buildFields(opts.fields, rLevelMax, dLevelMax, path.concat(name))
		fields: buildFields(opts.fields, rLevelMax, dLevelMax, path.concat(name)),
		};
		@@ -149,3 +147,3 @@ if (opts.type == 'LIST' \|\| opts.type == 'MAP')
		}
		let nameWithPath = (`${name}` \|\| 'missing name');
		let nameWithPath = `${name}` \|\| 'missing name';
		if (path && path.length > 0) {
		@@ -156,3 +154,3 @@ nameWithPath = `${path}.${nameWithPath}`;
		if (!typeDef) {
		fieldErrors.push(`Invalid parquet type: ${(opts.type \|\| "missing type")}, for Column: ${nameWithPath}`);
		fieldErrors.push(`Invalid parquet type: ${opts.type \|\| 'missing type'}, for Column: ${nameWithPath}`);
		continue;
		@@ -175,3 +173,3 @@ }
		// Default scale to 0 per https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
		if (typeof opts.scale === "undefined")
		if (typeof opts.scale === 'undefined')
		opts.scale = 0;
		@@ -194,3 +192,3 @@ fieldErrors = fieldErrors.concat(errorsForDecimalOpts(typeDef.originalType, typeDef.primitiveType, opts, nameWithPath));
		rLevelMax: rLevelMax,
		dLevelMax: dLevelMax
		dLevelMax: dLevelMax,
		};
		@@ -205,3 +203,3 @@ }
		let list = [];
		for (let k in fields) {
		for (const k in fields) {
		list.push(fields[k]);
		@@ -226,6 +224,6 @@ const nestedFields = fields[k].fields;
		}
		else if (primitiveType === "INT64" && opts.precision > 18) {
		else if (primitiveType === 'INT64' && opts.precision > 18) {
		fieldErrors.push(`invalid schema for type: ${type} and primitive type: ${primitiveType} for Column: ${columnName}, can not handle precision over 18`);
		}
		if (typeof opts.scale === "undefined" \|\| opts.scale < 0) {
		if (typeof opts.scale === 'undefined' \|\| opts.scale < 0) {
		fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, scale is required to be 0 or greater`);
		@@ -232,0 +230,0 @@ }

2

dist/lib/shred.d.ts

		@@ -53,2 +53,2 @@ import { ParquetSchema } from './schema';
		*/
		export declare const materializeRecords: (schema: ParquetSchema, buffer: RecordBuffer, records?: Array<Record<string, unknown>>) => Record<string, unknown>[];
		export declare const materializeRecords: (schema: ParquetSchema, buffer: RecordBuffer, records?: Record<string, unknown>[]) => Record<string, unknown>[];

39

dist/lib/shred.js

		@@ -30,4 +30,4 @@ "use strict";
		/* shred the record, this may raise an exception */
		var recordShredded = {};
		for (let field of schema.fieldList) {
		const recordShredded = {};
		for (const field of schema.fieldList) {
		recordShredded[field.path.join(',')] = {
		@@ -38,3 +38,3 @@ dlevels: [],
		distinct_values: new Set(),
		count: 0
		count: 0,
		};
		@@ -49,4 +49,4 @@ }
		buffer.pages = {};
		for (let field of schema.fieldList) {
		let path = field.path.join(',');
		for (const field of schema.fieldList) {
		const path = field.path.join(',');
		buffer.columnData[path] = {
		@@ -57,3 +57,3 @@ dlevels: [],
		distinct_values: new Set(),
		count: 0
		count: 0,
		};
		@@ -65,6 +65,6 @@ buffer.pages[path] = [];
		buffer.pageRowCount += 1;
		for (let field of schema.fieldList) {
		let path = field.path.join(',');
		let record = recordShredded[path];
		let column = buffer.columnData[path];
		for (const field of schema.fieldList) {
		const path = field.path.join(',');
		const record = recordShredded[path];
		const column = buffer.columnData[path];
		for (let i = 0; i < record.rlevels.length; i++) {
		@@ -77,3 +77,3 @@ column.rlevels.push(record.rlevels[i]);
		}
		[...recordShredded[path].distinct_values].forEach(value => buffer.columnData[path].distinct_values.add(value));
		[...recordShredded[path].distinct_values].forEach((value) => buffer.columnData[path].distinct_values.add(value));
		buffer.columnData[path].count += recordShredded[path].count;
		@@ -84,3 +84,3 @@ }
		function shredRecordInternal(fields, record, data, rlvl, dlvl) {
		for (let fieldName in fields) {
		for (const fieldName in fields) {
		const field = fields[fieldName];
		@@ -91,7 +91,8 @@ const fieldType = field.originalType \|\| field.primitiveType;
		let values = [];
		if (record && (fieldName in record) && record[fieldName] !== undefined && record[fieldName] !== null) {
		if (record && fieldName in record && record[fieldName] !== undefined && record[fieldName] !== null) {
		if (Array.isArray(record[fieldName])) {
		values = record[fieldName];
		}
		else if (ArrayBuffer.isView(record[fieldName])) { // checks if any typed array
		else if (ArrayBuffer.isView(record[fieldName])) {
		// checks if any typed array
		if (record[fieldName] instanceof Uint8Array) {
		@@ -168,7 +169,7 @@ // wrap in a buffer, since not supported by parquet_thrift
		}
		for (let k in buffer.columnData) {
		for (const k in buffer.columnData) {
		const field = schema.findField(k);
		const fieldBranch = schema.findFieldBranch(k);
		let values = buffer.columnData[k].values[Symbol.iterator]();
		let rLevels = new Array(field.rLevelMax + 1);
		const values = buffer.columnData[k].values[Symbol.iterator]();
		const rLevels = new Array(field.rLevelMax + 1);
		rLevels.fill(0);
		@@ -199,3 +200,3 @@ for (let i = 0; i < buffer.columnData[k].count; ++i) {
		if (branch.length > 1) {
		if (node.repetitionType === "REPEATED") {
		if (node.repetitionType === 'REPEATED') {
		if (!(node.name in record)) {
		@@ -217,3 +218,3 @@ record[node.name] = [];
		else {
		if (node.repetitionType === "REPEATED") {
		if (node.repetitionType === 'REPEATED') {
		if (!(node.name in record)) {
		@@ -220,0 +221,0 @@ record[node.name] = [];

12

dist/lib/types.d.ts

		@@ -1,10 +0,10 @@
		import { PrimitiveType, OriginalType, ParquetType, FieldDefinition, ParquetField } from "./declare";
		import { Options } from "./codec/types";
		type ParquetTypeDataObject = {
		import { PrimitiveType, OriginalType, ParquetType, FieldDefinition, ParquetField } from './declare';
		import { Options } from './codec/types';
		interface ParquetTypeDataObject {
		primitiveType?: PrimitiveType;
		toPrimitive: Function;
		fromPrimitive?: Function;
		toPrimitive: (x: any) => any;
		fromPrimitive?: (x: any) => any;
		originalType?: OriginalType;
		typeLength?: number;
		};
		}
		export declare function getParquetTypeDataObject(type: ParquetType, field?: ParquetField \| Options \| FieldDefinition): ParquetTypeDataObject;
		@@ -11,0 +11,0 @@ /**

175

dist/lib/types.js

		'use strict';
		var __createBinding = (this && this.__createBinding) \|\| (Object.create ? (function(o, m, k, k2) {
		if (k2 === undefined) k2 = k;
		var desc = Object.getOwnPropertyDescriptor(m, k);
		if (!desc \|\| ("get" in desc ? !m.__esModule : desc.writable \|\| desc.configurable)) {
		desc = { enumerable: true, get: function() { return m[k]; } };
		}
		Object.defineProperty(o, k2, desc);
		}) : (function(o, m, k, k2) {
		if (k2 === undefined) k2 = k;
		o[k2] = m[k];
		}));
		var __setModuleDefault = (this && this.__setModuleDefault) \|\| (Object.create ? (function(o, v) {
		Object.defineProperty(o, "default", { enumerable: true, value: v });
		}) : function(o, v) {
		o["default"] = v;
		});
		var __importStar = (this && this.__importStar) \|\| function (mod) {
		if (mod && mod.__esModule) return mod;
		var result = {};
		if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
		__setModuleDefault(result, mod);
		return result;
		};
		Object.defineProperty(exports, "__esModule", { value: true });
		exports.fromPrimitive = exports.toPrimitive = exports.getParquetTypeDataObject = void 0;
		// Thanks to https://github.com/kbajalc/parquets for some of the code.
		const BSON = __importStar(require("bson"));
		exports.getParquetTypeDataObject = getParquetTypeDataObject;
		exports.toPrimitive = toPrimitive;
		exports.fromPrimitive = fromPrimitive;
		// BSON uses top level awaits, so use require for now
		const bsonSerialize = require('bson').serialize;
		const bsonDeserialize = require('bson').deserialize;
		function getParquetTypeDataObject(type, field) {
		@@ -36,3 +16,3 @@ if (type === 'DECIMAL') {
		typeLength: field.typeLength,
		toPrimitive: toPrimitive_FIXED_LEN_BYTE_ARRAY_DECIMAL
		toPrimitive: toPrimitive_FIXED_LEN_BYTE_ARRAY_DECIMAL,
		};
		@@ -45,3 +25,3 @@ }
		typeLength: field.typeLength,
		toPrimitive: toPrimitive_BYTE_ARRAY_DECIMAL
		toPrimitive: toPrimitive_BYTE_ARRAY_DECIMAL,
		};
		@@ -53,3 +33,3 @@ }
		originalType: 'DECIMAL',
		toPrimitive: toPrimitive_INT64
		toPrimitive: toPrimitive_INT64,
		};
		@@ -62,3 +42,2 @@ }
		}
		exports.getParquetTypeDataObject = getParquetTypeDataObject;
		const PARQUET_LOGICAL_TYPES = new Set([
		@@ -93,131 +72,131 @@ 'BOOLEAN',
		'MAP',
		'LIST'
		'LIST',
		]);
		const PARQUET_LOGICAL_TYPE_DATA = {
		'BOOLEAN': {
		BOOLEAN: {
		primitiveType: 'BOOLEAN',
		toPrimitive: toPrimitive_BOOLEAN,
		fromPrimitive: fromPrimitive_BOOLEAN
		fromPrimitive: fromPrimitive_BOOLEAN,
		},
		'INT32': {
		INT32: {
		primitiveType: 'INT32',
		toPrimitive: toPrimitive_INT32
		toPrimitive: toPrimitive_INT32,
		},
		'INT64': {
		INT64: {
		primitiveType: 'INT64',
		toPrimitive: toPrimitive_INT64
		toPrimitive: toPrimitive_INT64,
		},
		'INT96': {
		INT96: {
		primitiveType: 'INT96',
		toPrimitive: toPrimitive_INT96
		toPrimitive: toPrimitive_INT96,
		},
		'FLOAT': {
		FLOAT: {
		primitiveType: 'FLOAT',
		toPrimitive: toPrimitive_FLOAT
		toPrimitive: toPrimitive_FLOAT,
		},
		'DOUBLE': {
		DOUBLE: {
		primitiveType: 'DOUBLE',
		toPrimitive: toPrimitive_DOUBLE
		toPrimitive: toPrimitive_DOUBLE,
		},
		'BYTE_ARRAY': {
		BYTE_ARRAY: {
		primitiveType: 'BYTE_ARRAY',
		toPrimitive: toPrimitive_BYTE_ARRAY
		toPrimitive: toPrimitive_BYTE_ARRAY,
		},
		'FIXED_LEN_BYTE_ARRAY': {
		FIXED_LEN_BYTE_ARRAY: {
		primitiveType: 'FIXED_LEN_BYTE_ARRAY',
		toPrimitive: toPrimitive_BYTE_ARRAY
		toPrimitive: toPrimitive_BYTE_ARRAY,
		},
		'UTF8': {
		UTF8: {
		primitiveType: 'BYTE_ARRAY',
		originalType: 'UTF8',
		toPrimitive: toPrimitive_UTF8,
		fromPrimitive: fromPrimitive_UTF8
		fromPrimitive: fromPrimitive_UTF8,
		},
		'ENUM': {
		ENUM: {
		primitiveType: 'BYTE_ARRAY',
		originalType: 'UTF8',
		toPrimitive: toPrimitive_UTF8,
		fromPrimitive: fromPrimitive_UTF8
		fromPrimitive: fromPrimitive_UTF8,
		},
		'TIME_MILLIS': {
		TIME_MILLIS: {
		primitiveType: 'INT32',
		originalType: 'TIME_MILLIS',
		toPrimitive: toPrimitive_TIME_MILLIS
		toPrimitive: toPrimitive_TIME_MILLIS,
		},
		'TIME_MICROS': {
		TIME_MICROS: {
		primitiveType: 'INT64',
		originalType: 'TIME_MICROS',
		toPrimitive: toPrimitive_TIME_MICROS
		toPrimitive: toPrimitive_TIME_MICROS,
		},
		'DATE': {
		DATE: {
		primitiveType: 'INT32',
		originalType: 'DATE',
		toPrimitive: toPrimitive_DATE,
		fromPrimitive: fromPrimitive_DATE
		fromPrimitive: fromPrimitive_DATE,
		},
		'TIMESTAMP_MILLIS': {
		TIMESTAMP_MILLIS: {
		primitiveType: 'INT64',
		originalType: 'TIMESTAMP_MILLIS',
		toPrimitive: toPrimitive_TIMESTAMP_MILLIS,
		fromPrimitive: fromPrimitive_TIMESTAMP_MILLIS
		fromPrimitive: fromPrimitive_TIMESTAMP_MILLIS,
		},
		'TIMESTAMP_MICROS': {
		TIMESTAMP_MICROS: {
		primitiveType: 'INT64',
		originalType: 'TIMESTAMP_MICROS',
		toPrimitive: toPrimitive_TIMESTAMP_MICROS,
		fromPrimitive: fromPrimitive_TIMESTAMP_MICROS
		fromPrimitive: fromPrimitive_TIMESTAMP_MICROS,
		},
		'UINT_8': {
		UINT_8: {
		primitiveType: 'INT32',
		originalType: 'UINT_8',
		toPrimitive: toPrimitive_UINT8
		toPrimitive: toPrimitive_UINT8,
		},
		'UINT_16': {
		UINT_16: {
		primitiveType: 'INT32',
		originalType: 'UINT_16',
		toPrimitive: toPrimitive_UINT16
		toPrimitive: toPrimitive_UINT16,
		},
		'UINT_32': {
		UINT_32: {
		primitiveType: 'INT32',
		originalType: 'UINT_32',
		toPrimitive: toPrimitive_UINT32
		toPrimitive: toPrimitive_UINT32,
		},
		'UINT_64': {
		UINT_64: {
		primitiveType: 'INT64',
		originalType: 'UINT_64',
		toPrimitive: toPrimitive_UINT64
		toPrimitive: toPrimitive_UINT64,
		},
		'INT_8': {
		INT_8: {
		primitiveType: 'INT32',
		originalType: 'INT_8',
		toPrimitive: toPrimitive_INT8
		toPrimitive: toPrimitive_INT8,
		},
		'INT_16': {
		INT_16: {
		primitiveType: 'INT32',
		originalType: 'INT_16',
		toPrimitive: toPrimitive_INT16
		toPrimitive: toPrimitive_INT16,
		},
		'INT_32': {
		INT_32: {
		primitiveType: 'INT32',
		originalType: 'INT_32',
		toPrimitive: toPrimitive_INT32
		toPrimitive: toPrimitive_INT32,
		},
		'INT_64': {
		INT_64: {
		primitiveType: 'INT64',
		originalType: 'INT_64',
		toPrimitive: toPrimitive_INT64
		toPrimitive: toPrimitive_INT64,
		},
		'JSON': {
		JSON: {
		primitiveType: 'BYTE_ARRAY',
		originalType: 'JSON',
		toPrimitive: toPrimitive_JSON,
		fromPrimitive: fromPrimitive_JSON
		fromPrimitive: fromPrimitive_JSON,
		},
		'BSON': {
		BSON: {
		primitiveType: 'BYTE_ARRAY',
		originalType: 'BSON',
		toPrimitive: toPrimitive_BSON,
		fromPrimitive: fromPrimitive_BSON
		fromPrimitive: fromPrimitive_BSON,
		},
		'INTERVAL': {
		INTERVAL: {
		primitiveType: 'FIXED_LEN_BYTE_ARRAY',
		@@ -227,3 +206,3 @@ originalType: 'INTERVAL',
		toPrimitive: toPrimitive_INTERVAL,
		fromPrimitive: fromPrimitive_INTERVAL
		fromPrimitive: fromPrimitive_INTERVAL,
		},
		@@ -237,3 +216,3 @@ MAP: {
		toPrimitive: toPrimitive_LIST,
		}
		},
		};
		@@ -254,7 +233,6 @@ /**
		if (!isParquetType(type)) {
		throw 'invalid type: ' + type \|\| "undefined";
		throw 'invalid type: ' + type \|\| 'undefined';
		}
		return getParquetTypeDataObject(type, field).toPrimitive(value);
		}
		exports.toPrimitive = toPrimitive;
		/**
		@@ -266,3 +244,3 @@ * Convert a value from it's internal/underlying primitive representation to
		if (!isParquetType(type)) {
		throw 'invalid type: ' + type \|\| "undefined";
		throw 'invalid type: ' + type \|\| 'undefined';
		}
		@@ -277,3 +255,2 @@ const typeFromPrimitive = getParquetTypeDataObject(type, field).fromPrimitive;
		}
		exports.fromPrimitive = fromPrimitive;
		function toPrimitive_BOOLEAN(value) {
		@@ -437,3 +414,3 @@ return !!value;
		function fromPrimitive_UTF8(value) {
		return (value !== undefined && value !== null) ? value.toString() : value;
		return value !== undefined && value !== null ? value.toString() : value;
		}
		@@ -447,6 +424,6 @@ function toPrimitive_JSON(value) {
		function toPrimitive_BSON(value) {
		return Buffer.from(BSON.serialize(value));
		return Buffer.from(bsonSerialize(value));
		}
		function fromPrimitive_BSON(value) {
		return BSON.deserialize(value);
		return bsonDeserialize(value);
		}
		@@ -456,6 +433,6 @@ function toNumberInternal(typeName, value) {
		switch (typeof value) {
		case "string":
		case 'string':
		numberValue = parseInt(value, 10);
		break;
		case "number":
		case 'number':
		numberValue = value;
		@@ -473,3 +450,3 @@ break;
		function toPrimitive_TIME_MILLIS(value) {
		return toNumberInternal("TIME_MILLIS", value);
		return toNumberInternal('TIME_MILLIS', value);
		}
		@@ -489,3 +466,3 @@ function toPrimitive_TIME_MICROS(value) {
		}
		return toNumberInternal("DATE", value);
		return toNumberInternal('DATE', value);
		}
		@@ -500,3 +477,3 @@ function fromPrimitive_DATE(value) {
		}
		return toNumberInternal("TIMESTAMP_MILLIS", value);
		return toNumberInternal('TIMESTAMP_MILLIS', value);
		}
		@@ -531,5 +508,5 @@ function fromPrimitive_TIMESTAMP_MILLIS(value) {
		if (!value.months \|\| !value.days \|\| !value.milliseconds) {
		throw "value for INTERVAL must be object { months: ..., days: ..., milliseconds: ... }";
		throw 'value for INTERVAL must be object { months: ..., days: ..., milliseconds: ... }';
		}
		let buf = Buffer.alloc(12);
		const buf = Buffer.alloc(12);
		buf.writeUInt32LE(value.months, 0);
		@@ -549,4 +526,4 @@ buf.writeUInt32LE(value.days, 4);
		if (v < lowerRange \|\| v > upperRange) {
		throw "invalid value";
		throw 'invalid value';
		}
		}

11

dist/lib/util.d.ts

		@@ -1,10 +0,7 @@
		/// <reference types="node" />
		/// <reference types="node" />
		/// <reference types="node-int64" />
		import thrift from "thrift";
		import thrift from 'thrift';
		import fs, { WriteStream } from 'fs';
		import * as parquet_thrift from '../gen-nodejs/parquet_types';
		import { FileMetaDataExt, WriterOptions } from './declare';
		import { Int64 } from "thrift";
		export type WriteStreamMinimal = Pick<WriteStream, "write" \| "end">;
		import { Int64 } from 'thrift';
		export type WriteStreamMinimal = Pick<WriteStream, 'write' \| 'end'>;
		type Enums = typeof parquet_thrift.Encoding \| typeof parquet_thrift.FieldRepetitionType \| typeof parquet_thrift.Type \| typeof parquet_thrift.CompressionCodec \| typeof parquet_thrift.PageType \| typeof parquet_thrift.ConvertedType;
		@@ -32,4 +29,4 @@ type ThriftObject = FileMetaDataExt \| parquet_thrift.PageHeader \| parquet_thrift.ColumnMetaData \| parquet_thrift.BloomFilterHeader \| parquet_thrift.OffsetIndex \| parquet_thrift.ColumnIndex \| FileMetaDataExt;
		export declare const osopen: (path: string \| Buffer \| URL, opts?: WriterOptions) => Promise<WriteStream>;
		export declare const fieldIndexOf: (arr: Array<Array<unknown>>, elem: Array<unknown>) => number;
		export declare const fieldIndexOf: (arr: unknown[][], elem: unknown[]) => number;
		export declare const cloneInteger: (int: Int64) => thrift.Int64;
		export {};

38

dist/lib/util.js

		@@ -49,9 +49,13 @@ "use strict";
		/** Patch PageLocation to be three element array that has getters/setters
		* for each of the properties (offset, compressed_page_size, first_row_index)
		* This saves space considerably as we do not need to store the full variable
		* names for every PageLocation
		*/
		* for each of the properties (offset, compressed_page_size, first_row_index)
		* This saves space considerably as we do not need to store the full variable
		* names for every PageLocation
		*/
		const getterSetter = (index) => ({
		get: function () { return this[index]; },
		set: function (value) { return this[index] = value; }
		get: function () {
		return this[index];
		},
		set: function (value) {
		return (this[index] = value);
		},
		});
		@@ -65,9 +69,9 @@ Object.defineProperty(parquet_thrift.PageLocation.prototype, 'offset', getterSetter(0));
		const serializeThrift = function (obj) {
		let output = [];
		const output = [];
		const callBack = function (buf) {
		output.push(buf);
		};
		let transport = new thrift_1.default.TBufferedTransport(undefined, callBack);
		let protocol = new thrift_1.default.TCompactProtocol(transport);
		//@ts-ignore, https://issues.apache.org/jira/browse/THRIFT-3872
		const transport = new thrift_1.default.TBufferedTransport(undefined, callBack);
		const protocol = new thrift_1.default.TCompactProtocol(transport);
		//@ts-expect-error, https://issues.apache.org/jira/browse/THRIFT-3872
		obj.write(protocol);
		@@ -82,6 +86,6 @@ transport.flush();
		}
		var transport = new fixedTFramedTransport(buf);
		const transport = new fixedTFramedTransport(buf);
		transport.readPos = offset;
		var protocol = new thrift_1.default.TCompactProtocol(transport);
		//@ts-ignore, https://issues.apache.org/jira/browse/THRIFT-3872
		const protocol = new thrift_1.default.TCompactProtocol(transport);
		//@ts-expect-error, https://issues.apache.org/jira/browse/THRIFT-3872
		obj.read(protocol);
		@@ -107,3 +111,3 @@ return transport.readPos - offset;
		const getThriftEnum = function (klass, value) {
		for (let k in klass) {
		for (const k in klass) {
		if (klass[k] === value) {
		@@ -143,3 +147,3 @@ return k;
		const fread = function (fd, position, length) {
		let buffer = Buffer.alloc(length);
		const buffer = Buffer.alloc(length);
		return new Promise((resolve, reject) => {
		@@ -198,4 +202,4 @@ fs_1.default.read(fd, buffer, 0, length, position, (err, bytesRead, buf) => {
		return new Promise((resolve, reject) => {
		let outputStream = fs_1.default.createWriteStream(path, opts);
		outputStream.on('open', function (fd) {
		const outputStream = fs_1.default.createWriteStream(path, opts);
		outputStream.on('open', function (_fd) {
		resolve(outputStream);
		@@ -202,0 +206,0 @@ });

20

dist/lib/writer.d.ts

		@@ -1,3 +0,1 @@
		/// <reference types="node" />
		/// <reference types="node" />
		import stream from 'stream';
		@@ -47,3 +45,3 @@ import * as parquet_shredder from './shred';
		*/
		close(callback?: Function): Promise<void>;
		close(callback?: () => void): Promise<void>;
		/**
		@@ -74,4 +72,4 @@ * Add key<>value metadata to the file
		schema: ParquetSchema;
		write: Function;
		close: Function;
		write: (buf: Buffer) => void;
		close: () => void;
		offset: Int64;
		@@ -88,8 +86,8 @@ rowCount: Int64;
		static openStream(schema: ParquetSchema, outputStream: parquet_util.WriteStreamMinimal, opts: WriterOptions): Promise<ParquetEnvelopeWriter>;
		constructor(schema: ParquetSchema, writeFn: Function, closeFn: Function, fileOffset: Int64, opts: WriterOptions);
		writeSection(buf: Buffer): any;
		constructor(schema: ParquetSchema, writeFn: (buf: Buffer) => void, closeFn: () => void, fileOffset: Int64, opts: WriterOptions);
		writeSection(buf: Buffer): void;
		/**
		* Encode the parquet file header
		*/
		writeHeader(): any;
		writeHeader(): void;
		/**
		@@ -99,3 +97,3 @@ * Encode a parquet row group. The records object should be created using the
		*/
		writeRowGroup(records: parquet_shredder.RecordBuffer): Promise<any>;
		writeRowGroup(records: parquet_shredder.RecordBuffer): Promise<void>;
		writeBloomFilters(): void;
		@@ -109,3 +107,3 @@ /**
		*/
		writeFooter(userMetadata: Record<string, string>): any;
		writeFooter(userMetadata: Record<string, string>): void;
		/**
		@@ -123,4 +121,4 @@ * Set the parquet data page size. The data page size controls the maximum
		constructor(schema: ParquetSchema, opts?: {});
		_transform(row: Record<string, unknown>, _encoding: string, callback: Function): void;
		_transform(row: Record<string, unknown>, _encoding: string, callback: (err?: Error \| null, data?: any) => void): void;
		_flush(callback: (foo: any, bar?: any) => any): void;
		}

163

dist/lib/writer.js

		@@ -74,3 +74,3 @@ "use strict";
		static async openFile(schema, path, opts) {
		let outputStream = await parquet_util.osopen(path, opts);
		const outputStream = await parquet_util.osopen(path, opts);
		return ParquetWriter.openStream(schema, outputStream, opts);
		@@ -86,3 +86,3 @@ }
		}
		let envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
		const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
		return new ParquetWriter(schema, envelopeWriter, opts);
		@@ -119,3 +119,3 @@ }
		useDataPageV2: this.envelopeWriter.useDataPageV2,
		bloomFilters: this.envelopeWriter.bloomFilters
		bloomFilters: this.envelopeWriter.bloomFilters,
		};
		@@ -144,3 +144,6 @@ if (this.rowBuffer.pageRowCount >= this.envelopeWriter.pageSize) {
		if (this.rowBuffer.rowCount > 0 \|\| this.rowBuffer.rowCount >= this.rowGroupSize) {
		await encodePages(this.schema, this.rowBuffer, { useDataPageV2: this.envelopeWriter.useDataPageV2, bloomFilters: this.envelopeWriter.bloomFilters });
		await encodePages(this.schema, this.rowBuffer, {
		useDataPageV2: this.envelopeWriter.useDataPageV2,
		bloomFilters: this.envelopeWriter.bloomFilters,
		});
		await this.envelopeWriter.writeRowGroup(this.rowBuffer);
		@@ -204,4 +207,4 @@ this.rowBuffer = {};
		static async openStream(schema, outputStream, opts) {
		let writeFn = parquet_util.oswrite.bind(undefined, outputStream);
		let closeFn = parquet_util.osend.bind(undefined, outputStream);
		const writeFn = parquet_util.oswrite.bind(undefined, outputStream);
		const closeFn = parquet_util.osend.bind(undefined, outputStream);
		return new ParquetEnvelopeWriter(schema, writeFn, closeFn, new node_int64_1.default(0), opts);
		@@ -217,6 +220,6 @@ }
		this.pageSize = opts.pageSize \|\| PARQUET_DEFAULT_PAGE_SIZE;
		this.useDataPageV2 = ("useDataPageV2" in opts) ? opts.useDataPageV2 : true;
		this.useDataPageV2 = 'useDataPageV2' in opts ? opts.useDataPageV2 : true;
		this.pageIndex = opts.pageIndex;
		this.bloomFilters = {};
		(opts.bloomFilters \|\| []).forEach(bloomOption => {
		(opts.bloomFilters \|\| []).forEach((bloomOption) => {
		this.bloomFilters[bloomOption.column] = bloomFilterWriter.createSBBF(bloomOption);
		@@ -240,7 +243,7 @@ });
		async writeRowGroup(records) {
		let rgroup = await encodeRowGroup(this.schema, records, {
		const rgroup = await encodeRowGroup(this.schema, records, {
		baseOffset: this.offset,
		pageSize: this.pageSize,
		useDataPageV2: this.useDataPageV2,
		pageIndex: this.pageIndex
		pageIndex: this.pageIndex,
		});
		@@ -252,4 +255,4 @@ this.rowCount.setValue(this.rowCount.valueOf() + records.rowCount);
		writeBloomFilters() {
		this.rowGroups.forEach(group => {
		group.columns.forEach(column => {
		this.rowGroups.forEach((group) => {
		group.columns.forEach((column) => {
		if (!column.meta_data?.path_in_schema.length) {
		@@ -273,8 +276,8 @@ return;
		this.schema.fieldList.forEach((c, i) => {
		this.rowGroups.forEach(group => {
		let column = group.columns[i];
		this.rowGroups.forEach((group) => {
		const column = group.columns[i];
		if (!column)
		return;
		if (column.meta_data?.columnIndex) {
		let columnBody = parquet_util.serializeThrift(column.meta_data.columnIndex);
		const columnBody = parquet_util.serializeThrift(column.meta_data.columnIndex);
		delete column.meta_data.columnIndex;
		@@ -286,3 +289,3 @@ column.column_index_offset = parquet_util.cloneInteger(this.offset);
		if (column.meta_data?.offsetIndex) {
		let offsetBody = parquet_util.serializeThrift(column.meta_data.offsetIndex);
		const offsetBody = parquet_util.serializeThrift(column.meta_data.offsetIndex);
		delete column.meta_data.offsetIndex;
		@@ -308,3 +311,2 @@ column.offset_index_offset = parquet_util.cloneInteger(this.offset);
		}
		;
		/**
		@@ -326,3 +328,3 @@ * Set the parquet data page size. The data page size controls the maximum
		super({ objectMode: true });
		let writeProxy = (function (t) {
		const writeProxy = (function (t) {
		return function (b) {
		@@ -332,3 +334,4 @@ t.push(b);
		})(this);
		this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, function () {
		this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, () => {
		/* void */
		}, new node_int64_1.default(0), opts), opts);
		@@ -338,3 +341,3 @@ }
		if (row) {
		this.writer.appendRow(row).then(data => callback(null, data), err => {
		this.writer.appendRow(row).then((data) => callback(null, data), (err) => {
		const fullErr = new Error(`Error transforming to parquet: ${err.toString()} row:${row}`);
		@@ -350,4 +353,3 @@ fullErr.message = err;
		_flush(callback) {
		this.writer.close()
		.then(d => callback(null, d), callback);
		this.writer.close().then((d) => callback(null, d), callback);
		}
		@@ -379,4 +381,6 @@ }
		statistics = Object.assign({}, statistics);
		statistics.min_value = statistics.min_value === undefined ? null : encodeStatisticsValue(statistics.min_value, column);
		statistics.max_value = statistics.max_value === undefined ? null : encodeStatisticsValue(statistics.max_value, column);
		statistics.min_value =
		statistics.min_value === undefined ? null : encodeStatisticsValue(statistics.min_value, column);
		statistics.max_value =
		statistics.max_value === undefined ? null : encodeStatisticsValue(statistics.max_value, column);
		statistics.max = statistics.max_value;
		@@ -387,6 +391,7 @@ statistics.min = statistics.min_value;
		async function encodePages(schema, rowBuffer, opts) {
		// generic
		if (!rowBuffer.pageRowCount) {
		return;
		}
		for (let field of schema.fieldList) {
		for (const field of schema.fieldList) {
		if (field.isNested) {
		@@ -398,5 +403,5 @@ continue;
		const values = rowBuffer.columnData[columnPath];
		if (opts.bloomFilters && (columnPath in opts.bloomFilters)) {
		if (opts.bloomFilters && columnPath in opts.bloomFilters) {
		const splitBlockBloomFilter = opts.bloomFilters[columnPath];
		values.values.forEach(v => splitBlockBloomFilter.insert(v));
		values.values.forEach((v) => splitBlockBloomFilter.insert(v));
		}
		@@ -423,5 +428,5 @@ let statistics = {};
		}
		let pages = rowBuffer.pages[field.path.join(',')];
		let lastPage = pages[pages.length - 1];
		let first_row_index = lastPage ? lastPage.first_row_index + lastPage.count : 0;
		const pages = rowBuffer.pages[field.path.join(',')];
		const lastPage = pages[pages.length - 1];
		const first_row_index = lastPage ? lastPage.first_row_index + lastPage.count : 0;
		pages.push({
		@@ -432,3 +437,3 @@ page,
		distinct_values: values.distinct_values,
		num_values: values.dlevels.length
		num_values: values.dlevels.length,
		});
		@@ -448,5 +453,5 @@ values.distinct_values = new Set();
		/* encode values */
		let valuesBuf = encodeValues(column.primitiveType, column.encoding, values, {
		const valuesBuf = encodeValues(column.primitiveType, column.encoding, values, {
		bitWidth: column.typeLength,
		...column
		...column,
		});
		@@ -456,7 +461,11 @@ /* encode repetition and definition levels */
		if (column.rLevelMax > 0) {
		rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, rlevels, { bitWidth: parquet_util.getBitWidth(column.rLevelMax) });
		rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, rlevels, {
		bitWidth: parquet_util.getBitWidth(column.rLevelMax),
		});
		}
		let dLevelsBuf = Buffer.alloc(0);
		if (column.dLevelMax > 0) {
		dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, dlevels, { bitWidth: parquet_util.getBitWidth(column.dLevelMax) });
		dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, dlevels, {
		bitWidth: parquet_util.getBitWidth(column.dLevelMax),
		});
		}
		@@ -466,3 +475,3 @@ /* build page header */
		pageBody = await parquet_compression.deflate(column.compression, pageBody);
		let pageHeader = new parquet_types_1.default.PageHeader();
		const pageHeader = new parquet_types_1.default.PageHeader();
		pageHeader.type = parquet_types_1.default.PageType['DATA_PAGE'];
		@@ -477,6 +486,4 @@ pageHeader.uncompressed_page_size = rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length;
		pageHeader.data_page_header.encoding = parquet_types_1.default.Encoding[column.encoding];
		pageHeader.data_page_header.definition_level_encoding =
		parquet_types_1.default.Encoding[PARQUET_RDLVL_ENCODING];
		pageHeader.data_page_header.repetition_level_encoding =
		parquet_types_1.default.Encoding[PARQUET_RDLVL_ENCODING];
		pageHeader.data_page_header.definition_level_encoding = parquet_types_1.default.Encoding[PARQUET_RDLVL_ENCODING];
		pageHeader.data_page_header.repetition_level_encoding = parquet_types_1.default.Encoding[PARQUET_RDLVL_ENCODING];
		/* concat page header, repetition and definition levels and values */
		@@ -490,7 +497,7 @@ return Buffer.concat([parquet_util.serializeThrift(pageHeader), pageBody]);
		/* encode values */
		let valuesBuf = encodeValues(column.primitiveType, column.encoding, values, {
		const valuesBuf = encodeValues(column.primitiveType, column.encoding, values, {
		bitWidth: column.typeLength,
		...column,
		});
		let valuesBufCompressed = await parquet_compression.deflate(column.compression, valuesBuf);
		const valuesBufCompressed = await parquet_compression.deflate(column.compression, valuesBuf);
		/* encode repetition and definition levels */
		@@ -501,3 +508,3 @@ let rLevelsBuf = Buffer.alloc(0);
		bitWidth: parquet_util.getBitWidth(column.rLevelMax),
		disableEnvelope: true
		disableEnvelope: true,
		});
		@@ -509,7 +516,7 @@ }
		bitWidth: parquet_util.getBitWidth(column.dLevelMax),
		disableEnvelope: true
		disableEnvelope: true,
		});
		}
		/* build page header */
		let pageHeader = new parquet_types_1.default.PageHeader();
		const pageHeader = new parquet_types_1.default.PageHeader();
		pageHeader.type = parquet_types_1.default.PageType['DATA_PAGE_V2'];
		@@ -523,18 +530,10 @@ pageHeader.data_page_header_v2 = new parquet_types_1.default.DataPageHeaderV2();
		}
		pageHeader.uncompressed_page_size =
		rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length;
		pageHeader.compressed_page_size =
		rLevelsBuf.length + dLevelsBuf.length + valuesBufCompressed.length;
		pageHeader.uncompressed_page_size = rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length;
		pageHeader.compressed_page_size = rLevelsBuf.length + dLevelsBuf.length + valuesBufCompressed.length;
		pageHeader.data_page_header_v2.encoding = parquet_types_1.default.Encoding[column.encoding];
		pageHeader.data_page_header_v2.definition_levels_byte_length = dLevelsBuf.length;
		pageHeader.data_page_header_v2.repetition_levels_byte_length = rLevelsBuf.length;
		pageHeader.data_page_header_v2.is_compressed =
		column.compression !== 'UNCOMPRESSED';
		pageHeader.data_page_header_v2.is_compressed = column.compression !== 'UNCOMPRESSED';
		/* concat page header, repetition and definition levels and values */
		return Buffer.concat([
		parquet_util.serializeThrift(pageHeader),
		rLevelsBuf,
		dLevelsBuf,
		valuesBufCompressed
		]);
		return Buffer.concat([parquet_util.serializeThrift(pageHeader), rLevelsBuf, dLevelsBuf, valuesBufCompressed]);
		}
		@@ -545,7 +544,7 @@ /**
		async function encodeColumnChunk(pages, opts) {
		let pagesBuf = Buffer.concat(pages.map(d => d.page));
		let num_values = pages.reduce((p, d) => p + d.num_values, 0);
		const pagesBuf = Buffer.concat(pages.map((d) => d.page));
		const num_values = pages.reduce((p, d) => p + d.num_values, 0);
		let offset = opts.baseOffset;
		/* prepare metadata header */
		let metadata = new parquet_types_1.default.ColumnMetaData();
		const metadata = new parquet_types_1.default.ColumnMetaData();
		metadata.path_in_schema = opts.column.path;
		@@ -560,3 +559,3 @@ metadata.num_values = new node_int64_1.default(num_values);
		/* compile statistics ColumnIndex and OffsetIndex*/
		let columnIndex = new parquet_types_1.default.ColumnIndex();
		const columnIndex = new parquet_types_1.default.ColumnIndex();
		columnIndex.null_pages = [];
		@@ -567,7 +566,7 @@ columnIndex.max_values = [];
		columnIndex.boundary_order = 0;
		let offsetIndex = new parquet_types_1.default.OffsetIndex();
		const offsetIndex = new parquet_types_1.default.OffsetIndex();
		offsetIndex.page_locations = [];
		/* prepare statistics */
		let statistics = {};
		let distinct_values = new Set();
		const statistics = {};
		const distinct_values = new Set();
		statistics.null_count = new node_int64_1.default(0);
		@@ -592,3 +591,3 @@ statistics.distinct_count = new node_int64_1.default(0);
		}
		let pageLocation = new parquet_types_1.default.PageLocation();
		const pageLocation = new parquet_types_1.default.PageLocation();
		pageLocation.offset = new node_int64_1.default(offset);
		@@ -614,4 +613,4 @@ offset += page.page.length;
		/* concat metadata header and data pages */
		let metadataOffset = opts.baseOffset + pagesBuf.length;
		let body = Buffer.concat([pagesBuf, parquet_util.serializeThrift(metadata)]);
		const metadataOffset = opts.baseOffset + pagesBuf.length;
		const body = Buffer.concat([pagesBuf, parquet_util.serializeThrift(metadata)]);
		return { body, metadata, metadataOffset };
		@@ -623,3 +622,3 @@ }
		async function encodeRowGroup(schema, data, opts) {
		let metadata = new parquet_types_1.default.RowGroup();
		const metadata = new parquet_types_1.default.RowGroup();
		metadata.num_rows = new node_int64_1.default(data.rowCount);
		@@ -629,7 +628,7 @@ metadata.columns = [];
		let body = Buffer.alloc(0);
		for (let field of schema.fieldList) {
		for (const field of schema.fieldList) {
		if (field.isNested) {
		continue;
		}
		let cchunkData = await encodeColumnChunk(data.pages[field.path.join(',')], {
		const cchunkData = await encodeColumnChunk(data.pages[field.path.join(',')], {
		column: field,
		@@ -640,9 +639,9 @@ baseOffset: opts.baseOffset.valueOf() + body.length,
		useDataPageV2: opts.useDataPageV2 ?? true,
		pageIndex: opts.pageIndex ?? true
		pageIndex: opts.pageIndex ?? true,
		});
		let cchunk = new parquet_types_1.default.ColumnChunk();
		const cchunk = new parquet_types_1.default.ColumnChunk();
		cchunk.file_offset = new node_int64_1.default(cchunkData.metadataOffset);
		cchunk.meta_data = cchunkData.metadata;
		metadata.columns.push(cchunk);
		metadata.total_byte_size = new node_int64_1.default(metadata.total_byte_size.valueOf() + (cchunkData.body.length));
		metadata.total_byte_size = new node_int64_1.default(metadata.total_byte_size.valueOf() + cchunkData.body.length);
		body = Buffer.concat([body, cchunkData.body]);
		@@ -656,3 +655,3 @@ }
		function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
		let metadata = new parquet_types_1.default.FileMetaData();
		const metadata = new parquet_types_1.default.FileMetaData();
		metadata.version = PARQUET_VERSION;
		@@ -664,4 +663,4 @@ metadata.created_by = '@dsnp/parquetjs';
		metadata.key_value_metadata = [];
		for (let k in userMetadata) {
		let kv = new parquet_types_1.default.KeyValue();
		for (const k in userMetadata) {
		const kv = new parquet_types_1.default.KeyValue();
		kv.key = k;
		@@ -672,3 +671,3 @@ kv.value = userMetadata[k];
		{
		let schemaRoot = new parquet_types_1.default.SchemaElement();
		const schemaRoot = new parquet_types_1.default.SchemaElement();
		schemaRoot.name = 'root';
		@@ -678,4 +677,4 @@ schemaRoot.num_children = Object.keys(schema.fields).length;
		}
		for (let field of schema.fieldList) {
		let schemaElem = new parquet_types_1.default.SchemaElement();
		for (const field of schema.fieldList) {
		const schemaElem = new parquet_types_1.default.SchemaElement();
		schemaElem.name = field.name;
		@@ -694,3 +693,3 @@ schemaElem.repetition_type = parquet_types_1.default.FieldRepetitionType[field.repetitionType];
		switch (schemaElem.converted_type) {
		case (parquet_types_1.ConvertedType.DECIMAL):
		case parquet_types_1.ConvertedType.DECIMAL:
		schemaElem.precision = field.precision;
		@@ -703,4 +702,4 @@ schemaElem.scale = field.scale \|\| 0;
		}
		let metadataEncoded = parquet_util.serializeThrift(metadata);
		let footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
		const metadataEncoded = parquet_util.serializeThrift(metadata);
		const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
		metadataEncoded.copy(footerEncoded);
		@@ -707,0 +706,0 @@ footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);

73

package.json

		@@ -6,3 +6,3 @@ {
		"types": "dist/parquet.d.ts",
		"version": "0.0.0-5db9db",
		"version": "0.0.0-6396d2",
		"homepage": "https://github.com/LibertyDSNP/parquetjs",
		@@ -19,14 +19,14 @@ "license": "MIT",
		"dependencies": {
		"@aws-sdk/client-s3": "^3.489.0",
		"@aws-sdk/client-s3": "^3.575.0",
		"@types/long": "^4.0.2",
		"@types/node-int64": "^0.4.29",
		"@types/thrift": "^0.10.11",
		"brotli-wasm": "^2.0.1",
		"@types/node-int64": "^0.4.32",
		"@types/thrift": "^0.10.17",
		"brotli-wasm": "^3.0.0",
		"browserify-zlib": "^0.2.0",
		"bson": "4.6.3",
		"cross-fetch": "^3.1.4",
		"int53": "^0.2.4",
		"long": "^4.0.0",
		"snappyjs": "^0.6.1",
		"thrift": "0.16.0",
		"bson": "6.7.0",
		"cross-fetch": "^4.0.0",
		"int53": "^1.0.0",
		"long": "^5.2.3",
		"snappyjs": "^0.7.0",
		"thrift": "0.20.0",
		"varint": "^6.0.0",
		@@ -36,27 +36,34 @@ "xxhash-wasm": "^1.0.2"
		"devDependencies": {
		"@smithy/util-stream": "^2.0.24",
		"@types/chai": "^4.3.5",
		"@types/json-schema": "^7.0.11",
		"@types/mocha": "^10.0.1",
		"@types/node": "^18.18.2",
		"@types/sinon": "^10.0.15",
		"@types/varint": "^6.0.1",
		"assert": "^2.0.0",
		"aws-sdk-client-mock": "^3.0.1",
		"@eslint/js": "^9.6.0",
		"@smithy/util-stream": "^3.0.0",
		"@types/chai": "^4.3.16",
		"@types/eslint__js": "^8.42.3",
		"@types/json-schema": "^7.0.15",
		"@types/mocha": "^10.0.6",
		"@types/node": "^20.12.12",
		"@types/sinon": "^17.0.3",
		"@types/varint": "^6.0.3",
		"assert": "^2.1.0",
		"aws-sdk-client-mock": "^4.0.0",
		"browserfs": "^1.4.3",
		"buffer": "^6.0.3",
		"chai": "4.3.6",
		"core-js": "^3.22.5",
		"esbuild": "^0.19.2",
		"mocha": "^10.2.0",
		"msw": "^1.2.1",
		"chai": "4.4.1",
		"core-js": "^3.37.1",
		"esbuild": "^0.21.2",
		"eslint": "^8.57.0",
		"eslint-plugin-mocha": "^10.4.3",
		"events": "^3.3.0",
		"mocha": "^10.4.0",
		"msw": "^2.3.0",
		"object-stream": "^0.0.1",
		"prettier": "3.3.2",
		"process": "^0.11.10",
		"regenerator-runtime": "^0.13.11",
		"sinon": "^15.1.0",
		"regenerator-runtime": "^0.14.1",
		"sinon": "^17.0.2",
		"sinon-chai": "^3.7.0",
		"sinon-chai-in-order": "^0.1.0",
		"stream-browserify": "^3.0.0",
		"ts-node": "^10.9.1",
		"typescript": "^5.0.4"
		"tsx": "^4.10.2",
		"typescript": "^5.5.2",
		"typescript-eslint": "^7.14.1"
		},
		@@ -69,5 +76,7 @@ "scripts": {
		"type": "tsc --noEmit",
		"lint": "echo 'Linting, it is on the TODO list...'",
		"test": "mocha -r ts-node/register 'test/{,!(browser)/*}/.{js,ts}'",
		"test:only": "mocha -r ts-node/register",
		"lint": "eslint . && npx prettier . --check",
		"lint:fix": "eslint --fix .",
		"format": "npx prettier . --write",
		"test": "mocha 'test/{,!(browser)/*}/.{js,ts}'",
		"test:only": "mocha",
		"clean": "rm -Rf ./dist",
		@@ -74,0 +83,0 @@ "prepublishOnly": "npm run clean && npm run build:node && npm run build:types && npm run build:browser",

189

README.md

		@@ -20,2 +20,3 @@ # parquet.js
		This is a forked repository with code from various sources:

		- Primary source [ironSource](https://github.com/ironSource/parquetjs) [npm: parquetjs](https://www.npmjs.com/package/parquetjs)
		@@ -25,2 +26,3 @@ - Secondary source [ZJONSSON](https://github.com/ZJONSSON/parquetjs) [npm: parquetjs-lite](https://www.npmjs.com/package/parquetjs-lite)
		## Installation

		_parquet.js requires node.js >= 18.18.2_
		@@ -33,24 +35,31 @@
		### NodeJS

		To use with nodejs:

		```javascript
		import parquetjs from "@dsnp/parquetjs"
		import parquetjs from '@dsnp/parquetjs';
		```

		### Browser with Bundler

		To use in a browser with a bundler, depending on your needs, write the appropriate plugin or resolver to point to either the Common JS or ES Module version:

		```javascript
		// Common JS
		"node_modules/@dsnp/parquetjs/dist/browser/parquetjs.cjs"
		'node_modules/@dsnp/parquetjs/dist/browser/parquetjs.cjs';
		// ES Modules
		"node_modules/@dsnp/parquetjs/dist/browser/parquetjs.esm"
		'node_modules/@dsnp/parquetjs/dist/browser/parquetjs.esm';
		```

		or:

		```javascript
		// Common JS
		import parquetjs from "@dsnp/parquetjs/dist/browser/parquetjs.cjs"
		import parquetjs from '@dsnp/parquetjs/dist/browser/parquetjs.cjs';
		// ES Modules
		import parquetjs from "@dsnp/parquetjs/dist/browser/parquetjs.esm"
		import parquetjs from '@dsnp/parquetjs/dist/browser/parquetjs.esm';
		```

		### Browser Direct: ES Modules

		To use directly in the browser without a bundler using ES Modules:
		@@ -61,10 +70,11 @@
		3. Use it in your html or other ES Modules:
		```html
		<script type="module">
		import parquetjs from '../parquet.esm.js';
		// Use parquetjs
		</script>
		```
		```html
		<script type="module">
		import parquetjs from '../parquet.esm.js';
		// Use parquetjs
		</script>
		```

		### Browser Direct: Plain Ol' JavaScript

		To use directly in the browser without a bundler or ES Modules:
		@@ -74,8 +84,8 @@
		2. Copy to `dist/browser/parquetjs.js` the server
		2. Use the global `parquetjs` variable to access parquetjs functions
		3. Use the global `parquetjs` variable to access parquetjs functions
		```html
		<script>
		// console.log(parquetjs)
		</script>
		```
		// console.log(parquetjs)
		</script>
		```

		@@ -87,3 +97,3 @@ ## Usage: Writing files

		``` js
		```js
		var parquet = require('@dsnp/parquetjs');
		@@ -98,3 +108,3 @@ ```

		``` js
		```js
		// declare a schema for the `fruits` table
		@@ -106,3 +116,3 @@ var schema = new parquet.ParquetSchema({
		date: { type: 'TIMESTAMP_MILLIS' },
		in_stock: { type: 'BOOLEAN' }
		in_stock: { type: 'BOOLEAN' },
		});
		@@ -119,3 +129,3 @@ ```
		date: parquet.ParquetFieldBuilder.createTimestampField(),
		in_stock: parquet.ParquetFieldBuilder.createBooleanField()
		in_stock: parquet.ParquetFieldBuilder.createBooleanField(),
		});
		@@ -126,29 +136,28 @@ ```

		``` js
		```js
		// declare a schema for the `fruits` JSON Schema
		var schema = new parquet.ParquetSchema.fromJsonSchema({
		"type": "object",
		"properties": {
		"name": {
		"type": "string"
		type: 'object',
		properties: {
		name: {
		type: 'string',
		},
		"quantity": {
		"type": "integer"
		quantity: {
		type: 'integer',
		},
		"price": {
		"type": "number"
		price: {
		type: 'number',
		},
		"date": {
		"type": "string",
		"format": "date-time"
		date: {
		type: 'string',
		format: 'date-time',
		},
		"in_stock": {
		"type": "boolean"
		}
		in_stock: {
		type: 'boolean',
		},
		},
		"required": ["name", "quantity", "price", "date", "in_stock"]
		required: ['name', 'quantity', 'price', 'date', 'in_stock'],
		});
		```


		Note that the Parquet schema supports nesting, so you can store complex, arbitrarily
		@@ -162,3 +171,3 @@ nested records into a single row (more on that later) while still maintaining good

		``` js
		```js
		// create new ParquetWriter that writes to 'fruits.parquet`
		@@ -168,4 +177,4 @@ var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet');
		// append a few rows to the file
		await writer.appendRow({name: 'apples', quantity: 10, price: 2.5, date: new Date(), in_stock: true});
		await writer.appendRow({name: 'oranges', quantity: 10, price: 2.5, date: new Date(), in_stock: true});
		await writer.appendRow({ name: 'apples', quantity: 10, price: 2.5, date: new Date(), in_stock: true });
		await writer.appendRow({ name: 'oranges', quantity: 10, price: 2.5, date: new Date(), in_stock: true });
		```
		@@ -180,15 +189,15 @@

		``` js
		const options = {
		bloomFilters: [
		{
		column: "name",
		numFilterBytes: 1024,
		},
		{
		column: "quantity",
		numFilterBytes: 1024,
		},
		]
		};
		```js
		const options = {
		bloomFilters: [
		{
		column: 'name',
		numFilterBytes: 1024,
		},
		{
		column: 'quantity',
		numFilterBytes: 1024,
		},
		],
		};

		@@ -198,11 +207,11 @@ var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet', options);

		By default, not passing any additional options calculates the optimal number of blocks according to the default number of distinct values (128*1024) and default false positive probability (0.001), which gives a filter byte size of 29,920.
		By default, not passing any additional options calculates the optimal number of blocks according to the default number of distinct values (128\*1024) and default false positive probability (0.001), which gives a filter byte size of 29,920.

		The following options are provided to have the ability to adjust the split-block bloom filter settings.

		`numFilterBytes` - sets the desire size of bloom filter in bytes. Defaults to 128 * 1024 * 1024 bits.
		`numFilterBytes` - sets the desire size of bloom filter in bytes. Defaults to 128 _ 1024 _ 1024 bits.

		`falsePositiveRate` - set the desired false positive percentage for bloom filter. Defaults to 0.001.

		`numDistinct` - sets the number of distinct values. Defaults to 128 * 1024 bits.
		`numDistinct` - sets the number of distinct values. Defaults to 128 \* 1024 bits.

		@@ -221,3 +230,3 @@ Note that if numFilterBytes is provided then falsePositiveRate and numDistinct options are ignored.

		``` js
		```js
		// create new ParquetReader that reads from 'fruits.parquet`
		@@ -231,3 +240,3 @@ let reader = await parquet.ParquetReader.openFile('fruits.parquet');
		let record = null;
		while (record = await cursor.next()) {
		while ((record = await cursor.next())) {
		console.log(record);
		@@ -240,3 +249,3 @@ }

		``` js
		```js
		// create a new cursor that will only return the `name` and `price` columns
		@@ -249,3 +258,3 @@ let cursor = reader.getCursor(['name', 'price']);

		``` js
		```js
		await reader.close();
		@@ -258,3 +267,4 @@ ```
		and calling `getBloomFiltersFor`.
		``` js

		```js
		// create new ParquetReader that reads from 'fruits.parquet`
		@@ -277,2 +287,3 @@ let reader = await parquet.ParquetReader.openFile('fruits.parquet');
		```

		Calling `getBloomFiltersFor` on the reader returns an object with the keys being a column name and value being an array of length equal to the number of row groups that the column spans.
		@@ -282,3 +293,3 @@

		``` js
		```js
		const sbbf = bloomFilters.name[0].ssbf;
		@@ -289,3 +300,2 @@


		### Reading data from a url
		@@ -297,5 +307,5 @@

		``` js
		```js
		const request = require('request');
		let reader = await parquet.ParquetReader.openUrl(request,'https://domain/fruits.parquet');
		let reader = await parquet.ParquetReader.openUrl(request, 'https://domain/fruits.parquet');
		```
		@@ -309,7 +319,7 @@

		``` js
		```js
		const AWS = require('aws-sdk');
		const client = new AWS.S3({
		accessKeyId: 'xxxxxxxxxxx',
		secretAccessKey: 'xxxxxxxxxxx'
		secretAccessKey: 'xxxxxxxxxxx',
		});
		@@ -319,6 +329,6 @@
		Bucket: 'xxxxxxxxxxx',
		Key: 'xxxxxxxxxxx'
		Key: 'xxxxxxxxxxx',
		};

		let reader = await parquet.ParquetReader.openS3(client,params);
		let reader = await parquet.ParquetReader.openS3(client, params);
		```
		@@ -330,3 +340,3 @@

		``` js
		```js
		const file = fs.readFileSync('fruits.parquet');
		@@ -347,3 +357,3 @@ let reader = await parquet.ParquetReader.openBuffer(file);

		``` js
		```js
		var schema = new parquet.ParquetSchema({
		@@ -362,3 +372,3 @@ name: { type: 'UTF8', encoding: 'PLAIN' },

		``` js
		```js
		var schema = new parquet.ParquetSchema({
		@@ -369,3 +379,2 @@ age: { type: 'UINT_32', encoding: 'RLE', typeLength: 7 },


		### Optional Fields
		@@ -376,3 +385,3 @@

		``` js
		```js
		var schema = new parquet.ParquetSchema({
		@@ -384,7 +393,6 @@ name: { type: 'UTF8' },
		var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet');
		await writer.appendRow({name: 'apples', quantity: 10 });
		await writer.appendRow({name: 'banana' }); // not in stock
		await writer.appendRow({ name: 'apples', quantity: 10 });
		await writer.appendRow({ name: 'banana' }); // not in stock
		```


		### Nested Rows & Arrays
		@@ -400,3 +408,3 @@

		``` js
		```js
		// advanced fruits table
		@@ -411,4 +419,4 @@ var schema = new parquet.ParquetSchema({
		quantity: { type: 'INT64' },
		}
		}
		},
		},
		});
		@@ -424,4 +432,4 @@
		{ price: 2.45, quantity: 16 },
		{ price: 2.60, quantity: 420 }
		]
		{ price: 2.6, quantity: 420 },
		],
		});
		@@ -433,5 +441,5 @@
		stock: [
		{ price: 1.20, quantity: 42 },
		{ price: 1.30, quantity: 230 }
		]
		{ price: 1.2, quantity: 42 },
		{ price: 1.3, quantity: 230 },
		],
		});
		@@ -446,3 +454,3 @@
		let record = null;
		while (record = await cursor.next()) {
		while ((record = await cursor.next())) {
		console.log(record);
		@@ -455,3 +463,3 @@ }
		It might not be obvious why one would want to implement or use such a feature when
		the same can - in principle - be achieved by serializing the record using JSON
		the same can - in principle - be achieved by serializing the record using JSON
		(or a similar scheme) and then storing it into a UTF8 field:
		@@ -465,8 +473,6 @@


		### Nested Lists for Hive / Athena

		Lists have to be annotated to be queriable with AWS Athena. See [parquet-format](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists) for more detail and a full working example with comments in the test directory ([`test/list.js`](test/list.js))
		Lists have to be annotated to be queriable with AWS Athena. See [parquet-format](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists) for more detail and a full working example with comments in the test directory ([`test/list.js`](test/list.js))


		### List of Supported Types & Encodings
		@@ -504,3 +510,2 @@


		## Buffering & Row Group Size
		@@ -516,3 +521,3 @@

		``` js
		```js
		var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet');
		@@ -522,3 +527,2 @@ writer.setRowGroupSize(8192);


		## Dependencies
		@@ -529,3 +533,2 @@


		## Notes
		@@ -532,0 +535,0 @@

dist/browser/parquet.cjs.js

Sorry, the diff of this file is too big to display

dist/browser/parquet.esm.js

Sorry, the diff of this file is too big to display

dist/browser/parquet.js

Sorry, the diff of this file is too big to display

dist/gen-nodejs/parquet_types.js

Sorry, the diff of this file is too big to display

@dsnp/parquetjs - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes