Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@dsnp/parquetjs

Package Overview
Dependencies
Maintainers
2
Versions
93
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@dsnp/parquetjs - npm Package Compare versions

Comparing version 0.0.0-b4ca34 to 0.0.0-b8bfc0

dist/gen-nodejs/parquet_types.d.ts

1

dist/lib/bloom/sbbf.js

@@ -371,2 +371,1 @@ "use strict";

exports.default = SplitBlockBloomFilter;
//# sourceMappingURL=sbbf.js.map

@@ -44,2 +44,1 @@ "use strict";

exports.default = XxHasher;
//# sourceMappingURL=xxhasher.js.map
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -31,4 +35,3 @@ if (k2 === undefined) k2 = k;

return columnChunkDataCollection.filter((columnChunk) => {
const { column: { meta_data: { bloom_filter_offset: { buffer: bloomFilterOffsetBuffer }, }, }, } = columnChunk;
return bloomFilterOffsetBuffer;
return columnChunk.column.meta_data?.bloom_filter_offset;
});

@@ -44,6 +47,6 @@ };

const parseBloomFilterOffsets = (ColumnChunkDataCollection) => {
return ColumnChunkDataCollection.map((columnChunkData) => {
const { column: { meta_data: { bloom_filter_offset: { buffer: bloomFilterOffsetBuffer }, path_in_schema: pathInSchema, }, }, rowGroupIndex, } = columnChunkData;
return ColumnChunkDataCollection.map(({ rowGroupIndex, column }) => {
const { bloom_filter_offset: bloomOffset, path_in_schema: pathInSchema, } = column.meta_data || {};
return {
offsetBytes: toInteger(bloomFilterOffsetBuffer),
offsetBytes: toInteger(bloomOffset.buffer),
columnName: pathInSchema.join(","),

@@ -111,2 +114,1 @@ rowGroupIndex,

exports.getBloomFiltersFor = getBloomFiltersFor;
//# sourceMappingURL=bloomFilterReader.js.map
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -65,3 +69,3 @@ if (k2 === undefined) k2 = k;

const setFilterOffset = (column, offset) => {
column.meta_data.bloom_filter_offset = offset;
column.meta_data.bloom_filter_offset = parquet_util.cloneInteger(offset);
};

@@ -75,2 +79,1 @@ exports.setFilterOffset = setFilterOffset;

exports.getSerializedBloomFilterData = getSerializedBloomFilterData;
//# sourceMappingURL=bloomFilterWriter.js.map

@@ -57,2 +57,1 @@ 'use strict';

exports.inflate = inflate;
//# sourceMappingURL=compression.js.map

@@ -19,3 +19,3 @@ "use strict";

}
async read(offset, length) {
read(offset, length) {
if (!this.scheduled) {

@@ -71,2 +71,1 @@ this.scheduled = true;

;
//# sourceMappingURL=bufferReader.js.map
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -26,2 +30,1 @@ if (k2 === undefined) k2 = k;

exports.PLAIN_DICTIONARY = __importStar(require("./plain_dictionary"));
//# sourceMappingURL=index.js.map
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -30,2 +34,1 @@ if (k2 === undefined) k2 = k;

exports.decodeValues = decodeValues;
//# sourceMappingURL=plain_dictionary.js.map

@@ -213,2 +213,1 @@ "use strict";

exports.decodeValues = decodeValues;
//# sourceMappingURL=plain.js.map
"use strict";
// For questions about RLE encoding, see the spec:
//
// https://github.com/apache/parquet-format/blob/master/Encodings.md
var __importDefault = (this && this.__importDefault) || function (mod) {

@@ -25,5 +28,8 @@ return (mod && mod.__esModule) ? mod : { "default": mod };

let buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8));
let remainingValue = value;
// This is encoded LSB to MSB, so we pick off the least
// significant byte and shift to get the next one.
for (let i = 0; i < buf.length; ++i) {
buf.writeUInt8(value & 0xff, i);
value >> 8;
buf.writeUInt8(remainingValue & 0xff, i);
remainingValue = remainingValue >> 8;
}

@@ -111,6 +117,9 @@ return Buffer.concat([

function decodeRunRepeated(cursor, count, opts) {
var bytesNeededForFixedBitWidth = Math.ceil(opts.bitWidth / 8);
let value = 0;
for (let i = 0; i < Math.ceil(opts.bitWidth / 8); ++i) {
value << 8;
value += cursor.buffer[cursor.offset];
for (let i = 0; i < bytesNeededForFixedBitWidth; ++i) {
const byte = cursor.buffer[cursor.offset];
// Bytes are stored LSB to MSB, so we need to shift
// each new byte appropriately.
value += byte << (i * 8);
cursor.offset += 1;

@@ -149,2 +158,1 @@ }

exports.decodeValues = decodeValues;
//# sourceMappingURL=rle.js.map
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
//# sourceMappingURL=types.js.map

@@ -80,2 +80,1 @@ "use strict";

}
//# sourceMappingURL=compression.js.map
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -25,3 +29,3 @@ if (k2 === undefined) k2 = k;

Object.defineProperty(exports, "__esModule", { value: true });
exports.ParquetEnvelopeReader = void 0;
exports.ParquetEnvelopeReader = exports.ParquetReader = void 0;
const node_int64_1 = __importDefault(require("node-int64"));

@@ -63,2 +67,3 @@ const parquet_types_1 = __importDefault(require("../gen-nodejs/parquet_types"));

rowGroupIndex;
cursorIndex;
/**

@@ -77,2 +82,3 @@ * Create a new parquet reader from the file metadata and an envelope reader.

this.rowGroupIndex = 0;
this.cursorIndex = 0;
}

@@ -84,3 +90,3 @@ /**

async next() {
if (this.rowGroup.length === 0) {
if (this.cursorIndex >= this.rowGroup.length) {
if (this.rowGroupIndex >= this.metadata.row_groups.length) {

@@ -92,4 +98,5 @@ return null;

this.rowGroupIndex++;
this.cursorIndex = 0;
}
return this.rowGroup.shift();
return this.rowGroup[this.cursorIndex++];
}

@@ -102,2 +109,3 @@ /**

this.rowGroupIndex = 0;
this.cursorIndex = 0;
}

@@ -191,7 +199,7 @@ }

if (column.offsetIndex) {
column.offsetIndex.page_locations.forEach(d => {
Promise.resolve(column.offsetIndex).then(offset => (offset.page_locations.forEach(d => {
if (Array.isArray(d)) {
Object.setPrototypeOf(d, parquet_types_1.default.PageLocation.prototype);
}
});
})));
}

@@ -285,3 +293,3 @@ });

}
exportMetadata(indent) {
async exportMetadata(indent) {
function replacer(_key, value) {

@@ -320,2 +328,14 @@ if (value instanceof parquet_types_1.default.PageLocation) {

const metadata = Object.assign({}, this.metadata, { json: true });
for (let i = 0; i < metadata.row_groups.length; i++) {
const rowGroup = metadata.row_groups[i];
for (let j = 0; j < rowGroup.columns.length; j++) {
const column = rowGroup.columns[j];
if (column.offsetIndex instanceof Promise) {
column.offsetIndex = await column.offsetIndex;
}
if (column.columnIndex instanceof Promise) {
column.columnIndex = await column.columnIndex;
}
}
}
return JSON.stringify(metadata, replacer, indent);

@@ -336,2 +356,3 @@ }

}
exports.ParquetReader = ParquetReader;
/**

@@ -437,10 +458,14 @@ * The parquet envelope reader allows direct, unbuffered access to the individual

let column;
if (!isNaN(row_group)) {
row_group = this.metadata.row_groups[row_group];
let parsedRowGroup;
if (!isNaN(Number(row_group))) {
parsedRowGroup = this.metadata?.row_groups[Number(row_group)];
}
else if (row_group instanceof parquet_types_1.default.RowGroup) {
parsedRowGroup = row_group;
}
if (typeof path === 'string') {
if (!row_group) {
if (!parsedRowGroup) {
throw `Missing RowGroup ${row_group}`;
}
column = row_group.columns.find(d => d.meta_data.path_in_schema.join(',') === path);
column = parsedRowGroup.columns.find(d => d.meta_data.path_in_schema.join(',') === path);
if (!column) {

@@ -476,9 +501,5 @@ throw `Column ${path} Not Found`;

Object.defineProperty(offset_index, 'column', { value: column, enumerable: false });
if (opts && opts.cache) {
column.offsetIndex = offset_index;
}
return offset_index;
});
if (opts && opts.cache) {
//@ts-ignore
if (opts?.cache) {
column.offsetIndex = data;

@@ -496,5 +517,5 @@ }

}
const data = this.read(+column.column_index_offset, column.column_index_length).then((data) => {
const data = this.read(+column.column_index_offset, column.column_index_length).then((buf) => {
let column_index = new parquet_types_1.default.ColumnIndex();
parquet_util.decodeThrift(column_index, data);
parquet_util.decodeThrift(column_index, buf);
Object.defineProperty(column_index, 'column', { value: column });

@@ -509,9 +530,5 @@ // decode the statistics values

}
if (opts && opts.cache) {
column.columnIndex = column_index;
}
return column_index;
});
if (opts && opts.cache) {
//@ts-ignore
if (opts?.cache) {
column.columnIndex = data;

@@ -528,9 +545,9 @@ }

}
column.meta_data.data_page_offset = page.offset;
column.meta_data.total_compressed_size = page.compressed_page_size;
column.meta_data.data_page_offset = parquet_util.cloneInteger(page.offset);
column.meta_data.total_compressed_size = new node_int64_1.default(page.compressed_page_size);
}
else {
const offsetIndex = column.offsetIndex || await this.readOffsetIndex(column, null, opts);
column.meta_data.data_page_offset = offsetIndex.page_locations[page].offset;
column.meta_data.total_compressed_size = offsetIndex.page_locations[page].compressed_page_size;
const offsetIndex = await this.readOffsetIndex(column, null, opts);
column.meta_data.data_page_offset = parquet_util.cloneInteger(offsetIndex.page_locations[page].offset);
column.meta_data.total_compressed_size = new node_int64_1.default(offsetIndex.page_locations[page].compressed_page_size);
}

@@ -605,3 +622,3 @@ const chunk = await this.readColumnChunk(this.schema, column);

let metadataBuf = await this.read(metadataOffset, metadataSize);
let metadata = new types_1.NewFileMetaData();
let metadata = new parquet_types_1.default.FileMetaData();
parquet_util.decodeThrift(metadata, metadataBuf);

@@ -704,3 +721,6 @@ return metadata;

}
if (opts.dictionary) {
// It's possible to have a column chunk where some pages should use
// the dictionary (PLAIN_DICTIONARY for example) and others should
// not (PLAIN for example).
if (opts.dictionary && pageData.useDictionary) {
pageData.values = pageData.values.map(d => opts.dictionary[d]);

@@ -790,3 +810,4 @@ }

values: values,
count: valueCount
count: valueCount,
useDictionary: valueEncoding === 'PLAIN_DICTIONARY' || valueEncoding === 'RLE_DICTIONARY'
};

@@ -841,3 +862,4 @@ }

values: values,
count: valueCount
count: valueCount,
useDictionary: valueEncoding === 'PLAIN_DICTIONARY' || valueEncoding === 'RLE_DICTIONARY'
};

@@ -904,2 +926,1 @@ }

};
//# sourceMappingURL=reader.js.map
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -177,2 +181,1 @@ if (k2 === undefined) k2 = k;

}
//# sourceMappingURL=schema.js.map
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -219,2 +223,1 @@ if (k2 === undefined) k2 = k;

}
//# sourceMappingURL=shred.js.map
'use strict';
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -434,2 +438,1 @@ if (k2 === undefined) k2 = k;

}
//# sourceMappingURL=types.js.map

@@ -363,2 +363,1 @@ "use strict";

}
//# sourceMappingURL=parquet_primitives.js.map

@@ -7,14 +7,5 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
exports.NewPageHeader = exports.NewFileMetaData = void 0;
exports.NewPageHeader = void 0;
const parquet_types_1 = __importDefault(require("../../gen-nodejs/parquet_types"));
;
class NewFileMetaData extends parquet_types_1.default.FileMetaData {
json;
//@ts-ignore
row_groups;
constructor() {
super();
}
}
exports.NewFileMetaData = NewFileMetaData;
class NewPageHeader extends parquet_types_1.default.PageHeader {

@@ -28,2 +19,1 @@ offset;

exports.NewPageHeader = NewPageHeader;
//# sourceMappingURL=types.js.map
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -25,6 +29,7 @@ if (k2 === undefined) k2 = k;

Object.defineProperty(exports, "__esModule", { value: true });
exports.fieldIndexOf = exports.osopen = exports.osend = exports.oswrite = exports.fclose = exports.fread = exports.fstat = exports.fopen = exports.getThriftEnum = exports.getBitWidth = exports.decodeThrift = exports.serializeThrift = void 0;
exports.cloneInteger = exports.fieldIndexOf = exports.osopen = exports.osend = exports.oswrite = exports.fclose = exports.fread = exports.fstat = exports.fopen = exports.getThriftEnum = exports.getBitWidth = exports.decodeThrift = exports.serializeThrift = void 0;
const thrift_1 = __importDefault(require("thrift"));
const fs_1 = __importDefault(require("fs"));
const parquet_thrift = __importStar(require("../gen-nodejs/parquet_types"));
const thrift_2 = require("thrift");
/**

@@ -44,2 +49,7 @@ * We need to patch Thrift's TFramedTransport class bc the TS type definitions

}
/** Patch PageLocation to be three element array that has getters/setters
* for each of the properties (offset, compressed_page_size, first_row_index)
* This saves space considerably as we do not need to store the full variable
* names for every PageLocation
*/
const getterSetter = (index) => ({

@@ -214,2 +224,5 @@ get: function () { return this[index]; },

exports.fieldIndexOf = fieldIndexOf;
//# sourceMappingURL=util.js.map
const cloneInteger = (int) => {
return new thrift_2.Int64(int.valueOf());
};
exports.cloneInteger = cloneInteger;

@@ -1,11 +0,39 @@

'use strict';
const stream = require('stream');
const parquet_thrift = require('../gen-nodejs/parquet_types');
const parquet_shredder = require('./shred');
const parquet_util = require('./util');
const parquet_codec = require('./codec');
const parquet_compression = require('./compression');
const parquet_types = require('./types');
const bloomFilterWriter = require("./bloomFilterIO/bloomFilterWriter");
const Long = require('long');
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ParquetTransformer = exports.ParquetEnvelopeWriter = exports.ParquetWriter = void 0;
const stream_1 = __importDefault(require("stream"));
const parquet_types_1 = __importDefault(require("../gen-nodejs/parquet_types"));
const parquet_shredder = __importStar(require("./shred"));
const parquet_util = __importStar(require("./util"));
const parquet_codec = __importStar(require("./codec"));
const parquet_compression = __importStar(require("./compression"));
const parquet_types = __importStar(require("./types"));
const bloomFilterWriter = __importStar(require("./bloomFilterIO/bloomFilterWriter"));
const node_int64_1 = __importDefault(require("node-int64"));
/**

@@ -35,2 +63,8 @@ * Parquet File Magic String

class ParquetWriter {
schema;
envelopeWriter;
rowBuffer;
rowGroupSize;
closed;
userMetadata;
/**

@@ -78,3 +112,3 @@ * Convenience method to create a new buffered parquet writer that writes to

async appendRow(row) {
if (this.closed) {
if (this.closed || this.envelopeWriter === null) {
throw 'writer was closed';

@@ -107,12 +141,14 @@ }

this.closed = true;
if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
await encodePages(this.schema, this.rowBuffer, { useDataPageV2: this.envelopeWriter.useDataPageV2, bloomFilters: this.envelopeWriter.bloomFilters });
await this.envelopeWriter.writeRowGroup(this.rowBuffer);
this.rowBuffer = {};
if (this.envelopeWriter) {
if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
await encodePages(this.schema, this.rowBuffer, { useDataPageV2: this.envelopeWriter.useDataPageV2, bloomFilters: this.envelopeWriter.bloomFilters });
await this.envelopeWriter.writeRowGroup(this.rowBuffer);
this.rowBuffer = {};
}
await this.envelopeWriter.writeBloomFilters();
await this.envelopeWriter.writeIndex();
await this.envelopeWriter.writeFooter(this.userMetadata);
await this.envelopeWriter.close();
this.envelopeWriter = null;
}
await this.envelopeWriter.writeBloomFilters();
await this.envelopeWriter.writeIndex();
await this.envelopeWriter.writeFooter(this.userMetadata);
await this.envelopeWriter.close();
this.envelopeWriter = null;
if (callback) {

@@ -142,12 +178,23 @@ callback();

setPageSize(cnt) {
this.writer.setPageSize(cnt);
this.envelopeWriter.setPageSize(cnt);
}
}
exports.ParquetWriter = ParquetWriter;
/**
* Create a parquet file from a schema and a number of row groups. This class
* performs direct, unbuffered writes to the underlying output stream and is
* intendend for advanced and internal users; the writeXXX methods must be
* intended for advanced and internal users; the writeXXX methods must be
* called in the correct order to produce a valid file.
*/
class ParquetEnvelopeWriter {
schema;
write;
close;
offset;
rowCount;
rowGroups;
pageSize;
useDataPageV2;
pageIndex;
bloomFilters; // TODO: OR filterCollection
/**

@@ -159,3 +206,3 @@ * Create a new parquet envelope writer that writes to the specified stream

let closeFn = parquet_util.osend.bind(undefined, outputStream);
return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
return new ParquetEnvelopeWriter(schema, writeFn, closeFn, new node_int64_1.default(0), opts);
}

@@ -167,3 +214,3 @@ constructor(schema, writeFn, closeFn, fileOffset, opts) {

this.offset = fileOffset;
this.rowCount = 0;
this.rowCount = new node_int64_1.default(0);
this.rowGroups = [];

@@ -179,3 +226,3 @@ this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;

writeSection(buf) {
this.offset += buf.length;
this.offset.setValue(this.offset.valueOf() + buf.length);
return this.write(buf);

@@ -200,12 +247,11 @@ }

});
this.rowCount += records.rowCount;
this.rowCount.setValue(this.rowCount.valueOf() + records.rowCount);
this.rowGroups.push(rgroup.metadata);
return this.writeSection(rgroup.body);
}
writeBloomFilters(_rowGroups) {
let rowGroups = _rowGroups || this.rowGroups;
rowGroups.forEach(group => {
writeBloomFilters() {
this.rowGroups.forEach(group => {
group.columns.forEach(column => {
const columnName = column.meta_data.path_in_schema[0];
if (columnName in this.bloomFilters === false)
const columnName = column.meta_data?.path_in_schema[0];
if (!columnName || columnName in this.bloomFilters === false)
return;

@@ -221,20 +267,19 @@ const serializedBloomFilterData = bloomFilterWriter.getSerializedBloomFilterData(this.bloomFilters[columnName]);

*/
writeIndex(_rowGroups) {
let rowGroups = _rowGroups || this.rowGroups;
writeIndex() {
this.schema.fieldList.forEach((c, i) => {
rowGroups.forEach(group => {
this.rowGroups.forEach(group => {
let column = group.columns[i];
if (!column)
return;
if (column.meta_data.columnIndex) {
if (column.meta_data?.columnIndex) {
let columnBody = parquet_util.serializeThrift(column.meta_data.columnIndex);
delete column.meta_data.columnIndex;
column.column_index_offset = this.offset;
column.column_index_offset = parquet_util.cloneInteger(this.offset);
column.column_index_length = columnBody.length;
this.writeSection(columnBody);
}
if (column.meta_data.offsetIndex) {
if (column.meta_data?.offsetIndex) {
let offsetBody = parquet_util.serializeThrift(column.meta_data.offsetIndex);
delete column.meta_data.offsetIndex;
column.offset_index_offset = this.offset;
column.offset_index_offset = parquet_util.cloneInteger(this.offset);
column.offset_index_length = offsetBody.length;

@@ -249,3 +294,3 @@ this.writeSection(offsetBody);

*/
writeFooter(userMetadata, schema, rowCount, rowGroups) {
writeFooter(userMetadata) {
if (!userMetadata) {

@@ -268,6 +313,8 @@ userMetadata = {};

}
exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
/**
* Create a parquet transform stream
*/
class ParquetTransformer extends stream.Transform {
class ParquetTransformer extends stream_1.default.Transform {
writer;
constructor(schema, opts = {}) {

@@ -280,9 +327,9 @@ super({ objectMode: true });

})(this);
this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, function () { }, 0, opts), opts);
this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, function () { }, new node_int64_1.default(0), opts), opts);
}
_transform(row, encoding, callback) {
_transform(row, _encoding, callback) {
if (row) {
this.writer.appendRow(row).then(data => callback(null, data), err => {
const fullErr = new Error(`Error transforming to parquet: ${err.toString()} row:${row}`);
fullErr.origErr = err;
fullErr.message = err;
callback(fullErr);

@@ -300,2 +347,3 @@ });

}
exports.ParquetTransformer = ParquetTransformer;
/**

@@ -328,3 +376,3 @@ * Encode a consecutive array of data using one of the parquet encodings

statistics.min = statistics.min_value;
return new parquet_thrift.Statistics(statistics);
return new parquet_types_1.default.Statistics(statistics);
}

@@ -340,3 +388,3 @@ async function encodePages(schema, rowBuffer, opts) {

let page;
const values = rowBuffer.columnData[field.path];
const values = rowBuffer.columnData[field.path.join(',')];
if (opts.bloomFilters && (field.name in opts.bloomFilters)) {

@@ -346,3 +394,3 @@ const splitBlockBloomFilter = opts.bloomFilters[field.name];

}
let statistics;
let statistics = {};
if (field.statistics !== false) {

@@ -358,4 +406,4 @@ statistics = {};

});
statistics.null_count = values.dlevels.length - values.values.length;
statistics.distinct_count = values.distinct_values.size;
statistics.null_count = new node_int64_1.default(values.dlevels.length - values.values.length);
statistics.distinct_count = new node_int64_1.default(values.distinct_values.size);
}

@@ -366,5 +414,5 @@ if (opts.useDataPageV2) {

else {
page = await encodeDataPage(field, values.values, values.rlevels, values.dlevels, statistics);
page = await encodeDataPage(field, values.values || [], values.rlevels || [], values.dlevels || [], statistics);
}
let pages = rowBuffer.pages[field.path];
let pages = rowBuffer.pages[field.path.join(',')];
let lastPage = pages[pages.length - 1];

@@ -408,7 +456,7 @@ let first_row_index = lastPage ? lastPage.first_row_index + lastPage.count : 0;

pageBody = await parquet_compression.deflate(column.compression, pageBody);
let pageHeader = new parquet_thrift.PageHeader();
pageHeader.type = parquet_thrift.PageType['DATA_PAGE'];
let pageHeader = new parquet_types_1.default.PageHeader();
pageHeader.type = parquet_types_1.default.PageType['DATA_PAGE'];
pageHeader.uncompressed_page_size = rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length;
pageHeader.compressed_page_size = pageBody.length;
pageHeader.data_page_header = new parquet_thrift.DataPageHeader();
pageHeader.data_page_header = new parquet_types_1.default.DataPageHeader();
pageHeader.data_page_header.num_values = dlevels.length;

@@ -418,7 +466,7 @@ if (column.statistics !== false) {

}
pageHeader.data_page_header.encoding = parquet_thrift.Encoding[column.encoding];
pageHeader.data_page_header.encoding = parquet_types_1.default.Encoding[column.encoding];
pageHeader.data_page_header.definition_level_encoding =
parquet_thrift.Encoding[PARQUET_RDLVL_ENCODING];
parquet_types_1.default.Encoding[PARQUET_RDLVL_ENCODING];
pageHeader.data_page_header.repetition_level_encoding =
parquet_thrift.Encoding[PARQUET_RDLVL_ENCODING];
parquet_types_1.default.Encoding[PARQUET_RDLVL_ENCODING];
/* concat page header, repetition and definition levels and values */

@@ -453,5 +501,5 @@ return Buffer.concat([parquet_util.serializeThrift(pageHeader), pageBody]);

/* build page header */
let pageHeader = new parquet_thrift.PageHeader();
pageHeader.type = parquet_thrift.PageType['DATA_PAGE_V2'];
pageHeader.data_page_header_v2 = new parquet_thrift.DataPageHeaderV2();
let pageHeader = new parquet_types_1.default.PageHeader();
pageHeader.type = parquet_types_1.default.PageType['DATA_PAGE_V2'];
pageHeader.data_page_header_v2 = new parquet_types_1.default.DataPageHeaderV2();
pageHeader.data_page_header_v2.num_values = dlevels.length;

@@ -467,3 +515,3 @@ pageHeader.data_page_header_v2.num_nulls = dlevels.length - values.length;

rLevelsBuf.length + dLevelsBuf.length + valuesBufCompressed.length;
pageHeader.data_page_header_v2.encoding = parquet_thrift.Encoding[column.encoding];
pageHeader.data_page_header_v2.encoding = parquet_types_1.default.Encoding[column.encoding];
pageHeader.data_page_header_v2.definition_levels_byte_length = dLevelsBuf.length;

@@ -489,16 +537,16 @@ pageHeader.data_page_header_v2.repetition_levels_byte_length = rLevelsBuf.length;

/* prepare metadata header */
let metadata = new parquet_thrift.ColumnMetaData();
let metadata = new parquet_types_1.default.ColumnMetaData();
metadata.path_in_schema = opts.column.path;
metadata.num_values = num_values;
metadata.data_page_offset = opts.baseOffset;
metadata.num_values = new node_int64_1.default(num_values);
metadata.data_page_offset = new node_int64_1.default(opts.baseOffset);
metadata.encodings = [];
metadata.total_uncompressed_size = pagesBuf.length;
metadata.total_compressed_size = pagesBuf.length;
metadata.type = parquet_thrift.Type[opts.column.primitiveType];
metadata.codec = await parquet_thrift.CompressionCodec[opts.column.compression];
metadata.total_uncompressed_size = new node_int64_1.default(pagesBuf.length);
metadata.total_compressed_size = new node_int64_1.default(pagesBuf.length);
metadata.type = parquet_types_1.default.Type[opts.column.primitiveType];
metadata.codec = await parquet_types_1.default.CompressionCodec[opts.column.compression];
/* compile statistics ColumnIndex and OffsetIndex*/
let columnIndex = new parquet_thrift.ColumnIndex();
let columnIndex = new parquet_types_1.default.ColumnIndex();
columnIndex.max_values = [];
columnIndex.min_values = [];
let offsetIndex = new parquet_thrift.OffsetIndex();
let offsetIndex = new parquet_types_1.default.OffsetIndex();
offsetIndex.page_locations = [];

@@ -508,7 +556,7 @@ /* prepare statistics */

let distinct_values = new Set();
statistics.null_count = 0;
statistics.distinct_count = 0;
statistics.null_count = new node_int64_1.default(0);
statistics.distinct_count = new node_int64_1.default(0);
/* loop through pages and update indices and statistics */
for (let i = 0; i < pages.length; i++) {
let page = pages[i];
const page = pages[i];
if (opts.column.statistics !== false) {

@@ -521,12 +569,12 @@ if (page.statistics.max_value > statistics.max_value || i == 0) {

}
statistics.null_count += page.statistics.null_count;
page.distinct_values.forEach(value => distinct_values.add(value));
statistics.null_count.setValue(statistics.null_count.valueOf() + (page.statistics.null_count?.valueOf() || 0));
page.distinct_values.forEach((value) => distinct_values.add(value));
columnIndex.max_values.push(encodeStatisticsValue(page.statistics.max_value, opts.column));
columnIndex.min_values.push(encodeStatisticsValue(page.statistics.min_value, opts.column));
}
let pageLocation = new parquet_thrift.PageLocation();
pageLocation.offset = offset;
let pageLocation = new parquet_types_1.default.PageLocation();
pageLocation.offset = new node_int64_1.default(offset);
offset += page.page.length;
pageLocation.compressed_page_size = page.page.length;
pageLocation.first_row_index = page.first_row_index;
pageLocation.first_row_index = new node_int64_1.default(page.first_row_index);
offsetIndex.page_locations.push(pageLocation);

@@ -538,3 +586,3 @@ }

if (opts.column.statistics !== false) {
statistics.distinct_count = distinct_values.size;
statistics.distinct_count = new node_int64_1.default(distinct_values.size);
metadata.statistics = encodeStatistics(statistics, opts.column);

@@ -546,8 +594,4 @@ if (opts.pageIndex !== false) {

/* list encodings */
let encodingsSet = {};
encodingsSet[PARQUET_RDLVL_ENCODING] = true;
encodingsSet[opts.column.encoding] = true;
for (let k in encodingsSet) {
metadata.encodings.push(parquet_thrift.Encoding[k]);
}
metadata.encodings.push(parquet_types_1.default.Encoding[PARQUET_RDLVL_ENCODING]);
metadata.encodings.push(parquet_types_1.default.Encoding[opts.column.encoding]);
/* concat metadata header and data pages */

@@ -562,6 +606,6 @@ let metadataOffset = opts.baseOffset + pagesBuf.length;

async function encodeRowGroup(schema, data, opts) {
let metadata = new parquet_thrift.RowGroup();
metadata.num_rows = data.rowCount;
let metadata = new parquet_types_1.default.RowGroup();
metadata.num_rows = new node_int64_1.default(data.rowCount);
metadata.columns = [];
metadata.total_byte_size = 0;
metadata.total_byte_size = new node_int64_1.default(0);
let body = Buffer.alloc(0);

@@ -572,16 +616,15 @@ for (let field of schema.fieldList) {

}
let cchunkData = await encodeColumnChunk(data.pages[field.path], {
let cchunkData = await encodeColumnChunk(data.pages[field.path.join(',')], {
column: field,
baseOffset: opts.baseOffset + body.length,
pageSize: opts.pageSize,
encoding: field.encoding,
rowCount: data.rowCount,
useDataPageV2: opts.useDataPageV2,
pageIndex: opts.pageIndex
baseOffset: opts.baseOffset.valueOf() + body.length,
pageSize: opts.pageSize || 0,
rowCount: data.rowCount || 0,
useDataPageV2: opts.useDataPageV2 ?? true,
pageIndex: opts.pageIndex ?? true
});
let cchunk = new parquet_thrift.ColumnChunk();
cchunk.file_offset = cchunkData.metadataOffset;
let cchunk = new parquet_types_1.default.ColumnChunk();
cchunk.file_offset = new node_int64_1.default(cchunkData.metadataOffset);
cchunk.meta_data = cchunkData.metadata;
metadata.columns.push(cchunk);
metadata.total_byte_size += cchunkData.body.length;
metadata.total_byte_size = new node_int64_1.default(metadata.total_byte_size.valueOf() + (cchunkData.body.length));
body = Buffer.concat([body, cchunkData.body]);

@@ -595,5 +638,5 @@ }

function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
let metadata = new parquet_thrift.FileMetaData();
let metadata = new parquet_types_1.default.FileMetaData();
metadata.version = PARQUET_VERSION;
metadata.created_by = 'parquet.js';
metadata.created_by = '@dsnp/parquetjs';
metadata.num_rows = rowCount;

@@ -604,3 +647,3 @@ metadata.row_groups = rowGroups;

for (let k in userMetadata) {
let kv = new parquet_thrift.KeyValue();
let kv = new parquet_types_1.default.KeyValue();
kv.key = k;

@@ -611,3 +654,3 @@ kv.value = userMetadata[k];

{
let schemaRoot = new parquet_thrift.SchemaElement();
let schemaRoot = new parquet_types_1.default.SchemaElement();
schemaRoot.name = 'root';

@@ -618,5 +661,5 @@ schemaRoot.num_children = Object.keys(schema.fields).length;

for (let field of schema.fieldList) {
let schemaElem = new parquet_thrift.SchemaElement();
let schemaElem = new parquet_types_1.default.SchemaElement();
schemaElem.name = field.name;
schemaElem.repetition_type = parquet_thrift.FieldRepetitionType[field.repetitionType];
schemaElem.repetition_type = parquet_types_1.default.FieldRepetitionType[field.repetitionType];
if (field.isNested) {

@@ -626,6 +669,6 @@ schemaElem.num_children = field.fieldCount;

else {
schemaElem.type = parquet_thrift.Type[field.primitiveType];
schemaElem.type = parquet_types_1.default.Type[field.primitiveType];
}
if (field.originalType) {
schemaElem.converted_type = parquet_thrift.ConvertedType[field.originalType];
schemaElem.converted_type = parquet_types_1.default.ConvertedType[field.originalType];
}

@@ -647,2 +690,1 @@ schemaElem.type_length = field.typeLength;

};
//# sourceMappingURL=writer.js.map
"use strict";
const reader = require('./lib/reader');
const writer = require('./lib/writer');
const schema = require('./lib/schema');
const shredder = require('./lib/shred');
const util = require('./lib/util');
module.exports = {
ParquetEnvelopeReader: reader.ParquetEnvelopeReader,
ParquetReader: reader.ParquetReader,
ParquetEnvelopeWriter: writer.ParquetEnvelopeWriter,
ParquetWriter: writer.ParquetWriter,
ParquetTransformer: writer.ParquetTransformer,
ParquetSchema: schema.ParquetSchema,
ParquetShredder: shredder,
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
//# sourceMappingURL=parquet.js.map
Object.defineProperty(exports, "__esModule", { value: true });
exports.ParquetShredder = exports.ParquetSchema = exports.ParquetTransformer = exports.ParquetWriter = exports.ParquetEnvelopeWriter = exports.ParquetReader = exports.ParquetEnvelopeReader = void 0;
const reader = __importStar(require("./lib/reader"));
const writer = __importStar(require("./lib/writer"));
const schema = __importStar(require("./lib/schema"));
const shredder = __importStar(require("./lib/shred"));
exports.ParquetEnvelopeReader = reader.ParquetEnvelopeReader;
exports.ParquetReader = reader.ParquetReader;
exports.ParquetEnvelopeWriter = writer.ParquetEnvelopeWriter;
exports.ParquetWriter = writer.ParquetWriter;
exports.ParquetTransformer = writer.ParquetTransformer;
exports.ParquetSchema = schema.ParquetSchema;
exports.ParquetShredder = shredder;

@@ -5,3 +5,4 @@ {

"main": "dist/parquet.js",
"version": "0.0.0-b4ca34",
"types": "dist/parquet.d.ts",
"version": "0.0.0-b8bfc0",
"homepage": "https://github.com/LibertyDSNP/parquetjs",

@@ -18,11 +19,12 @@ "license": "MIT",

"dependencies": {
"@types/varint": "^6.0.0",
"@types/long": "^4.0.2",
"@types/node-int64": "^0.4.29",
"browserify-zlib": "^0.2.0",
"bson": "4.4.0",
"bson": "4.6.3",
"cross-fetch": "^3.1.4",
"int53": "^0.2.4",
"long": "^4.0.0",
"snappyjs": "^0.6.0",
"thrift": "0.14.1",
"varint": "^5.0.0",
"snappyjs": "^0.6.1",
"thrift": "0.16.0",
"varint": "^6.0.0",
"wasm-brotli": "^2.0.2",

@@ -32,36 +34,30 @@ "xxhash-wasm": "^0.4.1"

"devDependencies": {
"@babel/core": "^7.14.6",
"@babel/preset-env": "^7.14.7",
"@babel/preset-typescript": "^7.14.5",
"@types/bson": "^4.0.3",
"@types/chai": "^4.2.16",
"@types/long": "^4.0.1",
"@types/mocha": "^8.2.2",
"@types/bson": "^4.0.5",
"@types/chai": "^4.3.1",
"@types/mocha": "^9.1.1",
"@types/node": "^14.14.35",
"@types/sinon": "^10.0.0",
"@types/thrift": "^0.10.10",
"@types/sinon": "^10.0.11",
"@types/thrift": "^0.10.11",
"@types/varint": "^6.0.0",
"assert": "^2.0.0",
"babel-loader": "^8.2.2",
"babel-plugin-add-module-exports": "^1.0.4",
"browserfs": "^1.4.3",
"buffer": "^6.0.3",
"chai": "4.3.4",
"core-js": "^3.15.1",
"esbuild": "^0.14.1",
"mocha": "8.3.2",
"msw": "^0.29.0",
"chai": "4.3.6",
"core-js": "^3.22.5",
"esbuild": "^0.14.38",
"mocha": "10.0.0",
"msw": "^0.39.2",
"object-stream": "0.0.1",
"process": "^0.11.10",
"regenerator-runtime": "^0.13.7",
"regenerator-runtime": "^0.13.9",
"sinon": "^10.0.0",
"sinon-chai": "^3.7.0",
"sinon-chai-in-order": "^0.1.0",
"source-map-loader": "^3.0.0",
"stream-browserify": "^3.0.0",
"ts-loader": "^9.2.3",
"ts-node": "^9.1.1",
"typescript": "^4.5.2"
"ts-node": "^10.7.0",
"typescript": "^4.6.4"
},
"scripts": {
"build": "npm run build:node",
"build": "npm run build:node && npm run build:types",
"build:types": "tsc -p tsconfig.types.json && cp gen-nodejs/parquet_types.d.ts dist/gen-nodejs/parquet_types.d.ts",
"build:node": "tsc -b",

@@ -73,3 +69,3 @@ "build:browser": "node esbuild.js",

"clean": "rm -Rf ./dist",
"prepublishOnly": "npm run clean && npm run build:node && npm run build:browser",
"prepublishOnly": "npm run clean && npm run build:node && npm run build:types && npm run build:browser",
"thrift": "thrift -out gen-nodejs --gen js:ts parquet.thrift && thrift -out gen-nodejs --gen js:node parquet.thrift",

@@ -76,0 +72,0 @@ "serve": "node esbuild-serve.js"

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc