Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@dsnp/parquetjs

Package Overview
Dependencies
Maintainers
2
Versions
93
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@dsnp/parquetjs - npm Package Compare versions

Comparing version 0.0.0-81e38e to 0.0.0-8f3038

dist/lib/codec/types.js

2

dist/lib/bloom/xxhasher.js

@@ -19,3 +19,3 @@ "use strict";

class XxHasher {
static h64 = xxhash_wasm_1.default().then(x => x.h64);
static h64 = (0, xxhash_wasm_1.default)().then(x => x.h64);
async hashit(value) {

@@ -22,0 +22,0 @@ return (await XxHasher.h64)(value);

"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {

@@ -7,3 +26,3 @@ return (mod && mod.__esModule) ? mod : { "default": mod };

exports.getBloomFiltersFor = exports.siftAllByteOffsets = exports.parseBloomFilterOffsets = void 0;
const util_1 = __importDefault(require("../util"));
const parquet_util = __importStar(require("../util"));
const parquet_types_1 = __importDefault(require("../../gen-nodejs/parquet_types"));

@@ -42,6 +61,9 @@ const sbbf_1 = __importDefault(require("../bloom/sbbf"));

catch (e) {
throw new Error(e);
if (typeof e === 'string')
throw new Error(e);
else
throw e;
}
const bloomFilterHeader = new parquet_types_1.default.BloomFilterHeader();
const sizeOfBloomFilterHeader = util_1.default.decodeThrift(bloomFilterHeader, bloomFilterHeaderData);
const sizeOfBloomFilterHeader = parquet_util.decodeThrift(bloomFilterHeader, bloomFilterHeaderData);
return {

@@ -61,3 +83,6 @@ bloomFilterHeader,

catch (e) {
throw new Error(e);
if (typeof e === 'string')
throw new Error(e);
else
throw e;
}

@@ -69,3 +94,3 @@ };

const siftAllByteOffsets = (columnChunkDataCollection) => {
return exports.parseBloomFilterOffsets(filterColumnChunksWithBloomFilters(columnChunkDataCollection));
return (0, exports.parseBloomFilterOffsets)(filterColumnChunksWithBloomFilters(columnChunkDataCollection));
};

@@ -75,3 +100,3 @@ exports.siftAllByteOffsets = siftAllByteOffsets;

const columnChunkDataCollection = envelopeReader.getAllColumnChunkDataFor(columnNames);
const bloomFilterOffsetData = exports.siftAllByteOffsets(columnChunkDataCollection);
const bloomFilterOffsetData = (0, exports.siftAllByteOffsets)(columnChunkDataCollection);
const offsetByteValues = bloomFilterOffsetData.map(({ offsetBytes }) => offsetBytes);

@@ -78,0 +103,0 @@ const filterBlocksBuffers = await readFilterDataFrom(offsetByteValues, envelopeReader);

"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {

@@ -7,3 +26,3 @@ return (mod && mod.__esModule) ? mod : { "default": mod };

exports.getSerializedBloomFilterData = exports.setFilterOffset = exports.serializeFilterData = exports.serializeFilterHeaders = exports.createSBBF = void 0;
const util_1 = __importDefault(require("../util"));
const parquet_util = __importStar(require("../util"));
const parquet_types_1 = __importDefault(require("../../gen-nodejs/parquet_types"));

@@ -37,3 +56,3 @@ const sbbf_1 = __importDefault(require("../bloom/sbbf"));

const bloomFilterHeader = buildFilterHeader(numberOfBytes);
return util_1.default.serializeThrift(bloomFilterHeader);
return parquet_util.serializeThrift(bloomFilterHeader);
};

@@ -43,3 +62,3 @@ exports.serializeFilterHeaders = serializeFilterHeaders;

const serializedFilterBlocks = serializeFilterBlocks(params.filterBlocks);
const serializedFilterHeaders = exports.serializeFilterHeaders(params.filterByteSize);
const serializedFilterHeaders = (0, exports.serializeFilterHeaders)(params.filterByteSize);
return Buffer.concat([serializedFilterHeaders, serializedFilterBlocks]);

@@ -55,5 +74,5 @@ };

const filterByteSize = splitBlockBloomFilter.getNumFilterBytes();
return exports.serializeFilterData({ filterBlocks, filterByteSize });
return (0, exports.serializeFilterData)({ filterBlocks, filterByteSize });
};
exports.getSerializedBloomFilterData = getSerializedBloomFilterData;
//# sourceMappingURL=bloomFilterWriter.js.map
"use strict";
module.exports = class BufferReader {
Object.defineProperty(exports, "__esModule", { value: true });
class BufferReader {
maxSpan;
maxLength;
queueWait;
scheduled;
queue;
envelopeReader;
constructor(envelopeReader, options) {

@@ -58,5 +65,7 @@ options = options || {};

});
readSubqueue(subqueue);
readSubqueue();
}
};
}
exports.default = BufferReader;
;
//# sourceMappingURL=bufferReader.js.map
"use strict";
module.exports.PLAIN = require('./plain');
module.exports.RLE = require('./rle');
module.exports.PLAIN_DICTIONARY = require('./plain_dictionary');
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.PLAIN_DICTIONARY = exports.RLE = exports.PLAIN = void 0;
exports.PLAIN = __importStar(require("./plain"));
exports.RLE = __importStar(require("./rle"));
exports.PLAIN_DICTIONARY = __importStar(require("./plain_dictionary"));
//# sourceMappingURL=index.js.map
"use strict";
const rle = require('./rle');
exports.decodeValues = function (type, cursor, count, opts) {
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.decodeValues = void 0;
const rle = __importStar(require("./rle"));
const decodeValues = function (type, cursor, count, opts) {
opts.bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0);

@@ -8,2 +29,3 @@ cursor.offset += 1;

};
exports.decodeValues = decodeValues;
//# sourceMappingURL=plain_dictionary.js.map

@@ -1,3 +0,8 @@

'use strict';
const INT53 = require('int53');
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.decodeValues = exports.encodeValues = void 0;
const int53_1 = __importDefault(require("int53"));
function encodeValues_BOOLEAN(values) {

@@ -8,3 +13,3 @@ let buf = Buffer.alloc(Math.ceil(values.length / 8));

if (values[i]) {
buf[Math.floor(i / 8)] |= (1 << (i % 8));
buf[Math.floor(i / 8)] |= 1 << i % 8;
}

@@ -18,3 +23,3 @@ }

let b = cursor.buffer[cursor.offset + Math.floor(i / 8)];
values.push((b & (1 << (i % 8))) > 0);
values.push((b & (1 << i % 8)) > 0);
}

@@ -58,7 +63,7 @@ cursor.offset += Math.ceil(count / 8);

if (values[i] >= 0) {
INT53.writeInt64LE(values[i], buf, i * 12);
int53_1.default.writeInt64LE(values[i], buf, i * 12);
buf.writeUInt32LE(0, i * 12 + 8); // truncate to 64 actual precision
}
else {
INT53.writeInt64LE((~-values[i]) + 1, buf, i * 12);
int53_1.default.writeInt64LE(~-values[i] + 1, buf, i * 12);
buf.writeUInt32LE(0xffffffff, i * 12 + 8); // truncate to 64 actual precision

@@ -72,6 +77,6 @@ }

for (let i = 0; i < count; ++i) {
const low = INT53.readInt64LE(cursor.buffer, cursor.offset);
const low = int53_1.default.readInt64LE(cursor.buffer, cursor.offset);
const high = cursor.buffer.readUInt32LE(cursor.offset + 8);
if (high === 0xffffffff) {
values.push((~-low) + 1); // truncate to 64 actual precision
values.push(~-low + 1); // truncate to 64 actual precision
}

@@ -115,14 +120,16 @@ else {

}
// Waylands reminder to check again
function encodeValues_BYTE_ARRAY(values) {
let buf_len = 0;
const returnedValues = [];
for (let i = 0; i < values.length; i++) {
values[i] = Buffer.from(values[i]);
buf_len += 4 + values[i].length;
returnedValues[i] = Buffer.from(values[i]);
buf_len += 4 + returnedValues[i].length;
}
let buf = Buffer.alloc(buf_len);
let buf_pos = 0;
for (let i = 0; i < values.length; i++) {
buf.writeUInt32LE(values[i].length, buf_pos);
values[i].copy(buf, buf_pos + 4);
buf_pos += 4 + values[i].length;
for (let i = 0; i < returnedValues.length; i++) {
buf.writeUInt32LE(returnedValues[i].length, buf_pos);
returnedValues[i].copy(buf, buf_pos + 4);
buf_pos += 4 + returnedValues[i].length;
}

@@ -145,10 +152,10 @@ return buf;

}
let buf_len = 0;
const returnedValues = [];
for (let i = 0; i < values.length; i++) {
values[i] = Buffer.from(values[i]);
if (values[i].length !== opts.typeLength) {
throw "invalid value for FIXED_LEN_BYTE_ARRAY: " + values[i];
returnedValues[i] = Buffer.from(values[i]);
if (returnedValues[i].length !== opts.typeLength) {
throw "invalid value for FIXED_LEN_BYTE_ARRAY: " + returnedValues[i];
}
}
return Buffer.concat(values);
return Buffer.concat(returnedValues);
}

@@ -166,46 +173,48 @@ function decodeValues_FIXED_LEN_BYTE_ARRAY(cursor, count, opts) {

}
exports.encodeValues = function (type, values, opts) {
const encodeValues = function (type, values, opts) {
switch (type) {
case 'BOOLEAN':
case "BOOLEAN":
return encodeValues_BOOLEAN(values);
case 'INT32':
case "INT32":
return encodeValues_INT32(values);
case 'INT64':
case "INT64":
return encodeValues_INT64(values);
case 'INT96':
case "INT96":
return encodeValues_INT96(values);
case 'FLOAT':
case "FLOAT":
return encodeValues_FLOAT(values);
case 'DOUBLE':
case "DOUBLE":
return encodeValues_DOUBLE(values);
case 'BYTE_ARRAY':
case "BYTE_ARRAY":
return encodeValues_BYTE_ARRAY(values);
case 'FIXED_LEN_BYTE_ARRAY':
case "FIXED_LEN_BYTE_ARRAY":
return encodeValues_FIXED_LEN_BYTE_ARRAY(values, opts);
default:
throw 'unsupported type: ' + type;
throw "unsupported type: " + type;
}
};
exports.decodeValues = function (type, cursor, count, opts) {
exports.encodeValues = encodeValues;
const decodeValues = function (type, cursor, count, opts) {
switch (type) {
case 'BOOLEAN':
case "BOOLEAN":
return decodeValues_BOOLEAN(cursor, count);
case 'INT32':
case "INT32":
return decodeValues_INT32(cursor, count);
case 'INT64':
case "INT64":
return decodeValues_INT64(cursor, count);
case 'INT96':
case "INT96":
return decodeValues_INT96(cursor, count);
case 'FLOAT':
case "FLOAT":
return decodeValues_FLOAT(cursor, count);
case 'DOUBLE':
case "DOUBLE":
return decodeValues_DOUBLE(cursor, count);
case 'BYTE_ARRAY':
case "BYTE_ARRAY":
return decodeValues_BYTE_ARRAY(cursor, count);
case 'FIXED_LEN_BYTE_ARRAY':
case "FIXED_LEN_BYTE_ARRAY":
return decodeValues_FIXED_LEN_BYTE_ARRAY(cursor, count, opts);
default:
throw 'unsupported type: ' + type;
throw "unsupported type: " + type;
}
};
exports.decodeValues = decodeValues;
//# sourceMappingURL=plain.js.map
"use strict";
const varint = require('varint');
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.decodeValues = exports.encodeValues = void 0;
const varint_1 = __importDefault(require("varint"));
function encodeRunBitpacked(values, opts) {

@@ -14,3 +19,3 @@ for (let i = 0; i < values.length % 8; i++) {

return Buffer.concat([
Buffer.from(varint.encode(((values.length / 8) << 1) | 1)),
Buffer.from(varint_1.default.encode(((values.length / 8) << 1) | 1)),
buf

@@ -26,7 +31,15 @@ ]);

return Buffer.concat([
Buffer.from(varint.encode(count << 1)),
Buffer.from(varint_1.default.encode(count << 1)),
buf
]);
}
exports.encodeValues = function (type, values, opts) {
function unknownToParsedInt(value) {
if (typeof value === 'string') {
return parseInt(value, 10);
}
else {
return value;
}
}
const encodeValues = function (type, values, opts) {
if (!('bitWidth' in opts)) {

@@ -39,3 +52,3 @@ throw 'bitWidth is required';

case 'INT64':
values = values.map((x) => parseInt(x, 10));
values = values.map((x) => unknownToParsedInt(x));
break;

@@ -85,2 +98,3 @@ default:

};
exports.encodeValues = encodeValues;
function decodeRunBitpacked(cursor, count, opts) {

@@ -108,3 +122,3 @@ if (count % 8 !== 0) {

}
exports.decodeValues = function (type, cursor, count, opts) {
const decodeValues = function (_, cursor, count, opts) {
if (!('bitWidth' in opts)) {

@@ -119,4 +133,4 @@ throw 'bitWidth is required';

while (values.length < count) {
const header = varint.decode(cursor.buffer, cursor.offset);
cursor.offset += varint.encodingLength(header);
const header = varint_1.default.decode(cursor.buffer, cursor.offset);
cursor.offset += varint_1.default.encodingLength(header);
if (header & 1) {

@@ -138,2 +152,3 @@ res = decodeRunBitpacked(cursor, (header >> 1) * 8, opts);

};
exports.decodeValues = decodeValues;
//# sourceMappingURL=rle.js.map

@@ -1,8 +0,12 @@

'use strict';
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const zlib = require('zlib');
const snappy = require('snappyjs');
exports.inflate = exports.deflate = exports.PARQUET_COMPRESSION_METHODS = void 0;
const zlib_1 = __importDefault(require("zlib"));
const snappyjs_1 = __importDefault(require("snappyjs"));
const wasm_brotli_1 = require("wasm-brotli");
// LZO compression is disabled. See: https://github.com/LibertyDSNP/parquetjs/issues/18
const PARQUET_COMPRESSION_METHODS = {
exports.PARQUET_COMPRESSION_METHODS = {
'UNCOMPRESSED': {

@@ -29,7 +33,8 @@ deflate: deflate_identity,

async function deflate(method, value) {
if (!(method in PARQUET_COMPRESSION_METHODS)) {
if (!(method in exports.PARQUET_COMPRESSION_METHODS)) {
throw 'invalid compression method: ' + method;
}
return PARQUET_COMPRESSION_METHODS[method].deflate(value);
return exports.PARQUET_COMPRESSION_METHODS[method].deflate(value);
}
exports.deflate = deflate;
function deflate_identity(value) {

@@ -39,13 +44,14 @@ return value;

function deflate_gzip(value) {
return zlib.gzipSync(value);
return zlib_1.default.gzipSync(value);
}
function deflate_snappy(value) {
return snappy.compress(value);
return snappyjs_1.default.compress(value);
}
async function deflate_brotli(value) {
const compressedContent = await wasm_brotli_1.compress(value, {
mode: 0,
quality: 8,
lgwin: 22
});
const compressedContent = await (0, wasm_brotli_1.compress)(value /*, {
mode: 0,
quality: 8,
lgwin: 22
}
*/);
return Buffer.from(compressedContent);

@@ -57,7 +63,8 @@ }

async function inflate(method, value) {
if (!(method in PARQUET_COMPRESSION_METHODS)) {
if (!(method in exports.PARQUET_COMPRESSION_METHODS)) {
throw 'invalid compression method: ' + method;
}
return await PARQUET_COMPRESSION_METHODS[method].inflate(value);
return await exports.PARQUET_COMPRESSION_METHODS[method].inflate(value);
}
exports.inflate = inflate;
function inflate_identity(value) {

@@ -67,12 +74,11 @@ return value;

function inflate_gzip(value) {
return zlib.gunzipSync(value);
return zlib_1.default.gunzipSync(value);
}
function inflate_snappy(value) {
return snappy.uncompress(value);
return snappyjs_1.default.uncompress(value);
}
async function inflate_brotli(value) {
const uncompressedContent = await wasm_brotli_1.decompress(value);
const uncompressedContent = await (0, wasm_brotli_1.decompress)(value);
return Buffer.from(uncompressedContent);
}
module.exports = { PARQUET_COMPRESSION_METHODS, deflate, inflate };
//# sourceMappingURL=compression.js.map

@@ -1,16 +0,38 @@

'use strict';
const fs = require('fs');
const thrift = require('thrift');
const Int64 = require('node-int64');
const parquet_thrift = require('../gen-nodejs/parquet_types');
const parquet_shredder = require('./shred');
const parquet_util = require('./util');
const parquet_schema = require('./schema');
const parquet_codec = require('./codec');
const parquet_compression = require('./compression');
const parquet_types = require('./types');
const BufferReader = require('./bufferReader');
const bloomFilterReader = require('./bloomFilterIO/bloomFilterReader');
const groupBy = require("lodash/groupBy");
const fetch = require('cross-fetch');
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ParquetEnvelopeReader = void 0;
const node_int64_1 = __importDefault(require("node-int64"));
const parquet_types_1 = __importDefault(require("../gen-nodejs/parquet_types"));
const parquet_shredder = __importStar(require("./shred"));
const parquet_util = __importStar(require("./util"));
const parquet_schema = __importStar(require("./schema"));
const parquet_codec = __importStar(require("./codec"));
const parquet_compression = __importStar(require("./compression"));
const parquet_types = __importStar(require("./types"));
const bufferReader_1 = __importDefault(require("./bufferReader"));
const bloomFilterReader = __importStar(require("./bloomFilterIO/bloomFilterReader"));
const cross_fetch_1 = __importDefault(require("cross-fetch"));
const types_1 = require("./types/types");
const { getBloomFiltersFor, } = bloomFilterReader;

@@ -34,2 +56,8 @@ /**

class ParquetCursor {
metadata;
envelopeReader;
schema;
columnList;
rowGroup;
rowGroupIndex;
/**

@@ -81,2 +109,5 @@ * Create a new parquet reader from the file metadata and an envelope reader.

class ParquetReader {
envelopeReader;
metadata;
schema;
/**

@@ -147,3 +178,3 @@ * Open the parquet file pointed to by the specified path and return a new

else if (o.parquetType === 'INT64') {
return new Int64(Buffer.from(o.value));
return new node_int64_1.default(Buffer.from(o.value));
}

@@ -159,3 +190,3 @@ }

if (Array.isArray(d)) {
Object.setPrototypeOf(d, parquet_thrift.PageLocation.prototype);
Object.setPrototypeOf(d, parquet_types_1.default.PageLocation.prototype);
}

@@ -203,3 +234,9 @@ });

const bloomFilterData = await getBloomFiltersFor(columnNames, this.envelopeReader);
return groupBy(bloomFilterData, 'columnName');
return bloomFilterData.reduce((acc, value) => {
if (acc[value.columnName])
acc[value.columnName].push(value);
else
acc[value.columnName] = [value];
return acc;
}, {});
}

@@ -230,5 +267,5 @@ /**

exportMetadata(indent) {
function replacer(key, value) {
if (value instanceof parquet_thrift.PageLocation) {
return [value[0], value[1], value[2]];
function replacer(_key, value) {
if (value instanceof parquet_types_1.default.PageLocation) {
return [value.offset, value.compressed_page_size, value.first_row_index];
}

@@ -248,4 +285,4 @@ if (typeof value === 'object') {

}
if (value instanceof Int64) {
if (isFinite(value)) {
if (value instanceof node_int64_1.default) {
if (isFinite(Number(value))) {
return Number(value);

@@ -288,2 +325,9 @@ }

class ParquetEnvelopeReader {
readFn;
close;
id;
fileSize;
default_dictionary_size;
metadata;
schema;
static async openFile(filePath, options) {

@@ -312,3 +356,3 @@ let fileStat = await parquet_util.fstat(filePath);

static async openS3(client, params, options) {
let fileStat = async () => client.headObject(params).promise().then(d => d.ContentLength);
let fileStat = async () => client.headObject(params).promise().then((d) => d.ContentLength);
let readFn = async (offset, length, file) => {

@@ -330,7 +374,7 @@ if (file) {

throw new Error('URL missing');
let base = params.url.split('/');
base = base.slice(0, base.length - 1).join('/') + '/';
const baseArr = params.url.split('/');
const base = baseArr.slice(0, baseArr.length - 1).join('/') + '/';
let defaultHeaders = params.headers || {};
let filesize = async () => {
const { headers } = await fetch(params.url);
const { headers } = await (0, cross_fetch_1.default)(params.url);
return headers.get('Content-Length');

@@ -342,3 +386,3 @@ };

let headers = Object.assign({}, defaultHeaders, { range });
const response = await fetch(url, { headers });
const response = await (0, cross_fetch_1.default)(url, { headers });
const arrayBuffer = await response.arrayBuffer();

@@ -351,3 +395,3 @@ const buffer = Buffer.from(arrayBuffer);

}
constructor(readFn, closeFn, fileSize, options) {
constructor(readFn, closeFn, fileSize, options, metadata) {
options = options || {};

@@ -359,4 +403,5 @@ this.readFn = readFn;

this.default_dictionary_size = options.default_dictionary_size || 10000000;
this.metadata = metadata;
if (options.maxLength || options.maxSpan || options.queueWait) {
const bufferReader = new BufferReader(this, options);
const bufferReader = new bufferReader_1.default(this, options);
this.read = (offset, length) => bufferReader.read(offset, length);

@@ -369,3 +414,3 @@ }

readHeader() {
return this.read(0, PARQUET_MAGIC.length).then(buf => {
return this.read(0, PARQUET_MAGIC.length).then((buf) => {
if (buf.toString() != PARQUET_MAGIC) {

@@ -413,4 +458,4 @@ throw 'not valid parquet file';

}
const data = this.read(+column.offset_index_offset, column.offset_index_length).then(data => {
let offset_index = new parquet_thrift.OffsetIndex();
const data = this.read(+column.offset_index_offset, column.offset_index_length).then((data) => {
let offset_index = new parquet_types_1.default.OffsetIndex();
parquet_util.decodeThrift(offset_index, data);

@@ -424,2 +469,3 @@ Object.defineProperty(offset_index, 'column', { value: column, enumerable: false });

if (opts && opts.cache) {
//@ts-ignore
column.offsetIndex = data;

@@ -437,4 +483,4 @@ }

}
const data = this.read(+column.column_index_offset, column.column_index_length).then(data => {
let column_index = new parquet_thrift.ColumnIndex();
const data = this.read(+column.column_index_offset, column.column_index_length).then((data) => {
let column_index = new parquet_types_1.default.ColumnIndex();
parquet_util.decodeThrift(column_index, data);

@@ -456,2 +502,3 @@ Object.defineProperty(column_index, 'column', { value: column });

if (opts && opts.cache) {
//@ts-ignore
column.columnIndex = data;

@@ -464,4 +511,4 @@ }

column.meta_data = Object.assign({}, column.meta_data);
if (page.offset !== undefined) {
if (isNaN(page.offset) || isNaN(page.compressed_page_size)) {
if (page instanceof parquet_types_1.default.PageLocation && page.offset !== undefined) {
if (isNaN(Number(page.offset)) || isNaN(page.compressed_page_size)) {
throw Error('page offset and/or size missing');

@@ -487,3 +534,5 @@ }

rowCount: +rowGroup.num_rows,
columnData: {}
columnData: {},
pageRowCount: 0,
pages: {}
};

@@ -496,3 +545,3 @@ for (let colChunk of rowGroup.columns) {

}
buffer.columnData[colKey] = await this.readColumnChunk(schema, colChunk);
buffer.columnData[colKey.join(',')] = await this.readColumnChunk(schema, colChunk);
}

@@ -502,9 +551,10 @@ return buffer;

async readColumnChunk(schema, colChunk, opts) {
let field = schema.findField(colChunk.meta_data.path_in_schema);
let type = parquet_util.getThriftEnum(parquet_thrift.Type, colChunk.meta_data.type);
let compression = parquet_util.getThriftEnum(parquet_thrift.CompressionCodec, colChunk.meta_data.codec);
let pagesOffset = +colChunk.meta_data.data_page_offset;
let pagesSize = +colChunk.meta_data.total_compressed_size;
let metadata = colChunk.meta_data;
let field = schema.findField(metadata.path_in_schema);
let type = parquet_util.getThriftEnum(parquet_types_1.default.Type, metadata.type);
let compression = parquet_util.getThriftEnum(parquet_types_1.default.CompressionCodec, metadata.codec);
let pagesOffset = +metadata.data_page_offset;
let pagesSize = +metadata.total_compressed_size;
if (!colChunk.file_path) {
pagesSize = Math.min(this.fileSize - pagesOffset, +colChunk.meta_data.total_compressed_size);
pagesSize = Math.min(this.fileSize - pagesOffset, +metadata.total_compressed_size);
}

@@ -517,6 +567,6 @@ opts = Object.assign({}, opts, {

column: field,
num_values: colChunk.meta_data.num_values
num_values: metadata.num_values
});
if (colChunk.meta_data.dictionary_page_offset) {
const offset = +colChunk.meta_data.dictionary_page_offset;
if (metadata.dictionary_page_offset) {
const offset = +metadata.dictionary_page_offset;
const size = Math.min(+this.fileSize - offset, this.default_dictionary_size);

@@ -529,3 +579,3 @@ await this.read(offset, size, colChunk.file_path).then(async (buffer) => {

}
return this.read(pagesOffset, pagesSize, colChunk.file_path).then(pagesBuf => decodePages(pagesBuf, opts));
return this.read(pagesOffset, pagesSize, colChunk.file_path).then((pagesBuf) => decodePages(pagesBuf, opts));
}

@@ -547,3 +597,3 @@ async readFooter() {

let metadataBuf = await this.read(metadataOffset, metadataSize);
let metadata = new parquet_thrift.FileMetaData();
let metadata = new types_1.NewFileMetaData();
parquet_util.decodeThrift(metadata, metadataBuf);

@@ -553,2 +603,3 @@ return metadata;

}
exports.ParquetEnvelopeReader = ParquetEnvelopeReader;
/**

@@ -594,7 +645,7 @@ * Decode a consecutive array of data using one of the parquet encodings

let page;
const pageHeader = new parquet_thrift.PageHeader();
const pageHeader = new types_1.NewPageHeader();
const headerOffset = cursor.offset;
const headerSize = parquet_util.decodeThrift(pageHeader, cursor.buffer.slice(cursor.offset));
cursor.offset += headerSize;
const pageType = parquet_util.getThriftEnum(parquet_thrift.PageType, pageHeader.type);
const pageType = parquet_util.getThriftEnum(parquet_types_1.default.PageType, pageHeader.type);
switch (pageType) {

@@ -650,3 +701,4 @@ case 'DATA_PAGE':

}
for (let i = 0; i < pageData.rlevels.length; i++) {
let length = pageData.rlevels != undefined ? pageData.rlevels.length : 0;
for (let i = 0; i < length; i++) {
data.rlevels.push(pageData.rlevels[i]);

@@ -680,9 +732,10 @@ data.dlevels.push(pageData.dlevels[i]);

}
return decodeValues(opts.column.primitiveType, opts.column.encoding, dictCursor, header.dictionary_page_header.num_values, opts)
.map(d => d.toString());
return decodeValues(opts.column.primitiveType, opts.column.encoding, dictCursor, (header.dictionary_page_header).num_values, opts)
.map((d) => d.toString());
}
async function decodeDataPage(cursor, header, opts) {
const cursorEnd = cursor.offset + header.compressed_page_size;
let valueCount = header.data_page_header.num_values;
let valueEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, header.data_page_header.encoding);
const dataPageHeader = header.data_page_header;
let valueCount = dataPageHeader.num_values;
let valueEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeader.encoding);
let valuesBufCursor = cursor;

@@ -698,3 +751,3 @@ if (opts.compression && opts.compression !== 'UNCOMPRESSED') {

/* read repetition levels */
let rLevelEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, header.data_page_header.repetition_level_encoding);
let rLevelEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeader.repetition_level_encoding);
let rLevels = new Array(valueCount);

@@ -708,3 +761,3 @@ if (opts.rLevelMax > 0) {

/* read definition levels */
let dLevelEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, header.data_page_header.definition_level_encoding);
let dLevelEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeader.definition_level_encoding);
let dLevels = new Array(valueCount);

@@ -739,5 +792,6 @@ if (opts.dLevelMax > 0) {

const cursorEnd = cursor.offset + header.compressed_page_size;
const valueCount = header.data_page_header_v2.num_values;
const valueCountNonNull = valueCount - header.data_page_header_v2.num_nulls;
const valueEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, header.data_page_header_v2.encoding);
const dataPageHeaderV2 = header.data_page_header_v2;
const valueCount = dataPageHeaderV2.num_values;
const valueCountNonNull = valueCount - dataPageHeaderV2.num_nulls;
const valueEncoding = parquet_util.getThriftEnum(parquet_types_1.default.Encoding, dataPageHeaderV2.encoding);
/* read repetition levels */

@@ -767,3 +821,3 @@ let rLevels = new Array(valueCount);

let valuesBufCursor = cursor;
if (header.data_page_header_v2.is_compressed) {
if (dataPageHeaderV2.is_compressed) {
let valuesBuf = await parquet_compression.inflate(opts.compression, cursor.buffer.slice(cursor.offset, cursorEnd));

@@ -791,3 +845,3 @@ valuesBufCursor = {

schemaElements.forEach(schemaElement => {
let repetitionType = parquet_util.getThriftEnum(parquet_thrift.FieldRepetitionType, schemaElement.repetition_type);
let repetitionType = parquet_util.getThriftEnum(parquet_types_1.default.FieldRepetitionType, schemaElement.repetition_type);
let optional = false;

@@ -806,3 +860,3 @@ let repeated = false;

;
if (schemaElement.num_children > 0) {
if (schemaElement.num_children != undefined && schemaElement.num_children > 0) {
schema[schemaElement.name] = {

@@ -827,5 +881,5 @@ optional: optional,

else {
let logicalType = parquet_util.getThriftEnum(parquet_thrift.Type, schemaElement.type);
let logicalType = parquet_util.getThriftEnum(parquet_types_1.default.Type, schemaElement.type);
if (schemaElement.converted_type != null) {
logicalType = parquet_util.getThriftEnum(parquet_thrift.ConvertedType, schemaElement.converted_type);
logicalType = parquet_util.getThriftEnum(parquet_types_1.default.ConvertedType, schemaElement.converted_type);
}

@@ -832,0 +886,0 @@ schema[schemaElement.name] = {

@@ -1,6 +0,26 @@

'use strict';
const parquet_codec = require('./codec');
const parquet_compression = require('./compression');
const parquet_types = require('./types');
const parquet_util = require('./util');
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ParquetSchema = void 0;
const parquet_codec = __importStar(require("./codec"));
const parquet_compression = __importStar(require("./compression"));
const parquet_types = __importStar(require("./types"));
const PARQUET_COLUMN_KEY_SEPARATOR = '.';

@@ -11,2 +31,5 @@ /**

class ParquetSchema {
schema;
fields;
fieldList;
/**

@@ -24,3 +47,3 @@ * Create a new schema from a JSON schema definition

findField(path) {
if (path.constructor !== Array) {
if (typeof path === 'string') {
path = path.split(",");

@@ -33,3 +56,6 @@ }

for (; path.length > 1; path.shift()) {
n = n[path[0]].fields;
let fields = n[path[0]]?.fields;
if (isDefined(fields)) {
n = fields;
}
}

@@ -42,3 +68,3 @@ return n[path[0]];

findFieldBranch(path) {
if (path.constructor !== Array) {
if (typeof path === 'string') {
path = path.split(",");

@@ -50,4 +76,5 @@ }

branch.push(n[path[0]]);
if (path.length > 1) {
n = n[path[0]].fields;
let fields = n[path[0]].fields;
if (path.length > 1 && isDefined(fields)) {
n = fields;
}

@@ -58,2 +85,3 @@ }

}
exports.ParquetSchema = ParquetSchema;
;

@@ -94,3 +122,3 @@ function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {

name: name,
path: path.concat([name]),
path: path.concat(name),
repetitionType: repetitionType,

@@ -102,3 +130,3 @@ rLevelMax: rLevelMax,

fieldCount: Object.keys(opts.fields).length,
fields: buildFields(opts.fields, rLevelMax, dLevelMax, path.concat([name]))
fields: buildFields(opts.fields, rLevelMax, dLevelMax, path.concat(name))
};

@@ -109,6 +137,5 @@ if (opts.type == 'LIST' || opts.type == 'MAP')

}
/* field type */
const typeDef = parquet_types.PARQUET_LOGICAL_TYPES[opts.type];
const typeDef = opts.type ? parquet_types.PARQUET_LOGICAL_TYPES[opts.type] : undefined;
if (!typeDef) {
throw 'invalid parquet type: ' + opts.type;
throw 'invalid parquet type: ' + (opts.type || "missing type");
}

@@ -120,3 +147,3 @@ /* field encoding */

if (!(opts.encoding in parquet_codec)) {
throw 'unsupported parquet encoding: ' + opts.encodig;
throw 'unsupported parquet encoding: ' + opts.encoding;
}

@@ -150,4 +177,5 @@ if (!opts.compression) {

list.push(fields[k]);
if (fields[k].isNested) {
list = list.concat(listFields(fields[k].fields));
const nestedFields = fields[k].fields;
if (fields[k].isNested && isDefined(nestedFields)) {
list = list.concat(listFields(nestedFields));
}

@@ -157,3 +185,5 @@ }

}
module.exports = { ParquetSchema };
function isDefined(val) {
return val !== undefined;
}
//# sourceMappingURL=schema.js.map

@@ -1,32 +0,29 @@

'use strict';
const parquet_types = require('./types');
const parquet_schema = require('./schema');
/**
* 'Shred' a record into a list of <value, repetition_level, definition_level>
* tuples per column using the Google Dremel Algorithm..
*
* The buffer argument must point to an object into which the shredded record
* will be returned. You may re-use the buffer for repeated calls to this function
* to append to an existing buffer, as long as the schema is unchanged.
*
* The format in which the shredded records will be stored in the buffer is as
* follows:
*
* buffer = {
* columnData: [
* 'my_col': {
* dlevels: [d1, d2, .. dN],
* rlevels: [r1, r2, .. rN],
* values: [v1, v2, .. vN],
* }, ...
* ],
* rowCount: X,
* }
*
*/
exports.shredRecord = function (schema, record, buffer) {
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.materializeRecords = exports.shredRecord = void 0;
const parquet_types = __importStar(require("./types"));
const shredRecord = function (schema, record, buffer) {
/* shred the record, this may raise an exception */
var recordShredded = {};
for (let field of schema.fieldList) {
recordShredded[field.path] = {
recordShredded[field.path.join(',')] = {
dlevels: [],

@@ -47,3 +44,4 @@ rlevels: [],

for (let field of schema.fieldList) {
buffer.columnData[field.path] = {
let path = field.path.join(',');
buffer.columnData[path] = {
dlevels: [],

@@ -55,3 +53,3 @@ rlevels: [],

};
buffer.pages[field.path] = [];
buffer.pages[path] = [];
}

@@ -62,4 +60,5 @@ }

for (let field of schema.fieldList) {
let record = recordShredded[field.path];
let column = buffer.columnData[field.path];
let path = field.path.join(',');
let record = recordShredded[path];
let column = buffer.columnData[path];
for (let i = 0; i < record.rlevels.length; i++) {

@@ -72,6 +71,7 @@ column.rlevels.push(record.rlevels[i]);

}
[...recordShredded[field.path].distinct_values].forEach(value => buffer.columnData[field.path].distinct_values.add(value));
buffer.columnData[field.path].count += recordShredded[field.path].count;
[...recordShredded[path].distinct_values].forEach(value => buffer.columnData[path].distinct_values.add(value));
buffer.columnData[path].count += recordShredded[path].count;
}
};
exports.shredRecord = shredRecord;
function shredRecordInternal(fields, record, data, rlvl, dlvl) {

@@ -81,8 +81,18 @@ for (let fieldName in fields) {

const fieldType = field.originalType || field.primitiveType;
const path = field.path.join(',');
// fetch values
let values = [];
if (record && (fieldName in record) && record[fieldName] !== undefined && record[fieldName] !== null) {
if (record[fieldName].constructor === Array) {
if (Array.isArray(record[fieldName])) {
values = record[fieldName];
}
else if (ArrayBuffer.isView(record[fieldName])) { // checks if any typed array
if (record[fieldName] instanceof Uint8Array) {
// wrap in a buffer, since not supported by parquet_thrift
values.push(Buffer.from(record[fieldName]));
}
else {
throw Object.prototype.toString.call(record[fieldName]) + ' is not supported';
}
}
else {

@@ -101,9 +111,9 @@ values.push(record[fieldName]);

if (values.length == 0) {
if (field.isNested) {
if (field.isNested && isDefined(field.fields)) {
shredRecordInternal(field.fields, null, data, rlvl, dlvl);
}
else {
data[field.path].rlevels.push(rlvl);
data[field.path].dlevels.push(dlvl);
data[field.path].count += 1;
data[path].rlevels.push(rlvl);
data[path].dlevels.push(dlvl);
data[path].count += 1;
}

@@ -115,11 +125,11 @@ continue;

const rlvl_i = i === 0 ? rlvl : field.rLevelMax;
if (field.isNested) {
if (field.isNested && isDefined(field.fields)) {
shredRecordInternal(field.fields, values[i], data, rlvl_i, field.dLevelMax);
}
else {
data[field.path].distinct_values.add(values[i]);
data[field.path].values.push(parquet_types.toPrimitive(fieldType, values[i]));
data[field.path].rlevels.push(rlvl_i);
data[field.path].dlevels.push(field.dLevelMax);
data[field.path].count += 1;
data[path].distinct_values.add(values[i]);
data[path].values.push(parquet_types.toPrimitive(fieldType, values[i]));
data[path].rlevels.push(rlvl_i);
data[path].dlevels.push(field.dLevelMax);
data[path].count += 1;
}

@@ -149,3 +159,3 @@ }

*/
exports.materializeRecords = function (schema, buffer, records) {
const materializeRecords = function (schema, buffer, records) {
if (!records) {

@@ -175,2 +185,3 @@ records = [];

};
exports.materializeRecords = materializeRecords;
function materializeRecordField(record, branch, rLevels, dLevel, value) {

@@ -186,10 +197,12 @@ const node = branch[0];

}
while (record[node.name].length < rLevels[0] + 1) {
record[node.name].push({});
const recordValue = record[node.name];
while (recordValue.length < rLevels[0] + 1) {
recordValue.push({});
}
materializeRecordField(record[node.name][rLevels[0]], branch.slice(1), rLevels.slice(1), dLevel, value);
materializeRecordField(recordValue[rLevels[0]], branch.slice(1), rLevels.slice(1), dLevel, value);
}
else {
record[node.name] = record[node.name] || {};
materializeRecordField(record[node.name], branch.slice(1), rLevels, dLevel, value);
const recordValue = record[node.name];
materializeRecordField(recordValue, branch.slice(1), rLevels, dLevel, value);
}

@@ -202,6 +215,7 @@ }

}
while (record[node.name].length < rLevels[0] + 1) {
record[node.name].push(null);
const recordValue = record[node.name];
while (recordValue.length < rLevels[0] + 1) {
recordValue.push(null);
}
record[node.name][rLevels[0]] = value;
recordValue[rLevels[0]] = value;
}

@@ -213,2 +227,5 @@ else {

}
function isDefined(val) {
return val !== undefined;
}
//# sourceMappingURL=shred.js.map
'use strict';
const BSON = require('bson');
const PARQUET_LOGICAL_TYPES = {
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.fromPrimitive = exports.toPrimitive = exports.PARQUET_LOGICAL_TYPES = void 0;
const BSON = __importStar(require("bson"));
exports.PARQUET_LOGICAL_TYPES = {
'BOOLEAN': {

@@ -142,7 +163,8 @@ primitiveType: 'BOOLEAN',

function toPrimitive(type, value) {
if (!(type in PARQUET_LOGICAL_TYPES)) {
throw 'invalid type: ' + type;
if (type === undefined || !(type in exports.PARQUET_LOGICAL_TYPES)) {
throw 'invalid type: ' + type || "undefined";
}
return PARQUET_LOGICAL_TYPES[type].toPrimitive(value);
return exports.PARQUET_LOGICAL_TYPES[type].toPrimitive(value);
}
exports.toPrimitive = toPrimitive;
/**

@@ -153,7 +175,8 @@ * Convert a value from it's internal/underlying primitive representation to

function fromPrimitive(type, value) {
if (!(type in PARQUET_LOGICAL_TYPES)) {
throw 'invalid type: ' + type;
if (type === undefined || !(type in exports.PARQUET_LOGICAL_TYPES)) {
throw 'invalid type: ' + type || "undefined";
}
if ("fromPrimitive" in PARQUET_LOGICAL_TYPES[type]) {
return PARQUET_LOGICAL_TYPES[type].fromPrimitive(value);
const typeFromPrimitive = exports.PARQUET_LOGICAL_TYPES[type].fromPrimitive;
if (typeFromPrimitive !== undefined) {
return typeFromPrimitive(value);
}

@@ -164,2 +187,3 @@ else {

}
exports.fromPrimitive = fromPrimitive;
function toPrimitive_BOOLEAN(value) {

@@ -172,77 +196,128 @@ return !!value;

function toPrimitive_FLOAT(value) {
const v = parseFloat(value);
if (isNaN(v)) {
throw 'invalid value for FLOAT: ' + value;
if (typeof value === 'string') {
const v = parseFloat(value);
return v;
}
return v;
else if (typeof value === 'number') {
return value;
}
throw 'invalid value for FLOAT: ' + value;
}
function toPrimitive_DOUBLE(value) {
const v = parseFloat(value);
if (isNaN(v)) {
throw 'invalid value for DOUBLE: ' + value;
if (typeof value === 'string') {
const v = parseFloat(value);
return v;
}
return v;
else if (typeof value === 'number') {
return value;
}
throw 'invalid value for DOUBLE: ' + value;
}
function toPrimitive_INT8(value) {
const v = parseInt(value, 10);
if (v < -0x80 || v > 0x7f || isNaN(v)) {
try {
let v = value;
if (typeof v === 'string')
v = BigInt(value);
checkValidValue(-0x80, 0x7f, v);
return v;
}
catch {
throw 'invalid value for INT8: ' + value;
}
return v;
}
function toPrimitive_UINT8(value) {
const v = parseInt(value, 10);
if (v < 0 || v > 0xff || isNaN(v)) {
try {
let v = value;
if (typeof v === 'string')
v = BigInt(value);
checkValidValue(0, 0xff, v);
return v;
}
catch {
throw 'invalid value for UINT8: ' + value;
}
return v;
}
function toPrimitive_INT16(value) {
const v = parseInt(value, 10);
if (v < -0x8000 || v > 0x7fff || isNaN(v)) {
try {
let v = value;
if (typeof v === 'string')
v = BigInt(value);
checkValidValue(-0x8000, 0x7fff, v);
return v;
}
catch {
throw 'invalid value for INT16: ' + value;
}
return v;
}
function toPrimitive_UINT16(value) {
const v = parseInt(value, 10);
if (v < 0 || v > 0xffff || isNaN(v)) {
try {
let v = value;
if (typeof v === 'string')
v = BigInt(value);
checkValidValue(0, 0xffff, v);
return v;
}
catch {
throw 'invalid value for UINT16: ' + value;
}
return v;
}
function toPrimitive_INT32(value) {
const v = parseInt(value, 10);
if (v < -0x80000000 || v > 0x7fffffff || isNaN(v)) {
try {
let v = value;
if (typeof v === 'string')
v = BigInt(value);
checkValidValue(-0x80000000, 0x7fffffff, v);
return v;
}
catch {
throw 'invalid value for INT32: ' + value;
}
return v;
}
function toPrimitive_UINT32(value) {
const v = parseInt(value, 10);
if (v < 0 || v > 0xffffffffffff || isNaN(v)) {
try {
let v = value;
if (typeof v === 'string')
v = BigInt(value);
checkValidValue(0, 0xffffffffffff, v);
return v;
}
catch {
throw 'invalid value for UINT32: ' + value;
}
return v;
}
function toPrimitive_INT64(value) {
const v = parseInt(value, 10);
if (isNaN(v)) {
try {
let v = value;
if (typeof v === 'string')
v = BigInt(value);
checkValidValue(-0x8000000000000000, 0x7fffffffffffffff, v);
return v;
}
catch {
throw 'invalid value for INT64: ' + value;
}
return v;
}
function toPrimitive_UINT64(value) {
const v = parseInt(value, 10);
if (v < 0 || isNaN(v)) {
try {
let v = value;
if (typeof v === 'string')
v = BigInt(value);
checkValidValue(0, 0xffffffffffffffff, v);
return v;
}
catch {
throw 'invalid value for UINT64: ' + value;
}
return v;
}
function toPrimitive_INT96(value) {
const v = parseInt(value, 10);
if (isNaN(v)) {
try {
let v = value;
if (typeof v === 'string')
v = BigInt(value);
checkValidValue(-0x800000000000000000000000, 0x7fffffffffffffffffffffff, v);
return v;
}
catch {
throw 'invalid value for INT96: ' + value;
}
return v;
}

@@ -271,4 +346,7 @@ function toPrimitive_BYTE_ARRAY(value) {

function toPrimitive_TIME_MILLIS(value) {
const v = parseInt(value, 10);
if (v < 0 || v > 0xffffffffffffffff || isNaN(v)) {
let v = value;
if (typeof value === `string`) {
v = parseInt(value, 10);
}
if (v < 0 || v > 0xffffffffffffffff || typeof v !== 'number') {
throw 'invalid value for TIME_MILLIS: ' + value;

@@ -280,3 +358,3 @@ }

const v = BigInt(value);
if (v < 0n || isNaN(v)) {
if (v < 0n) {
throw 'invalid value for TIME_MICROS: ' + value;

@@ -293,9 +371,10 @@ }

/* convert from integer */
{
const v = parseInt(value, 10);
if (v < 0 || isNaN(v)) {
throw 'invalid value for DATE: ' + value;
}
return v;
let v = value;
if (typeof value === 'string') {
v = parseInt(value, 10);
}
if (v < 0 || typeof v !== 'number') {
throw 'invalid value for DATE: ' + value;
}
return v;
}

@@ -311,9 +390,10 @@ function fromPrimitive_DATE(value) {

/* convert from integer */
{
const v = parseInt(value, 10);
if (v < 0 || isNaN(v)) {
throw 'invalid value for TIMESTAMP_MILLIS: ' + value;
}
return v;
let v = value;
if (typeof value === 'string') {
v = parseInt(value, 10);
}
if (v < 0 || typeof v !== 'number') {
throw 'invalid value for TIMESTAMP_MILLIS: ' + value;
}
return v;
}

@@ -338,3 +418,3 @@ function fromPrimitive_TIMESTAMP_MILLIS(value) {

function fromPrimitive_TIMESTAMP_MICROS(value) {
return new Date(parseInt(value / 1000n));
return typeof value === 'bigint' ? new Date(Number(value / 1000n)) : new Date(value / 1000);
}

@@ -358,3 +438,7 @@ function toPrimitive_INTERVAL(value) {

}
module.exports = { PARQUET_LOGICAL_TYPES, toPrimitive, fromPrimitive };
function checkValidValue(lowerRange, upperRange, v) {
if (v < lowerRange || v > upperRange) {
throw "invalid value";
}
}
//# sourceMappingURL=types.js.map

@@ -132,2 +132,10 @@ "use strict";

fromPrimitive: fromPrimitive_INTERVAL
},
MAP: {
originalType: 'MAP',
toPrimitive: toPrimitive_MAP,
},
LIST: {
originalType: 'LIST',
toPrimitive: toPrimitive_LIST,
}

@@ -246,2 +254,8 @@ };

}
function toPrimitive_MAP(value) {
return value;
}
function toPrimitive_LIST(value) {
return value;
}
function toPrimitive_BYTE_ARRAY(value) {

@@ -248,0 +262,0 @@ return Buffer.from(value);

"use strict";
// Lifted from https://github.com/kbajalc/parquets
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.NewPageHeader = exports.NewFileMetaData = void 0;
const parquet_types_1 = __importDefault(require("../../gen-nodejs/parquet_types"));
;
class NewFileMetaData extends parquet_types_1.default.FileMetaData {
json;
//@ts-ignore
row_groups;
constructor() {
super();
}
}
exports.NewFileMetaData = NewFileMetaData;
class NewPageHeader extends parquet_types_1.default.PageHeader {
offset;
headerSize;
constructor() {
super();
}
}
exports.NewPageHeader = NewPageHeader;
//# sourceMappingURL=types.js.map

@@ -1,61 +0,61 @@

'use strict';
const fs = require('fs');
const thrift = require('thrift');
const parquet_thrift = require('../gen-nodejs/parquet_types');
/** We need to use a patched version of TFramedTransport where
* readString returns the original buffer instead of a string if the
* buffer can not be safely encoded as utf8 (see http://bit.ly/2GXeZEF)
*/
class fixedTFramedTransport extends thrift.TFramedTransport {
readString(len) {
this.ensureAvailable(len);
var buffer = this.inBuf.slice(this.readPos, this.readPos + len);
var str = this.inBuf.toString('utf8', this.readPos, this.readPos + len);
this.readPos += len;
return (Buffer.from(str).equals(buffer)) ? str : buffer;
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.fieldIndexOf = exports.osopen = exports.osend = exports.oswrite = exports.fclose = exports.fread = exports.fstat = exports.fopen = exports.getThriftEnum = exports.getBitWidth = exports.decodeThrift = exports.serializeThrift = void 0;
const thrift_1 = __importDefault(require("thrift"));
const fs_1 = __importDefault(require("fs"));
const parquet_thrift = __importStar(require("../gen-nodejs/parquet_types"));
/**
* We need to patch Thrift's TFramedTransport class bc the TS type definitions
* do not define a `readPos` field, even though the class implementation has
* one.
*/
class fixedTFramedTransport extends thrift_1.default.TFramedTransport {
inBuf;
readPos;
constructor(inBuf) {
super(inBuf);
this.inBuf = inBuf;
this.readPos = 0;
}
}
/** Patch PageLocation to be three element array that has getters/setters
* for each of the properties (offset, compressed_page_size, first_row_index)
* This saves space considerably as we do not need to store the full variable
* names for every PageLocation
*/
const previousPageLocation = parquet_thrift.PageLocation.prototype;
const PageLocation = parquet_thrift.PageLocation.prototype = [];
PageLocation.write = previousPageLocation.write;
PageLocation.read = previousPageLocation.read;
const getterSetter = index => ({
const getterSetter = (index) => ({
get: function () { return this[index]; },
set: function (value) { return this[index] = value; }
});
Object.defineProperty(PageLocation, 'offset', getterSetter(0));
Object.defineProperty(PageLocation, 'compressed_page_size', getterSetter(1));
Object.defineProperty(PageLocation, 'first_row_index', getterSetter(2));
exports.force32 = function () {
const protocol = thrift.TCompactProtocol.prototype;
protocol.zigzagToI64 = protocol.zigzagToI32;
protocol.readVarint64 = protocol.readVarint32 = function () {
let lo = 0;
let shift = 0;
let b;
while (true) {
b = this.trans.readByte();
lo = lo | ((b & 0x7f) << shift);
shift += 7;
if (!(b & 0x80)) {
break;
}
}
return lo;
};
};
Object.defineProperty(parquet_thrift.PageLocation.prototype, 'offset', getterSetter(0));
Object.defineProperty(parquet_thrift.PageLocation.prototype, 'compressed_page_size', getterSetter(1));
Object.defineProperty(parquet_thrift.PageLocation.prototype, 'first_row_index', getterSetter(2));
/**
* Helper function that serializes a thrift object into a buffer
*/
exports.serializeThrift = function (obj) {
const serializeThrift = function (obj) {
let output = [];
let transport = new thrift.TBufferedTransport(null, function (buf) {
const callBack = function (buf) {
output.push(buf);
});
let protocol = new thrift.TCompactProtocol(transport);
};
let transport = new thrift_1.default.TBufferedTransport(undefined, callBack);
let protocol = new thrift_1.default.TCompactProtocol(transport);
//@ts-ignore, https://issues.apache.org/jira/browse/THRIFT-3872
obj.write(protocol);

@@ -65,3 +65,4 @@ transport.flush();

};
exports.decodeThrift = function (obj, buf, offset) {
exports.serializeThrift = serializeThrift;
const decodeThrift = function (obj, buf, offset) {
if (!offset) {

@@ -72,10 +73,12 @@ offset = 0;

transport.readPos = offset;
var protocol = new thrift.TCompactProtocol(transport);
var protocol = new thrift_1.default.TCompactProtocol(transport);
//@ts-ignore, https://issues.apache.org/jira/browse/THRIFT-3872
obj.read(protocol);
return transport.readPos - offset;
};
exports.decodeThrift = decodeThrift;
/**
* Get the number of bits required to store a given value
*/
exports.getBitWidth = function (val) {
const getBitWidth = function (val) {
if (val === 0) {

@@ -88,6 +91,7 @@ return 0;

};
exports.getBitWidth = getBitWidth;
/**
* FIXME not ideal that this is linear
*/
exports.getThriftEnum = function (klass, value) {
const getThriftEnum = function (klass, value) {
for (let k in klass) {

@@ -100,5 +104,6 @@ if (klass[k] === value) {

};
exports.fopen = function (filePath) {
exports.getThriftEnum = getThriftEnum;
const fopen = function (filePath) {
return new Promise((resolve, reject) => {
fs.open(filePath, 'r', (err, fd) => {
fs_1.default.open(filePath, 'r', (err, fd) => {
if (err) {

@@ -113,5 +118,6 @@ reject(err);

};
exports.fstat = function (filePath) {
exports.fopen = fopen;
const fstat = function (filePath) {
return new Promise((resolve, reject) => {
fs.stat(filePath, (err, stat) => {
fs_1.default.stat(filePath, (err, stat) => {
if (err) {

@@ -126,6 +132,7 @@ reject(err);

};
exports.fread = function (fd, position, length) {
exports.fstat = fstat;
const fread = function (fd, position, length) {
let buffer = Buffer.alloc(length);
return new Promise((resolve, reject) => {
fs.read(fd, buffer, 0, length, position, (err, bytesRead, buf) => {
fs_1.default.read(fd, buffer, 0, length, position, (err, bytesRead, buf) => {
if (err || bytesRead != length) {

@@ -140,5 +147,6 @@ reject(err || Error('read failed'));

};
exports.fclose = function (fd) {
exports.fread = fread;
const fclose = function (fd) {
return new Promise((resolve, reject) => {
fs.close(fd, (err) => {
fs_1.default.close(fd, (err) => {
if (err) {

@@ -153,3 +161,4 @@ reject(err);

};
exports.oswrite = function (os, buf) {
exports.fclose = fclose;
const oswrite = function (os, buf) {
return new Promise((resolve, reject) => {

@@ -161,3 +170,3 @@ os.write(buf, (err) => {

else {
resolve();
resolve(err);
}

@@ -167,3 +176,4 @@ });

};
exports.osend = function (os) {
exports.oswrite = oswrite;
const osend = function (os) {
return new Promise((resolve, reject) => {

@@ -175,3 +185,3 @@ os.end((err) => {

else {
resolve();
resolve(err);
}

@@ -181,5 +191,6 @@ });

};
exports.osopen = function (path, opts) {
exports.osend = osend;
const osopen = function (path, opts) {
return new Promise((resolve, reject) => {
let outputStream = fs.createWriteStream(path, opts);
let outputStream = fs_1.default.createWriteStream(path, opts);
outputStream.on('open', function (fd) {

@@ -193,3 +204,4 @@ resolve(outputStream);

};
exports.fieldIndexOf = function (arr, elem) {
exports.osopen = osopen;
const fieldIndexOf = function (arr, elem) {
for (let j = 0; j < arr.length; ++j) {

@@ -212,2 +224,3 @@ if (arr[j].length !== elem.length) {

};
exports.fieldIndexOf = fieldIndexOf;
//# sourceMappingURL=util.js.map

@@ -284,3 +284,3 @@ 'use strict';

_flush(callback) {
this.writer.close(callback)
this.writer.close()
.then(d => callback(null, d), callback);

@@ -287,0 +287,0 @@ }

@@ -15,4 +15,3 @@ "use strict";

ParquetShredder: shredder,
force32: util.force32
};
//# sourceMappingURL=parquet.js.map

@@ -5,3 +5,3 @@ {

"main": "dist/parquet.js",
"version": "0.0.0-81e38e",
"version": "0.0.0-8f3038",
"homepage": "https://github.com/LibertyDSNP/parquetjs",

@@ -18,21 +18,10 @@ "license": "MIT",

"dependencies": {
"@types/bson": "^4.0.3",
"@types/long": "^4.0.1",
"@types/node": "^14.14.35",
"@types/thrift": "^0.10.10",
"assert": "^2.0.0",
"@types/varint": "^6.0.0",
"browserify-zlib": "^0.2.0",
"bson": "4.4.0",
"cross-fetch": "^3.1.4",
"esbuild": "^0.12.11",
"events": "^3.3.0",
"int53": "^0.2.4",
"lodash": "^4.17.21",
"long": "^4.0.0",
"object-stream": "0.0.1",
"path-browserify": "^1.0.1",
"readable-stream": "^3.6.0",
"snappyjs": "^0.6.0",
"thrift": "0.14.1",
"util": "^0.12.4",
"varint": "^5.0.0",

@@ -46,5 +35,10 @@ "wasm-brotli": "^2.0.2",

"@babel/preset-typescript": "^7.14.5",
"@types/bson": "^4.0.3",
"@types/chai": "^4.2.16",
"@types/long": "^4.0.1",
"@types/mocha": "^8.2.2",
"@types/node": "^14.14.35",
"@types/sinon": "^10.0.0",
"@types/thrift": "^0.10.10",
"assert": "^2.0.0",
"babel-loader": "^8.2.2",

@@ -56,7 +50,11 @@ "babel-plugin-add-module-exports": "^1.0.4",

"core-js": "^3.15.1",
"esbuild": "^0.14.1",
"mocha": "8.3.2",
"msw": "^0.29.0",
"object-stream": "0.0.1",
"process": "^0.11.10",
"regenerator-runtime": "^0.13.7",
"sinon": "^10.0.0",
"sinon-chai": "^3.7.0",
"sinon-chai-in-order": "^0.1.0",
"source-map-loader": "^3.0.0",

@@ -66,3 +64,3 @@ "stream-browserify": "^3.0.0",

"ts-node": "^9.1.1",
"typescript": "^4.3.4"
"typescript": "^4.5.2"
},

@@ -69,0 +67,0 @@ "scripts": {

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc