Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

parquets

Package Overview
Dependencies
Maintainers
1
Versions
24
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

parquets - npm Package Compare versions

Comparing version 0.10.6 to 0.10.7

lib/snappy/compressor.d.ts

14

lib/codec/plain.js

@@ -53,3 +53,3 @@ "use strict";

buf.fill(0);
for (let i = 0; i < values.length; ++i) {
for (let i = 0; i < values.length; i++) {
if (values[i]) {

@@ -63,3 +63,3 @@ buf[Math.floor(i / 8)] |= (1 << (i % 8));

const values = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
const b = cursor.buffer[cursor.offset + Math.floor(i / 8)];

@@ -80,3 +80,3 @@ values.push((b & (1 << (i % 8))) > 0);

const values = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
values.push(cursor.buffer.readInt32LE(cursor.offset));

@@ -96,3 +96,3 @@ cursor.offset += 4;

const values = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
values.push(INT53.readInt64LE(cursor.buffer, cursor.offset));

@@ -119,3 +119,3 @@ cursor.offset += 8;

const values = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
const low = INT53.readInt64LE(cursor.buffer, cursor.offset);

@@ -142,3 +142,3 @@ const high = cursor.buffer.readUInt32LE(cursor.offset + 8);

const values = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
values.push(cursor.buffer.readFloatLE(cursor.offset));

@@ -158,3 +158,3 @@ cursor.offset += 4;

const values = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
values.push(cursor.buffer.readDoubleLE(cursor.offset));

@@ -161,0 +161,0 @@ cursor.offset += 8;

@@ -9,3 +9,3 @@ "use strict";

const buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8)));
for (let b = 0; b < opts.bitWidth * values.length; ++b) {
for (let b = 0; b < opts.bitWidth * values.length; b++) {
if ((values[Math.floor(b / opts.bitWidth)] & (1 << b % opts.bitWidth)) > 0) {

@@ -22,3 +22,3 @@ buf[Math.floor(b / 8)] |= (1 << (b % 8));

const buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8));
for (let i = 0; i < buf.length; ++i) {
for (let i = 0; i < buf.length; i++) {
buf.writeUInt8(value & 0xff, i);

@@ -93,3 +93,3 @@ value >> 8;

const values = new Array(count).fill(0);
for (let b = 0; b < opts.bitWidth * count; ++b) {
for (let b = 0; b < opts.bitWidth * count; b++) {
if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << (b % 8))) {

@@ -104,3 +104,3 @@ values[Math.floor(b / opts.bitWidth)] |= (1 << b % opts.bitWidth);

let value = 0;
for (let i = 0; i < Math.ceil(opts.bitWidth / 8); ++i) {
for (let i = 0; i < Math.ceil(opts.bitWidth / 8); i++) {
value << 8;

@@ -107,0 +107,0 @@ value += cursor.buffer[cursor.offset];

@@ -5,3 +5,3 @@ "use strict";

const zlib = require("zlib");
const snappyjs = require("snappyjs");
const snappyjs = require("./snappy");
let brotli;

@@ -8,0 +8,0 @@ let lzo;

@@ -8,5 +8,5 @@ export declare type ParquetCodec = 'PLAIN' | 'RLE';

export interface SchemaDefinition {
[string: string]: ElementDefinition;
[string: string]: FieldDefinition;
}
export interface ElementDefinition {
export interface FieldDefinition {
type?: ParquetType;

@@ -20,5 +20,6 @@ typeLength?: number;

}
export interface FieldDefinition {
export interface ParquetField {
name: string;
path: string[];
key: string;
primitiveType?: PrimitiveType;

@@ -34,3 +35,3 @@ originalType?: OriginalType;

fieldCount?: number;
fields?: Record<string, FieldDefinition>;
fields?: Record<string, ParquetField>;
}

@@ -37,0 +38,0 @@ export interface ParquetBuffer {

/// <reference types="node" />
import { ParquetBuffer, ParquetData, ParquetRecord } from './declare';
import { ColumnChunk, FileMetaData, RowGroup } from './gen';
import { ParquetSchema } from './schema';
import { ColumnChunk, FileMetaData, RowGroup } from './thrift';
/**

@@ -6,0 +6,0 @@ * A parquet cursor is used to retrieve rows from a parquet file in order

@@ -5,6 +5,6 @@ "use strict";

const Compression = require("./compression");
// tslint:disable-next-line:max-line-length
const gen_1 = require("./gen");
const schema_1 = require("./schema");
const Shred = require("./shred");
// tslint:disable-next-line:max-line-length
const thrift_1 = require("./thrift");
const Util = require("./util");

@@ -195,6 +195,6 @@ // import Fs = require('fs');

const field = schema.findField(colChunk.meta_data.path_in_schema);
const type = Util.getThriftEnum(gen_1.Type, colChunk.meta_data.type);
const type = Util.getThriftEnum(thrift_1.Type, colChunk.meta_data.type);
if (type !== field.primitiveType)
throw new Error('chunk type not matching schema: ' + type);
const compression = Util.getThriftEnum(gen_1.CompressionCodec, colChunk.meta_data.codec);
const compression = Util.getThriftEnum(thrift_1.CompressionCodec, colChunk.meta_data.codec);
const pagesOffset = +colChunk.meta_data.data_page_offset;

@@ -250,3 +250,3 @@ const pagesSize = +colChunk.meta_data.total_compressed_size;

cursor.offset += length;
const pageType = Util.getThriftEnum(gen_1.PageType, pageHeader.type);
const pageType = Util.getThriftEnum(thrift_1.PageType, pageHeader.type);
let pageData = null;

@@ -300,3 +300,3 @@ switch (pageType) {

/* read repetition levels */
const rLevelEncoding = Util.getThriftEnum(gen_1.Encoding, header.data_page_header.repetition_level_encoding);
const rLevelEncoding = Util.getThriftEnum(thrift_1.Encoding, header.data_page_header.repetition_level_encoding);
// tslint:disable-next-line:prefer-array-literal

@@ -315,3 +315,3 @@ let rLevels = new Array(valueCount);

/* read definition levels */
const dLevelEncoding = Util.getThriftEnum(gen_1.Encoding, header.data_page_header.definition_level_encoding);
const dLevelEncoding = Util.getThriftEnum(thrift_1.Encoding, header.data_page_header.definition_level_encoding);
// tslint:disable-next-line:prefer-array-literal

@@ -332,7 +332,7 @@ let dLevels = new Array(valueCount);

if (dlvl === column.dLevelMax) {
++valueCountNonNull;
valueCountNonNull++;
}
}
/* read values */
const valueEncoding = Util.getThriftEnum(gen_1.Encoding, header.data_page_header.encoding);
const valueEncoding = Util.getThriftEnum(thrift_1.Encoding, header.data_page_header.encoding);
const values = decodeValues(column.primitiveType, valueEncoding, dataCursor, valueCountNonNull, {

@@ -356,3 +356,3 @@ typeLength: column.typeLength,

const valueCountNonNull = valueCount - header.data_page_header_v2.num_nulls;
const valueEncoding = Util.getThriftEnum(gen_1.Encoding, header.data_page_header_v2.encoding);
const valueEncoding = Util.getThriftEnum(thrift_1.Encoding, header.data_page_header_v2.encoding);
/* read repetition levels */

@@ -409,3 +409,3 @@ // tslint:disable-next-line:prefer-array-literal

const schemaElement = schemaElements[next];
const repetitionType = next > 0 ? Util.getThriftEnum(gen_1.FieldRepetitionType, schemaElement.repetition_type) : 'ROOT';
const repetitionType = next > 0 ? Util.getThriftEnum(thrift_1.FieldRepetitionType, schemaElement.repetition_type) : 'ROOT';
let optional = false;

@@ -434,5 +434,5 @@ let repeated = false;

else {
let logicalType = Util.getThriftEnum(gen_1.Type, schemaElement.type);
let logicalType = Util.getThriftEnum(thrift_1.Type, schemaElement.type);
if (schemaElement.converted_type != null) {
logicalType = Util.getThriftEnum(gen_1.ConvertedType, schemaElement.converted_type);
logicalType = Util.getThriftEnum(thrift_1.ConvertedType, schemaElement.converted_type);
}

@@ -439,0 +439,0 @@ schema[schemaElement.name] = {

@@ -1,2 +0,2 @@

import { ElementDefinition, FieldDefinition, ParquetBuffer, ParquetCompression, ParquetRecord, SchemaDefinition } from './declare';
import { FieldDefinition, ParquetBuffer, ParquetCompression, ParquetField, ParquetRecord, SchemaDefinition } from './declare';
/**

@@ -6,5 +6,5 @@ * A parquet file schema

export declare class ParquetSchema {
schema: Record<string, ElementDefinition>;
fields: Record<string, FieldDefinition>;
fieldList: FieldDefinition[];
schema: Record<string, FieldDefinition>;
fields: Record<string, ParquetField>;
fieldList: ParquetField[];
/**

@@ -17,12 +17,13 @@ * Create a new schema from a JSON schema definition

*/
findField(path: string): FieldDefinition;
findField(path: string[]): FieldDefinition;
findField(path: string): ParquetField;
findField(path: string[]): ParquetField;
/**
* Retrieve a field definition and all the field's ancestors
*/
findFieldBranch(path: string): FieldDefinition[];
findFieldBranch(path: string[]): FieldDefinition[];
findFieldBranch(path: string): ParquetField[];
findFieldBranch(path: string[]): ParquetField[];
shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void;
materializeRecords(buffer: ParquetBuffer): ParquetRecord[];
compress(type: ParquetCompression): this;
buffer(): ParquetBuffer;
}

@@ -60,2 +60,5 @@ "use strict";

}
buffer() {
return shred_1.shredBuffer(this);
}
}

@@ -96,5 +99,7 @@ exports.ParquetSchema = ParquetSchema;

if (opts.fields) {
const cpath = path.concat([name]);
fieldList[name] = {
name,
path: path.concat([name]),
path: cpath,
key: cpath.join(),
repetitionType,

@@ -105,3 +110,3 @@ rLevelMax,

fieldCount: Object.keys(opts.fields).length,
fields: buildFields(opts.fields, rLevelMax, dLevelMax, path.concat([name]))
fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)
};

@@ -123,2 +128,3 @@ continue;

/* add to schema */
const cpath = path.concat([name]);
fieldList[name] = {

@@ -128,3 +134,4 @@ name,

originalType: typeDef.originalType,
path: path.concat([name]),
path: cpath,
key: cpath.join(),
repetitionType,

@@ -131,0 +138,0 @@ encoding: opts.encoding,

import { ParquetBuffer, ParquetRecord } from './declare';
import { ParquetSchema } from './schema';
export declare function shredBuffer(schema: ParquetSchema): ParquetBuffer;
/**

@@ -24,3 +25,2 @@ * 'Shred' a record into a list of <value, repetition_level, definition_level>

* }
*
*/

@@ -46,4 +46,3 @@ export declare function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void;

* }
*
*/
export declare function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[];
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const Types = require("./types");
function shredBuffer(schema) {
const columnData = {};
for (const field of schema.fieldList) {
columnData[field.key] = {
dlevels: [],
rlevels: [],
values: [],
count: 0
};
}
return { rowCount: 0, columnData };
}
exports.shredBuffer = shredBuffer;
/**

@@ -25,51 +38,33 @@ * 'Shred' a record into a list of <value, repetition_level, definition_level>

* }
*
*/
function shredRecord(schema, record, buffer) {
/* shred the record, this may raise an exception */
const recordShredded = {};
for (const field of schema.fieldList) {
recordShredded[field.path.join()] = {
dlevels: [],
rlevels: [],
values: [],
count: 0
};
}
shredRecordInternal(schema.fields, record, recordShredded, 0, 0);
const data = shredBuffer(schema).columnData;
shredRecordFields(schema.fields, record, data, 0, 0);
/* if no error during shredding, add the shredded record to the buffer */
if (!('columnData' in buffer) || !('rowCount' in buffer)) {
buffer.rowCount = 0;
buffer.columnData = {};
for (const field of schema.fieldList) {
const cd = {
dlevels: [],
rlevels: [],
values: [],
count: 0
};
buffer.columnData[field.path.join()] = cd;
}
buffer.rowCount = 1;
buffer.columnData = data;
return;
}
buffer.rowCount += 1;
for (const field of schema.fieldList) {
Array.prototype.push.apply(buffer.columnData[field.path.join()].rlevels, recordShredded[field.path.join()].rlevels);
Array.prototype.push.apply(buffer.columnData[field.path.join()].dlevels, recordShredded[field.path.join()].dlevels);
Array.prototype.push.apply(buffer.columnData[field.path.join()].values, recordShredded[field.path.join()].values);
buffer.columnData[field.path.join()].count += recordShredded[field.path.join()].count;
Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);
Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);
Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);
buffer.columnData[field.key].count += data[field.key].count;
}
}
exports.shredRecord = shredRecord;
function shredRecordInternal(fields, record, data, rlvl, dlvl) {
for (const fieldName in fields) {
const field = fields[fieldName];
const fieldType = field.originalType || field.primitiveType;
function shredRecordFields(fields, record, data, rLevel, dLevel) {
for (const name in fields) {
const field = fields[name];
// fetch values
let values = [];
if (record && (fieldName in record) && record[fieldName] !== undefined && record[fieldName] !== null) {
if (record[fieldName].constructor === Array) {
values = record[fieldName];
if (record && (field.name in record) && record[field.name] !== undefined && record[field.name] !== null) {
if (record[field.name].constructor === Array) {
values = record[field.name];
}
else {
values.push(record[fieldName]);
values.push(record[field.name]);
}

@@ -87,8 +82,8 @@ }

if (field.isNested) {
shredRecordInternal(field.fields, null, data, rlvl, dlvl);
shredRecordFields(field.fields, null, data, rLevel, dLevel);
}
else {
data[field.path.join()].rlevels.push(rlvl);
data[field.path.join()].dlevels.push(dlvl);
data[field.path.join()].count += 1;
data[field.key].count += 1;
data[field.key].rlevels.push(rLevel);
data[field.key].dlevels.push(dLevel);
}

@@ -98,13 +93,12 @@ continue;

// push values
for (let i = 0; i < values.length; ++i) {
// tslint:disable-next-line:variable-name
const rlvl_i = i === 0 ? rlvl : field.rLevelMax;
for (let i = 0; i < values.length; i++) {
const rlvl = i === 0 ? rLevel : field.rLevelMax;
if (field.isNested) {
shredRecordInternal(field.fields, values[i], data, rlvl_i, field.dLevelMax);
shredRecordFields(field.fields, values[i], data, rlvl, field.dLevelMax);
}
else {
data[field.path.join()].values.push(Types.toPrimitive(fieldType, values[i]));
data[field.path.join()].rlevels.push(rlvl_i);
data[field.path.join()].dlevels.push(field.dLevelMax);
data[field.path.join()].count += 1;
data[field.key].count += 1;
data[field.key].rlevels.push(rlvl);
data[field.key].dlevels.push(field.dLevelMax);
data[field.key].values.push(Types.toPrimitive(field.originalType || field.primitiveType, values[i]));
}

@@ -132,66 +126,66 @@ }

* }
*
*/
function materializeRecords(schema, buffer) {
const records = [];
for (let i = 0; i < buffer.rowCount; ++i) {
for (let i = 0; i < buffer.rowCount; i++)
records.push({});
for (const key in buffer.columnData) {
materializeColumn(schema, buffer, key, records);
}
for (const k in buffer.columnData) {
const field = schema.findField(k);
const fieldBranch = schema.findFieldBranch(k);
const values = buffer.columnData[k].values[Symbol.iterator]();
// tslint:disable-next-line:prefer-array-literal
const rLevels = new Array(field.rLevelMax + 1);
rLevels.fill(0);
for (let i = 0; i < buffer.columnData[k].count; ++i) {
const dLevel = buffer.columnData[k].dlevels[i];
const rLevel = buffer.columnData[k].rlevels[i];
rLevels[rLevel]++;
rLevels.fill(0, rLevel + 1);
let value = null;
if (dLevel === field.dLevelMax) {
value = Types.fromPrimitive(field.originalType || field.primitiveType, values.next().value);
}
materializeRecordField(records[rLevels[0] - 1], fieldBranch, rLevels.slice(1), dLevel, value);
}
}
return records;
}
exports.materializeRecords = materializeRecords;
function materializeRecordField(record, branch, rLevels, dLevel, value) {
const node = branch[0];
if (dLevel < node.dLevelMax) {
function materializeColumn(schema, buffer, key, records) {
const data = buffer.columnData[key];
if (!data.count)
return;
}
if (branch.length > 1) {
if (node.repetitionType === 'REPEATED') {
if (!(node.name in record)) {
record[node.name] = [];
const field = schema.findField(key);
const branch = schema.findFieldBranch(key);
// tslint:disable-next-line:prefer-array-literal
const rLevels = new Array(field.rLevelMax + 1).fill(0);
let vIndex = 0;
for (let i = 0; i < data.count; i++) {
const dLevel = data.dlevels[i];
const rLevel = data.rlevels[i];
rLevels[rLevel]++;
rLevels.fill(0, rLevel + 1);
let rIndex = 0;
let record = records[rLevels[rIndex++] - 1];
// Internal nodes
for (const step of branch) {
if (step === field)
break;
if (dLevel < step.dLevelMax)
break;
if (step.repetitionType === 'REPEATED') {
if (!(step.name in record))
record[step.name] = [];
const ix = rLevels[rIndex++];
while (record[step.name].length <= ix)
record[step.name].push({});
record = record[step.name][ix];
}
while (record[node.name].length < rLevels[0] + 1) {
record[node.name].push({});
else {
record[step.name] = record[step.name] || {};
record = record[step.name];
}
materializeRecordField(record[node.name][rLevels[0]], branch.slice(1), rLevels.slice(1), dLevel, value);
}
else {
record[node.name] = record[node.name] || {};
materializeRecordField(record[node.name], branch.slice(1), rLevels, dLevel, value);
}
}
else {
if (node.repetitionType === 'REPEATED') {
if (!(node.name in record)) {
record[node.name] = [];
// Leaf node
if (dLevel === field.dLevelMax) {
const value = Types.fromPrimitive(field.originalType || field.primitiveType, data.values[vIndex]);
vIndex++;
if (field.repetitionType === 'REPEATED') {
if (!(field.name in record))
record[field.name] = [];
const ix = rLevels[rIndex];
while (record[field.name].length <= ix)
record[field.name].push(null);
record[field.name][ix] = value;
}
while (record[node.name].length < rLevels[0] + 1) {
record[node.name].push(null);
else {
record[field.name] = value;
}
record[node.name][rLevels[0]] = value;
}
else {
record[node.name] = value;
}
}
}
//# sourceMappingURL=shred.js.map
/// <reference types="node" />
import fs = require('fs');
import { FileMetaData, PageHeader } from './gen';
import { FileMetaData, PageHeader } from './thrift';
export interface WriteStreamOptions {

@@ -5,0 +5,0 @@ flags?: string;

@@ -5,3 +5,3 @@ "use strict";

const thrift_1 = require("thrift");
const gen_1 = require("./gen");
const thrift_2 = require("./thrift");
class UFramedTransport extends thrift_1.TFramedTransport {

@@ -43,3 +43,3 @@ }

const protocol = new thrift_1.TCompactProtocol(transport);
const metadata = gen_1.FileMetaData.read(protocol);
const metadata = thrift_2.FileMetaData.read(protocol);
return { length: transport.readPos - offset, metadata };

@@ -56,3 +56,3 @@ }

const protocol = new thrift_1.TCompactProtocol(transport);
const pageHeader = gen_1.PageHeader.read(protocol);
const pageHeader = thrift_2.PageHeader.read(protocol);
return { length: transport.readPos - offset, pageHeader };

@@ -177,7 +177,7 @@ }

function fieldIndexOf(arr, elem) {
for (let j = 0; j < arr.length; ++j) {
for (let j = 0; j < arr.length; j++) {
if (arr[j].length > elem.length)
continue;
let m = true;
for (let i = 0; i < elem.length; ++i) {
for (let i = 0; i < elem.length; i++) {
if (arr[j][i] === elem[i] || arr[j][i] === '+' || arr[j][i] === '#')

@@ -184,0 +184,0 @@ continue;

@@ -5,4 +5,4 @@ /// <reference types="node" />

import { ParquetBuffer } from './declare';
import { RowGroup } from './gen';
import { ParquetSchema } from './schema';
import { RowGroup } from './thrift';
export interface ParquetWriterOptions {

@@ -9,0 +9,0 @@ baseOffset?: number;

@@ -6,5 +6,5 @@ "use strict";

const Compression = require("./compression");
const Shred = require("./shred");
// tslint:disable-next-line:max-line-length
const gen_1 = require("./gen");
const Shred = require("./shred");
const thrift_1 = require("./thrift");
const Util = require("./util");

@@ -275,9 +275,9 @@ const Int64 = require("node-int64");

/* build page header */
const header = new gen_1.PageHeader({
type: gen_1.PageType.DATA_PAGE,
data_page_header: new gen_1.DataPageHeader({
const header = new thrift_1.PageHeader({
type: thrift_1.PageType.DATA_PAGE,
data_page_header: new thrift_1.DataPageHeader({
num_values: data.count,
encoding: gen_1.Encoding[column.encoding],
definition_level_encoding: gen_1.Encoding[PARQUET_RDLVL_ENCODING],
repetition_level_encoding: gen_1.Encoding[PARQUET_RDLVL_ENCODING],
encoding: thrift_1.Encoding[column.encoding],
definition_level_encoding: thrift_1.Encoding[PARQUET_RDLVL_ENCODING],
repetition_level_encoding: thrift_1.Encoding[PARQUET_RDLVL_ENCODING],
}),

@@ -322,9 +322,9 @@ uncompressed_page_size: dataBuf.length,

/* build page header */
const header = new gen_1.PageHeader({
type: gen_1.PageType.DATA_PAGE_V2,
data_page_header_v2: new gen_1.DataPageHeaderV2({
const header = new thrift_1.PageHeader({
type: thrift_1.PageType.DATA_PAGE_V2,
data_page_header_v2: new thrift_1.DataPageHeaderV2({
num_values: data.count,
num_nulls: data.count - data.values.length,
num_rows: rowCount,
encoding: gen_1.Encoding[column.encoding],
encoding: thrift_1.Encoding[column.encoding],
definition_levels_byte_length: dLevelsBuf.length,

@@ -376,3 +376,3 @@ repetition_levels_byte_length: rLevelsBuf.length,

/* prepare metadata header */
const metadata = new gen_1.ColumnMetaData({
const metadata = new thrift_1.ColumnMetaData({
path_in_schema: column.path,

@@ -384,4 +384,4 @@ num_values: data.count,

total_compressed_size,
type: gen_1.Type[column.primitiveType],
codec: gen_1.CompressionCodec[column.compression]
type: thrift_1.Type[column.primitiveType],
codec: thrift_1.CompressionCodec[column.compression]
});

@@ -393,3 +393,3 @@ /* list encodings */

for (const k in encodingsSet) {
metadata.encodings.push(gen_1.Encoding[k]);
metadata.encodings.push(thrift_1.Encoding[k]);
}

@@ -405,3 +405,3 @@ /* concat metadata header and data pages */

function encodeRowGroup(schema, data, opts) {
const metadata = new gen_1.RowGroup({
const metadata = new thrift_1.RowGroup({
num_rows: data.rowCount,

@@ -417,3 +417,3 @@ columns: [],

const cchunkData = encodeColumnChunk(field, data, body.length, opts);
const cchunk = new gen_1.ColumnChunk({
const cchunk = new thrift_1.ColumnChunk({
file_offset: cchunkData.metadataOffset,

@@ -432,3 +432,3 @@ meta_data: cchunkData.metadata

function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
const metadata = new gen_1.FileMetaData({
const metadata = new thrift_1.FileMetaData({
version: PARQUET_VERSION,

@@ -442,3 +442,3 @@ created_by: 'parquets',

for (const key in userMetadata) {
const kv = new gen_1.KeyValue({
const kv = new thrift_1.KeyValue({
key,

@@ -450,3 +450,3 @@ value: userMetadata[key]

{
const schemaRoot = new gen_1.SchemaElement({
const schemaRoot = new thrift_1.SchemaElement({
name: 'root',

@@ -458,4 +458,4 @@ num_children: Object.keys(schema.fields).length

for (const field of schema.fieldList) {
const relt = gen_1.FieldRepetitionType[field.repetitionType];
const schemaElem = new gen_1.SchemaElement({
const relt = thrift_1.FieldRepetitionType[field.repetitionType];
const schemaElem = new thrift_1.SchemaElement({
name: field.name,

@@ -468,6 +468,6 @@ repetition_type: relt

else {
schemaElem.type = gen_1.Type[field.primitiveType];
schemaElem.type = thrift_1.Type[field.primitiveType];
}
if (field.originalType) {
schemaElem.converted_type = gen_1.ConvertedType[field.originalType];
schemaElem.converted_type = thrift_1.ConvertedType[field.originalType];
}

@@ -474,0 +474,0 @@ schemaElem.type_length = field.typeLength;

{
"name": "parquets",
"description": "TypeScript implementation of the Parquet file format, based on parquet.js",
"version": "0.10.6",
"version": "0.10.7",
"upstream": "0.10.1",

@@ -42,6 +42,4 @@ "homepage": "https://github.com/kbajalc/parquets",

"bson": "^4.0.2",
"debug": "^4.1.1",
"int53": "^1.0.0",
"node-int64": "^0.4.0",
"snappyjs": "^0.6.0",
"thrift": "^0.12.0",

@@ -60,6 +58,7 @@ "varint": "^5.0.0"

"@types/debug": "^4.1.4",
"@types/jest": "^24.0.16",
"@types/jest": "^24.0.17",
"@types/mocha": "^5.2.7",
"@types/node": "^10.14.14",
"@types/node": "^10.14.15",
"@types/node-int64": "^0.4.29",
"@types/snappy": "^6.0.0",
"@types/thrift": "^0.10.8",

@@ -70,2 +69,3 @@ "@types/varint": "^5.0.0",

"chai": "^4.2.0",
"debug": "^4.1.1",
"jest": "^24.8.0",

@@ -75,7 +75,6 @@ "jest-environment-node": "^24.8.0",

"lzo": "^0.4.11",
"mocha": "^6.2.0",
"node-snappy": "^0.1.4",
"object-stream": "0.0.1",
"snappy": "^6.2.3",
"ts-jest": "^24.0.2",
"ts-mocha": "^6.0.0",
"ts-node": "^8.3.0",

@@ -82,0 +81,0 @@ "tslint": "^5.18.0",

@@ -13,9 +13,8 @@ # parquets

with the [Parquet specification](https://github.com/apache/parquet-format) and is being tested
for compatibility with Apache's Java [reference implementation](https://github.com/apache/parquet-mr).
for compatibility with Apache's [reference implementation](https://github.com/apache/parquet-mr).
**WARNING**: *There are compatibility issues with the reference implementation [Appache Drill](https://drill.apache.org)*:
**WARNING**: *There are compatibility issues with the reference implementation*:
- only GZIP and SNAPPY compressions are compatible
- resolved problem with columns 'optional': true and with 'compression' enabled
- files with 'repeated' columns can not be read with Drill
- columns with nested 'fields' are not compatible
- [Parquet Tools](https://github.com/apache/parquet-mr/tree/master/parquet-tools) are command line tools that aid in the inspection of Parquet files.
- always verify your table structure loaded with realistic data sample can be read by Parquet Tools!

@@ -22,0 +21,0 @@ **What is Parquet?**: Parquet is a column-oriented file format; it allows you to

@@ -54,3 +54,3 @@ import { PrimitiveType } from '../declare';

buf.fill(0);
for (let i = 0; i < values.length; ++i) {
for (let i = 0; i < values.length; i++) {
if (values[i]) {

@@ -65,3 +65,3 @@ buf[Math.floor(i / 8)] |= (1 << (i % 8));

const values: boolean[] = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
const b = cursor.buffer[cursor.offset + Math.floor(i / 8)];

@@ -84,3 +84,3 @@ values.push((b & (1 << (i % 8))) > 0);

const values: number[] = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
values.push(cursor.buffer.readInt32LE(cursor.offset));

@@ -102,3 +102,3 @@ cursor.offset += 4;

const values: number[] = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
values.push(INT53.readInt64LE(cursor.buffer, cursor.offset));

@@ -126,3 +126,3 @@ cursor.offset += 8;

const values: number[] = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
const low = INT53.readInt64LE(cursor.buffer, cursor.offset);

@@ -150,3 +150,3 @@ const high = cursor.buffer.readUInt32LE(cursor.offset + 8);

const values: number[] = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
values.push(cursor.buffer.readFloatLE(cursor.offset));

@@ -168,3 +168,3 @@ cursor.offset += 4;

const values: number[] = [];
for (let i = 0; i < count; ++i) {
for (let i = 0; i < count; i++) {
values.push(cursor.buffer.readDoubleLE(cursor.offset));

@@ -171,0 +171,0 @@ cursor.offset += 8;

@@ -11,3 +11,3 @@ import varint = require('varint');

const buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8)));
for (let b = 0; b < opts.bitWidth * values.length; ++b) {
for (let b = 0; b < opts.bitWidth * values.length; b++) {
if ((values[Math.floor(b / opts.bitWidth)] & (1 << b % opts.bitWidth)) > 0) {

@@ -27,3 +27,3 @@ buf[Math.floor(b / 8)] |= (1 << (b % 8));

for (let i = 0; i < buf.length; ++i) {
for (let i = 0; i < buf.length; i++) {
buf.writeUInt8(value & 0xff, i);

@@ -107,3 +107,3 @@ value >> 8;

const values = new Array(count).fill(0);
for (let b = 0; b < opts.bitWidth * count; ++b) {
for (let b = 0; b < opts.bitWidth * count; b++) {
if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << (b % 8))) {

@@ -120,3 +120,3 @@ values[Math.floor(b / opts.bitWidth)] |= (1 << b % opts.bitWidth);

let value = 0;
for (let i = 0; i < Math.ceil(opts.bitWidth / 8); ++i) {
for (let i = 0; i < Math.ceil(opts.bitWidth / 8); i++) {
value << 8;

@@ -123,0 +123,0 @@ value += cursor.buffer[cursor.offset];

import { ParquetCompression } from './declare';
import * as Util from './util';
import zlib = require('zlib');
import snappyjs = require('snappyjs');
import snappyjs = require('./snappy');

@@ -6,0 +6,0 @@ let brotli: any;

@@ -0,1 +1,2 @@

export type ParquetCodec = 'PLAIN' | 'RLE';

@@ -43,6 +44,6 @@ export type ParquetCompression = 'UNCOMPRESSED' | 'GZIP' | 'SNAPPY' | 'LZO' | 'BROTLI' | 'LZ4';

export interface SchemaDefinition {
[string: string]: ElementDefinition;
[string: string]: FieldDefinition;
}
export interface ElementDefinition {
export interface FieldDefinition {
type?: ParquetType;

@@ -57,5 +58,6 @@ typeLength?: number;

export interface FieldDefinition {
export interface ParquetField {
name: string;
path: string[];
key: string;
primitiveType?: PrimitiveType;

@@ -71,3 +73,3 @@ originalType?: OriginalType;

fieldCount?: number;
fields?: Record<string, FieldDefinition>;
fields?: Record<string, ParquetField>;
}

@@ -74,0 +76,0 @@

@@ -12,9 +12,9 @@ declare module 'int53' {

declare module 'snappyjs' {
declare function compress(uncompressed: Buffer): Buffer;
declare function compress(uncompressed: ArrayBuffer): ArrayBuffer;
declare function compress(uncompressed: Uint8Array): Uint8Array;
declare function uncompress(compressed: Buffer): Buffer;
declare function uncompress(compressed: ArrayBuffer): ArrayBuffer;
declare function uncompress(compressed: Uint8Array): Uint8Array;
}
// declare module 'snappyjs' {
// declare function compress(uncompressed: Buffer): Buffer;
// declare function compress(uncompressed: ArrayBuffer): ArrayBuffer;
// declare function compress(uncompressed: Uint8Array): Uint8Array;
// declare function uncompress(compressed: Buffer): Buffer;
// declare function uncompress(compressed: ArrayBuffer): ArrayBuffer;
// declare function uncompress(compressed: Uint8Array): Uint8Array;
// }
import { CursorBuffer, ParquetCodecOptions, PARQUET_CODEC } from './codec';
import * as Compression from './compression';
import { FieldDefinition, ParquetBuffer, ParquetCodec, ParquetCompression, ParquetData, ParquetRecord, ParquetType, PrimitiveType, SchemaDefinition } from './declare';
// tslint:disable-next-line:max-line-length
import { ColumnChunk, CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, FileMetaData, PageHeader, PageType, RowGroup, SchemaElement, Type } from './gen';
import { ParquetBuffer, ParquetCodec, ParquetCompression, ParquetData, ParquetField, ParquetRecord, ParquetType, PrimitiveType, SchemaDefinition } from './declare';
import { ParquetSchema } from './schema';
import * as Shred from './shred';
// tslint:disable-next-line:max-line-length
import { ColumnChunk, CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, FileMetaData, PageHeader, PageType, RowGroup, SchemaElement, Type } from './thrift';
import * as Util from './util';

@@ -301,3 +301,3 @@ // import Fs = require('fs');

function decodeDataPages(buffer: Buffer, column: FieldDefinition, compression: ParquetCompression): ParquetData {
function decodeDataPages(buffer: Buffer, column: ParquetField, compression: ParquetCompression): ParquetData {
const cursor: CursorBuffer = {

@@ -348,3 +348,3 @@ buffer,

function decodeDataPage(cursor: CursorBuffer, header: PageHeader, column: FieldDefinition, compression: ParquetCompression): ParquetData {
function decodeDataPage(cursor: CursorBuffer, header: PageHeader, column: ParquetField, compression: ParquetCompression): ParquetData {
const cursorEnd = cursor.offset + header.compressed_page_size;

@@ -433,3 +433,3 @@ const valueCount = header.data_page_header.num_values;

if (dlvl === column.dLevelMax) {
++valueCountNonNull;
valueCountNonNull++;
}

@@ -466,3 +466,3 @@ }

function decodeDataPageV2(cursor: CursorBuffer, header: PageHeader, column: FieldDefinition, compression: ParquetCompression): ParquetData {
function decodeDataPageV2(cursor: CursorBuffer, header: PageHeader, column: ParquetField, compression: ParquetCompression): ParquetData {
const cursorEnd = cursor.offset + header.compressed_page_size;

@@ -469,0 +469,0 @@

import { PARQUET_CODEC } from './codec';
import { PARQUET_COMPRESSION_METHODS } from './compression';
import { ElementDefinition, FieldDefinition, ParquetBuffer, ParquetCompression, ParquetRecord, RepetitionType, SchemaDefinition } from './declare';
import { materializeRecords, shredRecord } from './shred';
import { FieldDefinition, ParquetBuffer, ParquetCompression, ParquetField, ParquetRecord, RepetitionType, SchemaDefinition } from './declare';
import { materializeRecords, shredBuffer, shredRecord } from './shred';
import { PARQUET_LOGICAL_TYPES } from './types';

@@ -11,5 +11,5 @@

export class ParquetSchema {
public schema: Record<string, ElementDefinition>;
public fields: Record<string, FieldDefinition>;
public fieldList: FieldDefinition[];
public schema: Record<string, FieldDefinition>;
public fields: Record<string, ParquetField>;
public fieldList: ParquetField[];

@@ -28,5 +28,5 @@ /**

*/
findField(path: string): FieldDefinition;
findField(path: string[]): FieldDefinition;
findField(path: any): FieldDefinition {
findField(path: string): ParquetField;
findField(path: string[]): ParquetField;
findField(path: any): ParquetField {
if (path.constructor !== Array) {

@@ -51,4 +51,4 @@ // tslint:disable-next-line:no-parameter-reassignment

*/
findFieldBranch(path: string): FieldDefinition[];
findFieldBranch(path: string[]): FieldDefinition[];
findFieldBranch(path: string): ParquetField[];
findFieldBranch(path: string[]): ParquetField[];
findFieldBranch(path: any): any[] {

@@ -83,2 +83,6 @@ if (path.constructor !== Array) {

}
buffer(): ParquetBuffer {
return shredBuffer(this);
}
}

@@ -102,4 +106,4 @@

path: string[]
): Record<string, FieldDefinition> {
const fieldList: Record<string, FieldDefinition> = {};
): Record<string, ParquetField> {
const fieldList: Record<string, ParquetField> = {};

@@ -128,5 +132,7 @@ for (const name in schema) {

if (opts.fields) {
const cpath = path.concat([name]);
fieldList[name] = {
name,
path: path.concat([name]),
path: cpath,
key: cpath.join(),
repetitionType,

@@ -141,3 +147,4 @@ rLevelMax,

dLevelMax,
path.concat([name]))
cpath
)
};

@@ -163,2 +170,3 @@ continue;

/* add to schema */
const cpath = path.concat([name]);
fieldList[name] = {

@@ -168,3 +176,4 @@ name,

originalType: typeDef.originalType,
path: path.concat([name]),
path: cpath,
key: cpath.join(),
repetitionType,

@@ -181,4 +190,4 @@ encoding: opts.encoding,

function listFields(fields: Record<string, FieldDefinition>): FieldDefinition[] {
let list: FieldDefinition[] = [];
function listFields(fields: Record<string, ParquetField>): ParquetField[] {
let list: ParquetField[] = [];
for (const k in fields) {

@@ -185,0 +194,0 @@ list.push(fields[k]);

@@ -1,5 +0,18 @@

import { FieldDefinition, ParquetBuffer, ParquetData, ParquetRecord } from './declare';
import { ParquetBuffer, ParquetData, ParquetField, ParquetRecord } from './declare';
import { ParquetSchema } from './schema';
import * as Types from './types';
export function shredBuffer(schema: ParquetSchema): ParquetBuffer {
const columnData: Record<string, ParquetData> = {};
for (const field of schema.fieldList) {
columnData[field.key] = {
dlevels: [],
rlevels: [],
values: [],
count: 0
};
}
return { rowCount: 0, columnData };
}
/**

@@ -26,73 +39,43 @@ * 'Shred' a record into a list of <value, repetition_level, definition_level>

* }
*
*/
export function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {
/* shred the record, this may raise an exception */
const recordShredded: Record<string, ParquetData> = {};
for (const field of schema.fieldList) {
recordShredded[field.path.join()] = {
dlevels: [],
rlevels: [],
values: [],
count: 0
};
}
const data = shredBuffer(schema).columnData;
shredRecordInternal(schema.fields, record, recordShredded, 0, 0);
shredRecordFields(schema.fields, record, data, 0, 0);
/* if no error during shredding, add the shredded record to the buffer */
if (!('columnData' in buffer) || !('rowCount' in buffer)) {
buffer.rowCount = 0;
buffer.columnData = {};
for (const field of schema.fieldList) {
const cd: ParquetData = {
dlevels: [],
rlevels: [],
values: [],
count: 0
};
buffer.columnData[field.path.join()] = cd;
}
buffer.rowCount = 1;
buffer.columnData = data;
return;
}
buffer.rowCount += 1;
for (const field of schema.fieldList) {
Array.prototype.push.apply(
buffer.columnData[field.path.join()].rlevels,
recordShredded[field.path.join()].rlevels);
Array.prototype.push.apply(
buffer.columnData[field.path.join()].dlevels,
recordShredded[field.path.join()].dlevels);
Array.prototype.push.apply(
buffer.columnData[field.path.join()].values,
recordShredded[field.path.join()].values);
buffer.columnData[field.path.join()].count += recordShredded[field.path.join()].count;
Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);
Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);
Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);
buffer.columnData[field.key].count += data[field.key].count;
}
}
function shredRecordInternal(
fields: Record<string, FieldDefinition>,
function shredRecordFields(
fields: Record<string, ParquetField>,
record: any,
data: Record<string, ParquetData>,
rlvl: number,
dlvl: number
rLevel: number,
dLevel: number
) {
for (const fieldName in fields) {
const field = fields[fieldName];
const fieldType = field.originalType || field.primitiveType;
for (const name in fields) {
const field = fields[name];
// fetch values
let values = [];
if (record && (fieldName in record) && record[fieldName] !== undefined && record[fieldName] !== null) {
if (record[fieldName].constructor === Array) {
values = record[fieldName];
if (record && (field.name in record) && record[field.name] !== undefined && record[field.name] !== null) {
if (record[field.name].constructor === Array) {
values = record[field.name];
} else {
values.push(record[fieldName]);
values.push(record[field.name]);
}
}
// check values

@@ -102,3 +85,2 @@ if (values.length === 0 && !!record && field.repetitionType === 'REQUIRED') {

}
if (values.length > 1 && field.repetitionType !== 'REPEATED') {

@@ -111,12 +93,12 @@ throw new Error(`too many values for field: ${field.name}`);

if (field.isNested) {
shredRecordInternal(
shredRecordFields(
field.fields,
null,
data,
rlvl,
dlvl);
rLevel,
dLevel);
} else {
data[field.path.join()].rlevels.push(rlvl);
data[field.path.join()].dlevels.push(dlvl);
data[field.path.join()].count += 1;
data[field.key].count += 1;
data[field.key].rlevels.push(rLevel);
data[field.key].dlevels.push(dLevel);
}

@@ -127,18 +109,19 @@ continue;

// push values
for (let i = 0; i < values.length; ++i) {
// tslint:disable-next-line:variable-name
const rlvl_i = i === 0 ? rlvl : field.rLevelMax;
for (let i = 0; i < values.length; i++) {
const rlvl = i === 0 ? rLevel : field.rLevelMax;
if (field.isNested) {
shredRecordInternal(
shredRecordFields(
field.fields,
values[i],
data,
rlvl_i,
rlvl,
field.dLevelMax);
} else {
data[field.path.join()].values.push(Types.toPrimitive(fieldType, values[i]));
data[field.path.join()].rlevels.push(rlvl_i);
data[field.path.join()].dlevels.push(field.dLevelMax);
data[field.path.join()].count += 1;
data[field.key].count += 1;
data[field.key].rlevels.push(rlvl);
data[field.key].dlevels.push(field.dLevelMax);
data[field.key].values.push(Types.toPrimitive(
field.originalType || field.primitiveType,
values[i]
));
}

@@ -167,93 +150,63 @@ }

* }
*
*/
export function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {
const records: ParquetRecord[] = [];
for (let i = 0; i < buffer.rowCount; ++i) {
records.push({});
for (let i = 0; i < buffer.rowCount; i++) records.push({});
for (const key in buffer.columnData) {
materializeColumn(schema, buffer, key, records);
}
for (const k in buffer.columnData) {
const field = schema.findField(k);
const fieldBranch = schema.findFieldBranch(k);
const values = buffer.columnData[k].values[Symbol.iterator]();
// tslint:disable-next-line:prefer-array-literal
const rLevels = new Array(field.rLevelMax + 1);
rLevels.fill(0);
for (let i = 0; i < buffer.columnData[k].count; ++i) {
const dLevel = buffer.columnData[k].dlevels[i];
const rLevel = buffer.columnData[k].rlevels[i];
rLevels[rLevel]++;
rLevels.fill(0, rLevel + 1);
let value = null;
if (dLevel === field.dLevelMax) {
value = Types.fromPrimitive(
field.originalType || field.primitiveType,
values.next().value);
}
materializeRecordField(
records[rLevels[0] - 1],
fieldBranch,
rLevels.slice(1),
dLevel,
value);
}
}
return records;
}
function materializeRecordField(record: any, branch: FieldDefinition[], rLevels: number[], dLevel: number, value: any): void {
const node = branch[0];
function materializeColumn(schema: ParquetSchema, buffer: ParquetBuffer, key: string, records: ParquetRecord[]) {
const data = buffer.columnData[key];
if (!data.count) return;
if (dLevel < node.dLevelMax) {
return;
}
const field = schema.findField(key);
const branch = schema.findFieldBranch(key);
if (branch.length > 1) {
if (node.repetitionType === 'REPEATED') {
if (!(node.name in record)) {
record[node.name] = [];
}
// tslint:disable-next-line:prefer-array-literal
const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
let vIndex = 0;
for (let i = 0; i < data.count; i++) {
const dLevel = data.dlevels[i];
const rLevel = data.rlevels[i];
rLevels[rLevel]++;
rLevels.fill(0, rLevel + 1);
while (record[node.name].length < rLevels[0] + 1) {
record[node.name].push({});
let rIndex = 0;
let record = records[rLevels[rIndex++] - 1];
// Internal nodes
for (const step of branch) {
if (step === field) break;
if (dLevel < step.dLevelMax) break;
if (step.repetitionType === 'REPEATED') {
if (!(step.name in record)) record[step.name] = [];
const ix = rLevels[rIndex++];
while (record[step.name].length <= ix) record[step.name].push({});
record = record[step.name][ix];
} else {
record[step.name] = record[step.name] || {};
record = record[step.name];
}
materializeRecordField(
record[node.name][rLevels[0]],
branch.slice(1),
rLevels.slice(1),
dLevel,
value);
} else {
record[node.name] = record[node.name] || {};
materializeRecordField(
record[node.name],
branch.slice(1),
rLevels,
dLevel,
value);
}
} else {
if (node.repetitionType === 'REPEATED') {
if (!(node.name in record)) {
record[node.name] = [];
}
while (record[node.name].length < rLevels[0] + 1) {
record[node.name].push(null);
// Leaf node
if (dLevel === field.dLevelMax) {
const value = Types.fromPrimitive(
field.originalType || field.primitiveType,
data.values[vIndex]
);
vIndex++;
if (field.repetitionType === 'REPEATED') {
if (!(field.name in record)) record[field.name] = [];
const ix = rLevels[rIndex];
while (record[field.name].length <= ix) record[field.name].push(null);
record[field.name][ix] = value;
} else {
record[field.name] = value;
}
record[node.name][rLevels[0]] = value;
} else {
record[node.name] = value;
}
}
}
import fs = require('fs');
import { TBufferedTransport, TCompactProtocol, TFramedTransport } from 'thrift';
import { FileMetaData, PageHeader } from './gen';
import { FileMetaData, PageHeader } from './thrift';

@@ -183,6 +183,6 @@ export interface WriteStreamOptions {

export function fieldIndexOf(arr: string[][], elem: string[]): number {
for (let j = 0; j < arr.length; ++j) {
for (let j = 0; j < arr.length; j++) {
if (arr[j].length > elem.length) continue;
let m = true;
for (let i = 0; i < elem.length; ++i) {
for (let i = 0; i < elem.length; i++) {
if (arr[j][i] === elem[i] || arr[j][i] === '+' || arr[j][i] === '#') continue;

@@ -189,0 +189,0 @@ if (i >= arr[j].length && arr[j][arr[j].length - 1] === '#') continue;

@@ -5,7 +5,7 @@ import { WriteStream } from 'fs';

import * as Compression from './compression';
import { FieldDefinition, ParquetBuffer, ParquetCodec, ParquetData, PrimitiveType } from './declare';
// tslint:disable-next-line:max-line-length
import { ColumnChunk, ColumnMetaData, CompressionCodec, ConvertedType, DataPageHeader, DataPageHeaderV2, Encoding, FieldRepetitionType, FileMetaData, KeyValue, PageHeader, PageType, RowGroup, SchemaElement, Type } from './gen';
import { ParquetBuffer, ParquetCodec, ParquetData, ParquetField, PrimitiveType } from './declare';
import { ParquetSchema } from './schema';
import * as Shred from './shred';
// tslint:disable-next-line:max-line-length
import { ColumnChunk, ColumnMetaData, CompressionCodec, ConvertedType, DataPageHeader, DataPageHeaderV2, Encoding, FieldRepetitionType, FileMetaData, KeyValue, PageHeader, PageType, RowGroup, SchemaElement, Type } from './thrift';
import * as Util from './util';

@@ -332,3 +332,3 @@ import Int64 = require('node-int64');

function encodeDataPage(
column: FieldDefinition,
column: ParquetField,
data: ParquetData

@@ -413,3 +413,3 @@ ): {

function encodeDataPageV2(
column: FieldDefinition,
column: ParquetField,
data: ParquetData,

@@ -491,3 +491,3 @@ rowCount: number

function encodeColumnChunk(
column: FieldDefinition,
column: ParquetField,
buffer: ParquetBuffer,

@@ -494,0 +494,0 @@ offset: number,

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc