@dsnp/parquetjs
Advanced tools
Comparing version 1.3.0 to 1.3.1
@@ -23,8 +23,8 @@ export namespace PARQUET_COMPRESSION_METHODS { | ||
export function inflate(method: any, value: any): Promise<any>; | ||
declare function deflate_identity(value: any): any; | ||
declare function inflate_identity(value: any): any; | ||
declare function deflate_identity(value: any): Buffer; | ||
declare function inflate_identity(value: any): Buffer; | ||
declare function deflate_gzip(value: any): Buffer; | ||
declare function inflate_gzip(value: any): Buffer; | ||
declare function deflate_snappy(value: any): ArrayBuffer | Uint8Array | Buffer; | ||
declare function inflate_snappy(value: any): ArrayBuffer | Uint8Array | Buffer; | ||
declare function deflate_snappy(value: any): Buffer; | ||
declare function inflate_snappy(value: any): Buffer; | ||
export {}; |
@@ -28,3 +28,3 @@ 'use strict'; | ||
function deflate_identity(value) { | ||
return value; | ||
return buffer_from_result(value); | ||
} | ||
@@ -35,3 +35,4 @@ function deflate_gzip(value) { | ||
function deflate_snappy(value) { | ||
return snappy.compress(value); | ||
const compressedValue = snappy.compress(value); | ||
return buffer_from_result(compressedValue); | ||
} | ||
@@ -48,3 +49,3 @@ /** | ||
function inflate_identity(value) { | ||
return value; | ||
return buffer_from_result(value); | ||
} | ||
@@ -55,6 +56,15 @@ function inflate_gzip(value) { | ||
function inflate_snappy(value) { | ||
return snappy.uncompress(value); | ||
const uncompressedValue = snappy.uncompress(value); | ||
return buffer_from_result(uncompressedValue); | ||
} | ||
function buffer_from_result(result) { | ||
if (Buffer.isBuffer(result)) { | ||
return result; | ||
} | ||
else { | ||
return Buffer.from(result); | ||
} | ||
} | ||
exports.PARQUET_COMPRESSION_METHODS = PARQUET_COMPRESSION_METHODS; | ||
exports.deflate = deflate; | ||
exports.inflate = inflate; |
@@ -29,6 +29,6 @@ "use strict"; | ||
const decodeValues = function (type, cursor, count, opts) { | ||
opts.bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0); | ||
const bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0); | ||
cursor.offset += 1; | ||
return rle.decodeValues(type, cursor, count, Object.assign({}, opts, { disableEnvelope: true })); | ||
return rle.decodeValues(type, cursor, count, Object.assign({}, opts, { disableEnvelope: true, bitWidth })); | ||
}; | ||
exports.decodeValues = decodeValues; |
@@ -27,6 +27,13 @@ "use strict"; | ||
} | ||
function encodeValues_INT32(values) { | ||
function encodeValues_INT32(values, opts) { | ||
const isDecimal = opts?.originalType === 'DECIMAL' || opts?.column?.originalType === 'DECIMAL'; | ||
const scale = opts?.scale || 0; | ||
let buf = Buffer.alloc(4 * values.length); | ||
for (let i = 0; i < values.length; i++) { | ||
buf.writeInt32LE(values[i], i * 4); | ||
if (isDecimal) { | ||
buf.writeInt32LE(values[i] * Math.pow(10, scale), i * 4); | ||
} | ||
else { | ||
buf.writeInt32LE(values[i], i * 4); | ||
} | ||
} | ||
@@ -55,6 +62,13 @@ return buf; | ||
} | ||
function encodeValues_INT64(values) { | ||
function encodeValues_INT64(values, opts) { | ||
const isDecimal = opts?.originalType === 'DECIMAL' || opts?.column?.originalType === 'DECIMAL'; | ||
const scale = opts?.scale || 0; | ||
let buf = Buffer.alloc(8 * values.length); | ||
for (let i = 0; i < values.length; i++) { | ||
buf.writeBigInt64LE(BigInt(values[i]), i * 8); | ||
if (isDecimal) { | ||
buf.writeBigInt64LE(BigInt(Math.floor(values[i] * Math.pow(10, scale))), i * 8); | ||
} | ||
else { | ||
buf.writeBigInt64LE(BigInt(values[i]), i * 8); | ||
} | ||
} | ||
@@ -85,7 +99,6 @@ return buf; | ||
function decodeValues_DECIMAL(cursor, count, opts) { | ||
let { scale, precision } = opts; | ||
const precision = opts.precision; | ||
// Default scale to 0 per spec | ||
const scale = opts.scale || 0; | ||
const name = opts.name || undefined; | ||
if (!scale) { | ||
throw `missing option: scale (required for DECIMAL) for column: ${name}`; | ||
} | ||
if (!precision) { | ||
@@ -229,5 +242,5 @@ throw `missing option: precision (required for DECIMAL) for column: ${name}`; | ||
case "INT32": | ||
return encodeValues_INT32(values); | ||
return encodeValues_INT32(values, opts); | ||
case "INT64": | ||
return encodeValues_INT64(values); | ||
return encodeValues_INT64(values, opts); | ||
case "INT96": | ||
@@ -234,0 +247,0 @@ return encodeValues_INT96(values); |
/// <reference types="node" /> | ||
import { Cursor, Options } from './types'; | ||
export declare const encodeValues: (type: string, values: Array<number>, opts: Options) => Buffer; | ||
export declare const decodeValues: (_: string, cursor: Cursor, count: number, opts: Options) => any[]; | ||
import { Cursor } from './types'; | ||
export declare const encodeValues: (type: string, values: Array<number>, opts: { | ||
bitWidth: number; | ||
disableEnvelope?: boolean; | ||
}) => Buffer; | ||
export declare const decodeValues: (_: string, cursor: Cursor, count: number, opts: { | ||
bitWidth: number; | ||
disableEnvelope?: boolean; | ||
}) => any[]; |
@@ -8,3 +8,3 @@ /// <reference types="node" /> | ||
bitWidth: number; | ||
disableEnvelope: boolean; | ||
disableEnvelope?: boolean; | ||
primitiveType?: PrimitiveType; | ||
@@ -11,0 +11,0 @@ originalType?: OriginalType; |
@@ -0,5 +1,6 @@ | ||
/// <reference types="node" /> | ||
interface PARQUET_COMPRESSION_METHODS { | ||
[key: string]: { | ||
deflate: Function; | ||
inflate: Function; | ||
deflate: (value: any) => Buffer | Promise<Buffer>; | ||
inflate: (value: any) => Buffer | Promise<Buffer>; | ||
}; | ||
@@ -11,7 +12,7 @@ } | ||
*/ | ||
export declare function deflate(method: string, value: unknown): Promise<any>; | ||
export declare function deflate(method: string, value: unknown): Promise<Buffer>; | ||
/** | ||
* Inflate a value using compression method `method` | ||
*/ | ||
export declare function inflate(method: string, value: unknown): Promise<any>; | ||
export declare function inflate(method: string, value: unknown): Promise<Buffer>; | ||
export {}; |
@@ -40,3 +40,3 @@ "use strict"; | ||
function deflate_identity(value) { | ||
return value; | ||
return buffer_from_result(value); | ||
} | ||
@@ -47,3 +47,4 @@ function deflate_gzip(value) { | ||
function deflate_snappy(value) { | ||
return snappyjs_1.default.compress(value); | ||
const compressedValue = snappyjs_1.default.compress(value); | ||
return buffer_from_result(compressedValue); | ||
} | ||
@@ -69,10 +70,11 @@ async function deflate_brotli(value) { | ||
exports.inflate = inflate; | ||
function inflate_identity(value) { | ||
return value; | ||
async function inflate_identity(value) { | ||
return buffer_from_result(value); | ||
} | ||
function inflate_gzip(value) { | ||
async function inflate_gzip(value) { | ||
return zlib_1.default.gunzipSync(value); | ||
} | ||
function inflate_snappy(value) { | ||
return snappyjs_1.default.uncompress(value); | ||
const uncompressedValue = snappyjs_1.default.uncompress(value); | ||
return buffer_from_result(uncompressedValue); | ||
} | ||
@@ -83,1 +85,9 @@ async function inflate_brotli(value) { | ||
} | ||
function buffer_from_result(result) { | ||
if (Buffer.isBuffer(result)) { | ||
return result; | ||
} | ||
else { | ||
return Buffer.from(result); | ||
} | ||
} |
@@ -48,5 +48,5 @@ "use strict"; | ||
/** | ||
* Parquet File Format Version | ||
* Supported Parquet File Format Version for reading | ||
*/ | ||
const PARQUET_VERSION = 1; | ||
const PARQUET_VERSIONS = [1, 2]; | ||
/** | ||
@@ -173,3 +173,3 @@ * Internal type used for repetition/definition levels | ||
opts = opts || {}; | ||
if (metadata.version != PARQUET_VERSION) { | ||
if (!PARQUET_VERSIONS.includes(metadata.version)) { | ||
throw 'invalid parquet version'; | ||
@@ -848,4 +848,4 @@ } | ||
let values = decodeValues(opts.type, valueEncoding, valuesBufCursor, valueCountNonNull, { | ||
typeLength: opts.column.typeLength, | ||
bitWidth: opts.column.typeLength | ||
bitWidth: opts.column.typeLength, | ||
...opts.column | ||
}); | ||
@@ -852,0 +852,0 @@ return { |
@@ -165,2 +165,5 @@ "use strict"; | ||
if (typeDef.originalType === 'DECIMAL') { | ||
// Default scale to 0 per https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal | ||
if (typeof opts.scale === "undefined") | ||
opts.scale = 0; | ||
fieldErrors = fieldErrors.concat(errorsForDecimalOpts(typeDef.originalType, opts, nameWithPath)); | ||
@@ -206,12 +209,22 @@ } | ||
const fieldErrors = []; | ||
if (!opts.precision) { | ||
fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, precision is required`); | ||
if (opts.precision === undefined || opts.precision < 1) { | ||
fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, precision is required and must be be greater than 0`); | ||
} | ||
else if (!Number.isInteger(opts.precision)) { | ||
fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, precision must be an integer`); | ||
} | ||
else if (opts.precision > 18) { | ||
fieldErrors.push(`invalid precision for type: ${type}, for Column: ${columnName}, can not handle precision over 18`); | ||
fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, can not handle precision over 18`); | ||
} | ||
if (!opts.scale) { | ||
fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, scale is required`); | ||
if (typeof opts.scale === "undefined" || opts.scale < 0) { | ||
fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, scale is required to be 0 or greater`); | ||
} | ||
else if (!Number.isInteger(opts.scale)) { | ||
fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, scale must be an integer`); | ||
} | ||
// Default precision to 18 if it is undefined as that is a different error | ||
else if (opts.scale > (opts.precision || 18)) { | ||
fieldErrors.push(`invalid schema or precision for type: ${type}, for Column: ${columnName}, precision must be greater than or equal to scale`); | ||
} | ||
return fieldErrors; | ||
} |
@@ -31,3 +31,3 @@ "use strict"; | ||
const stream_1 = __importDefault(require("stream")); | ||
const parquet_types_1 = __importDefault(require("../gen-nodejs/parquet_types")); | ||
const parquet_types_1 = __importStar(require("../gen-nodejs/parquet_types")); | ||
const parquet_shredder = __importStar(require("./shred")); | ||
@@ -424,4 +424,4 @@ const parquet_util = __importStar(require("./util")); | ||
let valuesBuf = encodeValues(column.primitiveType, column.encoding, values, { | ||
typeLength: column.typeLength, | ||
bitWidth: column.typeLength | ||
bitWidth: column.typeLength, | ||
...column | ||
}); | ||
@@ -463,4 +463,4 @@ /* encode repetition and definition levels */ | ||
let valuesBuf = encodeValues(column.primitiveType, column.encoding, values, { | ||
typeLength: column.typeLength, | ||
bitWidth: column.typeLength | ||
bitWidth: column.typeLength, | ||
...column, | ||
}); | ||
@@ -644,2 +644,9 @@ let valuesBufCompressed = await parquet_compression.deflate(column.compression, valuesBuf); | ||
} | ||
// Support Decimal | ||
switch (schemaElem.converted_type) { | ||
case (parquet_types_1.ConvertedType.DECIMAL): | ||
schemaElem.precision = field.precision; | ||
schemaElem.scale = field.scale || 0; | ||
break; | ||
} | ||
schemaElem.type_length = field.typeLength; | ||
@@ -646,0 +653,0 @@ metadata.schema.push(schemaElem); |
@@ -6,3 +6,3 @@ { | ||
"types": "dist/parquet.d.ts", | ||
"version": "1.3.0", | ||
"version": "1.3.1", | ||
"homepage": "https://github.com/LibertyDSNP/parquetjs", | ||
@@ -66,3 +66,3 @@ "license": "MIT", | ||
"lint": "echo 'Linting, it is on the TODO list...'", | ||
"test": "mocha -r ts-node/register 'test/**/*.{js,ts}'", | ||
"test": "mocha -r ts-node/register 'test/{,!(browser)/**}/*.{js,ts}'", | ||
"test:only": "mocha -r ts-node/register", | ||
@@ -69,0 +69,0 @@ "clean": "rm -Rf ./dist", |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
2747991
19360