@dsnp/parquetjs
Advanced tools
Comparing version 1.5.0 to 1.6.0
@@ -49,3 +49,3 @@ "use strict"; | ||
processQueue.forEach(async (d) => { | ||
d.resolve(buffer.slice(d.offset - start, d.offset + d.length - start)); | ||
d.resolve(buffer.subarray(d.offset - start, d.offset + d.length - start)); | ||
}); | ||
@@ -52,0 +52,0 @@ }; |
@@ -29,3 +29,3 @@ "use strict"; | ||
const decodeValues = function (type, cursor, count, opts) { | ||
const bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0); | ||
const bitWidth = cursor.buffer.subarray(cursor.offset, cursor.offset + 1).readInt8(0); | ||
cursor.offset += 1; | ||
@@ -32,0 +32,0 @@ return rle.decodeValues(type, cursor, count, Object.assign({}, opts, { disableEnvelope: true, bitWidth })); |
@@ -183,3 +183,2 @@ "use strict"; | ||
} | ||
// Waylands reminder to check again | ||
function encodeValues_BYTE_ARRAY(values) { | ||
@@ -206,3 +205,3 @@ let buf_len = 0; | ||
cursor.offset += 4; | ||
values.push(cursor.buffer.slice(cursor.offset, cursor.offset + len)); | ||
values.push(cursor.buffer.subarray(cursor.offset, cursor.offset + len)); | ||
cursor.offset += len; | ||
@@ -232,3 +231,3 @@ } | ||
for (let i = 0; i < count; ++i) { | ||
values.push(cursor.buffer.slice(cursor.offset, cursor.offset + typeLength)); | ||
values.push(cursor.buffer.subarray(cursor.offset, cursor.offset + typeLength)); | ||
cursor.offset += typeLength; | ||
@@ -235,0 +234,0 @@ } |
@@ -9,2 +9,3 @@ /// <reference types="node" /> | ||
import { Options } from './codec/types'; | ||
import { S3Client } from "@aws-sdk/client-s3"; | ||
/** | ||
@@ -56,7 +57,10 @@ * A parquet cursor is used to retrieve rows from a parquet file in order | ||
/** | ||
* Open the parquet file from S3 using the supplied aws client and params | ||
* The params have to include `Bucket` and `Key` to the file requested | ||
* This function returns a new parquet reader | ||
* Open the parquet file from S3 using the supplied aws client [, commands] and params | ||
* The params have to include `Bucket` and `Key` to the file requested, | ||
* If using v3 of the AWS SDK, combine the client and commands into an object wiht keys matching | ||
* the original module names, and do not instantiate the commands; pass them as classes/modules. | ||
* | ||
* This function returns a new parquet reader [ or throws an Error.] | ||
*/ | ||
static openS3(client: ClientS3, params: ClientParameters, options?: BufferReaderOptions): Promise<ParquetReader>; | ||
static openS3(client: any, params: ClientParameters, options?: BufferReaderOptions): Promise<ParquetReader>; | ||
/** | ||
@@ -135,2 +139,4 @@ * Open the parquet file from a url using the supplied request module | ||
static openS3(client: ClientS3, params: ClientParameters, options?: BufferReaderOptions): Promise<ParquetEnvelopeReader>; | ||
static openS3v3(client: S3Client, params: any, options: any): Promise<ParquetEnvelopeReader>; | ||
static streamToBuffer(body: any): Promise<Buffer>; | ||
static openUrl(url: Parameter | URL | string, options?: BufferReaderOptions): Promise<ParquetEnvelopeReader>; | ||
@@ -137,0 +143,0 @@ constructor(readFn: (offset: number, length: number, file?: string) => Promise<Buffer>, closeFn: () => unknown, fileSize: Function | number, options?: BufferReaderOptions, metadata?: FileMetaDataExt); |
@@ -42,2 +42,3 @@ "use strict"; | ||
const declare_1 = require("./declare"); | ||
const client_s3_1 = require("@aws-sdk/client-s3"); | ||
const { getBloomFiltersFor, } = bloomFilterReader; | ||
@@ -133,9 +134,19 @@ /** | ||
/** | ||
* Open the parquet file from S3 using the supplied aws client and params | ||
* The params have to include `Bucket` and `Key` to the file requested | ||
* This function returns a new parquet reader | ||
* Open the parquet file from S3 using the supplied aws client [, commands] and params | ||
* The params have to include `Bucket` and `Key` to the file requested, | ||
* If using v3 of the AWS SDK, combine the client and commands into an object wiht keys matching | ||
* the original module names, and do not instantiate the commands; pass them as classes/modules. | ||
* | ||
* This function returns a new parquet reader [ or throws an Error.] | ||
*/ | ||
static async openS3(client, params, options) { | ||
let envelopeReader = await ParquetEnvelopeReader.openS3(client, params, options); | ||
return this.openEnvelopeReader(envelopeReader, options); | ||
try { | ||
let envelopeReader = 'function' === typeof client['headObject'] ? | ||
await ParquetEnvelopeReader.openS3(client, params, options) : // S3 client v2 | ||
await ParquetEnvelopeReader.openS3v3(client, params, options); // S3 client v3 | ||
return this.openEnvelopeReader(envelopeReader, options); | ||
} | ||
catch (e) { | ||
throw new Error(`Error accessing S3 Bucket ${params.Bucket}. Message: ${e.message}`); | ||
} | ||
} | ||
@@ -382,3 +393,3 @@ /** | ||
} | ||
return Promise.resolve(buffer.slice(offset, offset + length)); | ||
return Promise.resolve(buffer.subarray(offset, offset + length)); | ||
}; | ||
@@ -401,2 +412,45 @@ let closeFn = () => ({}); | ||
} | ||
static async openS3v3(client, params, options) { | ||
const fileStat = async () => { | ||
try { | ||
let headObjectCommand = await client.send(new client_s3_1.HeadObjectCommand(params)); | ||
return Promise.resolve(headObjectCommand.ContentLength); | ||
} | ||
catch (e) { | ||
// having params match command names makes e.message clear to user | ||
return Promise.reject("rejected headObjectCommand: " + e.message); | ||
} | ||
}; | ||
const readFn = async (offset, length, file) => { | ||
if (file) { | ||
return Promise.reject("external references are not supported"); | ||
} | ||
const Range = `bytes=${offset}-${offset + length - 1}`; | ||
const input = { ...{ Range }, ...params }; | ||
const response = await client.send(new client_s3_1.GetObjectCommand(input)); | ||
const body = response.Body; | ||
if (body) { | ||
return ParquetEnvelopeReader.streamToBuffer(body); | ||
} | ||
return Buffer.of(); | ||
}; | ||
let closeFn = () => ({}); | ||
return new ParquetEnvelopeReader(readFn, closeFn, fileStat, options); | ||
} | ||
static async streamToBuffer(body) { | ||
const blob = body; | ||
if (blob.arrayBuffer !== undefined) { | ||
const arrayBuffer = await blob.arrayBuffer(); | ||
const uint8Array = new Uint8Array(arrayBuffer); | ||
return Buffer.from(uint8Array); | ||
} | ||
//Assumed to be a Readable like object | ||
const readable = body; | ||
return await new Promise((resolve, reject) => { | ||
const chunks = []; | ||
readable.on("data", (chunk) => chunks.push(chunk)); | ||
readable.on("error", reject); | ||
readable.on("end", () => resolve(Buffer.concat(chunks))); | ||
}); | ||
} | ||
static async openUrl(url, options) { | ||
@@ -607,4 +661,5 @@ let params; | ||
let trailerLen = PARQUET_MAGIC.length + 4; | ||
let trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen); | ||
if (trailerBuf.slice(4).toString() != PARQUET_MAGIC) { | ||
let offset = this.fileSize - trailerLen; | ||
let trailerBuf = await this.read(offset, trailerLen); | ||
if (trailerBuf.subarray(4).toString() != PARQUET_MAGIC) { | ||
throw 'not a valid parquet file'; | ||
@@ -666,3 +721,3 @@ } | ||
const headerOffset = cursor.offset; | ||
const headerSize = parquet_util.decodeThrift(pageHeader, cursor.buffer.slice(cursor.offset)); | ||
const headerSize = parquet_util.decodeThrift(pageHeader, cursor.buffer.subarray(cursor.offset)); | ||
cursor.offset += headerSize; | ||
@@ -741,3 +796,3 @@ const pageType = parquet_util.getThriftEnum(parquet_types_1.default.PageType, pageHeader.type); | ||
offset: 0, | ||
buffer: cursor.buffer.slice(cursor.offset, cursorEnd), | ||
buffer: cursor.buffer.subarray(cursor.offset, cursorEnd), | ||
size: cursorEnd - cursor.offset | ||
@@ -747,3 +802,3 @@ }; | ||
if (opts.compression && opts.compression !== 'UNCOMPRESSED') { | ||
let valuesBuf = await parquet_compression.inflate(opts.compression, dictCursor.buffer.slice(dictCursor.offset, cursorEnd)); | ||
let valuesBuf = await parquet_compression.inflate(opts.compression, dictCursor.buffer.subarray(dictCursor.offset, cursorEnd)); | ||
dictCursor = { | ||
@@ -765,3 +820,3 @@ buffer: valuesBuf, | ||
if (opts.compression && opts.compression !== 'UNCOMPRESSED') { | ||
let valuesBuf = await parquet_compression.inflate(opts.compression, cursor.buffer.slice(cursor.offset, cursorEnd)); | ||
let valuesBuf = await parquet_compression.inflate(opts.compression, cursor.buffer.subarray(cursor.offset, cursorEnd)); | ||
valuesBufCursor = { | ||
@@ -847,3 +902,3 @@ buffer: valuesBuf, | ||
if (dataPageHeaderV2.is_compressed) { | ||
let valuesBuf = await parquet_compression.inflate(opts.compression, cursor.buffer.slice(cursor.offset, cursorEnd)); | ||
let valuesBuf = await parquet_compression.inflate(opts.compression, cursor.buffer.subarray(cursor.offset, cursorEnd)); | ||
valuesBufCursor = { | ||
@@ -850,0 +905,0 @@ buffer: valuesBuf, |
@@ -6,3 +6,3 @@ { | ||
"types": "dist/parquet.d.ts", | ||
"version": "1.5.0", | ||
"version": "1.6.0", | ||
"homepage": "https://github.com/LibertyDSNP/parquetjs", | ||
@@ -19,2 +19,3 @@ "license": "MIT", | ||
"dependencies": { | ||
"@aws-sdk/client-s3": "^3.489.0", | ||
"@types/long": "^4.0.2", | ||
@@ -35,2 +36,3 @@ "@types/node-int64": "^0.4.29", | ||
"devDependencies": { | ||
"@smithy/util-stream": "^2.0.24", | ||
"@types/chai": "^4.3.5", | ||
@@ -43,2 +45,3 @@ "@types/json-schema": "^7.0.11", | ||
"assert": "^2.0.0", | ||
"aws-sdk-client-mock": "^3.0.1", | ||
"browserfs": "^1.4.3", | ||
@@ -45,0 +48,0 @@ "buffer": "^6.0.3", |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
3032546
20674
14
25
+ Added@aws-sdk/client-s3@^3.489.0
+ Added@aws-crypto/crc32@3.0.0(transitive)
+ Added@aws-crypto/crc32c@3.0.0(transitive)
+ Added@aws-crypto/ie11-detection@3.0.0(transitive)
+ Added@aws-crypto/sha1-browser@3.0.0(transitive)
+ Added@aws-crypto/sha256-browser@3.0.0(transitive)
+ Added@aws-crypto/sha256-js@3.0.0(transitive)
+ Added@aws-crypto/supports-web-crypto@3.0.0(transitive)
+ Added@aws-crypto/util@3.0.0(transitive)
+ Added@aws-sdk/client-s3@3.583.0(transitive)
+ Added@aws-sdk/client-sso@3.583.0(transitive)
+ Added@aws-sdk/client-sso-oidc@3.583.0(transitive)
+ Added@aws-sdk/client-sts@3.583.0(transitive)
+ Added@aws-sdk/core@3.582.0(transitive)
+ Added@aws-sdk/credential-provider-env@3.577.0(transitive)
+ Added@aws-sdk/credential-provider-http@3.582.0(transitive)
+ Added@aws-sdk/credential-provider-ini@3.583.0(transitive)
+ Added@aws-sdk/credential-provider-node@3.583.0(transitive)
+ Added@aws-sdk/credential-provider-process@3.577.0(transitive)
+ Added@aws-sdk/credential-provider-sso@3.583.0(transitive)
+ Added@aws-sdk/credential-provider-web-identity@3.577.0(transitive)
+ Added@aws-sdk/middleware-bucket-endpoint@3.577.0(transitive)
+ Added@aws-sdk/middleware-expect-continue@3.577.0(transitive)
+ Added@aws-sdk/middleware-flexible-checksums@3.577.0(transitive)
+ Added@aws-sdk/middleware-host-header@3.577.0(transitive)
+ Added@aws-sdk/middleware-location-constraint@3.577.0(transitive)
+ Added@aws-sdk/middleware-logger@3.577.0(transitive)
+ Added@aws-sdk/middleware-recursion-detection@3.577.0(transitive)
+ Added@aws-sdk/middleware-sdk-s3@3.582.0(transitive)
+ Added@aws-sdk/middleware-signing@3.577.0(transitive)
+ Added@aws-sdk/middleware-ssec@3.577.0(transitive)
+ Added@aws-sdk/middleware-user-agent@3.583.0(transitive)
+ Added@aws-sdk/region-config-resolver@3.577.0(transitive)
+ Added@aws-sdk/signature-v4-multi-region@3.582.0(transitive)
+ Added@aws-sdk/token-providers@3.577.0(transitive)
+ Added@aws-sdk/types@3.577.0(transitive)
+ Added@aws-sdk/util-arn-parser@3.568.0(transitive)
+ Added@aws-sdk/util-endpoints@3.583.0(transitive)
+ Added@aws-sdk/util-locate-window@3.568.0(transitive)
+ Added@aws-sdk/util-user-agent-browser@3.577.0(transitive)
+ Added@aws-sdk/util-user-agent-node@3.577.0(transitive)
+ Added@aws-sdk/util-utf8-browser@3.259.0(transitive)
+ Added@aws-sdk/xml-builder@3.575.0(transitive)
+ Added@smithy/abort-controller@3.0.0(transitive)
+ Added@smithy/chunked-blob-reader@3.0.0(transitive)
+ Added@smithy/chunked-blob-reader-native@3.0.0(transitive)
+ Added@smithy/config-resolver@3.0.0(transitive)
+ Added@smithy/core@2.0.1(transitive)
+ Added@smithy/credential-provider-imds@3.0.0(transitive)
+ Added@smithy/eventstream-codec@3.0.0(transitive)
+ Added@smithy/eventstream-serde-browser@3.0.0(transitive)
+ Added@smithy/eventstream-serde-config-resolver@3.0.0(transitive)
+ Added@smithy/eventstream-serde-node@3.0.0(transitive)
+ Added@smithy/eventstream-serde-universal@3.0.0(transitive)
+ Added@smithy/fetch-http-handler@3.0.1(transitive)
+ Added@smithy/hash-blob-browser@3.0.0(transitive)
+ Added@smithy/hash-node@3.0.0(transitive)
+ Added@smithy/hash-stream-node@3.0.0(transitive)
+ Added@smithy/invalid-dependency@3.0.0(transitive)
+ Added@smithy/is-array-buffer@3.0.0(transitive)
+ Added@smithy/md5-js@3.0.0(transitive)
+ Added@smithy/middleware-content-length@3.0.0(transitive)
+ Added@smithy/middleware-endpoint@3.0.0(transitive)
+ Added@smithy/middleware-retry@3.0.1(transitive)
+ Added@smithy/middleware-serde@3.0.0(transitive)
+ Added@smithy/middleware-stack@3.0.0(transitive)
+ Added@smithy/node-config-provider@3.0.0(transitive)
+ Added@smithy/node-http-handler@3.0.0(transitive)
+ Added@smithy/property-provider@3.0.0(transitive)
+ Added@smithy/protocol-http@4.0.0(transitive)
+ Added@smithy/querystring-builder@3.0.0(transitive)
+ Added@smithy/querystring-parser@3.0.0(transitive)
+ Added@smithy/service-error-classification@3.0.0(transitive)
+ Added@smithy/shared-ini-file-loader@3.0.0(transitive)
+ Added@smithy/signature-v4@3.0.0(transitive)
+ Added@smithy/smithy-client@3.0.1(transitive)
+ Added@smithy/types@3.0.0(transitive)
+ Added@smithy/url-parser@3.0.0(transitive)
+ Added@smithy/util-base64@3.0.0(transitive)
+ Added@smithy/util-body-length-browser@3.0.0(transitive)
+ Added@smithy/util-body-length-node@3.0.0(transitive)
+ Added@smithy/util-buffer-from@3.0.0(transitive)
+ Added@smithy/util-config-provider@3.0.0(transitive)
+ Added@smithy/util-defaults-mode-browser@3.0.1(transitive)
+ Added@smithy/util-defaults-mode-node@3.0.1(transitive)
+ Added@smithy/util-endpoints@2.0.0(transitive)
+ Added@smithy/util-hex-encoding@3.0.0(transitive)
+ Added@smithy/util-middleware@3.0.0(transitive)
+ Added@smithy/util-retry@3.0.0(transitive)
+ Added@smithy/util-stream@3.0.1(transitive)
+ Added@smithy/util-uri-escape@3.0.0(transitive)
+ Added@smithy/util-utf8@3.0.0(transitive)
+ Added@smithy/util-waiter@3.0.0(transitive)
+ Addedbowser@2.11.0(transitive)
+ Addedfast-xml-parser@4.2.5(transitive)
+ Addedstrnum@1.0.5(transitive)
+ Addedtslib@1.14.12.6.2(transitive)
+ Addeduuid@9.0.1(transitive)