@tokenizer/inflate
Advanced tools
| import type { ITokenizer } from 'strtok3'; | ||
| export declare class GzipHandler { | ||
| private tokenizer; | ||
| constructor(tokenizer: ITokenizer); | ||
| inflate(): ReadableStream<Uint8Array>; | ||
| } |
| export class GzipHandler { | ||
| constructor(tokenizer) { | ||
| this.tokenizer = tokenizer; | ||
| } | ||
| inflate() { | ||
| const tokenizer = this.tokenizer; | ||
| return new ReadableStream({ | ||
| async pull(controller) { | ||
| const buffer = new Uint8Array(1024); | ||
| const size = await tokenizer.readBuffer(buffer, { mayBeLess: true }); | ||
| if (size === 0) { | ||
| controller.close(); | ||
| return; | ||
| } | ||
| controller.enqueue(buffer.subarray(0, size)); | ||
| } | ||
| }).pipeThrough(new DecompressionStream("gzip")); | ||
| } | ||
| } |
| export { ZipHandler } from './ZipHandler.js'; | ||
| export { GzipHandler } from './GzipHandler.js'; | ||
| export type { ILocalFileHeader } from './ZipToken.js'; |
| export { ZipHandler } from './ZipHandler.js'; | ||
| export { GzipHandler } from './GzipHandler.js'; |
| import type { ITokenizer } from 'strtok3'; | ||
| import { type IFileHeader, type ILocalFileHeader } from "./ZipToken.js"; | ||
| export type InflateFileFilterResult = { | ||
| handler: InflatedDataHandler | false; | ||
| stop?: boolean; | ||
| }; | ||
| export type { ILocalFileHeader } from './ZipToken.js'; | ||
| /** | ||
| * Return false when to ignore the file, return `InflatedDataHandler` to handle extracted data | ||
| */ | ||
| export type InflateFileFilter = (file: ILocalFileHeader) => InflateFileFilterResult; | ||
| export type InflatedDataHandler = (fileData: Uint8Array) => Promise<void>; | ||
| export declare class ZipHandler { | ||
| private tokenizer; | ||
| private syncBuffer; | ||
| constructor(tokenizer: ITokenizer); | ||
| isZip(): Promise<boolean>; | ||
| private peekSignature; | ||
| findEndOfCentralDirectoryLocator(): Promise<number>; | ||
| readCentralDirectory(): Promise<IFileHeader[] | undefined>; | ||
| unzip(fileCb: InflateFileFilter): Promise<void>; | ||
| private iterateOverCentralDirectory; | ||
| private inflate; | ||
| private static decompressDeflateRaw; | ||
| private readLocalFileHeader; | ||
| } |
| import { StringType, UINT32_LE } from 'token-types'; | ||
| import initDebug from 'debug'; | ||
| import { DataDescriptor, EndOfCentralDirectoryRecordToken, FileHeader, LocalFileHeaderToken, Signature } from "./ZipToken.js"; | ||
| function signatureToArray(signature) { | ||
| const signatureBytes = new Uint8Array(UINT32_LE.len); | ||
| UINT32_LE.put(signatureBytes, 0, signature); | ||
| return signatureBytes; | ||
| } | ||
| const debug = initDebug('tokenizer:inflate'); | ||
| const syncBufferSize = 256 * 1024; | ||
| const ddSignatureArray = signatureToArray(Signature.DataDescriptor); | ||
| const eocdSignatureBytes = signatureToArray(Signature.EndOfCentralDirectory); | ||
| export class ZipHandler { | ||
| constructor(tokenizer) { | ||
| this.tokenizer = tokenizer; | ||
| this.syncBuffer = new Uint8Array(syncBufferSize); | ||
| } | ||
| async isZip() { | ||
| return await this.peekSignature() === Signature.LocalFileHeader; | ||
| } | ||
| peekSignature() { | ||
| return this.tokenizer.peekToken(UINT32_LE); | ||
| } | ||
| async findEndOfCentralDirectoryLocator() { | ||
| const randomReadTokenizer = this.tokenizer; | ||
| const chunkLength = Math.min(16 * 1024, randomReadTokenizer.fileInfo.size); | ||
| const buffer = this.syncBuffer.subarray(0, chunkLength); | ||
| await this.tokenizer.readBuffer(buffer, { position: randomReadTokenizer.fileInfo.size - chunkLength }); | ||
| // Search the buffer from end to beginning for EOCD signature | ||
| // const signature = 0x06054b50; | ||
| for (let i = buffer.length - 4; i >= 0; i--) { | ||
| // Compare 4 bytes directly without calling readUInt32LE | ||
| if (buffer[i] === eocdSignatureBytes[0] && | ||
| buffer[i + 1] === eocdSignatureBytes[1] && | ||
| buffer[i + 2] === eocdSignatureBytes[2] && | ||
| buffer[i + 3] === eocdSignatureBytes[3]) { | ||
| return randomReadTokenizer.fileInfo.size - chunkLength + i; | ||
| } | ||
| } | ||
| return -1; | ||
| } | ||
| async readCentralDirectory() { | ||
| if (!this.tokenizer.supportsRandomAccess()) { | ||
| debug('Cannot reading central-directory without random-read support'); | ||
| return; | ||
| } | ||
| debug('Reading central-directory...'); | ||
| const pos = this.tokenizer.position; | ||
| const offset = await this.findEndOfCentralDirectoryLocator(); | ||
| if (offset > 0) { | ||
| debug('Central-directory 32-bit signature found'); | ||
| const eocdHeader = await this.tokenizer.readToken(EndOfCentralDirectoryRecordToken, offset); | ||
| const files = []; | ||
| this.tokenizer.setPosition(eocdHeader.offsetOfStartOfCd); | ||
| for (let n = 0; n < eocdHeader.nrOfEntriesOfSize; ++n) { | ||
| const entry = await this.tokenizer.readToken(FileHeader); | ||
| if (entry.signature !== Signature.CentralFileHeader) { | ||
| throw new Error('Expected Central-File-Header signature'); | ||
| } | ||
| entry.filename = await this.tokenizer.readToken(new StringType(entry.filenameLength, 'utf-8')); | ||
| await this.tokenizer.ignore(entry.extraFieldLength); | ||
| await this.tokenizer.ignore(entry.fileCommentLength); | ||
| files.push(entry); | ||
| debug(`Add central-directory file-entry: n=${n + 1}/${files.length}: filename=${files[n].filename}`); | ||
| } | ||
| this.tokenizer.setPosition(pos); | ||
| return files; | ||
| } | ||
| this.tokenizer.setPosition(pos); | ||
| } | ||
| async unzip(fileCb) { | ||
| const entries = await this.readCentralDirectory(); | ||
| if (entries) { | ||
| // Use Central Directory to iterate over files | ||
| return this.iterateOverCentralDirectory(entries, fileCb); | ||
| } | ||
| // Scan Zip files for local-file-header | ||
| let stop = false; | ||
| do { | ||
| const zipHeader = await this.readLocalFileHeader(); | ||
| if (!zipHeader) | ||
| break; | ||
| const next = fileCb(zipHeader); | ||
| stop = !!next.stop; | ||
| let fileData; | ||
| await this.tokenizer.ignore(zipHeader.extraFieldLength); | ||
| if (zipHeader.dataDescriptor && zipHeader.compressedSize === 0) { | ||
| const chunks = []; | ||
| let len = syncBufferSize; | ||
| debug('Compressed-file-size unknown, scanning for next data-descriptor-signature....'); | ||
| let nextHeaderIndex = -1; | ||
| while (nextHeaderIndex < 0 && len === syncBufferSize) { | ||
| len = await this.tokenizer.peekBuffer(this.syncBuffer, { mayBeLess: true }); | ||
| nextHeaderIndex = indexOf(this.syncBuffer.subarray(0, len), ddSignatureArray); | ||
| const size = nextHeaderIndex >= 0 ? nextHeaderIndex : len; | ||
| if (next.handler) { | ||
| const data = new Uint8Array(size); | ||
| await this.tokenizer.readBuffer(data); | ||
| chunks.push(data); | ||
| } | ||
| else { | ||
| // Move position to the next header if found, skip the whole buffer otherwise | ||
| await this.tokenizer.ignore(size); | ||
| } | ||
| } | ||
| debug(`Found data-descriptor-signature at pos=${this.tokenizer.position}`); | ||
| if (next.handler) { | ||
| await this.inflate(zipHeader, mergeArrays(chunks), next.handler); | ||
| } | ||
| } | ||
| else { | ||
| if (next.handler) { | ||
| debug(`Reading compressed-file-data: ${zipHeader.compressedSize} bytes`); | ||
| fileData = new Uint8Array(zipHeader.compressedSize); | ||
| await this.tokenizer.readBuffer(fileData); | ||
| await this.inflate(zipHeader, fileData, next.handler); | ||
| } | ||
| else { | ||
| debug(`Ignoring compressed-file-data: ${zipHeader.compressedSize} bytes`); | ||
| await this.tokenizer.ignore(zipHeader.compressedSize); | ||
| } | ||
| } | ||
| debug(`Reading data-descriptor at pos=${this.tokenizer.position}`); | ||
| if (zipHeader.dataDescriptor) { | ||
| // await this.tokenizer.ignore(DataDescriptor.len); | ||
| const dataDescriptor = await this.tokenizer.readToken(DataDescriptor); | ||
| if (dataDescriptor.signature !== 0x08074b50) { | ||
| throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - DataDescriptor.len}`); | ||
| } | ||
| } | ||
| } while (!stop); | ||
| } | ||
| async iterateOverCentralDirectory(entries, fileCb) { | ||
| for (const fileHeader of entries) { | ||
| const next = fileCb(fileHeader); | ||
| if (next.handler) { | ||
| this.tokenizer.setPosition(fileHeader.relativeOffsetOfLocalHeader); | ||
| const zipHeader = await this.readLocalFileHeader(); | ||
| if (zipHeader) { | ||
| await this.tokenizer.ignore(zipHeader.extraFieldLength); | ||
| const fileData = new Uint8Array(fileHeader.compressedSize); | ||
| await this.tokenizer.readBuffer(fileData); | ||
| await this.inflate(zipHeader, fileData, next.handler); | ||
| } | ||
| } | ||
| if (next.stop) | ||
| break; | ||
| } | ||
| } | ||
| async inflate(zipHeader, fileData, cb) { | ||
| if (zipHeader.compressedMethod === 0) { | ||
| // Stored (uncompressed) | ||
| return cb(fileData); | ||
| } | ||
| if (zipHeader.compressedMethod !== 8) { | ||
| throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`); | ||
| } | ||
| debug(`Decompress filename=${zipHeader.filename}, compressed-size=${fileData.length}`); | ||
| const uncompressedData = await ZipHandler.decompressDeflateRaw(fileData); | ||
| return cb(uncompressedData); | ||
| } | ||
| static async decompressDeflateRaw(data) { | ||
| // Wrap Uint8Array in a ReadableStream without copying | ||
| const input = new ReadableStream({ | ||
| start(controller) { | ||
| controller.enqueue(data); | ||
| controller.close(); | ||
| } | ||
| }); | ||
| const ds = new DecompressionStream("deflate-raw"); | ||
| const output = input.pipeThrough(ds); | ||
| try { | ||
| // Collect decompressed bytes from the output stream | ||
| const response = new Response(output); | ||
| const buffer = await response.arrayBuffer(); | ||
| return new Uint8Array(buffer); | ||
| } | ||
| catch (err) { | ||
| // Provide ZIP-specific error context | ||
| const message = err instanceof Error | ||
| ? `Failed to deflate ZIP entry: ${err.message}` | ||
| : "Unknown decompression error in ZIP entry"; | ||
| throw new TypeError(message); | ||
| } | ||
| } | ||
| async readLocalFileHeader() { | ||
| const signature = await this.tokenizer.peekToken(UINT32_LE); | ||
| if (signature === Signature.LocalFileHeader) { | ||
| const header = await this.tokenizer.readToken(LocalFileHeaderToken); | ||
| header.filename = await this.tokenizer.readToken(new StringType(header.filenameLength, 'utf-8')); | ||
| return header; | ||
| } | ||
| if (signature === Signature.CentralFileHeader) { | ||
| return false; | ||
| } | ||
| if (signature === 0xE011CFD0) { | ||
| throw new Error('Encrypted ZIP'); | ||
| } | ||
| throw new Error('Unexpected signature'); | ||
| } | ||
| } | ||
| function indexOf(buffer, portion) { | ||
| const bufferLength = buffer.length; | ||
| const portionLength = portion.length; | ||
| // Return -1 if the portion is longer than the buffer | ||
| if (portionLength > bufferLength) | ||
| return -1; | ||
| // Search for the portion in the buffer | ||
| for (let i = 0; i <= bufferLength - portionLength; i++) { | ||
| let found = true; | ||
| for (let j = 0; j < portionLength; j++) { | ||
| if (buffer[i + j] !== portion[j]) { | ||
| found = false; | ||
| break; | ||
| } | ||
| } | ||
| if (found) { | ||
| return i; // Return the starting offset | ||
| } | ||
| } | ||
| return -1; // Not found | ||
| } | ||
| function mergeArrays(chunks) { | ||
| // Concatenate chunks into a single Uint8Array | ||
| const totalLength = chunks.reduce((acc, curr) => acc + curr.length, 0); | ||
| const mergedArray = new Uint8Array(totalLength); | ||
| let offset = 0; | ||
| for (const chunk of chunks) { | ||
| mergedArray.set(chunk, offset); | ||
| offset += chunk.length; | ||
| } | ||
| return mergedArray; | ||
| } |
| /** | ||
| * Ref https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT | ||
| */ | ||
| import type { IGetToken } from "strtok3"; | ||
| export declare const Signature: { | ||
| LocalFileHeader: number; | ||
| DataDescriptor: number; | ||
| CentralFileHeader: number; | ||
| EndOfCentralDirectory: number; | ||
| }; | ||
| interface IDataDescriptor { | ||
| signature: number; | ||
| compressedSize: number; | ||
| uncompressedSize: number; | ||
| } | ||
| export interface ILocalFileHeader extends IDataDescriptor { | ||
| minVersion: number; | ||
| dataDescriptor: boolean; | ||
| compressedMethod: number; | ||
| compressedSize: number; | ||
| uncompressedSize: number; | ||
| filenameLength: number; | ||
| extraFieldLength: number; | ||
| filename: string; | ||
| } | ||
| export declare const DataDescriptor: IGetToken<IDataDescriptor>; | ||
| /** | ||
| * First part of the ZIP Local File Header | ||
| * Offset | Bytes| Description | ||
| * -------|------+------------------------------------------------------------------- | ||
| * 0 | 4 | Signature (0x04034b50) | ||
| * 4 | 2 | Minimum version needed to extract | ||
| * 6 | 2 | Bit flag | ||
| * 8 | 2 | Compression method | ||
| * 10 | 2 | File last modification time (MS-DOS format) | ||
| * 12 | 2 | File last modification date (MS-DOS format) | ||
| * 14 | 4 | CRC-32 of uncompressed data | ||
| * 18 | 4 | Compressed size | ||
| * 22 | 4 | Uncompressed size | ||
| * 26 | 2 | File name length (n) | ||
| * 28 | 2 | Extra field length (m) | ||
| * 30 | n | File name | ||
| * 30 + n | m | Extra field | ||
| */ | ||
| export declare const LocalFileHeaderToken: IGetToken<ILocalFileHeader>; | ||
| interface IEndOfCentralDirectoryRecord { | ||
| signature: number; | ||
| nrOfThisDisk: number; | ||
| nrOfThisDiskWithTheStart: number; | ||
| nrOfEntriesOnThisDisk: number; | ||
| nrOfEntriesOfSize: number; | ||
| sizeOfCd: number; | ||
| offsetOfStartOfCd: number; | ||
| zipFileCommentLength: number; | ||
| } | ||
| /** | ||
| * 4.3.16 End of central directory record: | ||
| * end of central dir signature (0x06064b50) 4 bytes | ||
| * number of this disk 2 bytes | ||
| * number of the disk with the start of the central directory 2 bytes | ||
| * total number of entries in the central directory on this disk 2 bytes | ||
| * total number of entries in the size of the central directory 2 bytes | ||
| * sizeOfTheCentralDirectory 4 bytes | ||
| * offset of start of central directory with respect to the starting disk number 4 bytes | ||
| * .ZIP file comment length 2 bytes | ||
| * .ZIP file comment (variable size) | ||
| */ | ||
| export declare const EndOfCentralDirectoryRecordToken: IGetToken<IEndOfCentralDirectoryRecord>; | ||
| export interface IFileHeader extends ILocalFileHeader { | ||
| fileCommentLength: number; | ||
| relativeOffsetOfLocalHeader: number; | ||
| } | ||
| /** | ||
| * File header: | ||
| * central file header signature 4 bytes 0 (0x02014b50) | ||
| * version made by 2 bytes 4 | ||
| * version needed to extract 2 bytes 6 | ||
| * general purpose bit flag 2 bytes 8 | ||
| * compression method 2 bytes 10 | ||
| * last mod file time 2 bytes 12 | ||
| * last mod file date 2 bytes 14 | ||
| * crc-32 4 bytes 16 | ||
| * compressed size 4 bytes 20 | ||
| * uncompressed size 4 bytes 24 | ||
| * file name length 2 bytes 28 | ||
| * extra field length 2 bytes 30 | ||
| * file comment length 2 bytes 32 | ||
| * disk number start 2 bytes 34 | ||
| * internal file attributes 2 bytes 36 | ||
| * external file attributes 4 bytes 38 | ||
| * relative offset of local header 4 bytes 42 | ||
| */ | ||
| export declare const FileHeader: IGetToken<IFileHeader>; | ||
| export {}; |
+117
| /** | ||
| * Ref https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT | ||
| */ | ||
| import { UINT16_LE, UINT32_LE } from "token-types"; | ||
| export const Signature = { | ||
| LocalFileHeader: 0x04034b50, | ||
| DataDescriptor: 0x08074b50, | ||
| CentralFileHeader: 0x02014b50, | ||
| EndOfCentralDirectory: 0x06054b50 | ||
| }; | ||
| export const DataDescriptor = { | ||
| get(array) { | ||
| return { | ||
| signature: UINT32_LE.get(array, 0), | ||
| compressedSize: UINT32_LE.get(array, 8), | ||
| uncompressedSize: UINT32_LE.get(array, 12), | ||
| }; | ||
| }, len: 16 | ||
| }; | ||
| /** | ||
| * First part of the ZIP Local File Header | ||
| * Offset | Bytes| Description | ||
| * -------|------+------------------------------------------------------------------- | ||
| * 0 | 4 | Signature (0x04034b50) | ||
| * 4 | 2 | Minimum version needed to extract | ||
| * 6 | 2 | Bit flag | ||
| * 8 | 2 | Compression method | ||
| * 10 | 2 | File last modification time (MS-DOS format) | ||
| * 12 | 2 | File last modification date (MS-DOS format) | ||
| * 14 | 4 | CRC-32 of uncompressed data | ||
| * 18 | 4 | Compressed size | ||
| * 22 | 4 | Uncompressed size | ||
| * 26 | 2 | File name length (n) | ||
| * 28 | 2 | Extra field length (m) | ||
| * 30 | n | File name | ||
| * 30 + n | m | Extra field | ||
| */ | ||
| export const LocalFileHeaderToken = { | ||
| get(array) { | ||
| const flags = UINT16_LE.get(array, 6); | ||
| return { | ||
| signature: UINT32_LE.get(array, 0), | ||
| minVersion: UINT16_LE.get(array, 4), | ||
| dataDescriptor: !!(flags & 0x0008), | ||
| compressedMethod: UINT16_LE.get(array, 8), | ||
| compressedSize: UINT32_LE.get(array, 18), | ||
| uncompressedSize: UINT32_LE.get(array, 22), | ||
| filenameLength: UINT16_LE.get(array, 26), | ||
| extraFieldLength: UINT16_LE.get(array, 28), | ||
| filename: null | ||
| }; | ||
| }, len: 30 | ||
| }; | ||
| /** | ||
| * 4.3.16 End of central directory record: | ||
| * end of central dir signature (0x06064b50) 4 bytes | ||
| * number of this disk 2 bytes | ||
| * number of the disk with the start of the central directory 2 bytes | ||
| * total number of entries in the central directory on this disk 2 bytes | ||
| * total number of entries in the size of the central directory 2 bytes | ||
| * sizeOfTheCentralDirectory 4 bytes | ||
| * offset of start of central directory with respect to the starting disk number 4 bytes | ||
| * .ZIP file comment length 2 bytes | ||
| * .ZIP file comment (variable size) | ||
| */ | ||
| export const EndOfCentralDirectoryRecordToken = { | ||
| get(array) { | ||
| return { | ||
| signature: UINT32_LE.get(array, 0), | ||
| nrOfThisDisk: UINT16_LE.get(array, 4), | ||
| nrOfThisDiskWithTheStart: UINT16_LE.get(array, 6), | ||
| nrOfEntriesOnThisDisk: UINT16_LE.get(array, 8), | ||
| nrOfEntriesOfSize: UINT16_LE.get(array, 10), | ||
| sizeOfCd: UINT32_LE.get(array, 12), | ||
| offsetOfStartOfCd: UINT32_LE.get(array, 16), | ||
| zipFileCommentLength: UINT16_LE.get(array, 20), | ||
| }; | ||
| }, len: 22 | ||
| }; | ||
| /** | ||
| * File header: | ||
| * central file header signature 4 bytes 0 (0x02014b50) | ||
| * version made by 2 bytes 4 | ||
| * version needed to extract 2 bytes 6 | ||
| * general purpose bit flag 2 bytes 8 | ||
| * compression method 2 bytes 10 | ||
| * last mod file time 2 bytes 12 | ||
| * last mod file date 2 bytes 14 | ||
| * crc-32 4 bytes 16 | ||
| * compressed size 4 bytes 20 | ||
| * uncompressed size 4 bytes 24 | ||
| * file name length 2 bytes 28 | ||
| * extra field length 2 bytes 30 | ||
| * file comment length 2 bytes 32 | ||
| * disk number start 2 bytes 34 | ||
| * internal file attributes 2 bytes 36 | ||
| * external file attributes 4 bytes 38 | ||
| * relative offset of local header 4 bytes 42 | ||
| */ | ||
| export const FileHeader = { | ||
| get(array) { | ||
| const flags = UINT16_LE.get(array, 8); | ||
| return { | ||
| signature: UINT32_LE.get(array, 0), | ||
| minVersion: UINT16_LE.get(array, 6), | ||
| dataDescriptor: !!(flags & 0x0008), | ||
| compressedMethod: UINT16_LE.get(array, 10), | ||
| compressedSize: UINT32_LE.get(array, 20), | ||
| uncompressedSize: UINT32_LE.get(array, 24), | ||
| filenameLength: UINT16_LE.get(array, 28), | ||
| extraFieldLength: UINT16_LE.get(array, 30), | ||
| fileCommentLength: UINT16_LE.get(array, 32), | ||
| relativeOffsetOfLocalHeader: UINT32_LE.get(array, 42), | ||
| filename: null | ||
| }; | ||
| }, len: 46 | ||
| }; |
+2
-1
| { | ||
| "name": "@tokenizer/inflate", | ||
| "version": "0.4.0", | ||
| "version": "0.4.1", | ||
| "description": "Tokenized zip support", | ||
@@ -17,2 +17,3 @@ "type": "module", | ||
| "build": "yarn run clean && yarn run compile", | ||
| "prepublishOnly": "yarn run build", | ||
| "eslint": "eslint lib test", | ||
@@ -19,0 +20,0 @@ "lint:md": "remark -u preset-lint-recommended .", |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Deprecated
MaintenanceThe maintainer of the package marked it as deprecated. This could indicate that a single version should not be used, or that the package is no longer maintained and any new vulnerabilities will not be fixed.
Found 1 instance in 1 package
Empty package
Supply chain riskPackage does not contain any code. It may be removed, is name squatting, or the result of a faulty package publish.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
28506
290.6%11
266.67%500
Infinity%0
-100%1
-50%