@apache-arrow/ts
Advanced tools
Comparing version 16.1.0 to 17.0.0
@@ -366,10 +366,7 @@ // Licensed to the Apache Software Foundation (ASF) under one | ||
const dictionary = dictionaries.get(id); | ||
if (isDelta || !dictionary) { | ||
const type = schema.dictionaries.get(id)!; | ||
const data = this._loadVectors(header.data, body, [type]); | ||
return (dictionary && isDelta ? dictionary.concat( | ||
new Vector(data)) : | ||
new Vector(data)).memoize() as Vector; | ||
} | ||
return dictionary.memoize(); | ||
const type = schema.dictionaries.get(id)!; | ||
const data = this._loadVectors(header.data, body, [type]); | ||
return (dictionary && isDelta ? dictionary.concat( | ||
new Vector(data)) : | ||
new Vector(data)).memoize() as Vector; | ||
} | ||
@@ -376,0 +373,0 @@ protected _loadVectors(header: metadata.RecordBatch, body: any, types: (Field | DataType)[]) { |
@@ -87,2 +87,3 @@ // Licensed to the Apache Software Foundation (ASF) under one | ||
protected _recordBatchBlocks: FileBlock[] = []; | ||
protected _seenDictionaries = new Map<number, Vector>(); | ||
protected _dictionaryDeltaOffsets = new Map<number, number>(); | ||
@@ -148,2 +149,3 @@ | ||
this._recordBatchBlocks = []; | ||
this._seenDictionaries = new Map(); | ||
this._dictionaryDeltaOffsets = new Map(); | ||
@@ -264,3 +266,2 @@ | ||
protected _writeDictionaryBatch(dictionary: Data, id: number, isDelta = false) { | ||
this._dictionaryDeltaOffsets.set(id, dictionary.length + (this._dictionaryDeltaOffsets.get(id) || 0)); | ||
const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(new Vector([dictionary])); | ||
@@ -290,10 +291,17 @@ const recordBatch = new metadata.RecordBatch(dictionary.length, nodes, bufferRegions); | ||
protected _writeDictionaries(batch: RecordBatch<T>) { | ||
for (let [id, dictionary] of batch.dictionaries) { | ||
let offset = this._dictionaryDeltaOffsets.get(id) || 0; | ||
if (offset === 0 || (dictionary = dictionary?.slice(offset)).length > 0) { | ||
for (const data of dictionary.data) { | ||
this._writeDictionaryBatch(data, id, offset > 0); | ||
offset += data.length; | ||
} | ||
for (const [id, dictionary] of batch.dictionaries) { | ||
const chunks = dictionary?.data ?? []; | ||
const prevDictionary = this._seenDictionaries.get(id); | ||
const offset = this._dictionaryDeltaOffsets.get(id) ?? 0; | ||
// * If no previous dictionary was written, write an initial DictionaryMessage. | ||
// * If the current dictionary does not share chunks with the previous dictionary, write a replacement DictionaryMessage. | ||
if (!prevDictionary || prevDictionary.data[0] !== chunks[0]) { | ||
// * If `index > 0`, then `isDelta` is true. | ||
// * If `index = 0`, then `isDelta` is false, because this is either the initial or a replacement DictionaryMessage. | ||
for (const [index, chunk] of chunks.entries()) this._writeDictionaryBatch(chunk, id, index > 0); | ||
} else if (offset < chunks.length) { | ||
for (const chunk of chunks.slice(offset)) this._writeDictionaryBatch(chunk, id, true); | ||
} | ||
this._seenDictionaries.set(id, dictionary); | ||
this._dictionaryDeltaOffsets.set(id, chunks.length); | ||
} | ||
@@ -349,2 +357,9 @@ return this; | ||
protected _writeDictionaryBatch(dictionary: Data, id: number, isDelta = false) { | ||
if (!isDelta && this._seenDictionaries.has(id)) { | ||
throw new Error('The Arrow File format does not support replacement dictionaries. '); | ||
} | ||
return super._writeDictionaryBatch(dictionary, id, isDelta); | ||
} | ||
protected _writeFooter(schema: Schema<T>) { | ||
@@ -377,3 +392,3 @@ const buffer = Footer.encode(new Footer( | ||
private _recordBatches: RecordBatch[]; | ||
private _dictionaries: RecordBatch[]; | ||
private _recordBatchesWithDictionaries: RecordBatch[]; | ||
@@ -384,3 +399,3 @@ constructor() { | ||
this._recordBatches = []; | ||
this._dictionaries = []; | ||
this._recordBatchesWithDictionaries = []; | ||
} | ||
@@ -396,3 +411,3 @@ | ||
if (batch.dictionaries.size > 0) { | ||
this._dictionaries.push(batch); | ||
this._recordBatchesWithDictionaries.push(batch); | ||
} | ||
@@ -402,3 +417,2 @@ return this; | ||
protected _writeDictionaryBatch(dictionary: Data, id: number, isDelta = false) { | ||
this._dictionaryDeltaOffsets.set(id, dictionary.length + (this._dictionaryDeltaOffsets.get(id) || 0)); | ||
this._write(this._dictionaryBlocks.length === 0 ? ` ` : `,\n `); | ||
@@ -415,5 +429,5 @@ this._write(dictionaryBatchToJSON(dictionary, id, isDelta)); | ||
public close() { | ||
if (this._dictionaries.length > 0) { | ||
if (this._recordBatchesWithDictionaries.length > 0) { | ||
this._write(`,\n "dictionaries": [\n`); | ||
for (const batch of this._dictionaries) { | ||
for (const batch of this._recordBatchesWithDictionaries) { | ||
super._writeDictionaries(batch); | ||
@@ -437,3 +451,3 @@ } | ||
this._dictionaries = []; | ||
this._recordBatchesWithDictionaries = []; | ||
this._recordBatches = []; | ||
@@ -440,0 +454,0 @@ |
{ | ||
"version": "16.1.0", | ||
"version": "17.0.0", | ||
"name": "@apache-arrow/ts", | ||
@@ -30,4 +30,4 @@ "browser": "Arrow.dom.ts", | ||
"dependencies": { | ||
"@types/node": "^20.12.7", | ||
"@swc/helpers": "^0.5.10", | ||
"@types/node": "^20.13.0", | ||
"@swc/helpers": "^0.5.11", | ||
"@types/command-line-args": "^5.2.3", | ||
@@ -34,0 +34,0 @@ "@types/command-line-usage": "^5.0.4", |
@@ -36,3 +36,3 @@ // Licensed to the Apache Software Foundation (ASF) under one | ||
if (!dictionaries) { | ||
dictionaries = generateDictionaryMap(fields); | ||
dictionaries = generateDictionaryMap(this.fields); | ||
} | ||
@@ -39,0 +39,0 @@ this.dictionaries = dictionaries; |
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
1185185
18287
Updated@swc/helpers@^0.5.11
Updated@types/node@^20.13.0