Comparing version 0.4.7 to 0.4.8
@@ -284,2 +284,33 @@ import { type Schema, Table as ArrowTable } from 'apache-arrow'; | ||
/** | ||
* Runs a "merge insert" operation on the table | ||
* | ||
* This operation can add rows, update rows, and remove rows all in a single | ||
* transaction. It is a very generic tool that can be used to create | ||
* behaviors like "insert if not exists", "update or insert (i.e. upsert)", | ||
* or even replace a portion of existing data with new data (e.g. replace | ||
* all data where month="january") | ||
* | ||
* The merge insert operation works by combining new data from a | ||
* **source table** with existing data in a **target table** by using a | ||
* join. There are three categories of records. | ||
* | ||
* "Matched" records are records that exist in both the source table and | ||
* the target table. "Not matched" records exist only in the source table | ||
* (e.g. these are new data) "Not matched by source" records exist only | ||
* in the target table (this is old data) | ||
* | ||
* The MergeInsertArgs can be used to customize what should happen for | ||
* each category of data. | ||
* | ||
* Please note that the data may appear to be reordered as part of this | ||
* operation. This is because updated rows will be deleted from the | ||
* dataset and then reinserted at the end with the new values. | ||
* | ||
* @param on a column to join on. This is how records from the source | ||
* table and target table are matched. | ||
* @param data the new data to insert | ||
* @param args parameters controlling how the operation should behave | ||
*/ | ||
mergeInsert: (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs) => Promise<void>; | ||
/** | ||
* List the indicies on this table. | ||
@@ -319,2 +350,31 @@ */ | ||
} | ||
export interface MergeInsertArgs { | ||
/** | ||
* If true then rows that exist in both the source table (new data) and | ||
* the target table (old data) will be updated, replacing the old row | ||
* with the corresponding matching row. | ||
* | ||
* If there are multiple matches then the behavior is undefined. | ||
* Currently this causes multiple copies of the row to be created | ||
* but that behavior is subject to change. | ||
*/ | ||
whenMatchedUpdateAll?: boolean; | ||
/** | ||
* If true then rows that exist only in the source table (new data) | ||
* will be inserted into the target table. | ||
*/ | ||
whenNotMatchedInsertAll?: boolean; | ||
/** | ||
* If true then rows that exist only in the target table (old data) | ||
* will be deleted. | ||
* | ||
* If this is a string then it will be treated as an SQL filter and | ||
* only rows that both do not match any row in the source table and | ||
* match the given filter will be deleted. | ||
* | ||
* This can be used to replace a selection of existing data with | ||
* new data. | ||
*/ | ||
whenNotMatchedBySourceDelete?: string | boolean; | ||
} | ||
export interface VectorIndex { | ||
@@ -428,2 +488,3 @@ columns: string[]; | ||
update(args: UpdateArgs | UpdateSqlArgs): Promise<void>; | ||
mergeInsert(on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void>; | ||
/** | ||
@@ -430,0 +491,0 @@ * Clean up old versions of the table, freeing disk space. |
@@ -24,3 +24,3 @@ "use strict"; | ||
const util_1 = require("./util"); | ||
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateScalarIndex, tableCreateVectorIndex, tableCountRows, tableDelete, tableUpdate, tableCleanupOldVersions, tableCompactFiles, tableListIndices, tableIndexStats, tableSchema | ||
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateScalarIndex, tableCreateVectorIndex, tableCountRows, tableDelete, tableUpdate, tableMergeInsert, tableCleanupOldVersions, tableCompactFiles, tableListIndices, tableIndexStats, tableSchema | ||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||
@@ -283,2 +283,24 @@ } = require('../native.js'); | ||
} | ||
async mergeInsert(on, data, args) { | ||
const whenMatchedUpdateAll = args.whenMatchedUpdateAll ?? false; | ||
const whenNotMatchedInsertAll = args.whenNotMatchedInsertAll ?? false; | ||
let whenNotMatchedBySourceDelete = false; | ||
let whenNotMatchedBySourceDeleteFilt = null; | ||
if (args.whenNotMatchedBySourceDelete !== undefined && args.whenNotMatchedBySourceDelete !== null) { | ||
whenNotMatchedBySourceDelete = true; | ||
if (args.whenNotMatchedBySourceDelete !== true) { | ||
whenNotMatchedBySourceDeleteFilt = args.whenNotMatchedBySourceDelete; | ||
} | ||
} | ||
const schema = await this.schema; | ||
let tbl; | ||
if (data instanceof apache_arrow_1.Table) { | ||
tbl = data; | ||
} | ||
else { | ||
tbl = (0, arrow_1.makeArrowTable)(data, { schema }); | ||
} | ||
const buffer = await (0, arrow_1.fromTableToBuffer)(tbl, this._embeddings, schema); | ||
this._tbl = await tableMergeInsert.call(this._tbl, on, whenMatchedUpdateAll, whenNotMatchedInsertAll, whenNotMatchedBySourceDelete, whenNotMatchedBySourceDeleteFilt, buffer); | ||
} | ||
/** | ||
@@ -285,0 +307,0 @@ * Clean up old versions of the table, freeing disk space. |
@@ -1,2 +0,2 @@ | ||
import { type EmbeddingFunction, type Table, type VectorIndexParams, type Connection, type ConnectionOptions, type CreateTableOptions, type VectorIndex, type WriteOptions, type IndexStats, type UpdateArgs, type UpdateSqlArgs } from '../index'; | ||
import { type EmbeddingFunction, type Table, type VectorIndexParams, type Connection, type ConnectionOptions, type CreateTableOptions, type VectorIndex, type WriteOptions, type IndexStats, type UpdateArgs, type UpdateSqlArgs, type MergeInsertArgs } from '../index'; | ||
import { Query } from '../query'; | ||
@@ -35,2 +35,3 @@ import { Table as ArrowTable } from 'apache-arrow'; | ||
filter(where: string): Query<T>; | ||
mergeInsert(on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void>; | ||
add(data: Array<Record<string, unknown>> | ArrowTable): Promise<number>; | ||
@@ -37,0 +38,0 @@ overwrite(data: Array<Record<string, unknown>> | ArrowTable): Promise<number>; |
@@ -185,2 +185,42 @@ "use strict"; | ||
} | ||
async mergeInsert(on, data, args) { | ||
let tbl; | ||
if (data instanceof apache_arrow_1.Table) { | ||
tbl = data; | ||
} | ||
else { | ||
tbl = (0, index_1.makeArrowTable)(data, await this.schema); | ||
} | ||
const queryParams = { | ||
on | ||
}; | ||
if (args.whenMatchedUpdateAll ?? false) { | ||
queryParams.when_matched_update_all = 'true'; | ||
} | ||
else { | ||
queryParams.when_matched_update_all = 'false'; | ||
} | ||
if (args.whenNotMatchedInsertAll ?? false) { | ||
queryParams.when_not_matched_insert_all = 'true'; | ||
} | ||
else { | ||
queryParams.when_not_matched_insert_all = 'false'; | ||
} | ||
if (args.whenNotMatchedBySourceDelete !== false && args.whenNotMatchedBySourceDelete !== null && args.whenNotMatchedBySourceDelete !== undefined) { | ||
queryParams.when_not_matched_by_source_delete = 'true'; | ||
if (typeof args.whenNotMatchedBySourceDelete === 'string') { | ||
queryParams.when_not_matched_by_source_delete_filt = args.whenNotMatchedBySourceDelete; | ||
} | ||
} | ||
else { | ||
queryParams.when_not_matched_by_source_delete = 'false'; | ||
} | ||
const buffer = await (0, arrow_1.fromTableToStreamBuffer)(tbl, this._embeddings); | ||
const res = await this._client.post(`/v1/table/${this._name}/merge_insert/`, buffer, queryParams, 'application/vnd.apache.arrow.stream'); | ||
if (res.status !== 200) { | ||
throw new Error(`Server Error, status: ${res.status}, ` + | ||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions | ||
`message: ${res.statusText}: ${res.data}`); | ||
} | ||
} | ||
async add(data) { | ||
@@ -187,0 +227,0 @@ let tbl; |
@@ -438,2 +438,34 @@ "use strict"; | ||
}); | ||
it('can merge insert records into the table', async function () { | ||
const dir = await (0, temp_1.track)().mkdir('lancejs'); | ||
const con = await lancedb.connect(dir); | ||
const data = [{ id: 1, age: 1 }, { id: 2, age: 1 }]; | ||
const table = await con.createTable('my_table', data); | ||
let newData = [{ id: 2, age: 2 }, { id: 3, age: 2 }]; | ||
await table.mergeInsert('id', newData, { | ||
whenNotMatchedInsertAll: true | ||
}); | ||
assert.equal(await table.countRows(), 3); | ||
assert.equal((await table.filter('age = 2').execute()).length, 1); | ||
newData = [{ id: 3, age: 3 }, { id: 4, age: 3 }]; | ||
await table.mergeInsert('id', newData, { | ||
whenNotMatchedInsertAll: true, | ||
whenMatchedUpdateAll: true | ||
}); | ||
assert.equal(await table.countRows(), 4); | ||
assert.equal((await table.filter('age = 3').execute()).length, 2); | ||
newData = [{ id: 5, age: 4 }]; | ||
await table.mergeInsert('id', newData, { | ||
whenNotMatchedInsertAll: true, | ||
whenMatchedUpdateAll: true, | ||
whenNotMatchedBySourceDelete: 'age < 3' | ||
}); | ||
assert.equal(await table.countRows(), 3); | ||
await table.mergeInsert('id', newData, { | ||
whenNotMatchedInsertAll: true, | ||
whenMatchedUpdateAll: true, | ||
whenNotMatchedBySourceDelete: true | ||
}); | ||
assert.equal(await table.countRows(), 1); | ||
}); | ||
it('can update records in the table', async function () { | ||
@@ -440,0 +472,0 @@ const uri = await createTestDB(); |
{ | ||
"name": "vectordb", | ||
"version": "0.4.7", | ||
"version": "0.4.8", | ||
"description": " Serverless, low-latency vector database for AI applications", | ||
@@ -20,4 +20,8 @@ "main": "dist/index.js", | ||
"type": "git", | ||
"url": "https://github.com/lancedb/lancedb/node" | ||
"url": "https://github.com/lancedb/lancedb.git" | ||
}, | ||
"homepage": "https://lancedb.github.io/lancedb/", | ||
"bugs": { | ||
"url": "https://github.com/lancedb/lancedb/issues" | ||
}, | ||
"keywords": [ | ||
@@ -85,8 +89,8 @@ "data-format", | ||
"optionalDependencies": { | ||
"@lancedb/vectordb-darwin-arm64": "0.4.7", | ||
"@lancedb/vectordb-darwin-x64": "0.4.7", | ||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.7", | ||
"@lancedb/vectordb-linux-x64-gnu": "0.4.7", | ||
"@lancedb/vectordb-win32-x64-msvc": "0.4.7" | ||
"@lancedb/vectordb-darwin-arm64": "0.4.8", | ||
"@lancedb/vectordb-darwin-x64": "0.4.8", | ||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.8", | ||
"@lancedb/vectordb-linux-x64-gnu": "0.4.8", | ||
"@lancedb/vectordb-win32-x64-msvc": "0.4.8" | ||
} | ||
} |
@@ -40,2 +40,3 @@ // Copyright 2023 Lance Developers. | ||
tableUpdate, | ||
tableMergeInsert, | ||
tableCleanupOldVersions, | ||
@@ -445,2 +446,34 @@ tableCompactFiles, | ||
/** | ||
* Runs a "merge insert" operation on the table | ||
* | ||
* This operation can add rows, update rows, and remove rows all in a single | ||
* transaction. It is a very generic tool that can be used to create | ||
* behaviors like "insert if not exists", "update or insert (i.e. upsert)", | ||
* or even replace a portion of existing data with new data (e.g. replace | ||
* all data where month="january") | ||
* | ||
* The merge insert operation works by combining new data from a | ||
* **source table** with existing data in a **target table** by using a | ||
* join. There are three categories of records. | ||
* | ||
* "Matched" records are records that exist in both the source table and | ||
* the target table. "Not matched" records exist only in the source table | ||
* (e.g. these are new data) "Not matched by source" records exist only | ||
* in the target table (this is old data) | ||
* | ||
* The MergeInsertArgs can be used to customize what should happen for | ||
* each category of data. | ||
* | ||
* Please note that the data may appear to be reordered as part of this | ||
* operation. This is because updated rows will be deleted from the | ||
* dataset and then reinserted at the end with the new values. | ||
* | ||
* @param on a column to join on. This is how records from the source | ||
* table and target table are matched. | ||
* @param data the new data to insert | ||
* @param args parameters controlling how the operation should behave | ||
*/ | ||
mergeInsert: (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs) => Promise<void> | ||
/** | ||
* List the indicies on this table. | ||
@@ -488,2 +521,32 @@ */ | ||
export interface MergeInsertArgs { | ||
/** | ||
* If true then rows that exist in both the source table (new data) and | ||
* the target table (old data) will be updated, replacing the old row | ||
* with the corresponding matching row. | ||
* | ||
* If there are multiple matches then the behavior is undefined. | ||
* Currently this causes multiple copies of the row to be created | ||
* but that behavior is subject to change. | ||
*/ | ||
whenMatchedUpdateAll?: boolean | ||
/** | ||
* If true then rows that exist only in the source table (new data) | ||
* will be inserted into the target table. | ||
*/ | ||
whenNotMatchedInsertAll?: boolean | ||
/** | ||
* If true then rows that exist only in the target table (old data) | ||
* will be deleted. | ||
* | ||
* If this is a string then it will be treated as an SQL filter and | ||
* only rows that both do not match any row in the source table and | ||
* match the given filter will be deleted. | ||
* | ||
* This can be used to replace a selection of existing data with | ||
* new data. | ||
*/ | ||
whenNotMatchedBySourceDelete?: string | boolean | ||
} | ||
export interface VectorIndex { | ||
@@ -827,2 +890,34 @@ columns: string[] | ||
async mergeInsert (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void> { | ||
const whenMatchedUpdateAll = args.whenMatchedUpdateAll ?? false | ||
const whenNotMatchedInsertAll = args.whenNotMatchedInsertAll ?? false | ||
let whenNotMatchedBySourceDelete = false | ||
let whenNotMatchedBySourceDeleteFilt = null | ||
if (args.whenNotMatchedBySourceDelete !== undefined && args.whenNotMatchedBySourceDelete !== null) { | ||
whenNotMatchedBySourceDelete = true | ||
if (args.whenNotMatchedBySourceDelete !== true) { | ||
whenNotMatchedBySourceDeleteFilt = args.whenNotMatchedBySourceDelete | ||
} | ||
} | ||
const schema = await this.schema | ||
let tbl: ArrowTable | ||
if (data instanceof ArrowTable) { | ||
tbl = data | ||
} else { | ||
tbl = makeArrowTable(data, { schema }) | ||
} | ||
const buffer = await fromTableToBuffer(tbl, this._embeddings, schema) | ||
this._tbl = await tableMergeInsert.call( | ||
this._tbl, | ||
on, | ||
whenMatchedUpdateAll, | ||
whenNotMatchedInsertAll, | ||
whenNotMatchedBySourceDelete, | ||
whenNotMatchedBySourceDeleteFilt, | ||
buffer | ||
) | ||
} | ||
/** | ||
@@ -829,0 +924,0 @@ * Clean up old versions of the table, freeing disk space. |
@@ -27,3 +27,4 @@ // Copyright 2023 LanceDB Developers. | ||
type UpdateSqlArgs, | ||
makeArrowTable | ||
makeArrowTable, | ||
type MergeInsertArgs | ||
} from '../index' | ||
@@ -278,2 +279,48 @@ import { Query } from '../query' | ||
async mergeInsert (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void> { | ||
let tbl: ArrowTable | ||
if (data instanceof ArrowTable) { | ||
tbl = data | ||
} else { | ||
tbl = makeArrowTable(data, await this.schema) | ||
} | ||
const queryParams: any = { | ||
on | ||
} | ||
if (args.whenMatchedUpdateAll ?? false) { | ||
queryParams.when_matched_update_all = 'true' | ||
} else { | ||
queryParams.when_matched_update_all = 'false' | ||
} | ||
if (args.whenNotMatchedInsertAll ?? false) { | ||
queryParams.when_not_matched_insert_all = 'true' | ||
} else { | ||
queryParams.when_not_matched_insert_all = 'false' | ||
} | ||
if (args.whenNotMatchedBySourceDelete !== false && args.whenNotMatchedBySourceDelete !== null && args.whenNotMatchedBySourceDelete !== undefined) { | ||
queryParams.when_not_matched_by_source_delete = 'true' | ||
if (typeof args.whenNotMatchedBySourceDelete === 'string') { | ||
queryParams.when_not_matched_by_source_delete_filt = args.whenNotMatchedBySourceDelete | ||
} | ||
} else { | ||
queryParams.when_not_matched_by_source_delete = 'false' | ||
} | ||
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings) | ||
const res = await this._client.post( | ||
`/v1/table/${this._name}/merge_insert/`, | ||
buffer, | ||
queryParams, | ||
'application/vnd.apache.arrow.stream' | ||
) | ||
if (res.status !== 200) { | ||
throw new Error( | ||
`Server Error, status: ${res.status}, ` + | ||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions | ||
`message: ${res.statusText}: ${res.data}` | ||
) | ||
} | ||
} | ||
async add (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> { | ||
@@ -280,0 +327,0 @@ let tbl: ArrowTable |
@@ -534,2 +534,40 @@ // Copyright 2023 LanceDB Developers. | ||
it('can merge insert records into the table', async function () { | ||
const dir = await track().mkdir('lancejs') | ||
const con = await lancedb.connect(dir) | ||
const data = [{ id: 1, age: 1 }, { id: 2, age: 1 }] | ||
const table = await con.createTable('my_table', data) | ||
let newData = [{ id: 2, age: 2 }, { id: 3, age: 2 }] | ||
await table.mergeInsert('id', newData, { | ||
whenNotMatchedInsertAll: true | ||
}) | ||
assert.equal(await table.countRows(), 3) | ||
assert.equal((await table.filter('age = 2').execute()).length, 1) | ||
newData = [{ id: 3, age: 3 }, { id: 4, age: 3 }] | ||
await table.mergeInsert('id', newData, { | ||
whenNotMatchedInsertAll: true, | ||
whenMatchedUpdateAll: true | ||
}) | ||
assert.equal(await table.countRows(), 4) | ||
assert.equal((await table.filter('age = 3').execute()).length, 2) | ||
newData = [{ id: 5, age: 4 }] | ||
await table.mergeInsert('id', newData, { | ||
whenNotMatchedInsertAll: true, | ||
whenMatchedUpdateAll: true, | ||
whenNotMatchedBySourceDelete: 'age < 3' | ||
}) | ||
assert.equal(await table.countRows(), 3) | ||
await table.mergeInsert('id', newData, { | ||
whenNotMatchedInsertAll: true, | ||
whenMatchedUpdateAll: true, | ||
whenNotMatchedBySourceDelete: true | ||
}) | ||
assert.equal(await table.countRows(), 1) | ||
}) | ||
it('can update records in the table', async function () { | ||
@@ -536,0 +574,0 @@ const uri = await createTestDB() |
No bug tracker
MaintenancePackage does not have a linked bug tracker in package.json.
Found 1 instance in 1 package
No website
QualityPackage does not have a website.
Found 1 instance in 1 package
280950
7471
0
1