Comparing version 0.1.1 to 0.1.2
@@ -1,2 +0,2 @@ | ||
import { Table as ArrowTable } from 'apache-arrow'; | ||
import { type Table as ArrowTable } from 'apache-arrow'; | ||
/** | ||
@@ -20,23 +20,109 @@ * Connect to a LanceDB instance at the given URI | ||
/** | ||
* Open a table in the database. | ||
* @param name The name of the table. | ||
*/ | ||
* Open a table in the database. | ||
* | ||
* @param name The name of the table. | ||
*/ | ||
openTable(name: string): Promise<Table>; | ||
/** | ||
* Open a table in the database. | ||
* | ||
* @param name The name of the table. | ||
* @param embeddings An embedding function to use on this Table | ||
*/ | ||
openTable<T>(name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>; | ||
/** | ||
* Creates a new Table and initialize it with new data. | ||
* | ||
* @param name The name of the table. | ||
* @param data Non-empty Array of Records to be inserted into the Table | ||
*/ | ||
createTable(name: string, data: Array<Record<string, unknown>>): Promise<Table>; | ||
/** | ||
* Creates a new Table and initialize it with new data. | ||
* | ||
* @param name The name of the table. | ||
* @param data Non-empty Array of Records to be inserted into the Table | ||
* @param embeddings An embedding function to use on this Table | ||
*/ | ||
createTable<T>(name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>; | ||
createTableArrow(name: string, table: ArrowTable): Promise<Table>; | ||
} | ||
/** | ||
* A table in a LanceDB database. | ||
*/ | ||
export declare class Table { | ||
export declare class Table<T = number[]> { | ||
private readonly _tbl; | ||
private readonly _name; | ||
private readonly _embeddings?; | ||
constructor(tbl: any, name: string); | ||
/** | ||
* @param tbl | ||
* @param name | ||
* @param embeddings An embedding function to use when interacting with this table | ||
*/ | ||
constructor(tbl: any, name: string, embeddings: EmbeddingFunction<T>); | ||
get name(): string; | ||
/** | ||
* Create a search query to find the nearest neighbors of the given query vector. | ||
* @param queryVector The query vector. | ||
*/ | ||
search(queryVector: number[]): Query; | ||
* Creates a search query to find the nearest neighbors of the given search term | ||
* @param query The query search term | ||
*/ | ||
search(query: T): Query; | ||
/** | ||
* Insert records into this Table. | ||
* | ||
* @param data Records to be inserted into the Table | ||
* @return The number of rows added to the table | ||
*/ | ||
add(data: Array<Record<string, unknown>>): Promise<number>; | ||
/** | ||
* Insert records into this Table, replacing its contents. | ||
* | ||
* @param data Records to be inserted into the Table | ||
* @return The number of rows added to the table | ||
*/ | ||
overwrite(data: Array<Record<string, unknown>>): Promise<number>; | ||
/** | ||
* Create an ANN index on this Table vector index. | ||
* | ||
* @param indexParams The parameters of this Index, @see VectorIndexParams. | ||
*/ | ||
create_index(indexParams: VectorIndexParams): Promise<any>; | ||
} | ||
interface IvfPQIndexConfig { | ||
/** | ||
* The column to be indexed | ||
*/ | ||
column?: string; | ||
/** | ||
* A unique name for the index | ||
*/ | ||
index_name?: string; | ||
/** | ||
* Metric type, L2 or Cosine | ||
*/ | ||
metric_type?: MetricType; | ||
/** | ||
* The number of partitions this index | ||
*/ | ||
num_partitions?: number; | ||
/** | ||
* The max number of iterations for kmeans training. | ||
*/ | ||
max_iters?: number; | ||
/** | ||
* Train as optimized product quantization. | ||
*/ | ||
use_opq?: boolean; | ||
/** | ||
* Number of subvectors to build PQ code | ||
*/ | ||
num_sub_vectors?: number; | ||
/** | ||
* The number of bits to present one PQ centroid. | ||
*/ | ||
num_bits?: number; | ||
/** | ||
* Max number of iterations to train OPQ, if `use_opq` is true. | ||
*/ | ||
max_opq_iters?: number; | ||
type: 'ivf_pq'; | ||
} | ||
export type VectorIndexParams = IvfPQIndexConfig; | ||
/** | ||
@@ -47,16 +133,70 @@ * A builder for nearest neighbor queries for LanceDB. | ||
private readonly _tbl; | ||
private readonly _query_vector; | ||
private readonly _queryVector; | ||
private _limit; | ||
private readonly _refine_factor?; | ||
private readonly _nprobes; | ||
private _refineFactor?; | ||
private _nprobes; | ||
private readonly _columns?; | ||
private _filter?; | ||
private readonly _metric; | ||
private _metricType?; | ||
constructor(tbl: any, queryVector: number[]); | ||
/*** | ||
* Sets the number of results that will be returned | ||
* @param value number of results | ||
*/ | ||
limit(value: number): Query; | ||
/** | ||
* Refine the results by reading extra elements and re-ranking them in memory. | ||
* @param value refine factor to use in this query. | ||
*/ | ||
refineFactor(value: number): Query; | ||
/** | ||
* The number of probes used. A higher number makes search more accurate but also slower. | ||
* @param value The number of probes used. | ||
*/ | ||
nprobes(value: number): Query; | ||
/** | ||
* A filter statement to be applied to this query. | ||
* @param value A filter in the same format used by a sql WHERE clause. | ||
*/ | ||
filter(value: string): Query; | ||
/** | ||
* Execute the query and return the results as an Array of Objects | ||
*/ | ||
* The MetricType used for this Query. | ||
* @param value The metric to the. @see MetricType for the different options | ||
*/ | ||
metricType(value: MetricType): Query; | ||
/** | ||
* Execute the query and return the results as an Array of Objects | ||
*/ | ||
execute<T = Record<string, unknown>>(): Promise<T[]>; | ||
} | ||
export declare enum WriteMode { | ||
Overwrite = "overwrite", | ||
Append = "append" | ||
} | ||
/** | ||
* An embedding function that automatically creates vector representation for a given column. | ||
*/ | ||
export interface EmbeddingFunction<T> { | ||
/** | ||
* The name of the column that will be used as input for the Embedding Function. | ||
*/ | ||
sourceColumn: string; | ||
/** | ||
* Creates a vector representation for the given values. | ||
*/ | ||
embed: (data: T[]) => number[][]; | ||
} | ||
/** | ||
* Distance metrics type. | ||
*/ | ||
export declare enum MetricType { | ||
/** | ||
* Euclidean distance | ||
*/ | ||
L2 = "l2", | ||
/** | ||
* Cosine distance | ||
*/ | ||
Cosine = "cosine" | ||
} | ||
export {}; |
@@ -25,6 +25,7 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.Query = exports.Table = exports.Connection = exports.connect = void 0; | ||
exports.MetricType = exports.WriteMode = exports.Query = exports.Table = exports.Connection = exports.connect = void 0; | ||
const apache_arrow_1 = require("apache-arrow"); | ||
const arrow_1 = require("./arrow"); | ||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../native.js'); | ||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js'); | ||
/** | ||
@@ -59,46 +60,22 @@ * Connect to a LanceDB instance at the given URI | ||
} | ||
/** | ||
* Open a table in the database. | ||
* @param name The name of the table. | ||
*/ | ||
openTable(name) { | ||
openTable(name, embeddings) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const tbl = yield databaseOpenTable.call(this._db, name); | ||
return new Table(tbl, name); | ||
if (embeddings !== undefined) { | ||
return new Table(tbl, name, embeddings); | ||
} | ||
else { | ||
return new Table(tbl, name); | ||
} | ||
}); | ||
} | ||
createTable(name, data) { | ||
createTable(name, data, embeddings) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
if (data.length === 0) { | ||
throw new Error('At least one record needs to be provided'); | ||
const tbl = yield tableCreate.call(this._db, name, yield (0, arrow_1.fromRecordsToBuffer)(data, embeddings)); | ||
if (embeddings !== undefined) { | ||
return new Table(tbl, name, embeddings); | ||
} | ||
const columns = Object.keys(data[0]); | ||
const records = {}; | ||
for (const columnsKey of columns) { | ||
if (columnsKey === 'vector') { | ||
const children = new apache_arrow_1.Field('item', new apache_arrow_1.Float32()); | ||
const list = new apache_arrow_1.List(children); | ||
const listBuilder = (0, apache_arrow_1.makeBuilder)({ | ||
type: list | ||
}); | ||
const vectorSize = data[0].vector.length; | ||
for (const datum of data) { | ||
if (datum[columnsKey].length !== vectorSize) { | ||
throw new Error(`Invalid vector size, expected ${vectorSize}`); | ||
} | ||
listBuilder.append(datum[columnsKey]); | ||
} | ||
records[columnsKey] = listBuilder.finish().toVector(); | ||
} | ||
else { | ||
const values = []; | ||
for (const datum of data) { | ||
values.push(datum[columnsKey]); | ||
} | ||
records[columnsKey] = (0, apache_arrow_1.vectorFromArray)(values); | ||
} | ||
else { | ||
return new Table(tbl, name); | ||
} | ||
const table = new apache_arrow_1.Table(records); | ||
yield this.createTableArrow(name, table); | ||
return yield this.openTable(name); | ||
}); | ||
@@ -115,9 +92,7 @@ } | ||
exports.Connection = Connection; | ||
/** | ||
* A table in a LanceDB database. | ||
*/ | ||
class Table { | ||
constructor(tbl, name) { | ||
constructor(tbl, name, embeddings) { | ||
this._tbl = tbl; | ||
this._name = name; | ||
this._embeddings = embeddings; | ||
} | ||
@@ -128,8 +103,47 @@ get name() { | ||
/** | ||
* Create a search query to find the nearest neighbors of the given query vector. | ||
* @param queryVector The query vector. | ||
*/ | ||
search(queryVector) { | ||
* Creates a search query to find the nearest neighbors of the given search term | ||
* @param query The query search term | ||
*/ | ||
search(query) { | ||
let queryVector; | ||
if (this._embeddings !== undefined) { | ||
queryVector = this._embeddings.embed([query])[0]; | ||
} | ||
else { | ||
queryVector = query; | ||
} | ||
return new Query(this._tbl, queryVector); | ||
} | ||
/** | ||
* Insert records into this Table. | ||
* | ||
* @param data Records to be inserted into the Table | ||
* @return The number of rows added to the table | ||
*/ | ||
add(data) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
return tableAdd.call(this._tbl, yield (0, arrow_1.fromRecordsToBuffer)(data, this._embeddings), WriteMode.Append.toString()); | ||
}); | ||
} | ||
/** | ||
* Insert records into this Table, replacing its contents. | ||
* | ||
* @param data Records to be inserted into the Table | ||
* @return The number of rows added to the table | ||
*/ | ||
overwrite(data) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
return tableAdd.call(this._tbl, yield (0, arrow_1.fromRecordsToBuffer)(data, this._embeddings), WriteMode.Overwrite.toString()); | ||
}); | ||
} | ||
/** | ||
* Create an ANN index on this Table vector index. | ||
* | ||
* @param indexParams The parameters of this Index, @see VectorIndexParams. | ||
*/ | ||
create_index(indexParams) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
return tableCreateVectorIndex.call(this._tbl, indexParams); | ||
}); | ||
} | ||
} | ||
@@ -142,11 +156,15 @@ exports.Table = Table; | ||
constructor(tbl, queryVector) { | ||
this._metric = 'L2'; | ||
this._tbl = tbl; | ||
this._query_vector = queryVector; | ||
this._queryVector = queryVector; | ||
this._limit = 10; | ||
this._nprobes = 20; | ||
this._refine_factor = undefined; | ||
this._refineFactor = undefined; | ||
this._columns = undefined; | ||
this._filter = undefined; | ||
this._metricType = undefined; | ||
} | ||
/*** | ||
* Sets the number of results that will be returned | ||
* @param value number of results | ||
*/ | ||
limit(value) { | ||
@@ -156,2 +174,22 @@ this._limit = value; | ||
} | ||
/** | ||
* Refine the results by reading extra elements and re-ranking them in memory. | ||
* @param value refine factor to use in this query. | ||
*/ | ||
refineFactor(value) { | ||
this._refineFactor = value; | ||
return this; | ||
} | ||
/** | ||
* The number of probes used. A higher number makes search more accurate but also slower. | ||
* @param value The number of probes used. | ||
*/ | ||
nprobes(value) { | ||
this._nprobes = value; | ||
return this; | ||
} | ||
/** | ||
* A filter statement to be applied to this query. | ||
* @param value A filter in the same format used by a sql WHERE clause. | ||
*/ | ||
filter(value) { | ||
@@ -162,13 +200,15 @@ this._filter = value; | ||
/** | ||
* Execute the query and return the results as an Array of Objects | ||
*/ | ||
* The MetricType used for this Query. | ||
* @param value The metric to the. @see MetricType for the different options | ||
*/ | ||
metricType(value) { | ||
this._metricType = value; | ||
return this; | ||
} | ||
/** | ||
* Execute the query and return the results as an Array of Objects | ||
*/ | ||
execute() { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
let buffer; | ||
if (this._filter != null) { | ||
buffer = yield tableSearch.call(this._tbl, this._query_vector, this._limit, this._filter); | ||
} | ||
else { | ||
buffer = yield tableSearch.call(this._tbl, this._query_vector, this._limit); | ||
} | ||
const buffer = yield tableSearch.call(this._tbl, this); | ||
const data = (0, apache_arrow_1.tableFromIPC)(buffer); | ||
@@ -191,1 +231,20 @@ return data.toArray().map((entry) => { | ||
exports.Query = Query; | ||
var WriteMode; | ||
(function (WriteMode) { | ||
WriteMode["Overwrite"] = "overwrite"; | ||
WriteMode["Append"] = "append"; | ||
})(WriteMode = exports.WriteMode || (exports.WriteMode = {})); | ||
/** | ||
* Distance metrics type. | ||
*/ | ||
var MetricType; | ||
(function (MetricType) { | ||
/** | ||
* Euclidean distance | ||
*/ | ||
MetricType["L2"] = "l2"; | ||
/** | ||
* Cosine distance | ||
*/ | ||
MetricType["Cosine"] = "cosine"; | ||
})(MetricType = exports.MetricType || (exports.MetricType = {})); |
@@ -29,2 +29,3 @@ "use strict"; | ||
const lancedb = require("../index"); | ||
const index_1 = require("../index"); | ||
(0, mocha_1.describe)('LanceDB client', function () { | ||
@@ -84,3 +85,3 @@ (0, mocha_1.describe)('when creating a connection to lancedb', function () { | ||
const table = yield con.openTable('vectors'); | ||
const results = yield table.search([0.1, 0.3]).filter('id == 2').execute(); | ||
const results = yield table.search([0.1, 0.1]).filter('id == 2').execute(); | ||
chai_1.assert.equal(results.length, 1); | ||
@@ -107,12 +108,105 @@ chai_1.assert.equal(results[0].id, 2); | ||
}); | ||
it('appends records to an existing table ', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const dir = yield (0, temp_1.track)().mkdir('lancejs'); | ||
const con = yield lancedb.connect(dir); | ||
const data = [ | ||
{ id: 1, vector: [0.1, 0.2], price: 10, name: 'a' }, | ||
{ id: 2, vector: [1.1, 1.2], price: 50, name: 'b' } | ||
]; | ||
const table = yield con.createTable('vectors', data); | ||
const results = yield table.search([0.1, 0.3]).execute(); | ||
chai_1.assert.equal(results.length, 2); | ||
const dataAdd = [ | ||
{ id: 3, vector: [2.1, 2.2], price: 10, name: 'c' }, | ||
{ id: 4, vector: [3.1, 3.2], price: 50, name: 'd' } | ||
]; | ||
yield table.add(dataAdd); | ||
const resultsAdd = yield table.search([0.1, 0.3]).execute(); | ||
chai_1.assert.equal(resultsAdd.length, 4); | ||
}); | ||
}); | ||
it('overwrite all records in a table', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const uri = yield createTestDB(); | ||
const con = yield lancedb.connect(uri); | ||
const table = yield con.openTable('vectors'); | ||
const results = yield table.search([0.1, 0.3]).execute(); | ||
chai_1.assert.equal(results.length, 2); | ||
const dataOver = [ | ||
{ vector: [2.1, 2.2], price: 10, name: 'foo' }, | ||
{ vector: [3.1, 3.2], price: 50, name: 'bar' } | ||
]; | ||
yield table.overwrite(dataOver); | ||
const resultsAdd = yield table.search([0.1, 0.3]).execute(); | ||
chai_1.assert.equal(resultsAdd.length, 2); | ||
}); | ||
}); | ||
}); | ||
(0, mocha_1.describe)('when creating a vector index', function () { | ||
it('overwrite all records in a table', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const uri = yield createTestDB(32, 300); | ||
const con = yield lancedb.connect(uri); | ||
const table = yield con.openTable('vectors'); | ||
yield table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 }); | ||
}); | ||
}).timeout(10000); // Timeout is high partially because GH macos runner is pretty slow | ||
}); | ||
(0, mocha_1.describe)('when using a custom embedding function', function () { | ||
class TextEmbedding { | ||
constructor(targetColumn) { | ||
this._embedding_map = new Map([ | ||
['foo', [2.1, 2.2]], | ||
['bar', [3.1, 3.2]] | ||
]); | ||
this.sourceColumn = targetColumn; | ||
} | ||
embed(data) { | ||
return data.map(datum => { var _a; return (_a = this._embedding_map.get(datum)) !== null && _a !== void 0 ? _a : [0.0, 0.0]; }); | ||
} | ||
} | ||
it('should encode the original data into embeddings', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const dir = yield (0, temp_1.track)().mkdir('lancejs'); | ||
const con = yield lancedb.connect(dir); | ||
const embeddings = new TextEmbedding('name'); | ||
const data = [ | ||
{ price: 10, name: 'foo' }, | ||
{ price: 50, name: 'bar' } | ||
]; | ||
const table = yield con.createTable('vectors', data, embeddings); | ||
const results = yield table.search('foo').execute(); | ||
chai_1.assert.equal(results.length, 2); | ||
}); | ||
}); | ||
}); | ||
}); | ||
function createTestDB() { | ||
(0, mocha_1.describe)('Query object', function () { | ||
it('sets custom parameters', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const query = new index_1.Query(undefined, [0.1, 0.3]) | ||
.limit(1) | ||
.metricType(index_1.MetricType.Cosine) | ||
.refineFactor(100) | ||
.nprobes(20); | ||
chai_1.assert.equal(query._limit, 1); | ||
chai_1.assert.equal(query._metricType, index_1.MetricType.Cosine); | ||
chai_1.assert.equal(query._refineFactor, 100); | ||
chai_1.assert.equal(query._nprobes, 20); | ||
}); | ||
}); | ||
}); | ||
function createTestDB(numDimensions = 2, numRows = 2) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const dir = yield (0, temp_1.track)().mkdir('lancejs'); | ||
const con = yield lancedb.connect(dir); | ||
const data = [ | ||
{ id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true }, | ||
{ id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false } | ||
]; | ||
const data = []; | ||
for (let i = 0; i < numRows; i++) { | ||
const vector = []; | ||
for (let j = 0; j < numDimensions; j++) { | ||
vector.push(i + (j * 0.1)); | ||
} | ||
data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector }); | ||
} | ||
yield con.createTable('vectors', data); | ||
@@ -119,0 +213,0 @@ return dir; |
@@ -9,10 +9,3 @@ { | ||
"tsc": "tsc -b", | ||
<<<<<<< HEAD | ||
"build": "tsc" | ||
======= | ||
"build": "tsc", | ||
"clean": "rm -rf data/", | ||
"create": "node dist/create.js", | ||
"query": "node dist/query.js" | ||
>>>>>>> gsilvestrin/nodejs_linux_1 | ||
}, | ||
@@ -19,0 +12,0 @@ "author": "Lance Devs", |
@@ -15,3 +15,2 @@ // Copyright 2023 Lance Developers. | ||
<<<<<<<< HEAD:node/examples/ts/src/index.ts | ||
import * as vectordb from 'vectordb'; | ||
@@ -38,14 +37,1 @@ | ||
example().then(_ => { console.log ("All done!") }) | ||
======== | ||
let nativeLib; | ||
if (process.platform === "darwin" && process.arch === "arm64") { | ||
nativeLib = require('./darwin_arm64.node') | ||
} else if (process.platform === "linux" && process.arch === "x64") { | ||
nativeLib = require('./linux-x64.node') | ||
} else { | ||
throw new Error(`vectordb: unsupported platform ${process.platform}_${process.arch}. Please file a bug report at https://github.com/lancedb/lancedb/issues`) | ||
} | ||
module.exports = nativeLib | ||
>>>>>>>> gsilvestrin/nodejs_linux_1:node/native.js |
@@ -17,11 +17,25 @@ // Copyright 2023 Lance Developers. | ||
if (process.platform === "darwin" && process.arch === "arm64") { | ||
nativeLib = require('./darwin_arm64.node') | ||
} else if (process.platform === "linux" && process.arch === "x64") { | ||
nativeLib = require('./linux-x64.node') | ||
} else { | ||
throw new Error(`vectordb: unsupported platform ${process.platform}_${process.arch}. Please file a bug report at https://github.com/lancedb/lancedb/issues`) | ||
function getPlatformLibrary() { | ||
if (process.platform === "darwin" && process.arch == "arm64") { | ||
return require('./aarch64-apple-darwin.node'); | ||
} else if (process.platform === "darwin" && process.arch == "x64") { | ||
return require('./x86_64-apple-darwin.node'); | ||
} else if (process.platform === "linux" && process.arch == "x64") { | ||
return require('./x86_64-unknown-linux-gnu.node'); | ||
} else { | ||
throw new Error(`vectordb: unsupported platform ${process.platform}_${process.arch}. Please file a bug report at https://github.com/lancedb/lancedb/issues`) | ||
} | ||
} | ||
try { | ||
nativeLib = require('./index.node') | ||
} catch (e) { | ||
if (e.code === "MODULE_NOT_FOUND") { | ||
nativeLib = getPlatformLibrary(); | ||
} else { | ||
throw new Error('vectordb: failed to load native library. Please file a bug report at https://github.com/lancedb/lancedb/issues'); | ||
} | ||
} | ||
module.exports = nativeLib | ||
{ | ||
"name": "vectordb", | ||
"version": "0.1.1", | ||
"version": "0.1.2", | ||
"description": " Serverless, low-latency vector database for AI applications", | ||
@@ -5,0 +5,0 @@ "main": "dist/index.js", |
293
src/index.ts
@@ -16,15 +16,11 @@ // Copyright 2023 Lance Developers. | ||
import { | ||
Field, | ||
Float32, | ||
List, | ||
makeBuilder, | ||
RecordBatchFileWriter, | ||
Table as ArrowTable, | ||
type Table as ArrowTable, | ||
tableFromIPC, | ||
Vector, | ||
vectorFromArray | ||
Vector | ||
} from 'apache-arrow' | ||
import { fromRecordsToBuffer } from './arrow' | ||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../native.js') | ||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js') | ||
@@ -63,46 +59,46 @@ /** | ||
/** | ||
* Open a table in the database. | ||
* @param name The name of the table. | ||
*/ | ||
async openTable (name: string): Promise<Table> { | ||
* Open a table in the database. | ||
* | ||
* @param name The name of the table. | ||
*/ | ||
async openTable (name: string): Promise<Table> | ||
/** | ||
* Open a table in the database. | ||
* | ||
* @param name The name of the table. | ||
* @param embeddings An embedding function to use on this Table | ||
*/ | ||
async openTable<T> (name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>> | ||
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> { | ||
const tbl = await databaseOpenTable.call(this._db, name) | ||
return new Table(tbl, name) | ||
if (embeddings !== undefined) { | ||
return new Table(tbl, name, embeddings) | ||
} else { | ||
return new Table(tbl, name) | ||
} | ||
} | ||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> { | ||
if (data.length === 0) { | ||
throw new Error('At least one record needs to be provided') | ||
} | ||
/** | ||
* Creates a new Table and initialize it with new data. | ||
* | ||
* @param name The name of the table. | ||
* @param data Non-empty Array of Records to be inserted into the Table | ||
*/ | ||
const columns = Object.keys(data[0]) | ||
const records: Record<string, Vector> = {} | ||
for (const columnsKey of columns) { | ||
if (columnsKey === 'vector') { | ||
const children = new Field<Float32>('item', new Float32()) | ||
const list = new List(children) | ||
const listBuilder = makeBuilder({ | ||
type: list | ||
}) | ||
const vectorSize = (data[0].vector as any[]).length | ||
for (const datum of data) { | ||
if ((datum[columnsKey] as any[]).length !== vectorSize) { | ||
throw new Error(`Invalid vector size, expected ${vectorSize}`) | ||
} | ||
listBuilder.append(datum[columnsKey]) | ||
} | ||
records[columnsKey] = listBuilder.finish().toVector() | ||
} else { | ||
const values = [] | ||
for (const datum of data) { | ||
values.push(datum[columnsKey]) | ||
} | ||
records[columnsKey] = vectorFromArray(values) | ||
} | ||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> | ||
/** | ||
* Creates a new Table and initialize it with new data. | ||
* | ||
* @param name The name of the table. | ||
* @param data Non-empty Array of Records to be inserted into the Table | ||
* @param embeddings An embedding function to use on this Table | ||
*/ | ||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>> | ||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> { | ||
const tbl = await tableCreate.call(this._db, name, await fromRecordsToBuffer(data, embeddings)) | ||
if (embeddings !== undefined) { | ||
return new Table(tbl, name, embeddings) | ||
} else { | ||
return new Table(tbl, name) | ||
} | ||
const table = new ArrowTable(records) | ||
await this.createTableArrow(name, table) | ||
return await this.openTable(name) | ||
} | ||
@@ -117,12 +113,18 @@ | ||
/** | ||
* A table in a LanceDB database. | ||
*/ | ||
export class Table { | ||
export class Table<T = number[]> { | ||
private readonly _tbl: any | ||
private readonly _name: string | ||
private readonly _embeddings?: EmbeddingFunction<T> | ||
constructor (tbl: any, name: string) { | ||
constructor (tbl: any, name: string) | ||
/** | ||
* @param tbl | ||
* @param name | ||
* @param embeddings An embedding function to use when interacting with this table | ||
*/ | ||
constructor (tbl: any, name: string, embeddings: EmbeddingFunction<T>) | ||
constructor (tbl: any, name: string, embeddings?: EmbeddingFunction<T>) { | ||
this._tbl = tbl | ||
this._name = name | ||
this._embeddings = embeddings | ||
} | ||
@@ -135,10 +137,95 @@ | ||
/** | ||
* Create a search query to find the nearest neighbors of the given query vector. | ||
* @param queryVector The query vector. | ||
*/ | ||
search (queryVector: number[]): Query { | ||
* Creates a search query to find the nearest neighbors of the given search term | ||
* @param query The query search term | ||
*/ | ||
search (query: T): Query { | ||
let queryVector: number[] | ||
if (this._embeddings !== undefined) { | ||
queryVector = this._embeddings.embed([query])[0] | ||
} else { | ||
queryVector = query as number[] | ||
} | ||
return new Query(this._tbl, queryVector) | ||
} | ||
/** | ||
* Insert records into this Table. | ||
* | ||
* @param data Records to be inserted into the Table | ||
* @return The number of rows added to the table | ||
*/ | ||
async add (data: Array<Record<string, unknown>>): Promise<number> { | ||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Append.toString()) | ||
} | ||
/** | ||
* Insert records into this Table, replacing its contents. | ||
* | ||
* @param data Records to be inserted into the Table | ||
* @return The number of rows added to the table | ||
*/ | ||
async overwrite (data: Array<Record<string, unknown>>): Promise<number> { | ||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Overwrite.toString()) | ||
} | ||
/** | ||
* Create an ANN index on this Table vector index. | ||
* | ||
* @param indexParams The parameters of this Index, @see VectorIndexParams. | ||
*/ | ||
async create_index (indexParams: VectorIndexParams): Promise<any> { | ||
return tableCreateVectorIndex.call(this._tbl, indexParams) | ||
} | ||
} | ||
interface IvfPQIndexConfig { | ||
/** | ||
* The column to be indexed | ||
*/ | ||
column?: string | ||
/** | ||
* A unique name for the index | ||
*/ | ||
index_name?: string | ||
/** | ||
* Metric type, L2 or Cosine | ||
*/ | ||
metric_type?: MetricType | ||
/** | ||
* The number of partitions this index | ||
*/ | ||
num_partitions?: number | ||
/** | ||
* The max number of iterations for kmeans training. | ||
*/ | ||
max_iters?: number | ||
/** | ||
* Train as optimized product quantization. | ||
*/ | ||
use_opq?: boolean | ||
/** | ||
* Number of subvectors to build PQ code | ||
*/ | ||
num_sub_vectors?: number | ||
/** | ||
* The number of bits to present one PQ centroid. | ||
*/ | ||
num_bits?: number | ||
/** | ||
* Max number of iterations to train OPQ, if `use_opq` is true. | ||
*/ | ||
max_opq_iters?: number | ||
type: 'ivf_pq' | ||
} | ||
export type VectorIndexParams = IvfPQIndexConfig | ||
/** | ||
@@ -149,20 +236,25 @@ * A builder for nearest neighbor queries for LanceDB. | ||
private readonly _tbl: any | ||
private readonly _query_vector: number[] | ||
private readonly _queryVector: number[] | ||
private _limit: number | ||
private readonly _refine_factor?: number | ||
private readonly _nprobes: number | ||
private _refineFactor?: number | ||
private _nprobes: number | ||
private readonly _columns?: string[] | ||
private _filter?: string | ||
private readonly _metric = 'L2' | ||
private _metricType?: MetricType | ||
constructor (tbl: any, queryVector: number[]) { | ||
this._tbl = tbl | ||
this._query_vector = queryVector | ||
this._queryVector = queryVector | ||
this._limit = 10 | ||
this._nprobes = 20 | ||
this._refine_factor = undefined | ||
this._refineFactor = undefined | ||
this._columns = undefined | ||
this._filter = undefined | ||
this._metricType = undefined | ||
} | ||
/*** | ||
* Sets the number of results that will be returned | ||
* @param value number of results | ||
*/ | ||
limit (value: number): Query { | ||
@@ -173,2 +265,24 @@ this._limit = value | ||
/** | ||
* Refine the results by reading extra elements and re-ranking them in memory. | ||
* @param value refine factor to use in this query. | ||
*/ | ||
refineFactor (value: number): Query { | ||
this._refineFactor = value | ||
return this | ||
} | ||
/** | ||
* The number of probes used. A higher number makes search more accurate but also slower. | ||
* @param value The number of probes used. | ||
*/ | ||
nprobes (value: number): Query { | ||
this._nprobes = value | ||
return this | ||
} | ||
/** | ||
* A filter statement to be applied to this query. | ||
* @param value A filter in the same format used by a sql WHERE clause. | ||
*/ | ||
filter (value: string): Query { | ||
@@ -180,11 +294,15 @@ this._filter = value | ||
/** | ||
* Execute the query and return the results as an Array of Objects | ||
*/ | ||
* The MetricType used for this Query. | ||
* @param value The metric to the. @see MetricType for the different options | ||
*/ | ||
metricType (value: MetricType): Query { | ||
this._metricType = value | ||
return this | ||
} | ||
/** | ||
* Execute the query and return the results as an Array of Objects | ||
*/ | ||
async execute<T = Record<string, unknown>> (): Promise<T[]> { | ||
let buffer | ||
if (this._filter != null) { | ||
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit, this._filter) | ||
} else { | ||
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit) | ||
} | ||
const buffer = await tableSearch.call(this._tbl, this) | ||
const data = tableFromIPC(buffer) | ||
@@ -204,1 +322,36 @@ return data.toArray().map((entry: Record<string, unknown>) => { | ||
} | ||
export enum WriteMode { | ||
Overwrite = 'overwrite', | ||
Append = 'append' | ||
} | ||
/** | ||
* An embedding function that automatically creates vector representation for a given column. | ||
*/ | ||
export interface EmbeddingFunction<T> { | ||
/** | ||
* The name of the column that will be used as input for the Embedding Function. | ||
*/ | ||
sourceColumn: string | ||
/** | ||
* Creates a vector representation for the given values. | ||
*/ | ||
embed: (data: T[]) => number[][] | ||
} | ||
/** | ||
* Distance metrics type. | ||
*/ | ||
export enum MetricType { | ||
/** | ||
* Euclidean distance | ||
*/ | ||
L2 = 'l2', | ||
/** | ||
* Cosine distance | ||
*/ | ||
Cosine = 'cosine' | ||
} |
@@ -20,2 +20,3 @@ // Copyright 2023 Lance Developers. | ||
import * as lancedb from '../index' | ||
import { type EmbeddingFunction, MetricType, Query } from '../index' | ||
@@ -71,3 +72,3 @@ describe('LanceDB client', function () { | ||
const table = await con.openTable('vectors') | ||
const results = await table.search([0.1, 0.3]).filter('id == 2').execute() | ||
const results = await table.search([0.1, 0.1]).filter('id == 2').execute() | ||
assert.equal(results.length, 1) | ||
@@ -95,13 +96,112 @@ assert.equal(results[0].id, 2) | ||
}) | ||
it('appends records to an existing table ', async function () { | ||
const dir = await track().mkdir('lancejs') | ||
const con = await lancedb.connect(dir) | ||
const data = [ | ||
{ id: 1, vector: [0.1, 0.2], price: 10, name: 'a' }, | ||
{ id: 2, vector: [1.1, 1.2], price: 50, name: 'b' } | ||
] | ||
const table = await con.createTable('vectors', data) | ||
const results = await table.search([0.1, 0.3]).execute() | ||
assert.equal(results.length, 2) | ||
const dataAdd = [ | ||
{ id: 3, vector: [2.1, 2.2], price: 10, name: 'c' }, | ||
{ id: 4, vector: [3.1, 3.2], price: 50, name: 'd' } | ||
] | ||
await table.add(dataAdd) | ||
const resultsAdd = await table.search([0.1, 0.3]).execute() | ||
assert.equal(resultsAdd.length, 4) | ||
}) | ||
it('overwrite all records in a table', async function () { | ||
const uri = await createTestDB() | ||
const con = await lancedb.connect(uri) | ||
const table = await con.openTable('vectors') | ||
const results = await table.search([0.1, 0.3]).execute() | ||
assert.equal(results.length, 2) | ||
const dataOver = [ | ||
{ vector: [2.1, 2.2], price: 10, name: 'foo' }, | ||
{ vector: [3.1, 3.2], price: 50, name: 'bar' } | ||
] | ||
await table.overwrite(dataOver) | ||
const resultsAdd = await table.search([0.1, 0.3]).execute() | ||
assert.equal(resultsAdd.length, 2) | ||
}) | ||
}) | ||
describe('when creating a vector index', function () { | ||
it('overwrite all records in a table', async function () { | ||
const uri = await createTestDB(32, 300) | ||
const con = await lancedb.connect(uri) | ||
const table = await con.openTable('vectors') | ||
await table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 }) | ||
}).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow | ||
}) | ||
describe('when using a custom embedding function', function () { | ||
class TextEmbedding implements EmbeddingFunction<string> { | ||
sourceColumn: string | ||
constructor (targetColumn: string) { | ||
this.sourceColumn = targetColumn | ||
} | ||
_embedding_map = new Map<string, number[]>([ | ||
['foo', [2.1, 2.2]], | ||
['bar', [3.1, 3.2]] | ||
]) | ||
embed (data: string[]): number[][] { | ||
return data.map(datum => this._embedding_map.get(datum) ?? [0.0, 0.0]) | ||
} | ||
} | ||
it('should encode the original data into embeddings', async function () { | ||
const dir = await track().mkdir('lancejs') | ||
const con = await lancedb.connect(dir) | ||
const embeddings = new TextEmbedding('name') | ||
const data = [ | ||
{ price: 10, name: 'foo' }, | ||
{ price: 50, name: 'bar' } | ||
] | ||
const table = await con.createTable('vectors', data, embeddings) | ||
const results = await table.search('foo').execute() | ||
assert.equal(results.length, 2) | ||
}) | ||
}) | ||
}) | ||
async function createTestDB (): Promise<string> { | ||
describe('Query object', function () { | ||
it('sets custom parameters', async function () { | ||
const query = new Query(undefined, [0.1, 0.3]) | ||
.limit(1) | ||
.metricType(MetricType.Cosine) | ||
.refineFactor(100) | ||
.nprobes(20) as Record<string, any> | ||
assert.equal(query._limit, 1) | ||
assert.equal(query._metricType, MetricType.Cosine) | ||
assert.equal(query._refineFactor, 100) | ||
assert.equal(query._nprobes, 20) | ||
}) | ||
}) | ||
async function createTestDB (numDimensions: number = 2, numRows: number = 2): Promise<string> { | ||
const dir = await track().mkdir('lancejs') | ||
const con = await lancedb.connect(dir) | ||
const data = [ | ||
{ id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true }, | ||
{ id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false } | ||
] | ||
const data = [] | ||
for (let i = 0; i < numRows; i++) { | ||
const vector = [] | ||
for (let j = 0; j < numDimensions; j++) { | ||
vector.push(i + (j * 0.1)) | ||
} | ||
data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector }) | ||
} | ||
@@ -108,0 +208,0 @@ await con.createTable('vectors', data) |
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 1 instance in 1 package
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
24
3817
2
119931981