Comparing version 0.0.6 to 0.1.0
@@ -0,1 +1,2 @@ | ||
import { Table as ArrowTable } from 'apache-arrow'; | ||
/** | ||
@@ -5,3 +6,3 @@ * Connect to a LanceDB instance at the given URI | ||
*/ | ||
export declare function connect(uri: string): Connection; | ||
export declare function connect(uri: string): Promise<Connection>; | ||
/** | ||
@@ -18,3 +19,3 @@ * A connection to a LanceDB database. | ||
*/ | ||
tableNames(): string[]; | ||
tableNames(): Promise<string[]>; | ||
/** | ||
@@ -25,2 +26,4 @@ * Open a table in the database. | ||
openTable(name: string): Promise<Table>; | ||
createTable(name: string, data: Array<Record<string, unknown>>): Promise<Table>; | ||
createTableArrow(name: string, table: ArrowTable): Promise<Table>; | ||
} | ||
@@ -51,14 +54,11 @@ /** | ||
private readonly _columns?; | ||
private readonly _where?; | ||
private _filter?; | ||
private readonly _metric; | ||
set limit(value: number); | ||
constructor(tbl: any, queryVector: number[]); | ||
limit(value: number): Query; | ||
filter(value: string): Query; | ||
/** | ||
* Execute the query and return the results as an Array of Objects | ||
*/ | ||
execute(): Promise<unknown[]>; | ||
/** | ||
* Execute the query and return the results as an Array of the generic type provided | ||
*/ | ||
execute_cast<T>(): Promise<T[]>; | ||
execute<T = Record<string, unknown>>(): Promise<T[]>; | ||
} |
@@ -28,3 +28,3 @@ "use strict"; | ||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||
const { databaseNew, databaseTableNames, databaseOpenTable, tableSearch } = require('../index.node'); | ||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../index.node'); | ||
/** | ||
@@ -35,3 +35,5 @@ * Connect to a LanceDB instance at the given URI | ||
function connect(uri) { | ||
return new Connection(uri); | ||
return __awaiter(this, void 0, void 0, function* () { | ||
return new Connection(uri); | ||
}); | ||
} | ||
@@ -54,3 +56,5 @@ exports.connect = connect; | ||
tableNames() { | ||
return databaseTableNames.call(this._db); | ||
return __awaiter(this, void 0, void 0, function* () { | ||
return databaseTableNames.call(this._db); | ||
}); | ||
} | ||
@@ -67,2 +71,45 @@ /** | ||
} | ||
createTable(name, data) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
if (data.length === 0) { | ||
throw new Error('At least one record needs to be provided'); | ||
} | ||
const columns = Object.keys(data[0]); | ||
const records = {}; | ||
for (const columnsKey of columns) { | ||
if (columnsKey === 'vector') { | ||
const children = new apache_arrow_1.Field('item', new apache_arrow_1.Float32()); | ||
const list = new apache_arrow_1.List(children); | ||
const listBuilder = (0, apache_arrow_1.makeBuilder)({ | ||
type: list | ||
}); | ||
const vectorSize = data[0].vector.length; | ||
for (const datum of data) { | ||
if (datum[columnsKey].length !== vectorSize) { | ||
throw new Error(`Invalid vector size, expected ${vectorSize}`); | ||
} | ||
listBuilder.append(datum[columnsKey]); | ||
} | ||
records[columnsKey] = listBuilder.finish().toVector(); | ||
} | ||
else { | ||
const values = []; | ||
for (const datum of data) { | ||
values.push(datum[columnsKey]); | ||
} | ||
records[columnsKey] = (0, apache_arrow_1.vectorFromArray)(values); | ||
} | ||
} | ||
const table = new apache_arrow_1.Table(records); | ||
yield this.createTableArrow(name, table); | ||
return yield this.openTable(name); | ||
}); | ||
} | ||
createTableArrow(name, table) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const writer = apache_arrow_1.RecordBatchFileWriter.writeAll(table); | ||
yield tableCreate.call(this._db, name, Buffer.from(yield writer.toUint8Array())); | ||
return yield this.openTable(name); | ||
}); | ||
} | ||
} | ||
@@ -94,5 +141,2 @@ exports.Connection = Connection; | ||
class Query { | ||
set limit(value) { | ||
this._limit = value; | ||
} | ||
constructor(tbl, queryVector) { | ||
@@ -106,4 +150,12 @@ this._metric = 'L2'; | ||
this._columns = undefined; | ||
this._where = undefined; | ||
this._filter = undefined; | ||
} | ||
limit(value) { | ||
this._limit = value; | ||
return this; | ||
} | ||
filter(value) { | ||
this._filter = value; | ||
return this; | ||
} | ||
/** | ||
@@ -114,3 +166,9 @@ * Execute the query and return the results as an Array of Objects | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const buffer = yield tableSearch.call(this._tbl, this._query_vector, this._limit); | ||
let buffer; | ||
if (this._filter != null) { | ||
buffer = yield tableSearch.call(this._tbl, this._query_vector, this._limit, this._filter); | ||
} | ||
else { | ||
buffer = yield tableSearch.call(this._tbl, this._query_vector, this._limit); | ||
} | ||
const data = (0, apache_arrow_1.tableFromIPC)(buffer); | ||
@@ -131,11 +189,3 @@ return data.toArray().map((entry) => { | ||
} | ||
/** | ||
* Execute the query and return the results as an Array of the generic type provided | ||
*/ | ||
execute_cast() { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
return yield this.execute(); | ||
}); | ||
} | ||
} | ||
exports.Query = Query; |
@@ -27,62 +27,93 @@ "use strict"; | ||
const chai_1 = require("chai"); | ||
const temp_1 = require("temp"); | ||
const lancedb = require("../index"); | ||
(0, mocha_1.describe)('LanceDB client', function () { | ||
(0, mocha_1.describe)('open a connection to lancedb', function () { | ||
const con = lancedb.connect('.../../sample-lancedb'); | ||
(0, mocha_1.describe)('when creating a connection to lancedb', function () { | ||
it('should have a valid url', function () { | ||
chai_1.assert.equal(con.uri, '.../../sample-lancedb'); | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const uri = yield createTestDB(); | ||
const con = yield lancedb.connect(uri); | ||
chai_1.assert.equal(con.uri, uri); | ||
}); | ||
}); | ||
it('should return the existing table names', function () { | ||
chai_1.assert.deepEqual(con.tableNames(), ['my_table']); | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const uri = yield createTestDB(); | ||
const con = yield lancedb.connect(uri); | ||
chai_1.assert.deepEqual(yield con.tableNames(), ['vectors']); | ||
}); | ||
}); | ||
(0, mocha_1.describe)('open a table from a connection', function () { | ||
const tablePromise = con.openTable('my_table'); | ||
it('should have a valid name', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const table = yield tablePromise; | ||
chai_1.assert.equal(table.name, 'my_table'); | ||
}); | ||
}); | ||
(0, mocha_1.describe)('when querying an existing dataset', function () { | ||
it('should open a table', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const uri = yield createTestDB(); | ||
const con = yield lancedb.connect(uri); | ||
const table = yield con.openTable('vectors'); | ||
chai_1.assert.equal(table.name, 'vectors'); | ||
}); | ||
class MyResult { | ||
constructor() { | ||
this.vector = new Float32Array(0); | ||
this.price = 0; | ||
this.item = ''; | ||
} | ||
} | ||
it('execute a query', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const table = yield tablePromise; | ||
const builder = table.search([0.1, 0.3]); | ||
const results = yield builder.execute(); | ||
chai_1.assert.equal(results.length, 2); | ||
chai_1.assert.equal(results[0].item, 'foo'); | ||
chai_1.assert.equal(results[0].price, 10); | ||
chai_1.assert.approximately(results[0].vector[0], 3.1, 0.1); | ||
chai_1.assert.approximately(results[0].vector[1], 4.1, 0.1); | ||
}); | ||
}); | ||
it('execute a query', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const uri = yield createTestDB(); | ||
const con = yield lancedb.connect(uri); | ||
const table = yield con.openTable('vectors'); | ||
const results = yield table.search([0.1, 0.3]).execute(); | ||
chai_1.assert.equal(results.length, 2); | ||
chai_1.assert.equal(results[0].price, 10); | ||
const vector = results[0].vector; | ||
chai_1.assert.approximately(vector[0], 0.0, 0.2); | ||
chai_1.assert.approximately(vector[0], 0.1, 0.3); | ||
}); | ||
it('execute a query and type cast the result', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const table = yield tablePromise; | ||
const builder = table.search([0.1, 0.3]); | ||
const results = yield builder.execute_cast(); | ||
chai_1.assert.equal(results.length, 2); | ||
chai_1.assert.equal(results[0].item, 'foo'); | ||
chai_1.assert.equal(results[0].price, 10); | ||
chai_1.assert.approximately(results[0].vector[0], 3.1, 0.1); | ||
chai_1.assert.approximately(results[0].vector[1], 4.1, 0.1); | ||
}); | ||
}); | ||
it('limits # of results', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const uri = yield createTestDB(); | ||
const con = yield lancedb.connect(uri); | ||
const table = yield con.openTable('vectors'); | ||
const results = yield table.search([0.1, 0.3]).limit(1).execute(); | ||
chai_1.assert.equal(results.length, 1); | ||
chai_1.assert.equal(results[0].id, 1); | ||
}); | ||
it('limits # of results', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const table = yield tablePromise; | ||
const builder = table.search([0.1, 0.3]); | ||
builder.limit = 1; | ||
const results = yield builder.execute(); | ||
chai_1.assert.equal(results.length, 1); | ||
}); | ||
}); | ||
it('uses a filter', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const uri = yield createTestDB(); | ||
const con = yield lancedb.connect(uri); | ||
const table = yield con.openTable('vectors'); | ||
const results = yield table.search([0.1, 0.3]).filter('id == 2').execute(); | ||
chai_1.assert.equal(results.length, 1); | ||
chai_1.assert.equal(results[0].id, 2); | ||
}); | ||
}); | ||
}); | ||
(0, mocha_1.describe)('when creating a new dataset', function () { | ||
it('creates a new table from javascript objects', function () { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const dir = yield (0, temp_1.track)().mkdir('lancejs'); | ||
const con = yield lancedb.connect(dir); | ||
const data = [ | ||
{ id: 1, vector: [0.1, 0.2], price: 10 }, | ||
{ id: 2, vector: [1.1, 1.2], price: 50 } | ||
]; | ||
const tableName = `vectors_${Math.floor(Math.random() * 100)}`; | ||
const table = yield con.createTable(tableName, data); | ||
chai_1.assert.equal(table.name, tableName); | ||
const results = yield table.search([0.1, 0.3]).execute(); | ||
chai_1.assert.equal(results.length, 2); | ||
}); | ||
}); | ||
}); | ||
}); | ||
function createTestDB() { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const dir = yield (0, temp_1.track)().mkdir('lancejs'); | ||
const con = yield lancedb.connect(dir); | ||
const data = [ | ||
{ id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true }, | ||
{ id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false } | ||
]; | ||
yield con.createTable('vectors', data); | ||
return dir; | ||
}); | ||
} |
@@ -17,6 +17,17 @@ // Copyright 2023 Lance Developers. | ||
const lancedb = require('vectordb') | ||
async function example() { | ||
const lancedb = require('vectordb'); | ||
const db = lancedb.connect('../../sample-lancedb'); | ||
const db = lancedb.connect('../../sample-lancedb') | ||
console.log(db.tableNames()); | ||
console.log(db.tableNames()) | ||
const tbl = await db.openTable('my_table'); | ||
const query = tbl.search([0.1, 0.3]); | ||
query.limit = 20; | ||
const results = await query.execute(); | ||
console.log(results); | ||
} | ||
example(); | ||
@@ -12,4 +12,4 @@ { | ||
"dependencies": { | ||
"vectordb": "^0.0.5" | ||
"vectordb": "^0.0.6" | ||
} | ||
} |
{ | ||
"name": "vectordb", | ||
"version": "0.0.6", | ||
"version": "0.1.0", | ||
"description": " Serverless, low-latency vector database for AI applications", | ||
@@ -30,2 +30,3 @@ "main": "dist/index.js", | ||
"@types/node": "^18.16.2", | ||
"@types/temp": "^0.9.1", | ||
"@typescript-eslint/eslint-plugin": "^5.59.1", | ||
@@ -40,2 +41,3 @@ "cargo-cp-artifact": "^0.1", | ||
"mocha": "^10.2.0", | ||
"temp": "^0.9.4", | ||
"ts-node": "^10.9.1", | ||
@@ -42,0 +44,0 @@ "ts-node-dev": "^2.0.0", |
@@ -0,12 +1,25 @@ | ||
# LanceDB | ||
A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb). | ||
## Quick Start | ||
## Installation | ||
```bash | ||
npm i vectordb | ||
npm install vectordb | ||
``` | ||
See the examples folder for usage. | ||
## Usage | ||
### Basic Example | ||
```javascript | ||
const lancedb = require('vectordb'); | ||
const db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>'); | ||
const table = await db.openTable('my_table'); | ||
const query = await table.search([0.1, 0.3]).setLimit(20).execute(); | ||
console.log(results); | ||
``` | ||
The [examples](./examples) folder contains complete examples. | ||
## Development | ||
@@ -13,0 +26,0 @@ |
@@ -15,6 +15,16 @@ // Copyright 2023 Lance Developers. | ||
import { tableFromIPC, Vector } from 'apache-arrow' | ||
import { | ||
Field, | ||
Float32, | ||
List, | ||
makeBuilder, | ||
RecordBatchFileWriter, | ||
Table as ArrowTable, | ||
tableFromIPC, | ||
Vector, | ||
vectorFromArray | ||
} from 'apache-arrow' | ||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||
const { databaseNew, databaseTableNames, databaseOpenTable, tableSearch } = require('../index.node') | ||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../index.node') | ||
@@ -25,3 +35,3 @@ /** | ||
*/ | ||
export function connect (uri: string): Connection { | ||
export async function connect (uri: string): Promise<Connection> { | ||
return new Connection(uri) | ||
@@ -49,3 +59,3 @@ } | ||
*/ | ||
tableNames (): string[] { | ||
async tableNames (): Promise<string[]> { | ||
return databaseTableNames.call(this._db) | ||
@@ -62,2 +72,46 @@ } | ||
} | ||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> { | ||
if (data.length === 0) { | ||
throw new Error('At least one record needs to be provided') | ||
} | ||
const columns = Object.keys(data[0]) | ||
const records: Record<string, Vector> = {} | ||
for (const columnsKey of columns) { | ||
if (columnsKey === 'vector') { | ||
const children = new Field<Float32>('item', new Float32()) | ||
const list = new List(children) | ||
const listBuilder = makeBuilder({ | ||
type: list | ||
}) | ||
const vectorSize = (data[0].vector as any[]).length | ||
for (const datum of data) { | ||
if ((datum[columnsKey] as any[]).length !== vectorSize) { | ||
throw new Error(`Invalid vector size, expected ${vectorSize}`) | ||
} | ||
listBuilder.append(datum[columnsKey]) | ||
} | ||
records[columnsKey] = listBuilder.finish().toVector() | ||
} else { | ||
const values = [] | ||
for (const datum of data) { | ||
values.push(datum[columnsKey]) | ||
} | ||
records[columnsKey] = vectorFromArray(values) | ||
} | ||
} | ||
const table = new ArrowTable(records) | ||
await this.createTableArrow(name, table) | ||
return await this.openTable(name) | ||
} | ||
async createTableArrow (name: string, table: ArrowTable): Promise<Table> { | ||
const writer = RecordBatchFileWriter.writeAll(table) | ||
await tableCreate.call(this._db, name, Buffer.from(await writer.toUint8Array())) | ||
return await this.openTable(name) | ||
} | ||
} | ||
@@ -100,3 +154,3 @@ | ||
private readonly _columns?: string[] | ||
private readonly _where?: string | ||
private _filter?: string | ||
private readonly _metric = 'L2' | ||
@@ -111,11 +165,13 @@ | ||
this._columns = undefined | ||
this._where = undefined | ||
this._filter = undefined | ||
} | ||
set limit (value: number) { | ||
limit (value: number): Query { | ||
this._limit = value | ||
return this | ||
} | ||
get limit (): number { | ||
return this._limit | ||
filter (value: string): Query { | ||
this._filter = value | ||
return this | ||
} | ||
@@ -126,4 +182,9 @@ | ||
*/ | ||
async execute (): Promise<unknown[]> { | ||
const buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit) | ||
async execute<T = Record<string, unknown>> (): Promise<T[]> { | ||
let buffer; | ||
if (this._filter != null) { | ||
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit, this._filter) | ||
} else { | ||
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit) | ||
} | ||
const data = tableFromIPC(buffer) | ||
@@ -139,12 +200,5 @@ return data.toArray().map((entry: Record<string, unknown>) => { | ||
}) | ||
return newObject | ||
return newObject as unknown as T | ||
}) | ||
} | ||
/** | ||
* Execute the query and return the results as an Array of the generic type provided | ||
*/ | ||
async execute_cast<T>(): Promise<T[]> { | ||
return await this.execute() as T[] | ||
} | ||
} |
@@ -17,2 +17,3 @@ // Copyright 2023 Lance Developers. | ||
import { assert } from 'chai' | ||
import { track } from 'temp' | ||
@@ -22,61 +23,87 @@ import * as lancedb from '../index' | ||
describe('LanceDB client', function () { | ||
describe('open a connection to lancedb', function () { | ||
const con = lancedb.connect('.../../sample-lancedb') | ||
describe('when creating a connection to lancedb', function () { | ||
it('should have a valid url', async function () { | ||
const uri = await createTestDB() | ||
const con = await lancedb.connect(uri) | ||
assert.equal(con.uri, uri) | ||
}) | ||
it('should have a valid url', function () { | ||
assert.equal(con.uri, '.../../sample-lancedb') | ||
it('should return the existing table names', async function () { | ||
const uri = await createTestDB() | ||
const con = await lancedb.connect(uri) | ||
assert.deepEqual(await con.tableNames(), ['vectors']) | ||
}) | ||
}) | ||
it('should return the existing table names', function () { | ||
assert.deepEqual(con.tableNames(), ['my_table']) | ||
describe('when querying an existing dataset', function () { | ||
it('should open a table', async function () { | ||
const uri = await createTestDB() | ||
const con = await lancedb.connect(uri) | ||
const table = await con.openTable('vectors') | ||
assert.equal(table.name, 'vectors') | ||
}) | ||
describe('open a table from a connection', function () { | ||
const tablePromise = con.openTable('my_table') | ||
it('execute a query', async function () { | ||
const uri = await createTestDB() | ||
const con = await lancedb.connect(uri) | ||
const table = await con.openTable('vectors') | ||
const results = await table.search([0.1, 0.3]).execute() | ||
it('should have a valid name', async function () { | ||
const table = await tablePromise | ||
assert.equal(table.name, 'my_table') | ||
}) | ||
assert.equal(results.length, 2) | ||
assert.equal(results[0].price, 10) | ||
const vector = results[0].vector as Float32Array | ||
assert.approximately(vector[0], 0.0, 0.2) | ||
assert.approximately(vector[0], 0.1, 0.3) | ||
}) | ||
class MyResult { | ||
vector: Float32Array = new Float32Array(0) | ||
price: number = 0 | ||
item: string = '' | ||
} | ||
it('limits # of results', async function () { | ||
const uri = await createTestDB() | ||
const con = await lancedb.connect(uri) | ||
const table = await con.openTable('vectors') | ||
const results = await table.search([0.1, 0.3]).limit(1).execute() | ||
assert.equal(results.length, 1) | ||
assert.equal(results[0].id, 1) | ||
}) | ||
it('execute a query', async function () { | ||
const table = await tablePromise | ||
const builder = table.search([0.1, 0.3]) | ||
const results = await builder.execute() as MyResult[] | ||
it('uses a filter', async function () { | ||
const uri = await createTestDB() | ||
const con = await lancedb.connect(uri) | ||
const table = await con.openTable('vectors') | ||
const results = await table.search([0.1, 0.3]).filter('id == 2').execute() | ||
assert.equal(results.length, 1) | ||
assert.equal(results[0].id, 2) | ||
}) | ||
}) | ||
assert.equal(results.length, 2) | ||
assert.equal(results[0].item, 'foo') | ||
assert.equal(results[0].price, 10) | ||
assert.approximately(results[0].vector[0], 3.1, 0.1) | ||
assert.approximately(results[0].vector[1], 4.1, 0.1) | ||
}) | ||
describe('when creating a new dataset', function () { | ||
it('creates a new table from javascript objects', async function () { | ||
const dir = await track().mkdir('lancejs') | ||
const con = await lancedb.connect(dir) | ||
it('execute a query and type cast the result', async function () { | ||
const table = await tablePromise | ||
const data = [ | ||
{ id: 1, vector: [0.1, 0.2], price: 10 }, | ||
{ id: 2, vector: [1.1, 1.2], price: 50 } | ||
] | ||
const builder = table.search([0.1, 0.3]) | ||
const results = await builder.execute_cast<MyResult>() | ||
assert.equal(results.length, 2) | ||
assert.equal(results[0].item, 'foo') | ||
assert.equal(results[0].price, 10) | ||
assert.approximately(results[0].vector[0], 3.1, 0.1) | ||
assert.approximately(results[0].vector[1], 4.1, 0.1) | ||
}) | ||
const tableName = `vectors_${Math.floor(Math.random() * 100)}` | ||
const table = await con.createTable(tableName, data) | ||
assert.equal(table.name, tableName) | ||
it('limits # of results', async function () { | ||
const table = await tablePromise | ||
const builder = table.search([0.1, 0.3]) | ||
builder.limit = 1 | ||
const results = await builder.execute() as MyResult[] | ||
assert.equal(results.length, 1) | ||
}) | ||
const results = await table.search([0.1, 0.3]).execute() | ||
assert.equal(results.length, 2) | ||
}) | ||
}) | ||
}) | ||
async function createTestDB (): Promise<string> { | ||
const dir = await track().mkdir('lancejs') | ||
const con = await lancedb.connect(dir) | ||
const data = [ | ||
{ id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true }, | ||
{ id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false } | ||
] | ||
await con.createTable('vectors', data) | ||
return dir | ||
} |
Sorry, the diff of this file is not supported yet
50365069
683
44
17