New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

vectordb

Package Overview
Dependencies
Maintainers
2
Versions
99
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

vectordb - npm Package Compare versions

Comparing version 0.1.1 to 0.1.2

aarch64-apple-darwin.node

176

dist/index.d.ts

@@ -1,2 +0,2 @@

import { Table as ArrowTable } from 'apache-arrow';
import { type Table as ArrowTable } from 'apache-arrow';
/**

@@ -20,23 +20,109 @@ * Connect to a LanceDB instance at the given URI

/**
* Open a table in the database.
* @param name The name of the table.
*/
* Open a table in the database.
*
* @param name The name of the table.
*/
openTable(name: string): Promise<Table>;
/**
* Open a table in the database.
*
* @param name The name of the table.
* @param embeddings An embedding function to use on this Table
*/
openTable<T>(name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>;
/**
* Creates a new Table and initialize it with new data.
*
* @param name The name of the table.
* @param data Non-empty Array of Records to be inserted into the Table
*/
createTable(name: string, data: Array<Record<string, unknown>>): Promise<Table>;
/**
* Creates a new Table and initialize it with new data.
*
* @param name The name of the table.
* @param data Non-empty Array of Records to be inserted into the Table
* @param embeddings An embedding function to use on this Table
*/
createTable<T>(name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>;
createTableArrow(name: string, table: ArrowTable): Promise<Table>;
}
/**
* A table in a LanceDB database.
*/
export declare class Table {
export declare class Table<T = number[]> {
private readonly _tbl;
private readonly _name;
private readonly _embeddings?;
constructor(tbl: any, name: string);
/**
* @param tbl
* @param name
* @param embeddings An embedding function to use when interacting with this table
*/
constructor(tbl: any, name: string, embeddings: EmbeddingFunction<T>);
get name(): string;
/**
* Create a search query to find the nearest neighbors of the given query vector.
* @param queryVector The query vector.
*/
search(queryVector: number[]): Query;
* Creates a search query to find the nearest neighbors of the given search term
* @param query The query search term
*/
search(query: T): Query;
/**
* Insert records into this Table.
*
* @param data Records to be inserted into the Table
* @return The number of rows added to the table
*/
add(data: Array<Record<string, unknown>>): Promise<number>;
/**
* Insert records into this Table, replacing its contents.
*
* @param data Records to be inserted into the Table
* @return The number of rows added to the table
*/
overwrite(data: Array<Record<string, unknown>>): Promise<number>;
/**
* Create an ANN index on this Table vector index.
*
* @param indexParams The parameters of this Index, @see VectorIndexParams.
*/
create_index(indexParams: VectorIndexParams): Promise<any>;
}
interface IvfPQIndexConfig {
/**
* The column to be indexed
*/
column?: string;
/**
* A unique name for the index
*/
index_name?: string;
/**
* Metric type, L2 or Cosine
*/
metric_type?: MetricType;
/**
* The number of partitions this index
*/
num_partitions?: number;
/**
* The max number of iterations for kmeans training.
*/
max_iters?: number;
/**
* Train as optimized product quantization.
*/
use_opq?: boolean;
/**
* Number of subvectors to build PQ code
*/
num_sub_vectors?: number;
/**
* The number of bits to present one PQ centroid.
*/
num_bits?: number;
/**
* Max number of iterations to train OPQ, if `use_opq` is true.
*/
max_opq_iters?: number;
type: 'ivf_pq';
}
export type VectorIndexParams = IvfPQIndexConfig;
/**

@@ -47,16 +133,70 @@ * A builder for nearest neighbor queries for LanceDB.

private readonly _tbl;
private readonly _query_vector;
private readonly _queryVector;
private _limit;
private readonly _refine_factor?;
private readonly _nprobes;
private _refineFactor?;
private _nprobes;
private readonly _columns?;
private _filter?;
private readonly _metric;
private _metricType?;
constructor(tbl: any, queryVector: number[]);
/***
* Sets the number of results that will be returned
* @param value number of results
*/
limit(value: number): Query;
/**
* Refine the results by reading extra elements and re-ranking them in memory.
* @param value refine factor to use in this query.
*/
refineFactor(value: number): Query;
/**
* The number of probes used. A higher number makes search more accurate but also slower.
* @param value The number of probes used.
*/
nprobes(value: number): Query;
/**
* A filter statement to be applied to this query.
* @param value A filter in the same format used by a sql WHERE clause.
*/
filter(value: string): Query;
/**
* Execute the query and return the results as an Array of Objects
*/
* The MetricType used for this Query.
* @param value The metric to the. @see MetricType for the different options
*/
metricType(value: MetricType): Query;
/**
* Execute the query and return the results as an Array of Objects
*/
execute<T = Record<string, unknown>>(): Promise<T[]>;
}
export declare enum WriteMode {
Overwrite = "overwrite",
Append = "append"
}
/**
* An embedding function that automatically creates vector representation for a given column.
*/
export interface EmbeddingFunction<T> {
/**
* The name of the column that will be used as input for the Embedding Function.
*/
sourceColumn: string;
/**
* Creates a vector representation for the given values.
*/
embed: (data: T[]) => number[][];
}
/**
* Distance metrics type.
*/
export declare enum MetricType {
/**
* Euclidean distance
*/
L2 = "l2",
/**
* Cosine distance
*/
Cosine = "cosine"
}
export {};

177

dist/index.js

@@ -25,6 +25,7 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
exports.Query = exports.Table = exports.Connection = exports.connect = void 0;
exports.MetricType = exports.WriteMode = exports.Query = exports.Table = exports.Connection = exports.connect = void 0;
const apache_arrow_1 = require("apache-arrow");
const arrow_1 = require("./arrow");
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../native.js');
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js');
/**

@@ -59,46 +60,22 @@ * Connect to a LanceDB instance at the given URI

}
/**
* Open a table in the database.
* @param name The name of the table.
*/
openTable(name) {
openTable(name, embeddings) {
return __awaiter(this, void 0, void 0, function* () {
const tbl = yield databaseOpenTable.call(this._db, name);
return new Table(tbl, name);
if (embeddings !== undefined) {
return new Table(tbl, name, embeddings);
}
else {
return new Table(tbl, name);
}
});
}
createTable(name, data) {
createTable(name, data, embeddings) {
return __awaiter(this, void 0, void 0, function* () {
if (data.length === 0) {
throw new Error('At least one record needs to be provided');
const tbl = yield tableCreate.call(this._db, name, yield (0, arrow_1.fromRecordsToBuffer)(data, embeddings));
if (embeddings !== undefined) {
return new Table(tbl, name, embeddings);
}
const columns = Object.keys(data[0]);
const records = {};
for (const columnsKey of columns) {
if (columnsKey === 'vector') {
const children = new apache_arrow_1.Field('item', new apache_arrow_1.Float32());
const list = new apache_arrow_1.List(children);
const listBuilder = (0, apache_arrow_1.makeBuilder)({
type: list
});
const vectorSize = data[0].vector.length;
for (const datum of data) {
if (datum[columnsKey].length !== vectorSize) {
throw new Error(`Invalid vector size, expected ${vectorSize}`);
}
listBuilder.append(datum[columnsKey]);
}
records[columnsKey] = listBuilder.finish().toVector();
}
else {
const values = [];
for (const datum of data) {
values.push(datum[columnsKey]);
}
records[columnsKey] = (0, apache_arrow_1.vectorFromArray)(values);
}
else {
return new Table(tbl, name);
}
const table = new apache_arrow_1.Table(records);
yield this.createTableArrow(name, table);
return yield this.openTable(name);
});

@@ -115,9 +92,7 @@ }

exports.Connection = Connection;
/**
* A table in a LanceDB database.
*/
class Table {
constructor(tbl, name) {
constructor(tbl, name, embeddings) {
this._tbl = tbl;
this._name = name;
this._embeddings = embeddings;
}

@@ -128,8 +103,47 @@ get name() {

/**
* Create a search query to find the nearest neighbors of the given query vector.
* @param queryVector The query vector.
*/
search(queryVector) {
* Creates a search query to find the nearest neighbors of the given search term
* @param query The query search term
*/
search(query) {
let queryVector;
if (this._embeddings !== undefined) {
queryVector = this._embeddings.embed([query])[0];
}
else {
queryVector = query;
}
return new Query(this._tbl, queryVector);
}
/**
* Insert records into this Table.
*
* @param data Records to be inserted into the Table
* @return The number of rows added to the table
*/
add(data) {
return __awaiter(this, void 0, void 0, function* () {
return tableAdd.call(this._tbl, yield (0, arrow_1.fromRecordsToBuffer)(data, this._embeddings), WriteMode.Append.toString());
});
}
/**
* Insert records into this Table, replacing its contents.
*
* @param data Records to be inserted into the Table
* @return The number of rows added to the table
*/
overwrite(data) {
return __awaiter(this, void 0, void 0, function* () {
return tableAdd.call(this._tbl, yield (0, arrow_1.fromRecordsToBuffer)(data, this._embeddings), WriteMode.Overwrite.toString());
});
}
/**
* Create an ANN index on this Table vector index.
*
* @param indexParams The parameters of this Index, @see VectorIndexParams.
*/
create_index(indexParams) {
return __awaiter(this, void 0, void 0, function* () {
return tableCreateVectorIndex.call(this._tbl, indexParams);
});
}
}

@@ -142,11 +156,15 @@ exports.Table = Table;

constructor(tbl, queryVector) {
this._metric = 'L2';
this._tbl = tbl;
this._query_vector = queryVector;
this._queryVector = queryVector;
this._limit = 10;
this._nprobes = 20;
this._refine_factor = undefined;
this._refineFactor = undefined;
this._columns = undefined;
this._filter = undefined;
this._metricType = undefined;
}
/***
* Sets the number of results that will be returned
* @param value number of results
*/
limit(value) {

@@ -156,2 +174,22 @@ this._limit = value;

}
/**
* Refine the results by reading extra elements and re-ranking them in memory.
* @param value refine factor to use in this query.
*/
refineFactor(value) {
this._refineFactor = value;
return this;
}
/**
* The number of probes used. A higher number makes search more accurate but also slower.
* @param value The number of probes used.
*/
nprobes(value) {
this._nprobes = value;
return this;
}
/**
* A filter statement to be applied to this query.
* @param value A filter in the same format used by a sql WHERE clause.
*/
filter(value) {

@@ -162,13 +200,15 @@ this._filter = value;

/**
* Execute the query and return the results as an Array of Objects
*/
* The MetricType used for this Query.
* @param value The metric to the. @see MetricType for the different options
*/
metricType(value) {
this._metricType = value;
return this;
}
/**
* Execute the query and return the results as an Array of Objects
*/
execute() {
return __awaiter(this, void 0, void 0, function* () {
let buffer;
if (this._filter != null) {
buffer = yield tableSearch.call(this._tbl, this._query_vector, this._limit, this._filter);
}
else {
buffer = yield tableSearch.call(this._tbl, this._query_vector, this._limit);
}
const buffer = yield tableSearch.call(this._tbl, this);
const data = (0, apache_arrow_1.tableFromIPC)(buffer);

@@ -191,1 +231,20 @@ return data.toArray().map((entry) => {

exports.Query = Query;
var WriteMode;
(function (WriteMode) {
WriteMode["Overwrite"] = "overwrite";
WriteMode["Append"] = "append";
})(WriteMode = exports.WriteMode || (exports.WriteMode = {}));
/**
* Distance metrics type.
*/
var MetricType;
(function (MetricType) {
/**
* Euclidean distance
*/
MetricType["L2"] = "l2";
/**
* Cosine distance
*/
MetricType["Cosine"] = "cosine";
})(MetricType = exports.MetricType || (exports.MetricType = {}));

@@ -29,2 +29,3 @@ "use strict";

const lancedb = require("../index");
const index_1 = require("../index");
(0, mocha_1.describe)('LanceDB client', function () {

@@ -84,3 +85,3 @@ (0, mocha_1.describe)('when creating a connection to lancedb', function () {

const table = yield con.openTable('vectors');
const results = yield table.search([0.1, 0.3]).filter('id == 2').execute();
const results = yield table.search([0.1, 0.1]).filter('id == 2').execute();
chai_1.assert.equal(results.length, 1);

@@ -107,12 +108,105 @@ chai_1.assert.equal(results[0].id, 2);

});
it('appends records to an existing table ', function () {
return __awaiter(this, void 0, void 0, function* () {
const dir = yield (0, temp_1.track)().mkdir('lancejs');
const con = yield lancedb.connect(dir);
const data = [
{ id: 1, vector: [0.1, 0.2], price: 10, name: 'a' },
{ id: 2, vector: [1.1, 1.2], price: 50, name: 'b' }
];
const table = yield con.createTable('vectors', data);
const results = yield table.search([0.1, 0.3]).execute();
chai_1.assert.equal(results.length, 2);
const dataAdd = [
{ id: 3, vector: [2.1, 2.2], price: 10, name: 'c' },
{ id: 4, vector: [3.1, 3.2], price: 50, name: 'd' }
];
yield table.add(dataAdd);
const resultsAdd = yield table.search([0.1, 0.3]).execute();
chai_1.assert.equal(resultsAdd.length, 4);
});
});
it('overwrite all records in a table', function () {
return __awaiter(this, void 0, void 0, function* () {
const uri = yield createTestDB();
const con = yield lancedb.connect(uri);
const table = yield con.openTable('vectors');
const results = yield table.search([0.1, 0.3]).execute();
chai_1.assert.equal(results.length, 2);
const dataOver = [
{ vector: [2.1, 2.2], price: 10, name: 'foo' },
{ vector: [3.1, 3.2], price: 50, name: 'bar' }
];
yield table.overwrite(dataOver);
const resultsAdd = yield table.search([0.1, 0.3]).execute();
chai_1.assert.equal(resultsAdd.length, 2);
});
});
});
(0, mocha_1.describe)('when creating a vector index', function () {
it('overwrite all records in a table', function () {
return __awaiter(this, void 0, void 0, function* () {
const uri = yield createTestDB(32, 300);
const con = yield lancedb.connect(uri);
const table = yield con.openTable('vectors');
yield table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 });
});
}).timeout(10000); // Timeout is high partially because GH macos runner is pretty slow
});
(0, mocha_1.describe)('when using a custom embedding function', function () {
class TextEmbedding {
constructor(targetColumn) {
this._embedding_map = new Map([
['foo', [2.1, 2.2]],
['bar', [3.1, 3.2]]
]);
this.sourceColumn = targetColumn;
}
embed(data) {
return data.map(datum => { var _a; return (_a = this._embedding_map.get(datum)) !== null && _a !== void 0 ? _a : [0.0, 0.0]; });
}
}
it('should encode the original data into embeddings', function () {
return __awaiter(this, void 0, void 0, function* () {
const dir = yield (0, temp_1.track)().mkdir('lancejs');
const con = yield lancedb.connect(dir);
const embeddings = new TextEmbedding('name');
const data = [
{ price: 10, name: 'foo' },
{ price: 50, name: 'bar' }
];
const table = yield con.createTable('vectors', data, embeddings);
const results = yield table.search('foo').execute();
chai_1.assert.equal(results.length, 2);
});
});
});
});
function createTestDB() {
(0, mocha_1.describe)('Query object', function () {
it('sets custom parameters', function () {
return __awaiter(this, void 0, void 0, function* () {
const query = new index_1.Query(undefined, [0.1, 0.3])
.limit(1)
.metricType(index_1.MetricType.Cosine)
.refineFactor(100)
.nprobes(20);
chai_1.assert.equal(query._limit, 1);
chai_1.assert.equal(query._metricType, index_1.MetricType.Cosine);
chai_1.assert.equal(query._refineFactor, 100);
chai_1.assert.equal(query._nprobes, 20);
});
});
});
function createTestDB(numDimensions = 2, numRows = 2) {
return __awaiter(this, void 0, void 0, function* () {
const dir = yield (0, temp_1.track)().mkdir('lancejs');
const con = yield lancedb.connect(dir);
const data = [
{ id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true },
{ id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false }
];
const data = [];
for (let i = 0; i < numRows; i++) {
const vector = [];
for (let j = 0; j < numDimensions; j++) {
vector.push(i + (j * 0.1));
}
data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector });
}
yield con.createTable('vectors', data);

@@ -119,0 +213,0 @@ return dir;

@@ -9,10 +9,3 @@ {

"tsc": "tsc -b",
<<<<<<< HEAD
"build": "tsc"
=======
"build": "tsc",
"clean": "rm -rf data/",
"create": "node dist/create.js",
"query": "node dist/query.js"
>>>>>>> gsilvestrin/nodejs_linux_1
},

@@ -19,0 +12,0 @@ "author": "Lance Devs",

@@ -15,3 +15,2 @@ // Copyright 2023 Lance Developers.

<<<<<<<< HEAD:node/examples/ts/src/index.ts
import * as vectordb from 'vectordb';

@@ -38,14 +37,1 @@

example().then(_ => { console.log ("All done!") })
========
let nativeLib;
if (process.platform === "darwin" && process.arch === "arm64") {
nativeLib = require('./darwin_arm64.node')
} else if (process.platform === "linux" && process.arch === "x64") {
nativeLib = require('./linux-x64.node')
} else {
throw new Error(`vectordb: unsupported platform ${process.platform}_${process.arch}. Please file a bug report at https://github.com/lancedb/lancedb/issues`)
}
module.exports = nativeLib
>>>>>>>> gsilvestrin/nodejs_linux_1:node/native.js

@@ -17,11 +17,25 @@ // Copyright 2023 Lance Developers.

if (process.platform === "darwin" && process.arch === "arm64") {
nativeLib = require('./darwin_arm64.node')
} else if (process.platform === "linux" && process.arch === "x64") {
nativeLib = require('./linux-x64.node')
} else {
throw new Error(`vectordb: unsupported platform ${process.platform}_${process.arch}. Please file a bug report at https://github.com/lancedb/lancedb/issues`)
function getPlatformLibrary() {
if (process.platform === "darwin" && process.arch == "arm64") {
return require('./aarch64-apple-darwin.node');
} else if (process.platform === "darwin" && process.arch == "x64") {
return require('./x86_64-apple-darwin.node');
} else if (process.platform === "linux" && process.arch == "x64") {
return require('./x86_64-unknown-linux-gnu.node');
} else {
throw new Error(`vectordb: unsupported platform ${process.platform}_${process.arch}. Please file a bug report at https://github.com/lancedb/lancedb/issues`)
}
}
try {
nativeLib = require('./index.node')
} catch (e) {
if (e.code === "MODULE_NOT_FOUND") {
nativeLib = getPlatformLibrary();
} else {
throw new Error('vectordb: failed to load native library. Please file a bug report at https://github.com/lancedb/lancedb/issues');
}
}
module.exports = nativeLib
{
"name": "vectordb",
"version": "0.1.1",
"version": "0.1.2",
"description": " Serverless, low-latency vector database for AI applications",

@@ -5,0 +5,0 @@ "main": "dist/index.js",

@@ -16,15 +16,11 @@ // Copyright 2023 Lance Developers.

import {
Field,
Float32,
List,
makeBuilder,
RecordBatchFileWriter,
Table as ArrowTable,
type Table as ArrowTable,
tableFromIPC,
Vector,
vectorFromArray
Vector
} from 'apache-arrow'
import { fromRecordsToBuffer } from './arrow'
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../native.js')
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js')

@@ -63,46 +59,46 @@ /**

/**
* Open a table in the database.
* @param name The name of the table.
*/
async openTable (name: string): Promise<Table> {
* Open a table in the database.
*
* @param name The name of the table.
*/
async openTable (name: string): Promise<Table>
/**
* Open a table in the database.
*
* @param name The name of the table.
* @param embeddings An embedding function to use on this Table
*/
async openTable<T> (name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
const tbl = await databaseOpenTable.call(this._db, name)
return new Table(tbl, name)
if (embeddings !== undefined) {
return new Table(tbl, name, embeddings)
} else {
return new Table(tbl, name)
}
}
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> {
if (data.length === 0) {
throw new Error('At least one record needs to be provided')
}
/**
* Creates a new Table and initialize it with new data.
*
* @param name The name of the table.
* @param data Non-empty Array of Records to be inserted into the Table
*/
const columns = Object.keys(data[0])
const records: Record<string, Vector> = {}
for (const columnsKey of columns) {
if (columnsKey === 'vector') {
const children = new Field<Float32>('item', new Float32())
const list = new List(children)
const listBuilder = makeBuilder({
type: list
})
const vectorSize = (data[0].vector as any[]).length
for (const datum of data) {
if ((datum[columnsKey] as any[]).length !== vectorSize) {
throw new Error(`Invalid vector size, expected ${vectorSize}`)
}
listBuilder.append(datum[columnsKey])
}
records[columnsKey] = listBuilder.finish().toVector()
} else {
const values = []
for (const datum of data) {
values.push(datum[columnsKey])
}
records[columnsKey] = vectorFromArray(values)
}
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
/**
* Creates a new Table and initialize it with new data.
*
* @param name The name of the table.
* @param data Non-empty Array of Records to be inserted into the Table
* @param embeddings An embedding function to use on this Table
*/
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
const tbl = await tableCreate.call(this._db, name, await fromRecordsToBuffer(data, embeddings))
if (embeddings !== undefined) {
return new Table(tbl, name, embeddings)
} else {
return new Table(tbl, name)
}
const table = new ArrowTable(records)
await this.createTableArrow(name, table)
return await this.openTable(name)
}

@@ -117,12 +113,18 @@

/**
* A table in a LanceDB database.
*/
export class Table {
export class Table<T = number[]> {
private readonly _tbl: any
private readonly _name: string
private readonly _embeddings?: EmbeddingFunction<T>
constructor (tbl: any, name: string) {
constructor (tbl: any, name: string)
/**
* @param tbl
* @param name
* @param embeddings An embedding function to use when interacting with this table
*/
constructor (tbl: any, name: string, embeddings: EmbeddingFunction<T>)
constructor (tbl: any, name: string, embeddings?: EmbeddingFunction<T>) {
this._tbl = tbl
this._name = name
this._embeddings = embeddings
}

@@ -135,10 +137,95 @@

/**
* Create a search query to find the nearest neighbors of the given query vector.
* @param queryVector The query vector.
*/
search (queryVector: number[]): Query {
* Creates a search query to find the nearest neighbors of the given search term
* @param query The query search term
*/
search (query: T): Query {
let queryVector: number[]
if (this._embeddings !== undefined) {
queryVector = this._embeddings.embed([query])[0]
} else {
queryVector = query as number[]
}
return new Query(this._tbl, queryVector)
}
/**
* Insert records into this Table.
*
* @param data Records to be inserted into the Table
* @return The number of rows added to the table
*/
async add (data: Array<Record<string, unknown>>): Promise<number> {
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Append.toString())
}
/**
* Insert records into this Table, replacing its contents.
*
* @param data Records to be inserted into the Table
* @return The number of rows added to the table
*/
async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Overwrite.toString())
}
/**
* Create an ANN index on this Table vector index.
*
* @param indexParams The parameters of this Index, @see VectorIndexParams.
*/
async create_index (indexParams: VectorIndexParams): Promise<any> {
return tableCreateVectorIndex.call(this._tbl, indexParams)
}
}
interface IvfPQIndexConfig {
/**
* The column to be indexed
*/
column?: string
/**
* A unique name for the index
*/
index_name?: string
/**
* Metric type, L2 or Cosine
*/
metric_type?: MetricType
/**
* The number of partitions this index
*/
num_partitions?: number
/**
* The max number of iterations for kmeans training.
*/
max_iters?: number
/**
* Train as optimized product quantization.
*/
use_opq?: boolean
/**
* Number of subvectors to build PQ code
*/
num_sub_vectors?: number
/**
* The number of bits to present one PQ centroid.
*/
num_bits?: number
/**
* Max number of iterations to train OPQ, if `use_opq` is true.
*/
max_opq_iters?: number
type: 'ivf_pq'
}
export type VectorIndexParams = IvfPQIndexConfig
/**

@@ -149,20 +236,25 @@ * A builder for nearest neighbor queries for LanceDB.

private readonly _tbl: any
private readonly _query_vector: number[]
private readonly _queryVector: number[]
private _limit: number
private readonly _refine_factor?: number
private readonly _nprobes: number
private _refineFactor?: number
private _nprobes: number
private readonly _columns?: string[]
private _filter?: string
private readonly _metric = 'L2'
private _metricType?: MetricType
constructor (tbl: any, queryVector: number[]) {
this._tbl = tbl
this._query_vector = queryVector
this._queryVector = queryVector
this._limit = 10
this._nprobes = 20
this._refine_factor = undefined
this._refineFactor = undefined
this._columns = undefined
this._filter = undefined
this._metricType = undefined
}
/***
* Sets the number of results that will be returned
* @param value number of results
*/
limit (value: number): Query {

@@ -173,2 +265,24 @@ this._limit = value

/**
* Refine the results by reading extra elements and re-ranking them in memory.
* @param value refine factor to use in this query.
*/
refineFactor (value: number): Query {
this._refineFactor = value
return this
}
/**
* The number of probes used. A higher number makes search more accurate but also slower.
* @param value The number of probes used.
*/
nprobes (value: number): Query {
this._nprobes = value
return this
}
/**
* A filter statement to be applied to this query.
* @param value A filter in the same format used by a sql WHERE clause.
*/
filter (value: string): Query {

@@ -180,11 +294,15 @@ this._filter = value

/**
* Execute the query and return the results as an Array of Objects
*/
* The MetricType used for this Query.
* @param value The metric to the. @see MetricType for the different options
*/
metricType (value: MetricType): Query {
this._metricType = value
return this
}
/**
* Execute the query and return the results as an Array of Objects
*/
async execute<T = Record<string, unknown>> (): Promise<T[]> {
let buffer
if (this._filter != null) {
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit, this._filter)
} else {
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit)
}
const buffer = await tableSearch.call(this._tbl, this)
const data = tableFromIPC(buffer)

@@ -204,1 +322,36 @@ return data.toArray().map((entry: Record<string, unknown>) => {

}
export enum WriteMode {
Overwrite = 'overwrite',
Append = 'append'
}
/**
* An embedding function that automatically creates vector representation for a given column.
*/
export interface EmbeddingFunction<T> {
/**
* The name of the column that will be used as input for the Embedding Function.
*/
sourceColumn: string
/**
* Creates a vector representation for the given values.
*/
embed: (data: T[]) => number[][]
}
/**
* Distance metrics type.
*/
export enum MetricType {
/**
* Euclidean distance
*/
L2 = 'l2',
/**
* Cosine distance
*/
Cosine = 'cosine'
}

@@ -20,2 +20,3 @@ // Copyright 2023 Lance Developers.

import * as lancedb from '../index'
import { type EmbeddingFunction, MetricType, Query } from '../index'

@@ -71,3 +72,3 @@ describe('LanceDB client', function () {

const table = await con.openTable('vectors')
const results = await table.search([0.1, 0.3]).filter('id == 2').execute()
const results = await table.search([0.1, 0.1]).filter('id == 2').execute()
assert.equal(results.length, 1)

@@ -95,13 +96,112 @@ assert.equal(results[0].id, 2)

})
it('appends records to an existing table ', async function () {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
const data = [
{ id: 1, vector: [0.1, 0.2], price: 10, name: 'a' },
{ id: 2, vector: [1.1, 1.2], price: 50, name: 'b' }
]
const table = await con.createTable('vectors', data)
const results = await table.search([0.1, 0.3]).execute()
assert.equal(results.length, 2)
const dataAdd = [
{ id: 3, vector: [2.1, 2.2], price: 10, name: 'c' },
{ id: 4, vector: [3.1, 3.2], price: 50, name: 'd' }
]
await table.add(dataAdd)
const resultsAdd = await table.search([0.1, 0.3]).execute()
assert.equal(resultsAdd.length, 4)
})
it('overwrite all records in a table', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
const results = await table.search([0.1, 0.3]).execute()
assert.equal(results.length, 2)
const dataOver = [
{ vector: [2.1, 2.2], price: 10, name: 'foo' },
{ vector: [3.1, 3.2], price: 50, name: 'bar' }
]
await table.overwrite(dataOver)
const resultsAdd = await table.search([0.1, 0.3]).execute()
assert.equal(resultsAdd.length, 2)
})
})
describe('when creating a vector index', function () {
it('overwrite all records in a table', async function () {
const uri = await createTestDB(32, 300)
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
await table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 })
}).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow
})
describe('when using a custom embedding function', function () {
class TextEmbedding implements EmbeddingFunction<string> {
sourceColumn: string
constructor (targetColumn: string) {
this.sourceColumn = targetColumn
}
_embedding_map = new Map<string, number[]>([
['foo', [2.1, 2.2]],
['bar', [3.1, 3.2]]
])
embed (data: string[]): number[][] {
return data.map(datum => this._embedding_map.get(datum) ?? [0.0, 0.0])
}
}
it('should encode the original data into embeddings', async function () {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
const embeddings = new TextEmbedding('name')
const data = [
{ price: 10, name: 'foo' },
{ price: 50, name: 'bar' }
]
const table = await con.createTable('vectors', data, embeddings)
const results = await table.search('foo').execute()
assert.equal(results.length, 2)
})
})
})
async function createTestDB (): Promise<string> {
describe('Query object', function () {
it('sets custom parameters', async function () {
const query = new Query(undefined, [0.1, 0.3])
.limit(1)
.metricType(MetricType.Cosine)
.refineFactor(100)
.nprobes(20) as Record<string, any>
assert.equal(query._limit, 1)
assert.equal(query._metricType, MetricType.Cosine)
assert.equal(query._refineFactor, 100)
assert.equal(query._nprobes, 20)
})
})
async function createTestDB (numDimensions: number = 2, numRows: number = 2): Promise<string> {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
const data = [
{ id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true },
{ id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false }
]
const data = []
for (let i = 0; i < numRows; i++) {
const vector = []
for (let j = 0; j < numDimensions; j++) {
vector.push(i + (j * 0.1))
}
data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
}

@@ -108,0 +208,0 @@ await con.createTable('vectors', data)

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc