Skip to content

Commit 615b9d9

Browse files
AyushExeljacoblee93
andauthoredSep 17, 2024··
feat(community): Remove required param from LanceDB integration (#6706)
Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
1 parent 3cdb5e8 commit 615b9d9

File tree

7 files changed

+159
-77
lines changed

7 files changed

+159
-77
lines changed
 

‎examples/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@
102102
"typeorm": "^0.3.20",
103103
"typesense": "^1.5.3",
104104
"uuid": "^10.0.0",
105-
"vectordb": "^0.1.4",
105+
"vectordb": "^0.9.0",
106106
"voy-search": "0.6.2",
107107
"weaviate-ts-client": "^2.0.0",
108108
"zod": "^3.22.4",

‎examples/src/indexes/vector_stores/lancedb/fromDocs.ts

+16-11
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,29 @@ import { TextLoader } from "langchain/document_loaders/fs/text";
44
import fs from "node:fs/promises";
55
import path from "node:path";
66
import os from "node:os";
7-
import { connect } from "vectordb";
87

98
// Create docs with a loader
109
const loader = new TextLoader("src/document_loaders/example_data/example.txt");
1110
const docs = await loader.load();
1211

1312
export const run = async () => {
13+
const vectorStore = await LanceDB.fromDocuments(docs, new OpenAIEmbeddings());
14+
15+
const resultOne = await vectorStore.similaritySearch("hello world", 1);
16+
console.log(resultOne);
17+
18+
// [
19+
// Document {
20+
// pageContent: 'Foo\nBar\nBaz\n\n',
21+
// metadata: { source: 'src/document_loaders/example_data/example.txt' }
22+
// }
23+
// ]
24+
};
25+
26+
export const run_with_existing_table = async () => {
1427
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-"));
15-
const db = await connect(dir);
16-
const table = await db.createTable("vectors", [
17-
{ vector: Array(1536), text: "sample", source: "a" },
18-
]);
19-
20-
const vectorStore = await LanceDB.fromDocuments(
21-
docs,
22-
new OpenAIEmbeddings(),
23-
{ table }
24-
);
28+
29+
const vectorStore = await LanceDB.fromDocuments(docs, new OpenAIEmbeddings());
2530

2631
const resultOne = await vectorStore.similaritySearch("hello world", 1);
2732
console.log(resultOne);

‎examples/src/indexes/vector_stores/lancedb/fromTexts.ts

+13-8
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,27 @@
11
import { LanceDB } from "@langchain/community/vectorstores/lancedb";
22
import { OpenAIEmbeddings } from "@langchain/openai";
3-
import { connect } from "vectordb";
43
import * as fs from "node:fs/promises";
54
import * as path from "node:path";
65
import os from "node:os";
76

87
export const run = async () => {
9-
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-"));
10-
const db = await connect(dir);
11-
const table = await db.createTable("vectors", [
12-
{ vector: Array(1536), text: "sample", id: 1 },
13-
]);
8+
const vectorStore = await LanceDB.fromTexts(
9+
["Hello world", "Bye bye", "hello nice world"],
10+
[{ id: 2 }, { id: 1 }, { id: 3 }],
11+
new OpenAIEmbeddings()
12+
);
1413

14+
const resultOne = await vectorStore.similaritySearch("hello world", 1);
15+
console.log(resultOne);
16+
// [ Document { pageContent: 'hello nice world', metadata: { id: 3 } } ]
17+
};
18+
19+
export const run_with_existing_table = async () => {
20+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-"));
1521
const vectorStore = await LanceDB.fromTexts(
1622
["Hello world", "Bye bye", "hello nice world"],
1723
[{ id: 2 }, { id: 1 }, { id: 3 }],
18-
new OpenAIEmbeddings(),
19-
{ table }
24+
new OpenAIEmbeddings()
2025
);
2126

2227
const resultOne = await vectorStore.similaritySearch("hello world", 1);

‎libs/langchain-community/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@
207207
"typescript": "~5.1.6",
208208
"typesense": "^1.5.3",
209209
"usearch": "^1.1.1",
210-
"vectordb": "^0.1.4",
210+
"vectordb": "^0.9.0",
211211
"voy-search": "0.6.2",
212212
"weaviate-ts-client": "^1.4.0",
213213
"web-auth-library": "^1.0.3",

‎libs/langchain-community/src/vectorstores/lancedb.ts

+34-9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Table } from "vectordb";
1+
import { connect, Table, Connection, WriteMode } from "vectordb";
22
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
33
import { VectorStore } from "@langchain/core/vectorstores";
44
import { Document } from "@langchain/core/documents";
@@ -8,8 +8,11 @@ import { Document } from "@langchain/core/documents";
88
* table and an optional textKey.
99
*/
1010
export type LanceDBArgs = {
11-
table: Table;
11+
table?: Table;
1212
textKey?: string;
13+
uri?: string;
14+
tableName?: string;
15+
mode?: WriteMode;
1316
};
1417

1518
/**
@@ -18,15 +21,24 @@ export type LanceDBArgs = {
1821
* embeddings.
1922
*/
2023
export class LanceDB extends VectorStore {
21-
private table: Table;
24+
private table?: Table;
2225

2326
private textKey: string;
2427

25-
constructor(embeddings: EmbeddingsInterface, args: LanceDBArgs) {
26-
super(embeddings, args);
27-
this.table = args.table;
28+
private uri: string;
29+
30+
private tableName: string;
31+
32+
private mode?: WriteMode;
33+
34+
constructor(embeddings: EmbeddingsInterface, args?: LanceDBArgs) {
35+
super(embeddings, args || {});
36+
this.table = args?.table;
2837
this.embeddings = embeddings;
29-
this.textKey = args.textKey || "text";
38+
this.textKey = args?.textKey || "text";
39+
this.uri = args?.uri || "~/lancedb";
40+
this.tableName = args?.tableName || "langchain";
41+
this.mode = args?.mode || WriteMode.Overwrite;
3042
}
3143

3244
/**
@@ -71,6 +83,14 @@ export class LanceDB extends VectorStore {
7183
});
7284
data.push(record);
7385
}
86+
if (!this.table) {
87+
const db: Connection = await connect(this.uri);
88+
this.table = await db.createTable(this.tableName, data, {
89+
writeMode: this.mode,
90+
});
91+
92+
return;
93+
}
7494
await this.table.add(data);
7595
}
7696

@@ -85,6 +105,11 @@ export class LanceDB extends VectorStore {
85105
query: number[],
86106
k: number
87107
): Promise<[Document, number][]> {
108+
if (!this.table) {
109+
throw new Error(
110+
"Table not found. Please add vectors to the table first."
111+
);
112+
}
88113
const results = await this.table.search(query).limit(k).execute();
89114

90115
const docsAndScore: [Document, number][] = [];
@@ -119,7 +144,7 @@ export class LanceDB extends VectorStore {
119144
texts: string[],
120145
metadatas: object[] | object,
121146
embeddings: EmbeddingsInterface,
122-
dbConfig: LanceDBArgs
147+
dbConfig?: LanceDBArgs
123148
): Promise<LanceDB> {
124149
const docs: Document[] = [];
125150
for (let i = 0; i < texts.length; i += 1) {
@@ -143,7 +168,7 @@ export class LanceDB extends VectorStore {
143168
static async fromDocuments(
144169
docs: Document[],
145170
embeddings: EmbeddingsInterface,
146-
dbConfig: LanceDBArgs
171+
dbConfig?: LanceDBArgs
147172
): Promise<LanceDB> {
148173
const instance = new this(embeddings, dbConfig);
149174
await instance.addDocuments(docs);

‎libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts

+24
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,27 @@ describe("LanceDB", () => {
4545
expect(resultsTwo.length).toBe(5);
4646
});
4747
});
48+
49+
describe("LanceDB empty schema", () => {
50+
test("Test fromTexts + addDocuments", async () => {
51+
const embeddings = new OpenAIEmbeddings();
52+
const vectorStore = await LanceDB.fromTexts(
53+
["hello bye", "hello world", "bye bye"],
54+
[{ id: 1 }, { id: 2 }, { id: 3 }],
55+
embeddings
56+
);
57+
58+
const results = await vectorStore.similaritySearch("hello bye", 10);
59+
expect(results.length).toBe(3);
60+
61+
await vectorStore.addDocuments([
62+
new Document({
63+
pageContent: "a new world",
64+
metadata: { id: 4 },
65+
}),
66+
]);
67+
68+
const resultsTwo = await vectorStore.similaritySearch("hello bye", 10);
69+
expect(resultsTwo.length).toBe(4);
70+
});
71+
});

‎yarn.lock

+70-47
Original file line numberDiff line numberDiff line change
@@ -251,24 +251,6 @@ __metadata:
251251
languageName: node
252252
linkType: hard
253253

254-
"@apache-arrow/ts@npm:^12.0.0":
255-
version: 12.0.0
256-
resolution: "@apache-arrow/ts@npm:12.0.0"
257-
dependencies:
258-
"@types/command-line-args": 5.2.0
259-
"@types/command-line-usage": 5.0.2
260-
"@types/node": 18.14.5
261-
"@types/pad-left": 2.1.1
262-
command-line-args: 5.2.1
263-
command-line-usage: 6.1.3
264-
flatbuffers: 23.3.3
265-
json-bignum: ^0.0.3
266-
pad-left: ^2.1.0
267-
tslib: ^2.5.0
268-
checksum: 67b2791e14d5377b1d160a0d8390decc386e013c517713f8b9c100737a0e478a394086d91a8c846848d4e30289070a119d8e65191998f4c2555b18a29564df50
269-
languageName: node
270-
linkType: hard
271-
272254
"@apify/consts@npm:^2.13.0, @apify/consts@npm:^2.9.0":
273255
version: 2.13.0
274256
resolution: "@apify/consts@npm:2.13.0"
@@ -11112,6 +11094,41 @@ __metadata:
1111211094
languageName: node
1111311095
linkType: hard
1111411096

11097+
"@lancedb/vectordb-darwin-arm64@npm:0.4.20":
11098+
version: 0.4.20
11099+
resolution: "@lancedb/vectordb-darwin-arm64@npm:0.4.20"
11100+
conditions: os=darwin & cpu=arm64
11101+
languageName: node
11102+
linkType: hard
11103+
11104+
"@lancedb/vectordb-darwin-x64@npm:0.4.20":
11105+
version: 0.4.20
11106+
resolution: "@lancedb/vectordb-darwin-x64@npm:0.4.20"
11107+
conditions: os=darwin & cpu=x64
11108+
languageName: node
11109+
linkType: hard
11110+
11111+
"@lancedb/vectordb-linux-arm64-gnu@npm:0.4.20":
11112+
version: 0.4.20
11113+
resolution: "@lancedb/vectordb-linux-arm64-gnu@npm:0.4.20"
11114+
conditions: os=linux & cpu=arm64
11115+
languageName: node
11116+
linkType: hard
11117+
11118+
"@lancedb/vectordb-linux-x64-gnu@npm:0.4.20":
11119+
version: 0.4.20
11120+
resolution: "@lancedb/vectordb-linux-x64-gnu@npm:0.4.20"
11121+
conditions: os=linux & cpu=x64
11122+
languageName: node
11123+
linkType: hard
11124+
11125+
"@lancedb/vectordb-win32-x64-msvc@npm:0.4.20":
11126+
version: 0.4.20
11127+
resolution: "@lancedb/vectordb-win32-x64-msvc@npm:0.4.20"
11128+
conditions: os=win32 & cpu=x64
11129+
languageName: node
11130+
linkType: hard
11131+
1111511132
"@langchain/anthropic@*, @langchain/anthropic@workspace:*, @langchain/anthropic@workspace:libs/langchain-anthropic":
1111611133
version: 0.0.0-use.local
1111711134
resolution: "@langchain/anthropic@workspace:libs/langchain-anthropic"
@@ -11577,7 +11594,7 @@ __metadata:
1157711594
typesense: ^1.5.3
1157811595
usearch: ^1.1.1
1157911596
uuid: ^10.0.0
11580-
vectordb: ^0.1.4
11597+
vectordb: ^0.9.0
1158111598
voy-search: 0.6.2
1158211599
weaviate-ts-client: ^1.4.0
1158311600
web-auth-library: ^1.0.3
@@ -13039,6 +13056,13 @@ __metadata:
1303913056
languageName: node
1304013057
linkType: hard
1304113058

13059+
"@neon-rs/load@npm:^0.0.74":
13060+
version: 0.0.74
13061+
resolution: "@neon-rs/load@npm:0.0.74"
13062+
checksum: d26ec9b08cdf1a7c5aeefe98f77112d205d11b4005a7934b21fe8fd27528847e08e4749e7e6c3fc05ae9f701175a58c11a095ae6af449634df3991a2c82e1dfa
13063+
languageName: node
13064+
linkType: hard
13065+
1304213066
"@neondatabase/serverless@npm:0.6.0":
1304313067
version: 0.6.0
1304413068
resolution: "@neondatabase/serverless@npm:0.6.0"
@@ -20774,26 +20798,6 @@ __metadata:
2077420798
languageName: node
2077520799
linkType: hard
2077620800

20777-
"apache-arrow@npm:^12.0.0":
20778-
version: 12.0.0
20779-
resolution: "apache-arrow@npm:12.0.0"
20780-
dependencies:
20781-
"@types/command-line-args": 5.2.0
20782-
"@types/command-line-usage": 5.0.2
20783-
"@types/node": 18.14.5
20784-
"@types/pad-left": 2.1.1
20785-
command-line-args: 5.2.1
20786-
command-line-usage: 6.1.3
20787-
flatbuffers: 23.3.3
20788-
json-bignum: ^0.0.3
20789-
pad-left: ^2.1.0
20790-
tslib: ^2.5.0
20791-
bin:
20792-
arrow2csv: bin/arrow2csv.js
20793-
checksum: 3285189517c2b298cda42852321ce127754918513116eade6e4914c57983f68b6ba96605cfaa2202796d3d6e14755d3b3758f76c1374492affa3d95714eaca40
20794-
languageName: node
20795-
linkType: hard
20796-
2079720801
"apache-arrow@npm:^12.0.1":
2079820802
version: 12.0.1
2079920803
resolution: "apache-arrow@npm:12.0.1"
@@ -27133,7 +27137,7 @@ __metadata:
2713327137
typescript: ~5.1.6
2713427138
typesense: ^1.5.3
2713527139
uuid: ^10.0.0
27136-
vectordb: ^0.1.4
27140+
vectordb: ^0.9.0
2713727141
voy-search: 0.6.2
2713827142
weaviate-ts-client: ^2.0.0
2713927143
zod: ^3.22.4
@@ -42444,13 +42448,32 @@ __metadata:
4244442448
languageName: node
4244542449
linkType: hard
4244642450

42447-
"vectordb@npm:^0.1.4":
42448-
version: 0.1.4
42449-
resolution: "vectordb@npm:0.1.4"
42450-
dependencies:
42451-
"@apache-arrow/ts": ^12.0.0
42452-
apache-arrow: ^12.0.0
42453-
checksum: 8a40abf4466479b0b9e61687416b5ab232458401917bf9a1d5f3d8ea8c8320ecc5691174f4d4c0cfef0bb6c16328a9088419fd90ac85fd7267dbccdd1f9e55d7
42451+
"vectordb@npm:^0.9.0":
42452+
version: 0.9.0
42453+
resolution: "vectordb@npm:0.9.0"
42454+
dependencies:
42455+
"@lancedb/vectordb-darwin-arm64": 0.4.20
42456+
"@lancedb/vectordb-darwin-x64": 0.4.20
42457+
"@lancedb/vectordb-linux-arm64-gnu": 0.4.20
42458+
"@lancedb/vectordb-linux-x64-gnu": 0.4.20
42459+
"@lancedb/vectordb-win32-x64-msvc": 0.4.20
42460+
"@neon-rs/load": ^0.0.74
42461+
axios: ^1.4.0
42462+
peerDependencies:
42463+
"@apache-arrow/ts": ^14.0.2
42464+
apache-arrow: ^14.0.2
42465+
dependenciesMeta:
42466+
"@lancedb/vectordb-darwin-arm64":
42467+
optional: true
42468+
"@lancedb/vectordb-darwin-x64":
42469+
optional: true
42470+
"@lancedb/vectordb-linux-arm64-gnu":
42471+
optional: true
42472+
"@lancedb/vectordb-linux-x64-gnu":
42473+
optional: true
42474+
"@lancedb/vectordb-win32-x64-msvc":
42475+
optional: true
42476+
conditions: (os=darwin | os=linux | os=win32) & (cpu=x64 | cpu=arm64)
4245442477
languageName: node
4245542478
linkType: hard
4245642479

0 commit comments

Comments
 (0)
Please sign in to comment.