embeddings-splitter
Advanced tools
+1
-0
@@ -9,3 +9,4 @@ export declare function split(prompt: string): string[]; | ||
| index: typeof index; | ||
| merge: (chunks: string[], maxLen?: number) => Promise<string>; | ||
| }; | ||
| export default _default; |
+14
-1
@@ -66,2 +66,15 @@ "use strict"; | ||
| // should index chunks | ||
| exports.default = { split, index }; | ||
| const merge = (chunks, maxLen = 1800) => __awaiter(void 0, void 0, void 0, function* () { | ||
| let curLen = 0; | ||
| const context = []; | ||
| for (const chunk of chunks) { | ||
| const nTokens = tokenizer.encode(chunk).length; | ||
| curLen += nTokens + 4; | ||
| if (curLen > maxLen) { | ||
| break; | ||
| } | ||
| context.push(chunk); | ||
| } | ||
| return context.join('\n\n###\n\n'); | ||
| }); | ||
| exports.default = { split, index, merge }; |
+6
-6
| { | ||
| "name": "embeddings-splitter", | ||
| "version": "0.0.2", | ||
| "description": "A typescript library to split your long texts into smaller chunks to send them to OpenAI Embeddins", | ||
| "version": "0.0.3", | ||
| "description": "A typescript library to split your long texts into smaller chunks to send them to OpenAI Embeddings", | ||
| "main": "lib/index.js", | ||
@@ -22,3 +22,3 @@ "types": "lib/index.d.ts", | ||
| "type": "git", | ||
| "url": "git+https://github.com/different-ai/embeddings-splitter.git" | ||
| "url": "git+https://github.com/another-ai/embeddings-splitter.git" | ||
| }, | ||
@@ -30,8 +30,8 @@ "keywords": [ | ||
| ], | ||
| "author": "Different AI", | ||
| "author": "another AI", | ||
| "license": "MIT", | ||
| "bugs": { | ||
| "url": "https://github.com/different-ai/embeddings-splitter/issues" | ||
| "url": "https://github.com/another-ai/embeddings-splitter/issues" | ||
| }, | ||
| "homepage": "https://github.com/different-ai/embeddings-splitter#readme", | ||
| "homepage": "https://github.com/another-ai/embeddings-splitter#readme", | ||
| "devDependencies": { | ||
@@ -38,0 +38,0 @@ "@types/jest": "29.2.4", |
+45
-20
@@ -0,31 +1,57 @@ | ||
| <p align="center">embeddings-splitter</p> | ||
| <p align="center">A typescript library to split texts into chunks so they can be embedded with OpenAI Embeddings</p> | ||
| <a href="https://github.com/hebertcisco/ts-npm-package-boilerplate/issues/new/choose">Request Feature</a> | ||
| <p align="center">long-prompts</p> | ||
| <p align="center"> | ||
| <img alt="Issues" src="https://img.shields.io/github/issues/hebertcisco/ts-npm-package-boilerplate?style=flat&color=336791" /> | ||
| </a> | ||
| <a href="https://github.com/hebertcisco/ts-npm-package-boilerplate/pulls"> | ||
| <img alt="GitHub pull requests" src="https://img.shields.io/github/issues-pr/hebertcisco/ts-npm-package-boilerplate?style=flat&color=336791" /> | ||
| </a> | ||
| <a href="https://github.com/hebertcisco/ts-npm-package-boilerplate"> | ||
| <img alt="GitHub Downloads" src="https://img.shields.io/npm/dw/ts-npm-package-boilerplate?style=flat&color=336791" /> | ||
| </a> | ||
| <a href="https://github.com/hebertcisco/ts-npm-package-boilerplate"> | ||
| <img alt="GitHub Total Downloads" src="https://img.shields.io/npm/dt/ts-npm-package-boilerplate?color=336791&label=Total%20downloads" /> | ||
| </a> | ||
| # Getting started | ||
| <a href="https://github.com/hebertcisco/ts-npm-package-boilerplate/issues/new/choose">Request Feature</a> | ||
| ## Installation | ||
| > npm i embeddings-splitter | ||
| ## Usage | ||
| # Getting started | ||
| ### Split files | ||
| ## Installation | ||
| ```js | ||
| import { split } from 'embeddings-splitter'; | ||
| > npm i long-prompt (coming soon) | ||
| // chunks to iterate on and send to a server | ||
| const chunks = split('somVeryLongText...'); | ||
| ``` | ||
| ### Open the directory and run the script line: | ||
| ### Batch send (experimental) | ||
| ```js | ||
| coming soon | ||
| import {index} from 'embeddings-splitter'; | ||
| // used to send batches to a server in parellel | ||
| index(chunks, (batch) => { | ||
| // this example is using Embedbase, but it can be replaced with openai.createEmbeddings | ||
| const vaultId = 'youtube video id'; | ||
| await fetch(url + '/v1/' + 'your api key', { | ||
| method: 'POST', | ||
| headers: { | ||
| Authorization: 'Bearer ' + apiKey, | ||
| 'Content-Type': 'application/json', | ||
| }, | ||
| body: JSON.stringify({ | ||
| documents: batch, | ||
| }), | ||
| }); | ||
| }); | ||
| ``` | ||
| ### Merge chunks into single string | ||
| This is useful when you want to do generative search. | ||
| ```js | ||
| import { merge } from 'embeddings-splitter'; | ||
| const chunks = ['i am a text', 'that needs to be interpreted as one ', 'for a prompt to make sense']; | ||
| const merged = merge(chunks); | ||
| ``` | ||
| ## 🤝 Contributing | ||
@@ -39,5 +65,4 @@ | ||
| ## 📝 License | ||
| Copyright © 2023 Different AI |
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
193256
0.34%1323
1.07%68
58.14%