semantic-chunking
Advanced tools
Comparing version 2.2.4 to 2.3.0
@@ -5,9 +5,19 @@ # Changelog | ||
## [2.3.0] - 2024-11-11 | ||
### Updated | ||
- Updated `transformers.js` from v2 to v3 | ||
- Migrated quantization option from `onnxEmbeddingModelQuantized` (boolean) to `dtype` ('p32', 'p16', 'q8', 'q4') | ||
- Updated Web UI to use new `dtype` option | ||
## [2.2.5] - 2024-11-08 | ||
### Updated | ||
- Updated Web UI styles for smaller screens | ||
## [2.2.4] - 2024-11-08 | ||
### Fixed | ||
- Fixed issue with Web UI embedding cache not being cleared when a new model is initialized. | ||
- Fixed issue with Web UI embedding cache not being cleared when a new model is initialized | ||
## [2.2.3] - 2024-11-07 | ||
### Added | ||
- Web UI adjustments for display of truncated JSON results on screen but still allowing download of full results. | ||
- Web UI adjustments for display of truncated JSON results on screen but still allowing download of full results | ||
@@ -14,0 +24,0 @@ ## [2.2.2] - 2024-11-07 |
@@ -11,3 +11,2 @@ // =========================== | ||
import { env } from '@xenova/transformers'; | ||
import { splitBySentence } from "string-segmenter" | ||
@@ -34,3 +33,4 @@ import { DEFAULT_CONFIG } from './config.js'; | ||
onnxEmbeddingModel = DEFAULT_CONFIG.ONNX_EMBEDDING_MODEL, | ||
onnxEmbeddingModelQuantized = DEFAULT_CONFIG.ONNX_EMBEDDING_MODEL_QUANTIZED, | ||
dtype = DEFAULT_CONFIG.DTYPE, | ||
onnxEmbeddingModelQuantized, | ||
localModelPath = DEFAULT_CONFIG.LOCAL_MODEL_PATH, | ||
@@ -49,8 +49,12 @@ modelCacheDir = DEFAULT_CONFIG.MODEL_CACHE_DIR, | ||
// Set env variables if provided | ||
if (localModelPath) env.localModelPath = localModelPath; | ||
if (modelCacheDir) env.cacheDir = modelCacheDir; | ||
// if legacy boolean is used (onnxEmbeddingModelQuantized), set dtype (model precision) to 'q8' | ||
if (onnxEmbeddingModelQuantized === true) { dtype = 'q8'; } | ||
// Initialize embedding utilities | ||
const { modelName, isQuantized } = await initializeEmbeddingUtils(onnxEmbeddingModel, onnxEmbeddingModelQuantized); | ||
// Initialize embedding utilities and set optional paths | ||
const { modelName, dtype: usedDtype } = await initializeEmbeddingUtils( | ||
onnxEmbeddingModel, | ||
dtype, | ||
localModelPath, | ||
modelCacheDir | ||
); | ||
@@ -102,3 +106,3 @@ // Process each document | ||
console.log(`--------------`); | ||
console.log(chunk); | ||
console.log(chunk.substring(0, 50) + '...'); | ||
}); | ||
@@ -119,3 +123,3 @@ } | ||
console.log("--------------------"); | ||
console.log(chunk); | ||
console.log(chunk.substring(0, 50) + '...'); | ||
}); | ||
@@ -139,3 +143,3 @@ } | ||
model_name: modelName, | ||
is_model_quantized: isQuantized, | ||
dtype: usedDtype, | ||
text: prefixedChunk | ||
@@ -186,3 +190,4 @@ }; | ||
onnxEmbeddingModel = DEFAULT_CONFIG.ONNX_EMBEDDING_MODEL, | ||
onnxEmbeddingModelQuantized = DEFAULT_CONFIG.ONNX_EMBEDDING_MODEL_QUANTIZED, | ||
onnxEmbeddingModelQuantized, | ||
dtype = DEFAULT_CONFIG.DTYPE, | ||
localModelPath = DEFAULT_CONFIG.LOCAL_MODEL_PATH, | ||
@@ -201,8 +206,12 @@ modelCacheDir = DEFAULT_CONFIG.MODEL_CACHE_DIR, | ||
// Set env variables if provided | ||
if (localModelPath) env.localModelPath = localModelPath; | ||
if (modelCacheDir) env.cacheDir = modelCacheDir; | ||
// if legacy boolean is used (onnxEmbeddingModelQuantized), set dtype (model precision) to 'q8' | ||
if (onnxEmbeddingModelQuantized === true) { dtype = 'q8'; } | ||
// Initialize embedding utilities | ||
const { modelName, isQuantized } = await initializeEmbeddingUtils(onnxEmbeddingModel, onnxEmbeddingModelQuantized); | ||
// Initialize embedding utilities with paths | ||
const { modelName, isQuantized } = await initializeEmbeddingUtils( | ||
onnxEmbeddingModel, | ||
onnxEmbeddingModelQuantized, | ||
localModelPath, | ||
modelCacheDir | ||
); | ||
@@ -232,3 +241,3 @@ // Process each document | ||
console.log(`--------------`); | ||
console.log(chunk); | ||
console.log(chunk.substring(0, 50) + '...'); | ||
}); | ||
@@ -235,0 +244,0 @@ } |
@@ -11,3 +11,3 @@ export const DEFAULT_CONFIG = { | ||
ONNX_EMBEDDING_MODEL: "Xenova/all-MiniLM-L6-v2", | ||
ONNX_EMBEDDING_MODEL_QUANTIZED: true, | ||
DTYPE: 'fp32', | ||
LOCAL_MODEL_PATH: null, | ||
@@ -14,0 +14,0 @@ MODEL_CACHE_DIR: null, |
@@ -1,2 +0,2 @@ | ||
import { env, pipeline, AutoTokenizer } from '@xenova/transformers'; | ||
import { env, pipeline, AutoTokenizer } from '@huggingface/transformers'; | ||
@@ -10,10 +10,18 @@ let tokenizer; | ||
// -------------------------------------------- | ||
export async function initializeEmbeddingUtils(onnxEmbeddingModel, onnxEmbeddingModelQuantized) { | ||
export async function initializeEmbeddingUtils( | ||
onnxEmbeddingModel, | ||
dtype = 'fp32', | ||
localModelPath = null, | ||
modelCacheDir = null | ||
) { | ||
// Configure environment | ||
env.allowRemoteModels = true; | ||
if (localModelPath) env.localModelPath = localModelPath; | ||
if (modelCacheDir) env.cacheDir = modelCacheDir; | ||
tokenizer = await AutoTokenizer.from_pretrained(onnxEmbeddingModel); | ||
generateEmbedding = await pipeline('feature-extraction', onnxEmbeddingModel, { | ||
quantized: onnxEmbeddingModelQuantized, | ||
dtype: dtype, | ||
}); | ||
// Clear the embedding cache when initializing with a new model | ||
embeddingCache.clear(); | ||
@@ -23,3 +31,3 @@ | ||
modelName: onnxEmbeddingModel, | ||
isQuantized: onnxEmbeddingModelQuantized | ||
dtype: dtype | ||
}; | ||
@@ -26,0 +34,0 @@ } |
@@ -16,3 +16,3 @@ // ------------------------ | ||
let documents = []; | ||
let textFiles = ['./different.txt', './similar.txt']; | ||
let textFiles = ['./example.txt', './different.txt', './similar.txt']; | ||
@@ -42,3 +42,3 @@ // read each text file and add it to the documents array | ||
onnxEmbeddingModel: "nomic-ai/nomic-embed-text-v1.5", | ||
onnxEmbeddingModelQuantized: true, | ||
dtype: "q8", | ||
localModelPath: "../models", | ||
@@ -60,5 +60,5 @@ modelCacheDir: "../models", | ||
console.log("\n\n\n"); | ||
console.log("myTestChunks:"); | ||
console.log(myTestChunks); | ||
// console.log("myTestChunks:"); | ||
// console.log(myTestChunks); | ||
console.log("length: " + myTestChunks.length); | ||
console.log("trackedTimeSeconds: " + trackedTimeSeconds); |
{ | ||
"name": "semantic-chunking", | ||
"version": "2.2.4", | ||
"version": "2.3.0", | ||
"description": "Semantically create chunks from large texts. Useful for workflows involving large language models (LLMs).", | ||
@@ -39,3 +39,3 @@ "repository": { | ||
"dependencies": { | ||
"@xenova/transformers": "^2.17.2", | ||
"@huggingface/transformers": "^3.0.2", | ||
"cli-progress": "^3.12.0", | ||
@@ -42,0 +42,0 @@ "fs": "^0.0.1-security", |
@@ -74,3 +74,3 @@ # 🍱 semantic-chunking | ||
- `onnxEmbeddingModel`: String (optional, default `Xenova/all-MiniLM-L6-v2`) - ONNX model used for creating embeddings. | ||
- `onnxEmbeddingModelQuantized`: Boolean (optional, default `true`) - Indicates whether to use a quantized version of the embedding model. | ||
- `dtype`: String (optional, default `fp32`) - Precision of the embedding model (options: `fp32`, `fp16`, `q8`, `q4`). | ||
- `localModelPath`: String (optional, default `null`) - Local path to save and load models (example: `./models`). | ||
@@ -92,3 +92,3 @@ - `modelCacheDir`: String (optional, default `null`) - Directory to cache downloaded models (example: `./models`). | ||
- `model_name`: String - The name of the embedding model used. | ||
- `is_model_quantized`: Boolean - Indicates whether the embedding model is quantized. | ||
- `dtype`: String - The precision of the embedding model used (options: `fp32`, `fp16`, `q8`, `q4`). | ||
- `text`: String - The chunked text. | ||
@@ -203,3 +203,3 @@ - `embedding`: Array - The embedding vector (if `returnEmbedding` is `true`). | ||
- **Type**: String | ||
- **Default**: `Xenova/paraphrase-multilingual-MiniLM-L12-v2` | ||
- **Default**: `Xenova/all-MiniLM-L6-v2` | ||
- **Description**: Specifies the model used to generate sentence embeddings. Different models may yield different qualities of embeddings, affecting the chunking quality, especially in multilingual contexts. | ||
@@ -219,14 +219,12 @@ - **Resource Link**: [ONNX Embedding Models](https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending) | ||
| Model | Quantized | Link | Size | | ||
| -------------------------------------------- | --------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ------- | | ||
| nomic-ai/nomic-embed-text-v1.5 | true | [https://huggingface.co/nomic-ai/nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) | 138 MB | | ||
| nomic-ai/nomic-embed-text-v1.5 | false | [https://huggingface.co/nomic-ai/nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) | 548 MB | | ||
| Xenova/all-MiniLM-L6-v2 | true | [https://huggingface.co/Xenova/all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) | 23 MB | | ||
| Xenova/all-MiniLM-L6-v2 | false | [https://huggingface.co/Xenova/all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) | 90.4 MB | | ||
| Xenova/paraphrase-multilingual-MiniLM-L12-v2 | true | [https://huggingface.co/Xenova/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/Xenova/paraphrase-multilingual-MiniLM-L12-v2) | 118 MB | | ||
| thenlper/gte-base | false | [https://huggingface.co/thenlper/gte-base](https://huggingface.co/thenlper/gte-base) | 436 MB | | ||
| Xenova/all-distilroberta-v1 | true | [https://huggingface.co/Xenova/all-distilroberta-v1](https://huggingface.co/Xenova/all-distilroberta-v1) | 82.1 MB | | ||
| Xenova/all-distilroberta-v1 | false | [https://huggingface.co/Xenova/all-distilroberta-v1](https://huggingface.co/Xenova/all-distilroberta-v1) | 326 MB | | ||
| BAAI/bge-base-en-v1.5 | false | [https://huggingface.co/BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | 436 MB | | ||
| BAAI/bge-small-en-v1.5 | false | [https://huggingface.co/BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) | 133 MB | | ||
| Model | Precision | Link | Size | | ||
| -------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------- | | ||
| nomic-ai/nomic-embed-text-v1.5 | fp32, q8 | [https://huggingface.co/nomic-ai/nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) | 548 MB, 138 MB | | ||
| thenlper/gte-base | fp32 | [https://huggingface.co/thenlper/gte-base](https://huggingface.co/thenlper/gte-base) | 436 MB | | ||
| Xenova/all-MiniLM-L6-v2 | fp32, fp16, q8 | [https://huggingface.co/Xenova/all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) | 23 MB, 45 MB, 90 MB | | ||
| Xenova/paraphrase-multilingual-MiniLM-L12-v2 | fp32, fp16, q8 | [https://huggingface.co/Xenova/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/Xenova/paraphrase-multilingual-MiniLM-L12-v2) | 470 MB, 235 MB, 118 MB | | ||
| Xenova/all-distilroberta-v1 | fp32, fp16, q8 | [https://huggingface.co/Xenova/all-distilroberta-v1](https://huggingface.co/Xenova/all-distilroberta-v1) | 326 MB, 163 MB, 82 MB | | ||
| BAAI/bge-base-en-v1.5 | fp32 | [https://huggingface.co/BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | 436 MB | | ||
| BAAI/bge-small-en-v1.5 | fp32 | [https://huggingface.co/BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) | 133 MB | | ||
| yashvardhan7/snowflake-arctic-embed-m-onnx | fp32 | [https://huggingface.co/yashvardhan7/snowflake-arctic-embed-m-onnx](https://huggingface.co/yashvardhan7/snowflake-arctic-embed-m-onnx) | 436 MB | | ||
@@ -337,3 +335,3 @@ Each of these parameters allows you to customize the `chunkit` function to better fit the text size, content complexity, and performance requirements of your application. | ||
]; | ||
const myDocumentChunks = await chunkit(documents, { chunkPrefix: "search_document" }); | ||
const myDocumentChunks = await chunkit(documents, { chunkPrefix: "search_document", returnEmbedding: true }); | ||
``` | ||
@@ -346,3 +344,3 @@ | ||
]; | ||
const mySearchQueryChunk = await chunkit(documents, { chunkPrefix: "search_query" }); | ||
const mySearchQueryChunk = await cramit(documents, { chunkPrefix: "search_query", returnEmbedding: true }); | ||
``` | ||
@@ -349,0 +347,0 @@ |
@@ -0,0 +0,0 @@ import { createEmbedding } from './embeddingUtils.js'; |
{ | ||
"name": "semantic-chunking-webui", | ||
"version": "1.1.4", | ||
"version": "1.3.0", | ||
"lockfileVersion": 3, | ||
@@ -9,8 +9,8 @@ "requires": true, | ||
"name": "semantic-chunking-webui", | ||
"version": "1.1.4", | ||
"version": "1.3.0", | ||
"license": "ISC", | ||
"dependencies": { | ||
"cors": "^2.8.5", | ||
"express": "^4.18.3", | ||
"highlight.js": "^11.10.0" | ||
"dotenv": "^16.4.5", | ||
"express": "^4.18.3" | ||
} | ||
@@ -169,2 +169,13 @@ }, | ||
}, | ||
"node_modules/dotenv": { | ||
"version": "16.4.5", | ||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", | ||
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", | ||
"engines": { | ||
"node": ">=12" | ||
}, | ||
"funding": { | ||
"url": "https://dotenvx.com" | ||
} | ||
}, | ||
"node_modules/ee-first": { | ||
@@ -370,10 +381,2 @@ "version": "1.1.1", | ||
}, | ||
"node_modules/highlight.js": { | ||
"version": "11.10.0", | ||
"resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.10.0.tgz", | ||
"integrity": "sha512-SYVnVFswQER+zu1laSya563s+F8VDGt7o35d4utbamowvUNLLMovFqwCLSocpZTz3MgaSRA1IbqRWZv97dtErQ==", | ||
"engines": { | ||
"node": ">=12.0.0" | ||
} | ||
}, | ||
"node_modules/http-errors": { | ||
@@ -494,5 +497,5 @@ "version": "2.0.0", | ||
"node_modules/object-inspect": { | ||
"version": "1.13.2", | ||
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz", | ||
"integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==", | ||
"version": "1.13.3", | ||
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.3.tgz", | ||
"integrity": "sha512-kDCGIbxkDSXE3euJZZXzc6to7fCrKHNI/hSRQnRuQ+BWjFNzZwiFF8fj/6o2t2G9/jTj8PSIYTfCLelLZEeRpA==", | ||
"engines": { | ||
@@ -499,0 +502,0 @@ "node": ">= 0.4" |
{ | ||
"name": "semantic-chunking-webui", | ||
"version": "1.1.4", | ||
"version": "1.3.0", | ||
"type": "module", | ||
@@ -16,5 +16,5 @@ "description": "Web UI for semantic-chunking library", | ||
"cors": "^2.8.5", | ||
"express": "^4.18.3", | ||
"highlight.js": "^11.10.0" | ||
"dotenv": "^16.4.5", | ||
"express": "^4.18.3" | ||
} | ||
} |
@@ -136,2 +136,12 @@ // Load sample text on page load | ||
// Add this function near the top of the file | ||
function scrollToResults() { | ||
if (window.innerWidth <= 800) { | ||
const resultsWrapper = document.querySelector('.results-wrapper'); | ||
if (resultsWrapper) { | ||
resultsWrapper.scrollIntoView({ behavior: 'smooth' }); | ||
} | ||
} | ||
} | ||
// Process form handler | ||
@@ -151,2 +161,5 @@ form.addEventListener('submit', async (e) => { | ||
// Scroll to results as soon as we show the spinner | ||
scrollToResults(); | ||
// Get form data and convert checkbox values to boolean | ||
@@ -260,2 +273,5 @@ const formData = new FormData(form); | ||
// After results are displayed, scroll to them on mobile | ||
scrollToResults(); | ||
} catch (error) { | ||
@@ -266,4 +282,4 @@ console.error('Error:', error); | ||
if (errorMessage.includes('Could not locate file:')) { | ||
errorMessage += '<br><br>Some models may not have both a quantized & non-quantized version,'; | ||
errorMessage += '<br>please toggle this option and try again, or choose a different model'; | ||
errorMessage += '<br><br>Not all models have all precision options available.'; | ||
errorMessage += '<br>Please try a different precision level and/or model and try again.'; | ||
} | ||
@@ -326,3 +342,3 @@ | ||
formData[element.name] = element.checked; | ||
} else if (element.name) { // Only process elements with names | ||
} else if (element.name) { | ||
formData[element.name] = element.value; | ||
@@ -332,7 +348,5 @@ } | ||
// No need for additional processing in generateCode since we're already | ||
// getting the actual boolean values here | ||
codeExample.textContent = generateCode(formData); | ||
modal.style.display = "block"; | ||
// Clear the highlighted state before highlighting again | ||
document.body.style.overflow = 'hidden'; // Prevent body scrolling | ||
delete codeExample.dataset.highlighted; | ||
@@ -344,3 +358,5 @@ hljs.highlightElement(codeExample); | ||
function generateCode(formData) { | ||
// No need to convert checkbox values since they're already booleans | ||
const dtypeValues = ['fp32', 'fp16', 'q8', 'q4']; | ||
const dtype = dtypeValues[parseInt(formData.dtype)]; | ||
return `// import the semantic-chunking library | ||
@@ -370,3 +386,3 @@ import { chunkit } from 'semantic-chunking'; | ||
onnxEmbeddingModel: "${formData.onnxEmbeddingModel}", | ||
onnxEmbeddingModelQuantized: ${formData.onnxEmbeddingModelQuantized}, | ||
dtype: "${dtype}", | ||
localModelPath: "./models", | ||
@@ -388,2 +404,3 @@ modelCacheDir: "./models", | ||
modal.style.display = "none"; | ||
document.body.style.overflow = ''; // Restore body scrolling | ||
}; | ||
@@ -395,2 +412,3 @@ | ||
modal.style.display = "none"; | ||
document.body.style.overflow = ''; // Restore body scrolling | ||
} | ||
@@ -419,2 +437,3 @@ }; | ||
modal.style.display = "none"; | ||
document.body.style.overflow = ''; // Restore body scrolling | ||
}; | ||
@@ -490,2 +509,30 @@ | ||
resizeToggle.classList.toggle('wrapped'); | ||
}); | ||
}); | ||
// Add this to your existing range input handlers | ||
const dtypeInput = document.getElementById('dtype'); | ||
const dtypeDisplay = dtypeInput.nextElementSibling; | ||
function updateDtypeDisplay(value) { | ||
const dtypeValues = { | ||
0: { text: 'fp32 - Full Precision', class: 'precision-full' }, | ||
1: { text: 'fp16 - Half Precision', class: 'precision-half' }, | ||
2: { text: 'q8 - 8-bit Quantized', class: 'precision-q8' }, | ||
3: { text: 'q4 - 4-bit Quantized', class: 'precision-q4' } | ||
}; | ||
const dtype = dtypeValues[value]; | ||
const number = dtypeDisplay.querySelector('.number'); | ||
const description = dtypeDisplay.querySelector('.description'); | ||
number.className = `number ${dtype.class}`; | ||
number.textContent = value; | ||
description.className = `description ${dtype.class}`; | ||
description.textContent = dtype.text; | ||
} | ||
// Initial update | ||
updateDtypeDisplay(dtypeInput.value); | ||
// Update on change | ||
dtypeInput.addEventListener('input', (e) => updateDtypeDisplay(e.target.value)); |
@@ -30,4 +30,8 @@ { | ||
"label": "BAAI/bge-small-en-v1.5" | ||
}, | ||
{ | ||
"value": "yashvardhan7/snowflake-arctic-embed-m-onnx", | ||
"label": "yashvardhan7/snowflake-arctic-embed-m-onnx" | ||
} | ||
] | ||
} |
@@ -6,3 +6,6 @@ import express from 'express'; | ||
import { chunkit } from '../chunkit.js'; | ||
import dotenv from 'dotenv'; | ||
dotenv.config(); | ||
const __filename = fileURLToPath(import.meta.url); | ||
@@ -34,12 +37,17 @@ const __dirname = path.dirname(__filename); | ||
try { | ||
const { documentText, documentName, ...options } = req.body; | ||
const { documentText, documentName, dtype, onnxEmbeddingModelQuantized, ...options } = req.body; | ||
// Convert dtype value to string mapping | ||
const dtypeValues = ['fp32', 'fp16', 'q8', 'q4']; | ||
const dtypeString = dtypeValues[parseInt(dtype)] || 'fp32'; | ||
// Input validation | ||
if (!documentText) { | ||
return res.status(400).json({ error: 'Document text is required' }); | ||
} | ||
// Process the text with new dtype option | ||
const documents = [{ | ||
document_name: documentName || 'sample text', | ||
document_text: documentText | ||
}]; | ||
// Convert string values to appropriate types | ||
const processedOptions = { | ||
...options, | ||
dtype: dtypeString, | ||
maxTokenSize: parseInt(options.maxTokenSize), | ||
@@ -52,3 +60,2 @@ similarityThreshold: parseFloat(options.similarityThreshold), | ||
combineChunksSimilarityThreshold: parseFloat(options.combineChunksSimilarityThreshold), | ||
onnxEmbeddingModelQuantized: options.onnxEmbeddingModelQuantized === true, | ||
returnEmbedding: options.returnEmbedding === true, | ||
@@ -61,9 +68,3 @@ returnTokenLength: options.returnTokenLength === true, | ||
// Process the text | ||
const documents = [{ | ||
document_name: documentName || 'sample text', | ||
document_text: documentText | ||
}]; | ||
const result = await chunkit(documents, processedOptions); | ||
res.json(result); | ||
@@ -70,0 +71,0 @@ } catch (error) { |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
3506574
114
3372
351
5
+ Added@emnapi/runtime@1.3.1(transitive)
+ Added@huggingface/jinja@0.3.2(transitive)
+ Added@huggingface/transformers@3.2.4(transitive)
+ Added@img/sharp-darwin-arm64@0.33.5(transitive)
+ Added@img/sharp-darwin-x64@0.33.5(transitive)
+ Added@img/sharp-libvips-darwin-arm64@1.0.4(transitive)
+ Added@img/sharp-libvips-darwin-x64@1.0.4(transitive)
+ Added@img/sharp-libvips-linux-arm@1.0.5(transitive)
+ Added@img/sharp-libvips-linux-arm64@1.0.4(transitive)
+ Added@img/sharp-libvips-linux-s390x@1.0.4(transitive)
+ Added@img/sharp-libvips-linux-x64@1.0.4(transitive)
+ Added@img/sharp-libvips-linuxmusl-arm64@1.0.4(transitive)
+ Added@img/sharp-libvips-linuxmusl-x64@1.0.4(transitive)
+ Added@img/sharp-linux-arm@0.33.5(transitive)
+ Added@img/sharp-linux-arm64@0.33.5(transitive)
+ Added@img/sharp-linux-s390x@0.33.5(transitive)
+ Added@img/sharp-linux-x64@0.33.5(transitive)
+ Added@img/sharp-linuxmusl-arm64@0.33.5(transitive)
+ Added@img/sharp-linuxmusl-x64@0.33.5(transitive)
+ Added@img/sharp-wasm32@0.33.5(transitive)
+ Added@img/sharp-win32-ia32@0.33.5(transitive)
+ Added@img/sharp-win32-x64@0.33.5(transitive)
+ Added@isaacs/cliui@8.0.2(transitive)
+ Added@isaacs/fs-minipass@4.0.1(transitive)
+ Added@pkgjs/parseargs@0.11.0(transitive)
+ Addedansi-regex@6.1.0(transitive)
+ Addedansi-styles@4.3.06.2.1(transitive)
+ Addedbalanced-match@1.0.2(transitive)
+ Addedbrace-expansion@2.0.1(transitive)
+ Addedchownr@3.0.0(transitive)
+ Addedcross-spawn@7.0.6(transitive)
+ Addedeastasianwidth@0.2.0(transitive)
+ Addedemoji-regex@9.2.2(transitive)
+ Addedforeground-child@3.3.0(transitive)
+ Addedglob@10.4.5(transitive)
+ Addedisexe@2.0.0(transitive)
+ Addedjackspeak@3.4.3(transitive)
+ Addedlong@5.2.3(transitive)
+ Addedlru-cache@10.4.3(transitive)
+ Addedminimatch@9.0.5(transitive)
+ Addedminipass@7.1.2(transitive)
+ Addedminizlib@3.0.1(transitive)
+ Addedmkdirp@3.0.1(transitive)
+ Addedonnxruntime-common@1.20.11.21.0-dev.20241205-6ed77cc374(transitive)
+ Addedonnxruntime-node@1.20.1(transitive)
+ Addedonnxruntime-web@1.21.0-dev.20241205-d27fecd3d3(transitive)
+ Addedpackage-json-from-dist@1.0.1(transitive)
+ Addedpath-key@3.1.1(transitive)
+ Addedpath-scurry@1.11.1(transitive)
+ Addedprotobufjs@7.4.0(transitive)
+ Addedrimraf@5.0.10(transitive)
+ Addedsharp@0.33.5(transitive)
+ Addedshebang-command@2.0.0(transitive)
+ Addedshebang-regex@3.0.0(transitive)
+ Addedsignal-exit@4.1.0(transitive)
+ Addedstring-width@5.1.2(transitive)
+ Addedstrip-ansi@7.1.0(transitive)
+ Addedtar@7.4.3(transitive)
+ Addedtslib@2.8.1(transitive)
+ Addedwhich@2.0.2(transitive)
+ Addedwrap-ansi@7.0.08.1.0(transitive)
+ Addedyallist@5.0.0(transitive)
- Removed@xenova/transformers@^2.17.2
- Removed@huggingface/jinja@0.2.2(transitive)
- Removed@types/long@4.0.2(transitive)
- Removed@xenova/transformers@2.17.2(transitive)
- Removedb4a@1.6.7(transitive)
- Removedbare-events@2.5.0(transitive)
- Removedbare-fs@2.3.5(transitive)
- Removedbare-os@2.4.4(transitive)
- Removedbare-path@2.1.3(transitive)
- Removedbare-stream@2.6.1(transitive)
- Removedbase64-js@1.5.1(transitive)
- Removedbl@4.1.0(transitive)
- Removedbuffer@5.7.1(transitive)
- Removedchownr@1.1.4(transitive)
- Removeddecompress-response@6.0.0(transitive)
- Removeddeep-extend@0.6.0(transitive)
- Removedend-of-stream@1.4.4(transitive)
- Removedexpand-template@2.0.3(transitive)
- Removedfast-fifo@1.3.2(transitive)
- Removedfs-constants@1.0.0(transitive)
- Removedgithub-from-package@0.0.0(transitive)
- Removedieee754@1.2.1(transitive)
- Removedinherits@2.0.4(transitive)
- Removedini@1.3.8(transitive)
- Removedlong@4.0.0(transitive)
- Removedmimic-response@3.1.0(transitive)
- Removedminimist@1.2.8(transitive)
- Removedmkdirp-classic@0.5.3(transitive)
- Removednapi-build-utils@1.0.2(transitive)
- Removednode-abi@3.71.0(transitive)
- Removednode-addon-api@6.1.0(transitive)
- Removedonce@1.4.0(transitive)
- Removedonnx-proto@4.0.4(transitive)
- Removedonnxruntime-common@1.14.0(transitive)
- Removedonnxruntime-node@1.14.0(transitive)
- Removedonnxruntime-web@1.14.0(transitive)
- Removedprebuild-install@7.1.2(transitive)
- Removedprotobufjs@6.11.4(transitive)
- Removedpump@3.0.2(transitive)
- Removedqueue-tick@1.0.1(transitive)
- Removedrc@1.2.8(transitive)
- Removedreadable-stream@3.6.2(transitive)
- Removedsafe-buffer@5.2.1(transitive)
- Removedsharp@0.32.6(transitive)
- Removedsimple-concat@1.0.1(transitive)
- Removedsimple-get@4.0.1(transitive)
- Removedstreamx@2.21.1(transitive)
- Removedstring_decoder@1.3.0(transitive)
- Removedstrip-json-comments@2.0.1(transitive)
- Removedtar-fs@2.1.13.0.6(transitive)
- Removedtar-stream@2.2.03.1.7(transitive)
- Removedtext-decoder@1.2.3(transitive)
- Removedtunnel-agent@0.6.0(transitive)
- Removedutil-deprecate@1.0.2(transitive)
- Removedwrappy@1.0.2(transitive)