ollama-ai-provider
Advanced tools
@@ -11,2 +11,18 @@ import { LanguageModelV1, ProviderV1, EmbeddingModelV1 } from '@ai-sdk/provider'; | ||
/** | ||
* Enables the use of half-precision floating point values for key-value memory. This helps in optimizing memory usage. (Default: true) | ||
*/ | ||
f16Kv?: boolean; | ||
/** | ||
* If set to true, reduces the VRAM usage by trading off speed for memory. (Default: false) | ||
*/ | ||
lowVram?: boolean; | ||
/** | ||
* Sets which GPU is the main one. | ||
*/ | ||
mainGpu?: number; | ||
/** | ||
* Minimum cumulative probability for tokens to be considered. (Default: 0.0) | ||
*/ | ||
minP?: number; | ||
/** | ||
* Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) | ||
@@ -26,2 +42,10 @@ */ | ||
/** | ||
* Controls whether to use Non-Uniform Memory Access (NUMA) for more efficient memory management. (Default: false) | ||
*/ | ||
numa?: boolean; | ||
/** | ||
* Sets the number of batches to be processed. (Default: 512) | ||
*/ | ||
numBatch?: number; | ||
/** | ||
* Sets the size of the context window used to generate the next token. (Default: 2048) | ||
@@ -31,2 +55,22 @@ */ | ||
/** | ||
* Controls the number of GPUs to use for the operation. (Default: -1, indicates that NumGPU should be set dynamically) | ||
*/ | ||
numGpu?: number; | ||
/** | ||
* Keeps a number of tokens from the context. Controls how many of the previous tokens are retained. (Default: 4) | ||
*/ | ||
numKeep?: number; | ||
/** | ||
* Controls the number of tokens to predict in a single generation. (Default: -1) | ||
*/ | ||
numPredict?: number; | ||
/** | ||
* Sets the number of CPU threads to use. (Default: 0, indicates let the runtime decide) | ||
*/ | ||
numThread?: number; | ||
/** | ||
* Penalizes the model for generating newline characters. If set to true, it discourages the model from generating too many newlines. (Default: true) | ||
*/ | ||
penalizeNewline?: boolean; | ||
/** | ||
* Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | ||
@@ -59,2 +103,18 @@ */ | ||
topK?: number; | ||
/** | ||
* Controls the "typical" sampling probability. (Default: 1.0) | ||
*/ | ||
typicalP?: number; | ||
/** | ||
* Locks the memory to prevent swapping, which can be useful for performance optimization. (Default: false) | ||
*/ | ||
useMlock?: boolean; | ||
/** | ||
* Enables memory mapping to reduce RAM usage. (Default: false) | ||
*/ | ||
useMmap?: boolean; | ||
/** | ||
* If true, the model will only load the vocabulary without performing further computation. (Default: false) | ||
*/ | ||
vocabOnly?: boolean; | ||
} | ||
@@ -61,0 +121,0 @@ |
@@ -348,8 +348,18 @@ "use strict"; | ||
options: removeUndefined({ | ||
f16_kv: this.settings.f16Kv, | ||
frequency_penalty: frequencyPenalty, | ||
low_vram: this.settings.lowVram, | ||
main_gpu: this.settings.mainGpu, | ||
min_p: this.settings.minP, | ||
mirostat: this.settings.mirostat, | ||
mirostat_eta: this.settings.mirostatEta, | ||
mirostat_tau: this.settings.mirostatTau, | ||
num_batch: this.settings.numBatch, | ||
num_ctx: this.settings.numCtx, | ||
num_gpu: this.settings.numGpu, | ||
num_keep: this.settings.numKeep, | ||
num_predict: maxTokens, | ||
num_thread: this.settings.numThread, | ||
numa: this.settings.numa, | ||
penalize_newline: this.settings.penalizeNewline, | ||
presence_penalty: presencePenalty, | ||
@@ -363,3 +373,7 @@ repeat_last_n: this.settings.repeatLastN, | ||
top_k: (_b = this.settings.topK) != null ? _b : topK, | ||
top_p: topP | ||
top_p: topP, | ||
typical_p: this.settings.typicalP, | ||
use_mlock: this.settings.useMlock, | ||
use_mmap: this.settings.useMmap, | ||
vocab_only: this.settings.vocabOnly | ||
}) | ||
@@ -366,0 +380,0 @@ }; |
{ | ||
"name": "ollama-ai-provider", | ||
"version": "0.14.0", | ||
"version": "0.15.0", | ||
"description": "Vercel AI Provider for running LLMs locally using Ollama", | ||
@@ -5,0 +5,0 @@ "main": "./dist/index.js", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
148582
5.11%1597
5.83%