New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More

ollama-ai-provider

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

ollama-ai-provider - npm Package Compare versions

Comparing version

to
0.15.0

@@ -11,2 +11,18 @@ import { LanguageModelV1, ProviderV1, EmbeddingModelV1 } from '@ai-sdk/provider';

/**
* Enables the use of half-precision floating point values for key-value memory. This helps in optimizing memory usage. (Default: true)
*/
f16Kv?: boolean;
/**
* If set to true, reduces the VRAM usage by trading off speed for memory. (Default: false)
*/
lowVram?: boolean;
/**
* Sets which GPU is the main one.
*/
mainGpu?: number;
/**
* Minimum cumulative probability for tokens to be considered. (Default: 0.0)
*/
minP?: number;
/**
* Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)

@@ -26,2 +42,10 @@ */

/**
* Controls whether to use Non-Uniform Memory Access (NUMA) for more efficient memory management. (Default: false)
*/
numa?: boolean;
/**
* Sets the number of batches to be processed. (Default: 512)
*/
numBatch?: number;
/**
* Sets the size of the context window used to generate the next token. (Default: 2048)

@@ -31,2 +55,22 @@ */

/**
* Controls the number of GPUs to use for the operation. (Default: -1, indicates that NumGPU should be set dynamically)
*/
numGpu?: number;
/**
* Keeps a number of tokens from the context. Controls how many of the previous tokens are retained. (Default: 4)
*/
numKeep?: number;
/**
* Controls the number of tokens to predict in a single generation. (Default: -1)
*/
numPredict?: number;
/**
* Sets the number of CPU threads to use. (Default: 0, indicates let the runtime decide)
*/
numThread?: number;
/**
* Penalizes the model for generating newline characters. If set to true, it discourages the model from generating too many newlines. (Default: true)
*/
penalizeNewline?: boolean;
/**
* Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)

@@ -59,2 +103,18 @@ */

topK?: number;
/**
* Controls the "typical" sampling probability. (Default: 1.0)
*/
typicalP?: number;
/**
* Locks the memory to prevent swapping, which can be useful for performance optimization. (Default: false)
*/
useMlock?: boolean;
/**
* Enables memory mapping to reduce RAM usage. (Default: false)
*/
useMmap?: boolean;
/**
* If true, the model will only load the vocabulary without performing further computation. (Default: false)
*/
vocabOnly?: boolean;
}

@@ -61,0 +121,0 @@

@@ -348,8 +348,18 @@ "use strict";

options: removeUndefined({
f16_kv: this.settings.f16Kv,
frequency_penalty: frequencyPenalty,
low_vram: this.settings.lowVram,
main_gpu: this.settings.mainGpu,
min_p: this.settings.minP,
mirostat: this.settings.mirostat,
mirostat_eta: this.settings.mirostatEta,
mirostat_tau: this.settings.mirostatTau,
num_batch: this.settings.numBatch,
num_ctx: this.settings.numCtx,
num_gpu: this.settings.numGpu,
num_keep: this.settings.numKeep,
num_predict: maxTokens,
num_thread: this.settings.numThread,
numa: this.settings.numa,
penalize_newline: this.settings.penalizeNewline,
presence_penalty: presencePenalty,

@@ -363,3 +373,7 @@ repeat_last_n: this.settings.repeatLastN,

top_k: (_b = this.settings.topK) != null ? _b : topK,
top_p: topP
top_p: topP,
typical_p: this.settings.typicalP,
use_mlock: this.settings.useMlock,
use_mmap: this.settings.useMmap,
vocab_only: this.settings.vocabOnly
})

@@ -366,0 +380,0 @@ };

{
"name": "ollama-ai-provider",
"version": "0.14.0",
"version": "0.15.0",
"description": "Vercel AI Provider for running LLMs locally using Ollama",

@@ -5,0 +5,0 @@ "main": "./dist/index.js",

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet