@@ -11,2 +11,18 @@ import { LanguageModelV1, ProviderV1, EmbeddingModelV1 } from '@ai-sdk/provider';
		/**
		* Enables the use of half-precision floating point values for key-value memory. This helps in optimizing memory usage. (Default: true)
		*/
		f16Kv?: boolean;
		/**
		* If set to true, reduces the VRAM usage by trading off speed for memory. (Default: false)
		*/
		lowVram?: boolean;
		/**
		* Sets which GPU is the main one.
		*/
		mainGpu?: number;
		/**
		* Minimum cumulative probability for tokens to be considered. (Default: 0.0)
		*/
		minP?: number;
		/**
		* Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
		@@ -26,2 +42,10 @@ */
		/**
		* Controls whether to use Non-Uniform Memory Access (NUMA) for more efficient memory management. (Default: false)
		*/
		numa?: boolean;
		/**
		* Sets the number of batches to be processed. (Default: 512)
		*/
		numBatch?: number;
		/**
		* Sets the size of the context window used to generate the next token. (Default: 2048)
		@@ -31,2 +55,22 @@ */
		/**
		* Controls the number of GPUs to use for the operation. (Default: -1, indicates that NumGPU should be set dynamically)
		*/
		numGpu?: number;
		/**
		* Keeps a number of tokens from the context. Controls how many of the previous tokens are retained. (Default: 4)
		*/
		numKeep?: number;
		/**
		* Controls the number of tokens to predict in a single generation. (Default: -1)
		*/
		numPredict?: number;
		/**
		* Sets the number of CPU threads to use. (Default: 0, indicates let the runtime decide)
		*/
		numThread?: number;
		/**
		* Penalizes the model for generating newline characters. If set to true, it discourages the model from generating too many newlines. (Default: true)
		*/
		penalizeNewline?: boolean;
		/**
		* Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)
		@@ -59,2 +103,18 @@ */
		topK?: number;
		/**
		* Controls the "typical" sampling probability. (Default: 1.0)
		*/
		typicalP?: number;
		/**
		* Locks the memory to prevent swapping, which can be useful for performance optimization. (Default: false)
		*/
		useMlock?: boolean;
		/**
		* Enables memory mapping to reduce RAM usage. (Default: false)
		*/
		useMmap?: boolean;
		/**
		* If true, the model will only load the vocabulary without performing further computation. (Default: false)
		*/
		vocabOnly?: boolean;
		}
		@@ -61,0 +121,0 @@

dist/index.js

		@@ -348,8 +348,18 @@ "use strict";
		options: removeUndefined({
		f16_kv: this.settings.f16Kv,
		frequency_penalty: frequencyPenalty,
		low_vram: this.settings.lowVram,
		main_gpu: this.settings.mainGpu,
		min_p: this.settings.minP,
		mirostat: this.settings.mirostat,
		mirostat_eta: this.settings.mirostatEta,
		mirostat_tau: this.settings.mirostatTau,
		num_batch: this.settings.numBatch,
		num_ctx: this.settings.numCtx,
		num_gpu: this.settings.numGpu,
		num_keep: this.settings.numKeep,
		num_predict: maxTokens,
		num_thread: this.settings.numThread,
		numa: this.settings.numa,
		penalize_newline: this.settings.penalizeNewline,
		presence_penalty: presencePenalty,
		@@ -363,3 +373,7 @@ repeat_last_n: this.settings.repeatLastN,
		top_k: (_b = this.settings.topK) != null ? _b : topK,
		top_p: topP
		top_p: topP,
		typical_p: this.settings.typicalP,
		use_mlock: this.settings.useMlock,
		use_mmap: this.settings.useMmap,
		vocab_only: this.settings.vocabOnly
		})
		@@ -366,0 +380,0 @@ };

package.json

		{
		"name": "ollama-ai-provider",
		"version": "0.14.0",
		"version": "0.15.0",
		"description": "Vercel AI Provider for running LLMs locally using Ollama",
		@@ -5,0 +5,0 @@ "main": "./dist/index.js",

dist/index.d.mts

Sorry, the diff of this file is not supported yet

dist/index.js.map

Sorry, the diff of this file is not supported yet

dist/index.mjs

Sorry, the diff of this file is not supported yet

dist/index.mjs.map

Sorry, the diff of this file is not supported yet

ollama-ai-provider - npm Package Compare versions

Improved metrics