@@ -10,3 +10,3 @@ export declare class LlamaModel {
		* @param {number} [options.batchSize] - prompt processing batch size
		* @param {number} [options.gpuCores] - number of layers to store in VRAM
		* @param {number} [options.gpuLayers] - number of layers to store in VRAM
		* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
		@@ -20,14 +20,26 @@ * @param {boolean} [options.f16Kv] - use fp16 for KV cache
		*/
		constructor({ modelPath, seed, contextSize, batchSize, gpuCores, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: {
		constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: {
		/** path to the model on the filesystem */
		modelPath: string;
		/** If null, a random seed will be used */
		seed?: number \| null;
		/** text context size */
		contextSize?: number;
		/** prompt processing batch size */
		batchSize?: number;
		gpuCores?: number;
		/** number of layers to store in VRAM */
		gpuLayers?: number;
		/** if true, reduce VRAM usage at the cost of performance */
		lowVram?: boolean;
		/** use fp16 for KV cache */
		f16Kv?: boolean;
		/** the llama_eval() call computes all logits, not just the last one */
		logitsAll?: boolean;
		/** only load the vocabulary, no weights */
		vocabOnly?: boolean;
		/** use mmap if possible */
		useMmap?: boolean;
		/** force system to keep model in RAM */
		useMlock?: boolean;
		/** embedding mode only */
		embedding?: boolean;
		@@ -34,0 +46,0 @@ });

dist/llamaEvaluator/LlamaModel.js

		@@ -13,3 +13,3 @@ import { llamaCppNode, LLAMAModel } from "./LlamaBins.js";
		* @param {number} [options.batchSize] - prompt processing batch size
		* @param {number} [options.gpuCores] - number of layers to store in VRAM
		* @param {number} [options.gpuLayers] - number of layers to store in VRAM
		* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
		@@ -23,3 +23,3 @@ * @param {boolean} [options.f16Kv] - use fp16 for KV cache
		*/
		constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuCores, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) {
		constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) {
		this._model = new LLAMAModel(modelPath, removeNullFields({
		@@ -29,3 +29,3 @@ seed: seed != null ? Math.max(-1, seed) : undefined,
		batchSize,
		gpuCores,
		gpuLayers,
		lowVram,
		@@ -32,0 +32,0 @@ f16Kv,

package.json

		{
		"name": "node-llama-cpp",
		"version": "1.3.0",
		"version": "1.3.1",
		"description": "node.js bindings for llama.cpp",
		@@ -5,0 +5,0 @@ "main": "dist/index.js",

README.md

		@@ -28,3 +28,3 @@ # Node Llama.cpp
		import path from "path";
		import {LlamaModel, LlamaChatSession} from "node-llama-cpp";
		import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp";

		@@ -58,3 +58,3 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
		import path from "path";
		import {LlamaModel, LlamaChatSession, ChatPromptWrapper} from "node-llama-cpp";
		import {LlamaModel, LlamaContext, LlamaChatSession, ChatPromptWrapper} from "node-llama-cpp";

		@@ -103,3 +103,3 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
		import path from "path";
		import {LlamaModel, LlamaChatSession} from "node-llama-cpp";
		import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp";

		@@ -106,0 +106,0 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));

dist/llamaEvaluator/LlamaModel.js.map