node-llama-cpp
Advanced tools
Comparing version
@@ -10,3 +10,3 @@ export declare class LlamaModel { | ||
* @param {number} [options.batchSize] - prompt processing batch size | ||
* @param {number} [options.gpuCores] - number of layers to store in VRAM | ||
* @param {number} [options.gpuLayers] - number of layers to store in VRAM | ||
* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance | ||
@@ -20,14 +20,26 @@ * @param {boolean} [options.f16Kv] - use fp16 for KV cache | ||
*/ | ||
constructor({ modelPath, seed, contextSize, batchSize, gpuCores, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: { | ||
constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: { | ||
/** path to the model on the filesystem */ | ||
modelPath: string; | ||
/** If null, a random seed will be used */ | ||
seed?: number | null; | ||
/** text context size */ | ||
contextSize?: number; | ||
/** prompt processing batch size */ | ||
batchSize?: number; | ||
gpuCores?: number; | ||
/** number of layers to store in VRAM */ | ||
gpuLayers?: number; | ||
/** if true, reduce VRAM usage at the cost of performance */ | ||
lowVram?: boolean; | ||
/** use fp16 for KV cache */ | ||
f16Kv?: boolean; | ||
/** the llama_eval() call computes all logits, not just the last one */ | ||
logitsAll?: boolean; | ||
/** only load the vocabulary, no weights */ | ||
vocabOnly?: boolean; | ||
/** use mmap if possible */ | ||
useMmap?: boolean; | ||
/** force system to keep model in RAM */ | ||
useMlock?: boolean; | ||
/** embedding mode only */ | ||
embedding?: boolean; | ||
@@ -34,0 +46,0 @@ }); |
@@ -13,3 +13,3 @@ import { llamaCppNode, LLAMAModel } from "./LlamaBins.js"; | ||
* @param {number} [options.batchSize] - prompt processing batch size | ||
* @param {number} [options.gpuCores] - number of layers to store in VRAM | ||
* @param {number} [options.gpuLayers] - number of layers to store in VRAM | ||
* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance | ||
@@ -23,3 +23,3 @@ * @param {boolean} [options.f16Kv] - use fp16 for KV cache | ||
*/ | ||
constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuCores, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) { | ||
constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) { | ||
this._model = new LLAMAModel(modelPath, removeNullFields({ | ||
@@ -29,3 +29,3 @@ seed: seed != null ? Math.max(-1, seed) : undefined, | ||
batchSize, | ||
gpuCores, | ||
gpuLayers, | ||
lowVram, | ||
@@ -32,0 +32,0 @@ f16Kv, |
{ | ||
"name": "node-llama-cpp", | ||
"version": "1.3.0", | ||
"version": "1.3.1", | ||
"description": "node.js bindings for llama.cpp", | ||
@@ -5,0 +5,0 @@ "main": "dist/index.js", |
@@ -28,3 +28,3 @@ # Node Llama.cpp | ||
import path from "path"; | ||
import {LlamaModel, LlamaChatSession} from "node-llama-cpp"; | ||
import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp"; | ||
@@ -58,3 +58,3 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url)); | ||
import path from "path"; | ||
import {LlamaModel, LlamaChatSession, ChatPromptWrapper} from "node-llama-cpp"; | ||
import {LlamaModel, LlamaContext, LlamaChatSession, ChatPromptWrapper} from "node-llama-cpp"; | ||
@@ -103,3 +103,3 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url)); | ||
import path from "path"; | ||
import {LlamaModel, LlamaChatSession} from "node-llama-cpp"; | ||
import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp"; | ||
@@ -106,0 +106,0 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url)); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
25301898
01149
1.06%