@llama-node/llama-cpp
Advanced tools
+4
-3
@@ -17,5 +17,6 @@ [package] | ||
| llama-sys = { path = "./llama-sys" } | ||
| # llm-chain-llama-sys = { path = "../../../llm-chain/llm-chain-llama/sys" } | ||
| napi = { version = "2.12.2", default-features = false, features = ["napi6", "async"] } | ||
| napi-derive = "2.12.2" | ||
| serde = { version = "1.0.163", features = ["derive"] } | ||
| serde_json = "1.0.96" | ||
| napi = { version = "2.12.4", default-features = false, features = ["napi6", "async", "serde-json"] } | ||
| napi-derive = "2.12.3" | ||
| tokio = { version = "1.26.0", features = ["full"] } | ||
@@ -22,0 +23,0 @@ futures = "0.3" |
@@ -1,2 +0,2 @@ | ||
| import { LLama, LlamaInvocation } from "../index"; | ||
| import { LLama, Generate } from "../index"; | ||
| import path from "path"; | ||
@@ -6,4 +6,5 @@ | ||
| const llama = await LLama.load( | ||
| path.resolve(process.cwd(), "../../ggml-vic7b-q5_1.bin"), | ||
| null, | ||
| { | ||
| modelPath: path.resolve(process.cwd(), "../../ggml-vic7b-q5_1.bin"), | ||
| }, | ||
| true | ||
@@ -18,3 +19,3 @@ ); | ||
| const params: LlamaInvocation = { | ||
| const params: Generate = { | ||
| nThreads: 4, | ||
@@ -21,0 +22,0 @@ nTokPredict: 2048, |
@@ -1,2 +0,2 @@ | ||
| import { LLama, LlamaContextParams, LlamaInvocation } from "../index"; | ||
| import { LLama, Generate } from "../index"; | ||
| import path from "path"; | ||
@@ -6,6 +6,5 @@ | ||
| const llama = await LLama.load( | ||
| path.resolve(process.cwd(), "../../ggml-vic7b-q5_1.bin"), | ||
| { | ||
| modelPath: path.resolve(process.cwd(), "../../ggml-vic7b-q5_1.bin"), | ||
| nCtx: 512, | ||
| nParts: -1, | ||
| nGpuLayers: 0, | ||
@@ -25,3 +24,3 @@ seed: 0, | ||
| const params: LlamaInvocation = { | ||
| const params: Generate = { | ||
| nThreads: 4, | ||
@@ -28,0 +27,0 @@ nTokPredict: 2048, |
| import { InferenceResultType } from "../index"; | ||
| import { LLama, LlamaInvocation } from "../index"; | ||
| import { LLama, Generate } from "../index"; | ||
| import path from "path"; | ||
@@ -7,7 +7,6 @@ | ||
| const llama = await LLama.load( | ||
| path.resolve(process.cwd(), "../../ggml-vic7b-q5_1.bin"), | ||
| { | ||
| modelPath: path.resolve(process.cwd(), "../../ggml-vic7b-q5_1.bin"), | ||
| nGpuLayers: 32, | ||
| nCtx: 1024, | ||
| nParts: 1, | ||
| seed: 0, | ||
@@ -30,3 +29,3 @@ f16Kv: false, | ||
| const params: LlamaInvocation = { | ||
| const params: Generate = { | ||
| nThreads: 4, | ||
@@ -33,0 +32,0 @@ nTokPredict: 2048, |
@@ -6,4 +6,5 @@ import { LLama } from "../index"; | ||
| const llama = await LLama.load( | ||
| path.resolve(process.cwd(), "../../ggml-vic7b-q5_1.bin"), | ||
| null, | ||
| { | ||
| modelPath: path.resolve(process.cwd(), "../../ggml-vic7b-q5_1.bin"), | ||
| }, | ||
| false | ||
@@ -10,0 +11,0 @@ ); |
+89
-7
@@ -20,21 +20,103 @@ /* tslint:disable */ | ||
| } | ||
| export interface LlamaInvocation { | ||
| export interface LogitBias { | ||
| token: number | ||
| bias: number | ||
| } | ||
| export interface Generate { | ||
| nThreads: number | ||
| nTokPredict: number | ||
| topK: number | ||
| /** | ||
| * logit bias for specific tokens | ||
| * Default: None | ||
| */ | ||
| logitBias?: Array<LogitBias> | ||
| /** | ||
| * top k tokens to sample from | ||
| * Range: <= 0 to use vocab size | ||
| * Default: 40 | ||
| */ | ||
| topK?: number | ||
| /** | ||
| * top p tokens to sample from | ||
| * Default: 0.95 | ||
| * 1.0 = disabled | ||
| */ | ||
| topP?: number | ||
| /** | ||
| * tail free sampling | ||
| * Default: 1.0 | ||
| * 1.0 = disabled | ||
| */ | ||
| tfsZ?: number | ||
| /** | ||
| * temperature | ||
| * Default: 0.80 | ||
| * 1.0 = disabled | ||
| */ | ||
| temp?: number | ||
| /** | ||
| * locally typical sampling | ||
| * Default: 1.0 | ||
| * 1.0 = disabled | ||
| */ | ||
| typicalP?: number | ||
| /** | ||
| * repeat penalty | ||
| * Default: 1.10 | ||
| * 1.0 = disabled | ||
| */ | ||
| repeatPenalty?: number | ||
| /** | ||
| * last n tokens to penalize | ||
| * Default: 64 | ||
| * 0 = disable penalty, -1 = context size | ||
| */ | ||
| repeatLastN?: number | ||
| /** | ||
| * frequency penalty | ||
| * Default: 0.00 | ||
| * 1.0 = disabled | ||
| */ | ||
| frequencyPenalty?: number | ||
| /** | ||
| * presence penalty | ||
| * Default: 0.00 | ||
| * 1.0 = disabled | ||
| */ | ||
| presencePenalty?: number | ||
| /** | ||
| * Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. | ||
| * Mirostat: A Neural Text Decoding Algorithm that Directly Controls Perplexity | ||
| * Default: 0 | ||
| * 0 = disabled | ||
| * 1 = mirostat 1.0 | ||
| * 2 = mirostat 2.0 | ||
| */ | ||
| mirostat?: number | ||
| /** | ||
| * The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. | ||
| * Default: 5.0 | ||
| */ | ||
| mirostatTau?: number | ||
| /** | ||
| * The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. | ||
| * Default: 0.1 | ||
| */ | ||
| mirostatEta?: number | ||
| /** | ||
| * stop sequence | ||
| * Default: None | ||
| */ | ||
| stopSequence?: string | ||
| /** | ||
| * consider newlines as a repeatable token | ||
| * Default: true | ||
| */ | ||
| penalizeNl?: boolean | ||
| /** prompt */ | ||
| prompt: string | ||
| } | ||
| export interface LlamaContextParams { | ||
| export interface ModelLoad { | ||
| modelPath: string | ||
| nCtx: number | ||
| nParts: number | ||
| nGpuLayers: number | ||
@@ -56,6 +138,6 @@ seed: number | ||
| export class LLama { | ||
| static load(path: string, params: LlamaContextParams | undefined | null, enableLogger: boolean): Promise<LLama> | ||
| getWordEmbedding(params: LlamaInvocation): Promise<Array<number>> | ||
| static load(params: Partial<LoadModel>, enableLogger: boolean): Promise<LLama> | ||
| getWordEmbedding(params: Generate): Promise<Array<number>> | ||
| tokenize(params: string): Promise<Array<number>> | ||
| inference(params: LlamaInvocation, callback: (result: InferenceResult) => void): () => void | ||
| inference(params: Generate, callback: (result: InferenceResult) => void): () => void | ||
| } |
+1
-1
| { | ||
| "name": "@llama-node/llama-cpp", | ||
| "version": "0.1.5", | ||
| "version": "0.1.6", | ||
| "main": "index.js", | ||
@@ -5,0 +5,0 @@ "types": "index.d.ts", |
@@ -164,3 +164,3 @@ import { exec, execSync } from "child_process"; | ||
| const run = async () => { | ||
| console.log("Checking environment...\n..."); | ||
| console.log("Checking environment...\n"); | ||
| checkEnv(); | ||
@@ -167,0 +167,0 @@ |
+55
-50
@@ -5,42 +5,13 @@ use std::{ffi::CStr, ptr::null_mut, slice}; | ||
| use llama_sys::{ | ||
| llama_apply_lora_from_file, llama_context, llama_context_default_params, llama_context_params, | ||
| llama_eval, llama_free, llama_get_embeddings, llama_get_logits, llama_init_from_file, | ||
| llama_n_embd, llama_n_vocab, llama_print_system_info, | ||
| llama_sample_frequency_and_presence_penalties, llama_sample_repetition_penalty, | ||
| llama_sample_tail_free, llama_sample_temperature, llama_sample_token, | ||
| llama_sample_token_greedy, llama_sample_top_k, llama_sample_top_p, llama_sample_typical, | ||
| llama_apply_lora_from_file, llama_context, llama_context_params, llama_eval, llama_free, | ||
| llama_get_embeddings, llama_get_logits, llama_init_from_file, llama_n_embd, llama_n_vocab, | ||
| llama_print_system_info, llama_sample_frequency_and_presence_penalties, | ||
| llama_sample_repetition_penalty, llama_sample_tail_free, llama_sample_temperature, | ||
| llama_sample_token, llama_sample_token_greedy, llama_sample_token_mirostat, | ||
| llama_sample_token_mirostat_v2, llama_sample_top_k, llama_sample_top_p, llama_sample_typical, | ||
| llama_token, llama_token_data, llama_token_data_array, llama_token_nl, llama_token_to_str, | ||
| }; | ||
| use crate::types::{LlamaContextParams, LlamaInvocation}; | ||
| use crate::types::{Generate, ModelLoad}; | ||
| impl LlamaContextParams { | ||
| // Returns the default parameters or the user-specified parameters. | ||
| pub fn or_default(params: &Option<LlamaContextParams>) -> llama_context_params { | ||
| match params { | ||
| Some(params) => params.clone().into(), | ||
| None => unsafe { llama_context_default_params() }, | ||
| } | ||
| } | ||
| } | ||
| impl From<LlamaContextParams> for llama_context_params { | ||
| fn from(params: LlamaContextParams) -> Self { | ||
| llama_context_params { | ||
| n_ctx: params.n_ctx, | ||
| n_parts: params.n_parts, | ||
| n_gpu_layers: params.n_gpu_layers, | ||
| seed: params.seed, | ||
| f16_kv: params.f16_kv, | ||
| logits_all: params.logits_all, | ||
| vocab_only: params.vocab_only, | ||
| use_mmap: params.use_mmap, | ||
| use_mlock: params.use_mlock, | ||
| embedding: params.embedding, | ||
| progress_callback: None, | ||
| progress_callback_user_data: null_mut(), | ||
| } | ||
| } | ||
| } | ||
| // Represents the LLamaContext which wraps FFI calls to the llama.cpp library. | ||
@@ -53,9 +24,8 @@ pub struct LLamaContext { | ||
| // Creates a new LLamaContext from the specified file and configuration parameters. | ||
| pub async fn from_file_and_params( | ||
| path: &str, | ||
| params: &Option<LlamaContextParams>, | ||
| ) -> Result<Self, napi::Error> { | ||
| let lora_params = params.as_ref().and_then(|p| p.lora.clone()); | ||
| let params = LlamaContextParams::or_default(params); | ||
| let ctx = unsafe { llama_init_from_file(path.as_ptr() as *const i8, params) }; | ||
| pub async fn from_file_and_params(params: &ModelLoad) -> Result<Self, napi::Error> { | ||
| let lora_params = ¶ms.lora; | ||
| let context_params = ModelLoad::to_llama_context_params(params); | ||
| let ctx = unsafe { | ||
| llama_init_from_file(params.model_path.as_ptr() as *const i8, context_params) | ||
| }; | ||
@@ -65,3 +35,3 @@ if ctx.is_null() { | ||
| "Failed to initialize LLama context from file: {}", | ||
| path | ||
| params.model_path, | ||
| ))); | ||
@@ -109,3 +79,3 @@ } | ||
| last_n_tokens: &mut [llama_token], | ||
| input: &LlamaInvocation, | ||
| input: &Generate, | ||
| context_params: &llama_context_params, | ||
@@ -115,6 +85,7 @@ ) -> i32 { | ||
| let top_p = input.top_p.unwrap_or(0.95) as f32; | ||
| let top_k = if input.top_k <= 0 { | ||
| let top_k = input.top_k.unwrap_or(40); | ||
| let top_k = if top_k <= 0 { | ||
| unsafe { llama_n_vocab(self.ctx) } | ||
| } else { | ||
| input.top_k | ||
| top_k | ||
| }; | ||
@@ -135,2 +106,9 @@ let tfs_z = input.tfs_z.unwrap_or(1.0) as f32; | ||
| let empty_logit_bias = Vec::new(); | ||
| let logit_bias = input.logit_bias.as_ref().unwrap_or(&empty_logit_bias); | ||
| let mirostat = input.mirostat.unwrap_or(0); | ||
| let mirostat_tau = input.mirostat_tau.unwrap_or(5.0) as f32; | ||
| let mirostat_eta = input.mirostat_eta.unwrap_or(0.1) as f32; | ||
| let n_vocab = unsafe { llama_n_vocab(self.ctx) }; | ||
@@ -140,3 +118,5 @@ let logits_ptr = unsafe { llama_get_logits(self.ctx) }; | ||
| // TODO: apply logit bias | ||
| for i in logit_bias.iter() { | ||
| logits[i.token as usize] += i.bias as f32; | ||
| } | ||
@@ -205,4 +185,29 @@ let mut candidates: Vec<llama_token_data> = Vec::with_capacity(n_vocab as usize); | ||
| id = unsafe { llama_sample_token_greedy(self.ctx, candidates_p) }; | ||
| } else if mirostat == 1 { | ||
| let mut mirostat_mu = 2.0_f32 * mirostat_tau; | ||
| let mirostat_m = 100; | ||
| unsafe { llama_sample_temperature(self.ctx, candidates_p, temp) }; | ||
| id = unsafe { | ||
| llama_sample_token_mirostat( | ||
| self.ctx, | ||
| candidates_p, | ||
| mirostat_tau, | ||
| mirostat_eta, | ||
| mirostat_m, | ||
| &mut mirostat_mu, | ||
| ) | ||
| } | ||
| } else if mirostat == 2 { | ||
| let mut mirostat_mu = 2.0_f32 * mirostat_tau; | ||
| unsafe { llama_sample_temperature(self.ctx, candidates_p, temp) }; | ||
| id = unsafe { | ||
| llama_sample_token_mirostat_v2( | ||
| self.ctx, | ||
| candidates_p, | ||
| mirostat_tau, | ||
| mirostat_eta, | ||
| &mut mirostat_mu, | ||
| ) | ||
| } | ||
| } else { | ||
| // TODO: here we just do temp for first approach, I dont understand microstat very well, will impl later | ||
| id = unsafe { | ||
@@ -251,3 +256,3 @@ llama_sample_top_k(self.ctx, candidates_p, top_k, 1); | ||
| n_past: i32, | ||
| input: &LlamaInvocation, | ||
| input: &Generate, | ||
| ) -> Result<(), napi::Error> { | ||
@@ -254,0 +259,0 @@ let res = |
+7
-7
@@ -24,3 +24,3 @@ #![deny(clippy::all)] | ||
| use tokio::sync::Mutex; | ||
| use types::{InferenceResult, InferenceResultType, LlamaContextParams, LlamaInvocation}; | ||
| use types::{InferenceResult, InferenceResultType, Generate, ModelLoad}; | ||
@@ -36,12 +36,12 @@ #[napi] | ||
| pub async fn load( | ||
| path: String, | ||
| params: Option<LlamaContextParams>, | ||
| #[napi(ts_arg_type = "Partial<LoadModel>")] params: serde_json::Value, | ||
| enable_logger: bool, | ||
| ) -> Result<LLama> { | ||
| let params = serde_json::from_value::<ModelLoad>(params).unwrap(); | ||
| let logger = LLamaLogger::get_singleton(); | ||
| logger.set_enabled(enable_logger); | ||
| Ok(Self { | ||
| llama: LLamaInternal::load(path, params, enable_logger).await?, | ||
| llama: LLamaInternal::load(params, enable_logger).await?, | ||
| }) | ||
@@ -51,3 +51,3 @@ } | ||
| #[napi] | ||
| pub async fn get_word_embedding(&self, params: LlamaInvocation) -> Result<Vec<f64>> { | ||
| pub async fn get_word_embedding(&self, params: Generate) -> Result<Vec<f64>> { | ||
| let llama = self.llama.lock().await; | ||
@@ -67,3 +67,3 @@ llama.embedding(¶ms).await | ||
| env: Env, | ||
| params: LlamaInvocation, | ||
| params: Generate, | ||
| #[napi(ts_arg_type = "(result: InferenceResult) => void")] callback: JsFunction, | ||
@@ -70,0 +70,0 @@ ) -> Result<JsFunction> { |
+8
-11
@@ -9,5 +9,3 @@ use std::sync::Arc; | ||
| tokenizer::{llama_token_eos, tokenize}, | ||
| types::{ | ||
| InferenceResult, InferenceResultType, InferenceToken, LlamaContextParams, LlamaInvocation, | ||
| }, | ||
| types::{InferenceResult, InferenceResultType, InferenceToken, Generate, ModelLoad}, | ||
| }; | ||
@@ -17,3 +15,3 @@ | ||
| context: LLamaContext, | ||
| context_params: Option<LlamaContextParams>, | ||
| context_params: ModelLoad, | ||
| } | ||
@@ -23,8 +21,7 @@ | ||
| pub async fn load( | ||
| path: String, | ||
| params: Option<LlamaContextParams>, | ||
| params: ModelLoad, | ||
| enable_logger: bool, | ||
| ) -> Result<Arc<Mutex<Self>>, napi::Error> { | ||
| ) -> Result<Arc<Mutex<LLamaInternal>>, napi::Error> { | ||
| let llama = LLamaInternal { | ||
| context: LLamaContext::from_file_and_params(&path, ¶ms).await?, | ||
| context: LLamaContext::from_file_and_params(¶ms).await?, | ||
| context_params: params, | ||
@@ -46,3 +43,3 @@ }; | ||
| pub async fn embedding(&self, input: &LlamaInvocation) -> Result<Vec<f64>, napi::Error> { | ||
| pub async fn embedding(&self, input: &Generate) -> Result<Vec<f64>, napi::Error> { | ||
| let context = &self.context; | ||
@@ -70,3 +67,3 @@ let embd_inp = tokenize(context, input.prompt.as_str(), true); | ||
| &self, | ||
| input: &LlamaInvocation, | ||
| input: &Generate, | ||
| running: Arc<Mutex<bool>>, | ||
@@ -76,3 +73,3 @@ callback: impl Fn(InferenceResult), | ||
| let context = &self.context; | ||
| let context_params_c = LlamaContextParams::or_default(&self.context_params); | ||
| let context_params_c = ModelLoad::to_llama_context_params(&self.context_params); | ||
| // Tokenize the stop sequence and input prompt. | ||
@@ -79,0 +76,0 @@ let tokenized_stop_prompt = input |
+132
-14
@@ -0,2 +1,4 @@ | ||
| use llama_sys::llama_context_params; | ||
| use napi::bindgen_prelude::*; | ||
| use serde::{Deserialize, Serialize}; | ||
@@ -26,16 +28,87 @@ #[napi(object)] | ||
| #[derive(Debug, Clone)] | ||
| pub struct LlamaInvocation { | ||
| pub struct LogitBias { | ||
| pub token: i32, | ||
| pub bias: f64, | ||
| } | ||
| #[napi(object)] | ||
| #[derive(Debug, Clone)] | ||
| pub struct Generate { | ||
| pub n_threads: i32, | ||
| pub n_tok_predict: i32, | ||
| pub top_k: i32, // 40 | ||
| pub top_p: Option<f64>, // default 0.95f, 1.0 = disabled | ||
| pub tfs_z: Option<f64>, // default 1.00f, 1.0 = disabled | ||
| pub temp: Option<f64>, // default 0.80f, 1.0 = disabled | ||
| pub typical_p: Option<f64>, // default 1.00f, 1.0 = disabled | ||
| pub repeat_penalty: Option<f64>, // default 1.10f, 1.0 = disabled | ||
| pub repeat_last_n: Option<i32>, // default 64, last n tokens to penalize (0 = disable penalty, -1 = context size) | ||
| pub frequency_penalty: Option<f64>, // default 0.00f, 1.0 = disabled | ||
| pub presence_penalty: Option<f64>, // default 0.00f, 1.0 = disabled | ||
| /// logit bias for specific tokens | ||
| /// Default: None | ||
| pub logit_bias: Option<Vec<LogitBias>>, | ||
| /// top k tokens to sample from | ||
| /// Range: <= 0 to use vocab size | ||
| /// Default: 40 | ||
| pub top_k: Option<i32>, | ||
| /// top p tokens to sample from | ||
| /// Default: 0.95 | ||
| /// 1.0 = disabled | ||
| pub top_p: Option<f64>, | ||
| /// tail free sampling | ||
| /// Default: 1.0 | ||
| /// 1.0 = disabled | ||
| pub tfs_z: Option<f64>, | ||
| /// temperature | ||
| /// Default: 0.80 | ||
| /// 1.0 = disabled | ||
| pub temp: Option<f64>, | ||
| /// locally typical sampling | ||
| /// Default: 1.0 | ||
| /// 1.0 = disabled | ||
| pub typical_p: Option<f64>, | ||
| /// repeat penalty | ||
| /// Default: 1.10 | ||
| /// 1.0 = disabled | ||
| pub repeat_penalty: Option<f64>, | ||
| /// last n tokens to penalize | ||
| /// Default: 64 | ||
| /// 0 = disable penalty, -1 = context size | ||
| pub repeat_last_n: Option<i32>, | ||
| /// frequency penalty | ||
| /// Default: 0.00 | ||
| /// 1.0 = disabled | ||
| pub frequency_penalty: Option<f64>, | ||
| /// presence penalty | ||
| /// Default: 0.00 | ||
| /// 1.0 = disabled | ||
| pub presence_penalty: Option<f64>, | ||
| /// Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. | ||
| /// Mirostat: A Neural Text Decoding Algorithm that Directly Controls Perplexity | ||
| /// Default: 0 | ||
| /// 0 = disabled | ||
| /// 1 = mirostat 1.0 | ||
| /// 2 = mirostat 2.0 | ||
| pub mirostat: Option<i32>, | ||
| /// The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. | ||
| /// Default: 5.0 | ||
| pub mirostat_tau: Option<f64>, | ||
| /// The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. | ||
| /// Default: 0.1 | ||
| pub mirostat_eta: Option<f64>, | ||
| /// stop sequence | ||
| /// Default: None | ||
| pub stop_sequence: Option<String>, | ||
| /// consider newlines as a repeatable token | ||
| /// Default: true | ||
| pub penalize_nl: Option<bool>, | ||
| /// prompt | ||
| pub prompt: String, | ||
@@ -46,6 +119,7 @@ } | ||
| #[napi(object)] | ||
| #[derive(Debug, Clone)] | ||
| pub struct LlamaContextParams { | ||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||
| #[serde(default, rename_all = "camelCase")] | ||
| pub struct ModelLoad { | ||
| pub model_path: String, | ||
| pub n_ctx: i32, | ||
| pub n_parts: i32, | ||
| pub n_gpu_layers: i32, | ||
@@ -62,4 +136,48 @@ pub seed: i32, | ||
| impl Default for ModelLoad { | ||
| fn default() -> Self { | ||
| Self { | ||
| model_path: "".to_string(), | ||
| n_ctx: 2048, | ||
| n_gpu_layers: 0, | ||
| seed: 0, | ||
| f16_kv: true, | ||
| logits_all: false, | ||
| vocab_only: false, | ||
| use_mlock: false, | ||
| embedding: false, | ||
| use_mmap: true, | ||
| lora: None, | ||
| } | ||
| } | ||
| } | ||
| impl ModelLoad { | ||
| // Returns the default parameters or the user-specified parameters. | ||
| pub fn to_llama_context_params(params: &ModelLoad) -> llama_context_params { | ||
| params.clone().into() | ||
| } | ||
| } | ||
| impl From<ModelLoad> for llama_context_params { | ||
| fn from(params: ModelLoad) -> Self { | ||
| llama_context_params { | ||
| n_ctx: params.n_ctx, | ||
| n_gpu_layers: params.n_gpu_layers, | ||
| seed: params.seed, | ||
| f16_kv: params.f16_kv, | ||
| logits_all: params.logits_all, | ||
| vocab_only: params.vocab_only, | ||
| use_mmap: params.use_mmap, | ||
| use_mlock: params.use_mlock, | ||
| embedding: params.embedding, | ||
| progress_callback: None, | ||
| progress_callback_user_data: std::ptr::null_mut(), | ||
| } | ||
| } | ||
| } | ||
| #[napi(object)] | ||
| #[derive(Debug, Clone)] | ||
| #[derive(Debug, Clone, Serialize, Deserialize, Default)] | ||
| #[serde(default, rename_all = "camelCase")] | ||
| pub struct LlamaLoraAdaptor { | ||
@@ -66,0 +184,0 @@ pub lora_adapter: String, |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
AI-detected possible typosquat
Supply chain riskAI has identified this package as a potential typosquat of a more popular package. This suggests that the package may be intentionally mimicking another package's name, description, or other metadata.
Found 1 instance in 1 package
Shell access
Supply chain riskThis module accesses the system shell. Accessing the system shell increases the risk of executing arbitrary code.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 2 instances in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
Shell access
Supply chain riskThis module accesses the system shell. Accessing the system shell increases the risk of executing arbitrary code.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 2 instances in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
10372078
7.53%613
15.44%9
12.5%