@llama-node/llama-cpp
Advanced tools
| import { exec, execSync } from "child_process"; | ||
| const checkClang: () => boolean = () => { | ||
| try { | ||
| process.stdout.write("Checking clang..."); | ||
| execSync("clang --version"); | ||
| console.log("✅"); | ||
| return true; | ||
| } catch (error) { | ||
| return false; | ||
| } | ||
| }; | ||
| const checkGcc: () => boolean = () => { | ||
| try { | ||
| process.stdout.write("Checking gcc..."); | ||
| execSync("gcc --version"); | ||
| console.log("✅"); | ||
| return true; | ||
| } catch (error) { | ||
| return false; | ||
| } | ||
| }; | ||
| const checkEnv = () => { | ||
| // check if rustc is installed and available | ||
| try { | ||
| process.stdout.write("Checking rustc..."); | ||
| execSync("rustc --version"); | ||
| console.log("✅"); | ||
| } catch (error) { | ||
| console.log("❌"); | ||
| console.error("rustc is not installed or not available in PATH"); | ||
| console.log("Please install rustc from https://rustup.rs/"); | ||
| process.exit(1); | ||
| } | ||
| // check if cargo is installed and available | ||
| try { | ||
| process.stdout.write("Checking cargo..."); | ||
| execSync("cargo --version"); | ||
| console.log("✅"); | ||
| } catch (error) { | ||
| console.log("❌"); | ||
| console.error("cargo is not installed or not available in PATH"); | ||
| console.log("Please install cargo from https://rustup.rs/"); | ||
| process.exit(1); | ||
| } | ||
| // check if cmake is installed and available | ||
| try { | ||
| process.stdout.write("Checking cmake..."); | ||
| execSync("cmake --version"); | ||
| console.log("✅"); | ||
| } catch (error) { | ||
| console.log("❌"); | ||
| console.error("cmake is not installed or not available in PATH"); | ||
| console.log( | ||
| "Please install cmake from https://cmake.org/install/ or your package manager. Make sure to add it to PATH." | ||
| ); | ||
| process.exit(1); | ||
| } | ||
| // check if llvm is installed and available | ||
| try { | ||
| process.stdout.write("Checking llvm..."); | ||
| execSync("llvm-config --version"); | ||
| console.log("✅"); | ||
| } catch (error) { | ||
| console.log("❌"); | ||
| console.error("llvm is not installed or not available in PATH"); | ||
| console.log( | ||
| "Please install llvm from https://releases.llvm.org/download.html or your package manager. Make sure to add it to PATH." | ||
| ); | ||
| process.exit(1); | ||
| } | ||
| // check if clang or gcc is installed and available | ||
| if (!checkClang() && !checkGcc()) { | ||
| console.log("❌"); | ||
| console.error("clang or gcc is not installed or not available in PATH"); | ||
| // install clang | ||
| console.log( | ||
| "Please install clang from https://releases.llvm.org/download.html or your package manager. Make sure to add it to PATH." | ||
| ); | ||
| // or install gcc | ||
| console.log( | ||
| "Alternatively, you can install gcc from https://gcc.gnu.org/install/ or your package manager. Make sure to add it to PATH." | ||
| ); | ||
| process.exit(1); | ||
| } | ||
| // check if nvcc is installed and available | ||
| try { | ||
| process.stdout.write("Checking nvcc..."); | ||
| execSync("nvcc --version"); | ||
| console.log("✅"); | ||
| } catch (error) { | ||
| console.log("❌"); | ||
| console.error("nvcc is not installed or not available in PATH"); | ||
| console.log( | ||
| "Please install nvcc from https://developer.nvidia.com/cuda-downloads or your package manager. Make sure to add it to PATH." | ||
| ); | ||
| process.exit(1); | ||
| } | ||
| }; | ||
| const compile = () => { | ||
| const buildProcess = exec( | ||
| `napi build --platform --release --features=cublas` | ||
| ); | ||
| buildProcess.stdout?.pipe(process.stdout); | ||
| buildProcess.stderr?.pipe(process.stderr); | ||
| return new Promise<boolean>((resolve, reject) => { | ||
| buildProcess.on("close", (code) => { | ||
| if (code !== 0) { | ||
| reject(code); | ||
| } else { | ||
| resolve(true); | ||
| } | ||
| }); | ||
| }); | ||
| }; | ||
| const postCompile = async () => { | ||
| const homeDir = process.env.HOME || process.env.USERPROFILE; | ||
| const extension = process.platform === "win32" ? ".dll" : ".so"; | ||
| const libPath = `${homeDir}/.llama-node/libllama${extension}`; | ||
| // check if libllama.so exists | ||
| try { | ||
| process.stdout.write("Checking libllama..."); | ||
| execSync(`ls ${libPath}`); | ||
| console.log("✅"); | ||
| } catch (error) { | ||
| console.error("libllama is not found"); | ||
| console.log( | ||
| "Please make sure that libllama is compiled and installed under ~/.llama-node/" | ||
| ); | ||
| process.exit(1); | ||
| } | ||
| // check if libllama.so is under the LD_LIBRARY_PATH | ||
| try { | ||
| process.stdout.write("Checking LD_LIBRARY_PATH..."); | ||
| execSync(`echo $LD_LIBRARY_PATH | grep ${homeDir}/.llama-node`) | ||
| console.log("✅"); | ||
| } catch (error) { | ||
| console.log("\n\n"); | ||
| console.log("libllama is not under LD_LIBRARY_PATH"); | ||
| console.log("add this to your .bashrc or .zshrc:"); | ||
| console.log( | ||
| `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.llama-node` | ||
| ); | ||
| } | ||
| }; | ||
| const run = async () => { | ||
| console.log("Checking environment...\n..."); | ||
| checkEnv(); | ||
| console.log("\n\n"); | ||
| console.log("Compiling...\n"); | ||
| await compile(); | ||
| console.log("\n\n"); | ||
| console.log("Post-compiling...\n"); | ||
| await postCompile(); | ||
| console.log("Compile successful!"); | ||
| }; | ||
| run(); |
+7
-3
@@ -13,5 +13,3 @@ [package] | ||
| # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix | ||
| env_logger = "0.10.0" | ||
| log = "0.4" | ||
| once_cell = "1.17.1" | ||
| num_cpus = "1.15.0" | ||
@@ -27,4 +25,10 @@ rand = "0.8.5" | ||
| log = "0.4.17" | ||
| common-rs = { path = "../common-rs" } | ||
| [build-dependencies] | ||
| napi-build = "2.0.1" | ||
| [features] | ||
| default = [] | ||
| cublas = ["llama-sys/cublas"] |
@@ -10,2 +10,3 @@ import { LLama, LlamaContextParams, LlamaInvocation } from "../index"; | ||
| nParts: -1, | ||
| nGpuLayers: 0, | ||
| seed: 0, | ||
@@ -12,0 +13,0 @@ f16Kv: false, |
+22
-1
@@ -0,1 +1,2 @@ | ||
| import { InferenceResultType } from "../index"; | ||
| import { LLama, LlamaInvocation } from "../index"; | ||
@@ -7,3 +8,14 @@ import path from "path"; | ||
| path.resolve(process.cwd(), "../../ggml-vic7b-q5_1.bin"), | ||
| null, | ||
| { | ||
| nGpuLayers: 32, | ||
| nCtx: 1024, | ||
| nParts: 1, | ||
| seed: 0, | ||
| f16Kv: false, | ||
| logitsAll: false, | ||
| vocabOnly: false, | ||
| useMlock: false, | ||
| embedding: false, | ||
| useMmap: true, | ||
| }, | ||
| true | ||
@@ -28,4 +40,13 @@ ); | ||
| const start = Date.now(); | ||
| let count = 0; | ||
| llama.inference(params, (data) => { | ||
| count += 1; | ||
| process.stdout.write(data.data?.token ?? ""); | ||
| if (data.type === InferenceResultType.End) { | ||
| const end = Date.now(); | ||
| console.log(`\n\nToken Count: ${count}`); | ||
| console.log(`\n\nTime: ${end - start}ms`); | ||
| } | ||
| }); | ||
@@ -32,0 +53,0 @@ }; |
@@ -13,3 +13,3 @@ import { LLama } from "../index"; | ||
| llama.tokenize(template, 2048).then((data) => { | ||
| llama.tokenize(template).then((data) => { | ||
| console.log(data); | ||
@@ -16,0 +16,0 @@ }); |
+8
-1
@@ -39,2 +39,3 @@ /* tslint:disable */ | ||
| nParts: number | ||
| nGpuLayers: number | ||
| seed: number | ||
@@ -47,8 +48,14 @@ f16Kv: boolean | ||
| useMmap: boolean | ||
| lora?: LlamaLoraAdaptor | ||
| } | ||
| export interface LlamaLoraAdaptor { | ||
| loraAdapter: string | ||
| loraBase?: string | ||
| nThreads: number | ||
| } | ||
| export class LLama { | ||
| static load(path: string, params: LlamaContextParams | undefined | null, enableLogger: boolean): Promise<LLama> | ||
| getWordEmbedding(params: LlamaInvocation): Promise<Array<number>> | ||
| tokenize(params: string, nCtx: number): Promise<Array<number>> | ||
| tokenize(params: string): Promise<Array<number>> | ||
| inference(params: LlamaInvocation, callback: (result: InferenceResult) => void): () => void | ||
| } |
+90
-45
@@ -7,2 +7,3 @@ #![allow(clippy::uninlined_format_args)] | ||
| use dirs::home_dir; | ||
| use platforms::{Arch, Platform, OS}; | ||
@@ -12,6 +13,45 @@ use std::env; | ||
| struct BuildLinkInfo { | ||
| link_type: String, | ||
| #[cfg(target_os = "windows")] | ||
| link_extension_windows: String, | ||
| link_extension_nix: String, | ||
| link_out_dir: String, | ||
| cmake_link_flag: Vec<String>, | ||
| } | ||
| #[cfg(not(feature = "dynamic"))] | ||
| fn get_link_info() -> BuildLinkInfo { | ||
| BuildLinkInfo { | ||
| link_type: "static".to_owned(), | ||
| #[cfg(target_os = "windows")] | ||
| link_extension_windows: "lib".to_owned(), | ||
| link_extension_nix: "a".to_owned(), | ||
| link_out_dir: env::var("OUT_DIR").unwrap(), | ||
| cmake_link_flag: vec!["-DLLAMA_STATIC=ON".to_owned()], | ||
| } | ||
| } | ||
| #[cfg(feature = "dynamic")] | ||
| fn get_link_info() -> BuildLinkInfo { | ||
| BuildLinkInfo { | ||
| link_type: "dylib".to_owned(), | ||
| #[cfg(target_os = "windows")] | ||
| link_extension_windows: "dll".to_owned(), | ||
| link_extension_nix: "so".to_owned(), | ||
| link_out_dir: env::var("OUT_DIR").unwrap(), | ||
| cmake_link_flag: vec![ | ||
| "-DLLAMA_STATIC=OFF".to_owned(), | ||
| "-DBUILD_SHARED_LIBS=ON".to_owned(), | ||
| ], | ||
| } | ||
| } | ||
| fn main() { | ||
| let initial_dir = env::current_dir().unwrap(); | ||
| let home_dir = home_dir().unwrap(); | ||
| let llama_node_dir = home_dir.join(".llama-node"); | ||
| println!("cargo:warning=working_dir: {}", initial_dir.display()); | ||
| if !llama_node_dir.exists() { | ||
| std::fs::create_dir(&llama_node_dir).expect("Unable to create .llama-node directory"); | ||
| } | ||
@@ -28,12 +68,6 @@ let target = env::var("TARGET").unwrap(); | ||
| #[allow(unused_mut, unused_assignments)] | ||
| let mut link_type = "static"; | ||
| let build_link_info = get_link_info(); | ||
| #[cfg(feature = "dynamic")] | ||
| { | ||
| link_type = "dylib"; | ||
| } | ||
| println!("cargo:rustc-link-search={}", env::var("OUT_DIR").unwrap()); | ||
| println!("cargo:rustc-link-lib={}=llama", link_type); | ||
| println!("cargo:rustc-link-search={}", build_link_info.link_out_dir); | ||
| println!("cargo:rustc-link-lib={}=llama", build_link_info.link_type); | ||
| println!("cargo:rerun-if-changed=wrapper.h"); | ||
@@ -51,3 +85,3 @@ | ||
| Ok(b) => { | ||
| let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); | ||
| let out_path = PathBuf::from(build_link_info.link_out_dir.clone()); | ||
| b.write_to_file(out_path.join("bindings.rs")) | ||
@@ -88,17 +122,13 @@ .expect("Couldn't write bindings!"); | ||
| #[cfg(feature = "cublas")] | ||
| { | ||
| command.arg("-DLLAMA_CUBLAS=ON"); | ||
| for flag in build_link_info.cmake_link_flag { | ||
| command.arg(&flag); | ||
| } | ||
| #[allow(unused_mut, unused_assignments)] | ||
| let mut link_type = "-DLLAMA_STATIC=ON"; | ||
| #[cfg(feature = "dynamic")] | ||
| #[cfg(feature = "cublas")] | ||
| { | ||
| command.arg("-DBUILD_SHARED_LIBS=ON"); | ||
| link_type = "-DLLAMA_STATIC=OFF"; | ||
| command | ||
| .arg("-DLLAMA_CUBLAS=ON") | ||
| .arg("-DCMAKE_POSITION_INDEPENDENT_CODE=ON"); | ||
| } | ||
| command.arg(link_type); | ||
| if platform.target_os == OS::MacOS { | ||
@@ -137,21 +167,2 @@ if platform.target_arch == Arch::AArch64 { | ||
| #[allow(unused_mut, unused_assignments)] | ||
| let mut link_ext = ("lib", "a"); | ||
| #[allow(unused_mut, unused_assignments)] | ||
| let mut out_dir = env::var("OUT_DIR").unwrap(); | ||
| #[cfg(feature = "dynamic")] | ||
| { | ||
| link_ext = ("dll", "so"); | ||
| let bin_dir = initial_dir.parent().unwrap(); | ||
| let bin_dir = bin_dir.join("./@llama-node"); | ||
| println!("cargo:warning=bin_dir: {:?}", bin_dir.display()); | ||
| if !bin_dir.exists() { | ||
| std::fs::create_dir(bin_dir.clone()).unwrap(); | ||
| } | ||
| out_dir = bin_dir.to_str().unwrap().to_string(); | ||
| } | ||
| println!("cargo:warning=out_dir: {:?}", out_dir); | ||
| // move libllama.a to where Cargo expects it (OUT_DIR) | ||
@@ -161,6 +172,23 @@ #[cfg(target_os = "windows")] | ||
| std::fs::copy( | ||
| format!("Release/llama.{}", link_ext.0), | ||
| format!("{}/llama.{}", out_dir, link_ext.0), | ||
| format!("Release/llama.{}", build_link_info.link_extension_windows), | ||
| format!( | ||
| "{}/llama.{}", | ||
| build_link_info.link_out_dir, build_link_info.link_extension_windows | ||
| ), | ||
| ) | ||
| .expect("Failed to copy lib"); | ||
| #[cfg(feature = "dynamic")] | ||
| { | ||
| // move libllama.dll to llama_node_dir | ||
| std::fs::copy( | ||
| format!("Release/llama.{}", build_link_info.link_extension_windows), | ||
| format!( | ||
| "{}/llama.{}", | ||
| llama_node_dir.display(), | ||
| build_link_info.link_extension_windows | ||
| ), | ||
| ) | ||
| .expect("Failed to copy lib"); | ||
| } | ||
| } | ||
@@ -171,6 +199,23 @@ | ||
| std::fs::copy( | ||
| format!("libllama.{}", link_ext.1), | ||
| format!("{}/libllama.{}", out_dir, link_ext.1), | ||
| format!("libllama.{}", build_link_info.link_extension_nix), | ||
| format!( | ||
| "{}/libllama.{}", | ||
| build_link_info.link_out_dir, build_link_info.link_extension_nix | ||
| ), | ||
| ) | ||
| .expect("Failed to copy lib"); | ||
| #[cfg(feature = "dynamic")] | ||
| { | ||
| // move libllama.so to llama_node_dir | ||
| std::fs::copy( | ||
| format!("libllama.{}", build_link_info.link_extension_nix), | ||
| format!( | ||
| "{}/libllama.{}", | ||
| llama_node_dir.display(), | ||
| build_link_info.link_extension_nix | ||
| ), | ||
| ) | ||
| .expect("Failed to copy lib"); | ||
| } | ||
| } | ||
@@ -177,0 +222,0 @@ // clean the llama build directory to prevent Cargo from complaining during crate publish |
@@ -12,2 +12,3 @@ [package] | ||
| platforms = "3.0.2" | ||
| dirs = "5.0.1" | ||
@@ -14,0 +15,0 @@ [features] |
+2
-1
| { | ||
| "name": "@llama-node/llama-cpp", | ||
| "version": "0.1.4", | ||
| "version": "0.1.5", | ||
| "main": "index.js", | ||
@@ -28,2 +28,3 @@ "types": "index.d.ts", | ||
| "build": "napi build --platform --release", | ||
| "build:cuda": "tsx scripts/cuda-compile.mts", | ||
| "build:debug": "napi build --platform", | ||
@@ -30,0 +31,0 @@ "test": "vitest", |
+22
-10
@@ -14,3 +14,3 @@ use std::{ffi::CStr, ptr::null_mut, slice}; | ||
| use crate::types::{LlamaContextParams, LlamaInvocation, LlamaLoraAdaptor}; | ||
| use crate::types::{LlamaContextParams, LlamaInvocation}; | ||
@@ -32,2 +32,3 @@ impl LlamaContextParams { | ||
| n_parts: params.n_parts, | ||
| n_gpu_layers: params.n_gpu_layers, | ||
| seed: params.seed, | ||
@@ -56,6 +57,14 @@ f16_kv: params.f16_kv, | ||
| params: &Option<LlamaContextParams>, | ||
| lora_params: &Option<LlamaLoraAdaptor>, | ||
| ) -> Self { | ||
| ) -> Result<Self, napi::Error> { | ||
| let lora_params = params.as_ref().and_then(|p| p.lora.clone()); | ||
| let params = LlamaContextParams::or_default(params); | ||
| let ctx = unsafe { llama_init_from_file(path.as_ptr() as *const i8, params) }; | ||
| if ctx.is_null() { | ||
| return Err(napi::Error::from_reason(format!( | ||
| "Failed to initialize LLama context from file: {}", | ||
| path | ||
| ))); | ||
| } | ||
| if let Some(lora_params) = lora_params { | ||
@@ -78,15 +87,18 @@ let lora_base_path = lora_params | ||
| if err != 0 { | ||
| panic!("Failed to apply LORA adapter"); | ||
| return Err(napi::Error::from_reason(format!( | ||
| "Failed to apply lora adapter: {}", | ||
| err | ||
| ))); | ||
| } | ||
| } | ||
| Self { ctx } | ||
| Ok(Self { ctx }) | ||
| } | ||
| pub fn llama_print_system_info(&self) { | ||
| pub fn llama_print_system_info(&self) -> Result<()> { | ||
| let sys_info_c_str = unsafe { llama_print_system_info() }; | ||
| let sys_info = unsafe { CStr::from_ptr(sys_info_c_str) } | ||
| .to_str() | ||
| .unwrap() | ||
| .to_str()? | ||
| .to_owned(); | ||
| log::info!("{}", sys_info); | ||
| Ok(()) | ||
| } | ||
@@ -236,3 +248,3 @@ | ||
| input: &LlamaInvocation, | ||
| ) -> Result<(), ()> { | ||
| ) -> Result<(), napi::Error> { | ||
| let res = | ||
@@ -243,3 +255,3 @@ unsafe { llama_eval(self.ctx, tokens.as_ptr(), n_tokens, n_past, input.n_threads) }; | ||
| } else { | ||
| Err(()) | ||
| Err(napi::Error::from_reason("LLama eval failed")) | ||
| } | ||
@@ -246,0 +258,0 @@ } |
+20
-11
@@ -13,2 +13,4 @@ #![deny(clippy::all)] | ||
| use common_rs::logger::LLamaLogger; | ||
| use llama::LLamaInternal; | ||
@@ -23,3 +25,3 @@ use napi::{ | ||
| use tokio::sync::Mutex; | ||
| use types::{InferenceResult, LlamaContextParams, LlamaInvocation}; | ||
| use types::{InferenceResult, InferenceResultType, LlamaContextParams, LlamaInvocation}; | ||
@@ -39,11 +41,8 @@ #[napi] | ||
| ) -> Result<LLama> { | ||
| if enable_logger { | ||
| env_logger::builder() | ||
| .filter_level(log::LevelFilter::Info) | ||
| .parse_default_env() | ||
| .init(); | ||
| } | ||
| let logger = LLamaLogger::get_singleton(); | ||
| logger.set_enabled(enable_logger); | ||
| Ok(Self { | ||
| llama: LLamaInternal::load(path, params, enable_logger).await, | ||
| llama: LLamaInternal::load(path, params, enable_logger).await?, | ||
| }) | ||
@@ -59,5 +58,5 @@ } | ||
| #[napi] | ||
| pub async fn tokenize(&self, params: String, n_ctx: i32) -> Result<Vec<i32>> { | ||
| pub async fn tokenize(&self, params: String) -> Result<Vec<i32>> { | ||
| let llama = self.llama.lock().await; | ||
| llama.tokenize(¶ms, n_ctx as usize).await | ||
| llama.tokenize(¶ms).await | ||
| } | ||
@@ -85,5 +84,15 @@ | ||
| let llama = llama.blocking_lock(); | ||
| llama.inference(¶ms, running, |result| { | ||
| let res = llama.inference(¶ms, running, |result| { | ||
| tsfn.call(result, ThreadsafeFunctionCallMode::NonBlocking); | ||
| }); | ||
| if let Err(e) = res { | ||
| tsfn.call( | ||
| InferenceResult { | ||
| r#type: InferenceResultType::Error, | ||
| data: None, | ||
| message: Some(format!("Failed to run inference: {:?}", e)), | ||
| }, | ||
| ThreadsafeFunctionCallMode::NonBlocking, | ||
| ); | ||
| } | ||
| }); | ||
@@ -90,0 +99,0 @@ } |
+27
-53
| use std::sync::Arc; | ||
| use anyhow::Result; | ||
| use tokio::sync::Mutex; | ||
@@ -23,5 +24,5 @@ | ||
| enable_logger: bool, | ||
| ) -> Arc<Mutex<Self>> { | ||
| ) -> Result<Arc<Mutex<Self>>, napi::Error> { | ||
| let llama = LLamaInternal { | ||
| context: LLamaContext::from_file_and_params(&path, ¶ms, &None).await, | ||
| context: LLamaContext::from_file_and_params(&path, ¶ms).await?, | ||
| context_params: params, | ||
@@ -31,14 +32,12 @@ }; | ||
| if enable_logger { | ||
| llama.context.llama_print_system_info(); | ||
| llama.context.llama_print_system_info().map_err(|e| { | ||
| napi::Error::from_reason(format!("Failed to print system info: {:?}", e)) | ||
| })?; | ||
| } | ||
| Arc::new(Mutex::new(llama)) | ||
| Ok(Arc::new(Mutex::new(llama))) | ||
| } | ||
| pub async fn tokenize(&self, input: &str, n_ctx: usize) -> Result<Vec<i32>, napi::Error> { | ||
| pub async fn tokenize(&self, input: &str) -> Result<Vec<i32>, napi::Error> { | ||
| let context = &self.context; | ||
| if let Ok(data) = tokenize(context, input, n_ctx, false) { | ||
| Ok(data) | ||
| } else { | ||
| Err(napi::Error::from_reason("Failed to tokenize")) | ||
| } | ||
| Ok(tokenize(context, input, false)) | ||
| } | ||
@@ -48,10 +47,3 @@ | ||
| let context = &self.context; | ||
| let context_params_c = LlamaContextParams::or_default(&self.context_params); | ||
| let embd_inp = tokenize( | ||
| context, | ||
| input.prompt.as_str(), | ||
| context_params_c.n_ctx as usize, | ||
| true, | ||
| ) | ||
| .unwrap(); | ||
| let embd_inp = tokenize(context, input.prompt.as_str(), true); | ||
@@ -64,3 +56,3 @@ // let end_text = "\n"; | ||
| .llama_eval(embd_inp.as_slice(), embd_inp.len() as i32, 0, input) | ||
| .unwrap(); | ||
| .map_err(|e| napi::Error::from_reason(format!("Failed to evaluate input: {:?}", e)))?; | ||
@@ -81,25 +73,14 @@ let embeddings = context.llama_get_embeddings(); | ||
| callback: impl Fn(InferenceResult), | ||
| ) { | ||
| ) -> Result<(), napi::Error> { | ||
| let context = &self.context; | ||
| let context_params_c = LlamaContextParams::or_default(&self.context_params); | ||
| // Tokenize the stop sequence and input prompt. | ||
| let tokenized_stop_prompt = input.stop_sequence.as_ref().map(|stop_sequence| { | ||
| tokenize( | ||
| context, | ||
| stop_sequence, | ||
| context_params_c.n_ctx as usize, | ||
| false, | ||
| ) | ||
| .unwrap() | ||
| }); | ||
| let tokenized_stop_prompt = input | ||
| .stop_sequence | ||
| .as_ref() | ||
| .map(|stop_sequence| tokenize(context, stop_sequence, false)); | ||
| log::info!("tokenized_stop_prompt: {:?}", tokenized_stop_prompt); | ||
| let tokenized_input = tokenize( | ||
| context, | ||
| input.prompt.as_str(), | ||
| context_params_c.n_ctx as usize, | ||
| true, | ||
| ) | ||
| .unwrap(); | ||
| let tokenized_input = tokenize(context, input.prompt.as_str(), true); | ||
@@ -111,10 +92,8 @@ // Embd contains the prompt and the completion. The longer the prompt, the shorter the completion. | ||
| // Feed prompt to the model. | ||
| context | ||
| .llama_eval( | ||
| tokenized_input.as_slice(), | ||
| tokenized_input.len() as i32, | ||
| 0, | ||
| input, | ||
| ) | ||
| .unwrap(); | ||
| context.llama_eval( | ||
| tokenized_input.as_slice(), | ||
| tokenized_input.len() as i32, | ||
| 0, | ||
| input, | ||
| )?; | ||
| let token_eos = llama_token_eos(); | ||
@@ -150,8 +129,3 @@ | ||
| { | ||
| callback(InferenceResult { | ||
| r#type: InferenceResultType::Error, | ||
| data: None, | ||
| message: Some("Too many tokens predicted".to_string()), | ||
| }); | ||
| break; | ||
| return Err(napi::Error::from_reason("Too many tokens predicted")); | ||
| } | ||
@@ -188,5 +162,3 @@ | ||
| // Continue feeding the token to the model. | ||
| context | ||
| .llama_eval(&embd[n_used..], 1, n_used as i32, input) | ||
| .unwrap(); | ||
| context.llama_eval(&embd[n_used..], 1, n_used as i32, input)?; | ||
| } | ||
@@ -210,3 +182,5 @@ | ||
| }); | ||
| Ok(()) | ||
| } | ||
| } |
+3
-12
| // use crate::output::Output; | ||
| use anyhow::Result; | ||
| // use anyhow::Result; | ||
| use std::ffi::CString; | ||
@@ -31,13 +31,4 @@ use std::os::raw::c_char; | ||
| /// A Result containing a Vec of llama_tokens on success, or an error if the tokenized input is too long. | ||
| pub(crate) fn tokenize( | ||
| context: &LLamaContext, | ||
| text: &str, | ||
| context_window_size: usize, | ||
| add_bos: bool, | ||
| ) -> Result<Vec<llama_token>> { | ||
| let tokenized_input = llama_tokenize_helper(context, text, add_bos); | ||
| if tokenized_input.len() > context_window_size { | ||
| anyhow::bail!("Input too long") | ||
| } | ||
| Ok(tokenized_input) | ||
| pub(crate) fn tokenize(context: &LLamaContext, text: &str, add_bos: bool) -> Vec<llama_token> { | ||
| llama_tokenize_helper(context, text, add_bos) | ||
| } | ||
@@ -44,0 +35,0 @@ |
+2
-0
@@ -49,2 +49,3 @@ use napi::bindgen_prelude::*; | ||
| pub n_parts: i32, | ||
| pub n_gpu_layers: i32, | ||
| pub seed: i32, | ||
@@ -57,2 +58,3 @@ pub f16_kv: bool, | ||
| pub use_mmap: bool, | ||
| pub lora: Option<LlamaLoraAdaptor>, | ||
| } | ||
@@ -59,0 +61,0 @@ |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Shell access
Supply chain riskThis module accesses the system shell. Accessing the system shell increases the risk of executing arbitrary code.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 2 instances in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
Shell access
Supply chain riskThis module accesses the system shell. Accessing the system shell increases the risk of executing arbitrary code.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
28
3.7%531
5.57%9645701
-35.59%5
150%8
14.29%