@llama-node/llama-cpp
Advanced tools
| import { LLama, LlamaContextParams, LlamaInvocation } from "../index"; | ||
| import path from "path"; | ||
| const llama = LLama.load( | ||
| path.resolve(process.cwd(), "../../ggml-vicuna-7b-4bit-rev1.bin"), | ||
| { | ||
| nCtx: 512, | ||
| nParts: -1, | ||
| seed: 0, | ||
| f16Kv: false, | ||
| logitsAll: false, | ||
| vocabOnly: false, | ||
| useMlock: false, | ||
| embedding: true, | ||
| }, | ||
| false | ||
| ); | ||
| const prompt = `Who is the president of the United States?`; | ||
| const params: LlamaInvocation = { | ||
| nThreads: 4, | ||
| nTokPredict: 2048, | ||
| topK: 40, | ||
| topP: 0.1, | ||
| temp: 0.2, | ||
| repeatPenalty: 1, | ||
| prompt, | ||
| }; | ||
| llama.getWordEmbedding(params, (data) => { | ||
| console.log(data.data); | ||
| }); |
| import { LLama } from "../index"; | ||
| import path from "path"; | ||
| const llama = LLama.load( | ||
| path.resolve(process.cwd(), "../../ggml-vicuna-7b-4bit-rev1.bin"), | ||
| null, | ||
| false | ||
| ); | ||
| const template = `Who is the president of the United States?`; | ||
| llama.tokenize(template, 2048, (data) => { | ||
| console.log(data.data); | ||
| }); |
+55
| use crate::context::LlamaInvocation; | ||
| use napi::bindgen_prelude::*; | ||
| use std::sync::mpsc::Sender; | ||
| #[derive(Clone, Debug)] | ||
| pub enum LLamaCommand { | ||
| Inference(LlamaInvocation, Sender<InferenceResult>), | ||
| Tokenize(String, usize, Sender<TokenizeResult>), | ||
| Embedding(LlamaInvocation, Sender<EmbeddingResult>), | ||
| } | ||
| #[napi] | ||
| pub enum TokenizeResultType { | ||
| Error, | ||
| Data, | ||
| } | ||
| #[napi(object)] | ||
| pub struct TokenizeResult { | ||
| pub r#type: TokenizeResultType, | ||
| pub data: Vec<i32>, | ||
| } | ||
| #[napi(object)] | ||
| #[derive(Clone, Debug)] | ||
| pub struct InferenceToken { | ||
| pub token: String, | ||
| pub completed: bool, | ||
| } | ||
| #[napi] | ||
| pub enum InferenceResultType { | ||
| Error, | ||
| Data, | ||
| End, | ||
| } | ||
| #[napi(object)] | ||
| pub struct InferenceResult { | ||
| pub r#type: InferenceResultType, | ||
| pub data: Option<InferenceToken>, | ||
| pub message: Option<String>, | ||
| } | ||
| #[napi] | ||
| pub enum EmbeddingResultType { | ||
| Error, | ||
| Data, | ||
| } | ||
| #[napi(object)] | ||
| pub struct EmbeddingResult { | ||
| pub r#type: EmbeddingResultType, | ||
| pub data: Vec<f64>, | ||
| } |
+1
-0
@@ -20,2 +20,3 @@ [package] | ||
| llm-chain-llama-sys = { git = "https://github.com/hlhr202/llm-chain.git", branch = "feature/fix-cross-compile" } | ||
| # llm-chain-llama-sys = { path = "../../../llm-chain/llm-chain-llama/sys" } | ||
| napi = { version = "2.12.2", default-features = false, features = ["napi6", "async"] } | ||
@@ -22,0 +23,0 @@ napi-derive = "2.12.2" |
+23
-0
@@ -26,2 +26,10 @@ /* tslint:disable */ | ||
| } | ||
| export const enum TokenizeResultType { | ||
| Error = 0, | ||
| Data = 1 | ||
| } | ||
| export interface TokenizeResult { | ||
| type: TokenizeResultType | ||
| data: Array<number> | ||
| } | ||
| export interface InferenceToken { | ||
@@ -41,6 +49,21 @@ token: string | ||
| } | ||
| export const enum EmbeddingResultType { | ||
| Error = 0, | ||
| Data = 1 | ||
| } | ||
| export interface EmbeddingResult { | ||
| type: EmbeddingResultType | ||
| data: Array<number> | ||
| } | ||
| export class LLama { | ||
| static load(path: string, params: LlamaContextParams | undefined | null, enableLogger: boolean): LLama | ||
| getWordEmbedding(input: LlamaInvocation, | ||
| callback: (result: EmbeddingResult) => void): void | ||
| tokenize(params: string, | ||
| nCtx: number, | ||
| callback: (result: | ||
| { type: TokenizeResultType, data: number[] } | ||
| ) => void): void | ||
| inference(input: LlamaInvocation, | ||
| callback: (result: InferenceResult) => void): void | ||
| } |
+3
-1
@@ -255,5 +255,7 @@ /* tslint:disable */ | ||
| const { InferenceResultType, LLama } = nativeBinding | ||
| const { TokenizeResultType, InferenceResultType, EmbeddingResultType, LLama } = nativeBinding | ||
| module.exports.TokenizeResultType = TokenizeResultType | ||
| module.exports.InferenceResultType = InferenceResultType | ||
| module.exports.EmbeddingResultType = EmbeddingResultType | ||
| module.exports.LLama = LLama |
+1
-1
| { | ||
| "name": "@llama-node/llama-cpp", | ||
| "version": "0.0.21", | ||
| "version": "0.0.22", | ||
| "main": "index.js", | ||
@@ -5,0 +5,0 @@ "types": "index.d.ts", |
+17
-7
@@ -1,2 +0,2 @@ | ||
| use std::{ffi::CStr, ptr::null_mut}; | ||
| use std::{ffi::CStr, ptr::null_mut, slice}; | ||
@@ -6,4 +6,4 @@ use anyhow::Result; | ||
| llama_context, llama_context_default_params, llama_context_params, llama_eval, llama_free, | ||
| llama_init_from_file, llama_print_system_info, llama_sample_top_p_top_k, llama_token, | ||
| llama_token_to_str, | ||
| llama_get_embeddings, llama_init_from_file, llama_n_embd, llama_print_system_info, | ||
| llama_sample_top_p_top_k, llama_token, llama_token_to_str, | ||
| }; | ||
@@ -36,2 +36,3 @@ | ||
| pub embedding: bool, | ||
| // pub use_mmap: bool, | ||
| } | ||
@@ -62,2 +63,3 @@ | ||
| progress_callback_user_data: null_mut(), | ||
| // use_mmap: params.use_mmap, | ||
| } | ||
@@ -74,6 +76,3 @@ } | ||
| // Creates a new LLamaContext from the specified file and configuration parameters. | ||
| pub fn from_file_and_params( | ||
| path: &str, | ||
| params: &Option<LlamaContextParams>, | ||
| ) -> Self { | ||
| pub fn from_file_and_params(path: &str, params: &Option<LlamaContextParams>) -> Self { | ||
| let params = LlamaContextParams::or_default(params); | ||
@@ -125,2 +124,13 @@ let ctx = unsafe { llama_init_from_file(path.as_ptr() as *const i8, params) }; | ||
| pub fn llama_get_embeddings(&self) -> Result<Vec<f32>, ()> { | ||
| unsafe { | ||
| let embd_size = llama_n_embd(self.ctx); | ||
| let embd_ptr = llama_get_embeddings(self.ctx); | ||
| if embd_ptr.is_null() { | ||
| return Err(()); | ||
| } | ||
| Ok(slice::from_raw_parts(embd_ptr, embd_size as usize).to_vec()) | ||
| } | ||
| } | ||
| // Evaluates the given tokens with the specified configuration. | ||
@@ -127,0 +137,0 @@ pub fn llama_eval( |
+89
-12
@@ -10,12 +10,19 @@ #![deny(clippy::all)] | ||
| mod tokenizer; | ||
| mod types; | ||
| use std::sync::{mpsc::channel, Arc}; | ||
| use std::{ | ||
| sync::{mpsc::channel, Arc}, | ||
| thread, time, | ||
| }; | ||
| use context::{LlamaContextParams, LlamaInvocation}; | ||
| use llama::{InferenceResult, LLamaChannel}; | ||
| use llama::LLamaChannel; | ||
| use napi::{ | ||
| bindgen_prelude::*, | ||
| threadsafe_function::{ErrorStrategy, ThreadsafeFunction, ThreadsafeFunctionCallMode}, | ||
| threadsafe_function::{ | ||
| ErrorStrategy, ThreadSafeCallContext, ThreadsafeFunction, ThreadsafeFunctionCallMode, | ||
| }, | ||
| JsFunction, | ||
| }; | ||
| use types::{InferenceResult, TokenizeResult, EmbeddingResult}; | ||
@@ -54,3 +61,3 @@ #[napi] | ||
| _ => { | ||
| std::thread::yield_now(); | ||
| thread::yield_now(); | ||
| } | ||
@@ -63,2 +70,68 @@ } | ||
| #[napi(ts_args_type = "input: LlamaInvocation, | ||
| callback: (result: EmbeddingResult) => void")] | ||
| pub fn get_word_embedding(&self, input: LlamaInvocation, callback: JsFunction) -> Result<()> { | ||
| let tsfn: ThreadsafeFunction<EmbeddingResult, ErrorStrategy::Fatal> = | ||
| callback.create_threadsafe_function(0, |ctx| Ok(vec![ctx.value]))?; | ||
| let (embeddings_sender, embeddings_receiver) = channel(); | ||
| let llama_channel = self.llama_channel.clone(); | ||
| llama_channel.embedding(input, embeddings_sender); | ||
| thread::spawn(move || { | ||
| loop { | ||
| let result = embeddings_receiver.recv(); | ||
| match result { | ||
| Ok(result) => { | ||
| tsfn.call(result, ThreadsafeFunctionCallMode::NonBlocking); | ||
| } | ||
| Err(_) => { | ||
| break; | ||
| } | ||
| } | ||
| } | ||
| thread::sleep(time::Duration::from_millis(300)); // wait for end signal | ||
| tsfn.abort().unwrap(); | ||
| }); | ||
| Ok(()) | ||
| } | ||
| #[napi(ts_args_type = "params: string, | ||
| nCtx: number, | ||
| callback: (result: | ||
| { type: TokenizeResultType, data: number[] } | ||
| ) => void")] | ||
| pub fn tokenize(&self, params: String, n_ctx: i32, callback: JsFunction) -> Result<()> { | ||
| let (tokenize_sender, tokenize_receiver) = channel::<TokenizeResult>(); | ||
| let tsfn: ThreadsafeFunction<TokenizeResult, ErrorStrategy::Fatal> = callback | ||
| .create_threadsafe_function(0, |ctx: ThreadSafeCallContext<TokenizeResult>| { | ||
| Ok(vec![ctx.value]) | ||
| })?; | ||
| let llama_channel = self.llama_channel.clone(); | ||
| llama_channel.tokenize(params, n_ctx as usize, tokenize_sender); | ||
| thread::spawn(move || { | ||
| 'waiting_tokenize: loop { | ||
| let recv = tokenize_receiver.recv(); | ||
| match recv { | ||
| Ok(callback) => { | ||
| tsfn.call(callback, ThreadsafeFunctionCallMode::Blocking); | ||
| break 'waiting_tokenize; | ||
| } | ||
| _ => { | ||
| thread::yield_now(); | ||
| } | ||
| } | ||
| } | ||
| thread::sleep(time::Duration::from_millis(300)); // wait for end signal | ||
| tsfn.abort().unwrap(); | ||
| }); | ||
| Ok(()) | ||
| } | ||
| #[napi(ts_args_type = "input: LlamaInvocation, | ||
| callback: (result: InferenceResult) => void")] | ||
@@ -73,12 +146,16 @@ pub fn inference(&self, input: LlamaInvocation, callback: JsFunction) -> Result<()> { | ||
| std::thread::spawn(move || loop { | ||
| let result = inference_receiver.recv(); | ||
| match result { | ||
| Ok(result) => { | ||
| tsfn.call(result, ThreadsafeFunctionCallMode::NonBlocking); | ||
| thread::spawn(move || { | ||
| loop { | ||
| let result = inference_receiver.recv(); | ||
| match result { | ||
| Ok(result) => { | ||
| tsfn.call(result, ThreadsafeFunctionCallMode::NonBlocking); | ||
| } | ||
| Err(_) => { | ||
| break; | ||
| } | ||
| } | ||
| Err(_) => { | ||
| break; | ||
| } | ||
| } | ||
| thread::sleep(time::Duration::from_millis(300)); // wait for end signal | ||
| tsfn.abort().unwrap(); | ||
| }); | ||
@@ -85,0 +162,0 @@ |
+75
-26
@@ -1,2 +0,1 @@ | ||
| use napi::bindgen_prelude::*; | ||
| use std::{ | ||
@@ -13,2 +12,6 @@ sync::{ | ||
| tokenizer::{embedding_to_output, llama_token_eos, tokenize}, | ||
| types::{ | ||
| EmbeddingResult, EmbeddingResultType, InferenceResult, InferenceResultType, InferenceToken, | ||
| LLamaCommand, TokenizeResult, TokenizeResultType, | ||
| }, | ||
| }; | ||
@@ -27,33 +30,61 @@ | ||
| #[derive(Clone, Debug)] | ||
| pub enum LLamaCommand { | ||
| Inference(LlamaInvocation, Sender<InferenceResult>), | ||
| } | ||
| impl LLamaInternal { | ||
| pub fn tokenize(&self, input: &str, n_ctx: usize, sender: &Sender<TokenizeResult>) { | ||
| if let Ok(data) = tokenize(&self.context, input, n_ctx, false) { | ||
| sender | ||
| .send(TokenizeResult { | ||
| data, | ||
| r#type: TokenizeResultType::Data, | ||
| }) | ||
| .unwrap(); | ||
| } else { | ||
| sender | ||
| .send(TokenizeResult { | ||
| data: vec![], | ||
| r#type: TokenizeResultType::Error, | ||
| }) | ||
| .unwrap(); | ||
| } | ||
| } | ||
| #[napi(object)] | ||
| #[derive(Clone, Debug)] | ||
| pub struct InferenceToken { | ||
| pub token: String, | ||
| pub completed: bool, | ||
| } | ||
| pub fn embedding(&self, input: &LlamaInvocation, sender: &Sender<EmbeddingResult>) { | ||
| let context_params_c = LlamaContextParams::or_default(&self.context_params); | ||
| let input_ctx = &self.context; | ||
| let embd_inp = tokenize( | ||
| input_ctx, | ||
| input.prompt.as_str(), | ||
| context_params_c.n_ctx as usize, | ||
| true, | ||
| ) | ||
| .unwrap(); | ||
| #[napi] | ||
| pub enum InferenceResultType { | ||
| Error, | ||
| Data, | ||
| End, | ||
| } | ||
| // let end_text = "\n"; | ||
| // let end_token = | ||
| // tokenize(input_ctx, end_text, context_params_c.n_ctx as usize, false).unwrap(); | ||
| #[napi(object)] | ||
| pub struct InferenceResult { | ||
| pub r#type: InferenceResultType, | ||
| pub data: Option<InferenceToken>, | ||
| pub message: Option<String>, | ||
| } | ||
| input_ctx | ||
| .llama_eval(embd_inp.as_slice(), embd_inp.len() as i32, 0, input) | ||
| .unwrap(); | ||
| impl LLamaInternal { | ||
| let embeddings = input_ctx.llama_get_embeddings(); | ||
| if let Ok(embeddings) = embeddings { | ||
| sender | ||
| .send(EmbeddingResult { | ||
| r#type: EmbeddingResultType::Data, | ||
| data: embeddings.iter().map(|&x| x as f64).collect(), | ||
| }) | ||
| .unwrap(); | ||
| } else { | ||
| sender | ||
| .send(EmbeddingResult { | ||
| r#type: EmbeddingResultType::Error, | ||
| data: vec![], | ||
| }) | ||
| .unwrap(); | ||
| } | ||
| } | ||
| pub fn inference(&self, input: &LlamaInvocation, sender: &Sender<InferenceResult>) { | ||
| let context_params_c = LlamaContextParams::or_default(&self.context_params); | ||
| log::info!("inference: {:?}", input); | ||
| log::info!("context_params: {:?}", context_params_c); | ||
| let input_ctx = &self.context; | ||
@@ -193,2 +224,14 @@ // Tokenize the stop sequence and input prompt. | ||
| pub fn tokenize(&self, input: String, n_ctx: usize, sender: Sender<TokenizeResult>) { | ||
| self.command_sender | ||
| .send(LLamaCommand::Tokenize(input, n_ctx, sender)) | ||
| .unwrap(); | ||
| } | ||
| pub fn embedding(&self, params: LlamaInvocation, sender: Sender<EmbeddingResult>) { | ||
| self.command_sender | ||
| .send(LLamaCommand::Embedding(params, sender)) | ||
| .unwrap(); | ||
| } | ||
| pub fn inference(&self, params: LlamaInvocation, sender: Sender<InferenceResult>) { | ||
@@ -230,2 +273,8 @@ self.command_sender | ||
| } | ||
| Ok(LLamaCommand::Embedding(params, sender)) => { | ||
| llama.embedding(¶ms, &sender); | ||
| } | ||
| Ok(LLamaCommand::Tokenize(text, n_ctx, sender)) => { | ||
| llama.tokenize(&text, n_ctx, &sender); | ||
| } | ||
| Err(TryRecvError::Disconnected) => { | ||
@@ -232,0 +281,0 @@ break 'llama_loop; |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Shell access
Supply chain riskThis module accesses the system shell. Accessing the system shell increases the risk of executing arbitrary code.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
Shell access
Supply chain riskThis module accesses the system shell. Accessing the system shell increases the risk of executing arbitrary code.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
10705781
1.89%20
17.65%508
14.67%