New Research: Supply Chain Attack on Axios Pulls Malicious Dependency from npm.Details
Socket
Book a DemoSign in
Socket

@llama-node/llama-cpp

Package Overview
Dependencies
Maintainers
1
Versions
25
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@llama-node/llama-cpp - npm Package Compare versions

Comparing version
0.0.21
to
0.0.22
+33
example/embedding.ts
import { LLama, LlamaContextParams, LlamaInvocation } from "../index";
import path from "path";
const llama = LLama.load(
path.resolve(process.cwd(), "../../ggml-vicuna-7b-4bit-rev1.bin"),
{
nCtx: 512,
nParts: -1,
seed: 0,
f16Kv: false,
logitsAll: false,
vocabOnly: false,
useMlock: false,
embedding: true,
},
false
);
const prompt = `Who is the president of the United States?`;
const params: LlamaInvocation = {
nThreads: 4,
nTokPredict: 2048,
topK: 40,
topP: 0.1,
temp: 0.2,
repeatPenalty: 1,
prompt,
};
llama.getWordEmbedding(params, (data) => {
console.log(data.data);
});
import { LLama } from "../index";
import path from "path";
const llama = LLama.load(
path.resolve(process.cwd(), "../../ggml-vicuna-7b-4bit-rev1.bin"),
null,
false
);
const template = `Who is the president of the United States?`;
llama.tokenize(template, 2048, (data) => {
console.log(data.data);
});
use crate::context::LlamaInvocation;
use napi::bindgen_prelude::*;
use std::sync::mpsc::Sender;
#[derive(Clone, Debug)]
pub enum LLamaCommand {
Inference(LlamaInvocation, Sender<InferenceResult>),
Tokenize(String, usize, Sender<TokenizeResult>),
Embedding(LlamaInvocation, Sender<EmbeddingResult>),
}
#[napi]
pub enum TokenizeResultType {
Error,
Data,
}
#[napi(object)]
pub struct TokenizeResult {
pub r#type: TokenizeResultType,
pub data: Vec<i32>,
}
#[napi(object)]
#[derive(Clone, Debug)]
pub struct InferenceToken {
pub token: String,
pub completed: bool,
}
#[napi]
pub enum InferenceResultType {
Error,
Data,
End,
}
#[napi(object)]
pub struct InferenceResult {
pub r#type: InferenceResultType,
pub data: Option<InferenceToken>,
pub message: Option<String>,
}
#[napi]
pub enum EmbeddingResultType {
Error,
Data,
}
#[napi(object)]
pub struct EmbeddingResult {
pub r#type: EmbeddingResultType,
pub data: Vec<f64>,
}
+1
-0

@@ -20,2 +20,3 @@ [package]

llm-chain-llama-sys = { git = "https://github.com/hlhr202/llm-chain.git", branch = "feature/fix-cross-compile" }
# llm-chain-llama-sys = { path = "../../../llm-chain/llm-chain-llama/sys" }
napi = { version = "2.12.2", default-features = false, features = ["napi6", "async"] }

@@ -22,0 +23,0 @@ napi-derive = "2.12.2"

@@ -26,2 +26,10 @@ /* tslint:disable */

}
export const enum TokenizeResultType {
Error = 0,
Data = 1
}
export interface TokenizeResult {
type: TokenizeResultType
data: Array<number>
}
export interface InferenceToken {

@@ -41,6 +49,21 @@ token: string

}
export const enum EmbeddingResultType {
Error = 0,
Data = 1
}
export interface EmbeddingResult {
type: EmbeddingResultType
data: Array<number>
}
export class LLama {
static load(path: string, params: LlamaContextParams | undefined | null, enableLogger: boolean): LLama
getWordEmbedding(input: LlamaInvocation,
callback: (result: EmbeddingResult) => void): void
tokenize(params: string,
nCtx: number,
callback: (result:
{ type: TokenizeResultType, data: number[] }
) => void): void
inference(input: LlamaInvocation,
callback: (result: InferenceResult) => void): void
}
+3
-1

@@ -255,5 +255,7 @@ /* tslint:disable */

const { InferenceResultType, LLama } = nativeBinding
const { TokenizeResultType, InferenceResultType, EmbeddingResultType, LLama } = nativeBinding
module.exports.TokenizeResultType = TokenizeResultType
module.exports.InferenceResultType = InferenceResultType
module.exports.EmbeddingResultType = EmbeddingResultType
module.exports.LLama = LLama
{
"name": "@llama-node/llama-cpp",
"version": "0.0.21",
"version": "0.0.22",
"main": "index.js",

@@ -5,0 +5,0 @@ "types": "index.d.ts",

@@ -1,2 +0,2 @@

use std::{ffi::CStr, ptr::null_mut};
use std::{ffi::CStr, ptr::null_mut, slice};

@@ -6,4 +6,4 @@ use anyhow::Result;

llama_context, llama_context_default_params, llama_context_params, llama_eval, llama_free,
llama_init_from_file, llama_print_system_info, llama_sample_top_p_top_k, llama_token,
llama_token_to_str,
llama_get_embeddings, llama_init_from_file, llama_n_embd, llama_print_system_info,
llama_sample_top_p_top_k, llama_token, llama_token_to_str,
};

@@ -36,2 +36,3 @@

pub embedding: bool,
// pub use_mmap: bool,
}

@@ -62,2 +63,3 @@

progress_callback_user_data: null_mut(),
// use_mmap: params.use_mmap,
}

@@ -74,6 +76,3 @@ }

// Creates a new LLamaContext from the specified file and configuration parameters.
pub fn from_file_and_params(
path: &str,
params: &Option<LlamaContextParams>,
) -> Self {
pub fn from_file_and_params(path: &str, params: &Option<LlamaContextParams>) -> Self {
let params = LlamaContextParams::or_default(params);

@@ -125,2 +124,13 @@ let ctx = unsafe { llama_init_from_file(path.as_ptr() as *const i8, params) };

pub fn llama_get_embeddings(&self) -> Result<Vec<f32>, ()> {
unsafe {
let embd_size = llama_n_embd(self.ctx);
let embd_ptr = llama_get_embeddings(self.ctx);
if embd_ptr.is_null() {
return Err(());
}
Ok(slice::from_raw_parts(embd_ptr, embd_size as usize).to_vec())
}
}
// Evaluates the given tokens with the specified configuration.

@@ -127,0 +137,0 @@ pub fn llama_eval(

@@ -10,12 +10,19 @@ #![deny(clippy::all)]

mod tokenizer;
mod types;
use std::sync::{mpsc::channel, Arc};
use std::{
sync::{mpsc::channel, Arc},
thread, time,
};
use context::{LlamaContextParams, LlamaInvocation};
use llama::{InferenceResult, LLamaChannel};
use llama::LLamaChannel;
use napi::{
bindgen_prelude::*,
threadsafe_function::{ErrorStrategy, ThreadsafeFunction, ThreadsafeFunctionCallMode},
threadsafe_function::{
ErrorStrategy, ThreadSafeCallContext, ThreadsafeFunction, ThreadsafeFunctionCallMode,
},
JsFunction,
};
use types::{InferenceResult, TokenizeResult, EmbeddingResult};

@@ -54,3 +61,3 @@ #[napi]

_ => {
std::thread::yield_now();
thread::yield_now();
}

@@ -63,2 +70,68 @@ }

#[napi(ts_args_type = "input: LlamaInvocation,
callback: (result: EmbeddingResult) => void")]
pub fn get_word_embedding(&self, input: LlamaInvocation, callback: JsFunction) -> Result<()> {
let tsfn: ThreadsafeFunction<EmbeddingResult, ErrorStrategy::Fatal> =
callback.create_threadsafe_function(0, |ctx| Ok(vec![ctx.value]))?;
let (embeddings_sender, embeddings_receiver) = channel();
let llama_channel = self.llama_channel.clone();
llama_channel.embedding(input, embeddings_sender);
thread::spawn(move || {
loop {
let result = embeddings_receiver.recv();
match result {
Ok(result) => {
tsfn.call(result, ThreadsafeFunctionCallMode::NonBlocking);
}
Err(_) => {
break;
}
}
}
thread::sleep(time::Duration::from_millis(300)); // wait for end signal
tsfn.abort().unwrap();
});
Ok(())
}
#[napi(ts_args_type = "params: string,
nCtx: number,
callback: (result:
{ type: TokenizeResultType, data: number[] }
) => void")]
pub fn tokenize(&self, params: String, n_ctx: i32, callback: JsFunction) -> Result<()> {
let (tokenize_sender, tokenize_receiver) = channel::<TokenizeResult>();
let tsfn: ThreadsafeFunction<TokenizeResult, ErrorStrategy::Fatal> = callback
.create_threadsafe_function(0, |ctx: ThreadSafeCallContext<TokenizeResult>| {
Ok(vec![ctx.value])
})?;
let llama_channel = self.llama_channel.clone();
llama_channel.tokenize(params, n_ctx as usize, tokenize_sender);
thread::spawn(move || {
'waiting_tokenize: loop {
let recv = tokenize_receiver.recv();
match recv {
Ok(callback) => {
tsfn.call(callback, ThreadsafeFunctionCallMode::Blocking);
break 'waiting_tokenize;
}
_ => {
thread::yield_now();
}
}
}
thread::sleep(time::Duration::from_millis(300)); // wait for end signal
tsfn.abort().unwrap();
});
Ok(())
}
#[napi(ts_args_type = "input: LlamaInvocation,
callback: (result: InferenceResult) => void")]

@@ -73,12 +146,16 @@ pub fn inference(&self, input: LlamaInvocation, callback: JsFunction) -> Result<()> {

std::thread::spawn(move || loop {
let result = inference_receiver.recv();
match result {
Ok(result) => {
tsfn.call(result, ThreadsafeFunctionCallMode::NonBlocking);
thread::spawn(move || {
loop {
let result = inference_receiver.recv();
match result {
Ok(result) => {
tsfn.call(result, ThreadsafeFunctionCallMode::NonBlocking);
}
Err(_) => {
break;
}
}
Err(_) => {
break;
}
}
thread::sleep(time::Duration::from_millis(300)); // wait for end signal
tsfn.abort().unwrap();
});

@@ -85,0 +162,0 @@

@@ -1,2 +0,1 @@

use napi::bindgen_prelude::*;
use std::{

@@ -13,2 +12,6 @@ sync::{

tokenizer::{embedding_to_output, llama_token_eos, tokenize},
types::{
EmbeddingResult, EmbeddingResultType, InferenceResult, InferenceResultType, InferenceToken,
LLamaCommand, TokenizeResult, TokenizeResultType,
},
};

@@ -27,33 +30,61 @@

#[derive(Clone, Debug)]
pub enum LLamaCommand {
Inference(LlamaInvocation, Sender<InferenceResult>),
}
impl LLamaInternal {
pub fn tokenize(&self, input: &str, n_ctx: usize, sender: &Sender<TokenizeResult>) {
if let Ok(data) = tokenize(&self.context, input, n_ctx, false) {
sender
.send(TokenizeResult {
data,
r#type: TokenizeResultType::Data,
})
.unwrap();
} else {
sender
.send(TokenizeResult {
data: vec![],
r#type: TokenizeResultType::Error,
})
.unwrap();
}
}
#[napi(object)]
#[derive(Clone, Debug)]
pub struct InferenceToken {
pub token: String,
pub completed: bool,
}
pub fn embedding(&self, input: &LlamaInvocation, sender: &Sender<EmbeddingResult>) {
let context_params_c = LlamaContextParams::or_default(&self.context_params);
let input_ctx = &self.context;
let embd_inp = tokenize(
input_ctx,
input.prompt.as_str(),
context_params_c.n_ctx as usize,
true,
)
.unwrap();
#[napi]
pub enum InferenceResultType {
Error,
Data,
End,
}
// let end_text = "\n";
// let end_token =
// tokenize(input_ctx, end_text, context_params_c.n_ctx as usize, false).unwrap();
#[napi(object)]
pub struct InferenceResult {
pub r#type: InferenceResultType,
pub data: Option<InferenceToken>,
pub message: Option<String>,
}
input_ctx
.llama_eval(embd_inp.as_slice(), embd_inp.len() as i32, 0, input)
.unwrap();
impl LLamaInternal {
let embeddings = input_ctx.llama_get_embeddings();
if let Ok(embeddings) = embeddings {
sender
.send(EmbeddingResult {
r#type: EmbeddingResultType::Data,
data: embeddings.iter().map(|&x| x as f64).collect(),
})
.unwrap();
} else {
sender
.send(EmbeddingResult {
r#type: EmbeddingResultType::Error,
data: vec![],
})
.unwrap();
}
}
pub fn inference(&self, input: &LlamaInvocation, sender: &Sender<InferenceResult>) {
let context_params_c = LlamaContextParams::or_default(&self.context_params);
log::info!("inference: {:?}", input);
log::info!("context_params: {:?}", context_params_c);
let input_ctx = &self.context;

@@ -193,2 +224,14 @@ // Tokenize the stop sequence and input prompt.

pub fn tokenize(&self, input: String, n_ctx: usize, sender: Sender<TokenizeResult>) {
self.command_sender
.send(LLamaCommand::Tokenize(input, n_ctx, sender))
.unwrap();
}
pub fn embedding(&self, params: LlamaInvocation, sender: Sender<EmbeddingResult>) {
self.command_sender
.send(LLamaCommand::Embedding(params, sender))
.unwrap();
}
pub fn inference(&self, params: LlamaInvocation, sender: Sender<InferenceResult>) {

@@ -230,2 +273,8 @@ self.command_sender

}
Ok(LLamaCommand::Embedding(params, sender)) => {
llama.embedding(&params, &sender);
}
Ok(LLamaCommand::Tokenize(text, n_ctx, sender)) => {
llama.tokenize(&text, n_ctx, &sender);
}
Err(TryRecvError::Disconnected) => {

@@ -232,0 +281,0 @@ break 'llama_loop;

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet