@@ -27,4 +27,4 @@ /* tslint:disable */
		export const enum TokenizeResultType {
		Error = 0,
		Data = 1
		Error = 'Error',
		Data = 'Data'
		}
		@@ -40,5 +40,5 @@ export interface TokenizeResult {
		export const enum InferenceResultType {
		Error = 0,
		Data = 1,
		End = 2
		Error = 'Error',
		Data = 'Data',
		End = 'End'
		}
		@@ -51,4 +51,4 @@ export interface InferenceResult {
		export const enum EmbeddingResultType {
		Error = 0,
		Data = 1
		Error = 'Error',
		Data = 'Data'
		}
		@@ -61,11 +61,5 @@ export interface EmbeddingResult {
		static load(path: string, params: LlamaContextParams \| undefined \| null, enableLogger: boolean): LLama
		getWordEmbedding(input: LlamaInvocation,
		callback: (result: EmbeddingResult) => void): void
		tokenize(params: string,
		nCtx: number,
		callback: (result:
		{ type: TokenizeResultType, data: number[] }
		) => void): void
		inference(input: LlamaInvocation,
		callback: (result: InferenceResult) => void): void
		getWordEmbedding(input: LlamaInvocation, callback: (result: EmbeddingResult) => void): void
		tokenize(params: string, nCtx: number, callback: (result: TokenizeResult) => void): void
		inference(input: LlamaInvocation, callback: (result: InferenceResult) => void): void
		}

+1

-1

package.json

		{
		"name": "@llama-node/llama-cpp",
		"version": "0.0.22",
		"version": "0.0.23",
		"main": "index.js",
		@@ -5,0 +5,0 @@ "types": "index.d.ts",

+20

-13

src/lib.rs

		@@ -26,3 +26,3 @@ #![deny(clippy::all)]
		};
		use types::{InferenceResult, TokenizeResult, EmbeddingResult};
		use types::{EmbeddingResult, InferenceResult, TokenizeResult};

		@@ -68,5 +68,8 @@ #[napi]

		#[napi(ts_args_type = "input: LlamaInvocation,
		callback: (result: EmbeddingResult) => void")]
		pub fn get_word_embedding(&self, input: LlamaInvocation, callback: JsFunction) -> Result<()> {
		#[napi]
		pub fn get_word_embedding(
		&self,
		input: LlamaInvocation,
		#[napi(ts_arg_type = "(result: EmbeddingResult) => void")] callback: JsFunction,
		) -> Result<()> {
		let tsfn: ThreadsafeFunction<EmbeddingResult, ErrorStrategy::Fatal> =
		@@ -98,8 +101,9 @@ callback.create_threadsafe_function(0, \|ctx\| Ok(vec![ctx.value]))?;

		#[napi(ts_args_type = "params: string,
		nCtx: number,
		callback: (result:
		{ type: TokenizeResultType, data: number[] }
		) => void")]
		pub fn tokenize(&self, params: String, n_ctx: i32, callback: JsFunction) -> Result<()> {
		#[napi]
		pub fn tokenize(
		&self,
		params: String,
		n_ctx: i32,
		#[napi(ts_arg_type = "(result: TokenizeResult) => void")] callback: JsFunction,
		) -> Result<()> {
		let (tokenize_sender, tokenize_receiver) = channel::<TokenizeResult>();
		@@ -136,5 +140,8 @@

		#[napi(ts_args_type = "input: LlamaInvocation,
		callback: (result: InferenceResult) => void")]
		pub fn inference(&self, input: LlamaInvocation, callback: JsFunction) -> Result<()> {
		#[napi]
		pub fn inference(
		&self,
		input: LlamaInvocation,
		#[napi(ts_arg_type = "(result: InferenceResult) => void")] callback: JsFunction,
		) -> Result<()> {
		let tsfn: ThreadsafeFunction<InferenceResult, ErrorStrategy::Fatal> =
		@@ -141,0 +148,0 @@ callback.create_threadsafe_function(0, \|ctx\| Ok(vec![ctx.value]))?;

+29

-21

src/llama.rs

		@@ -11,3 +11,3 @@ use std::{
		context::{LLamaContext, LlamaContextParams, LlamaInvocation},
		tokenizer::{embedding_to_output, llama_token_eos, tokenize},
		tokenizer::{llama_token_eos, tokenize},
		types::{
		@@ -126,4 +126,2 @@ EmbeddingResult, EmbeddingResultType, InferenceResult, InferenceResultType, InferenceToken,

		log::info!("hard coded token_eos: {}", token_eos);

		// Generate remaining tokens.
		@@ -133,2 +131,3 @@ let mut n_remaining = context_params_c.n_ctx - tokenized_input.len() as i32;
		let mut stop_sequence_i = 0;
		let mut completed = false;
		while n_remaining > 0 {
		@@ -140,9 +139,3 @@ let tok = input_ctx.llama_sample(embd.as_slice(), n_used as i32, input);
		if tok == token_eos {
		sender
		.send(InferenceResult {
		r#type: InferenceResultType::End,
		data: None,
		message: None,
		})
		.unwrap();
		completed = true;
		break;
		@@ -167,9 +160,3 @@ }
		if stop_sequence_i >= tokenized_stop_prompt.len() {
		sender
		.send(InferenceResult {
		r#type: InferenceResultType::End,
		data: None,
		message: None,
		})
		.unwrap();
		completed = true;
		break;
		@@ -201,6 +188,27 @@ }
		}
		embedding_to_output(
		input_ctx,
		&embd[tokenized_input.len()..n_used + 1 - stop_sequence_i],
		);

		if completed {
		sender
		.send(InferenceResult {
		r#type: InferenceResultType::Data,
		data: Some(InferenceToken {
		token: "\n\n<end>\n".to_string(),
		completed: true,
		}),
		message: None,
		})
		.unwrap();
		}

		sender
		.send(InferenceResult {
		r#type: InferenceResultType::End,
		data: None,
		message: None,
		})
		.unwrap();
		// embedding_to_output(
		// input_ctx,
		// &embd[tokenized_input.len()..n_used + 1 - stop_sequence_i],
		// );
		}
		@@ -207,0 +215,0 @@ }

+3

-3

src/types.rs

		@@ -12,3 +12,3 @@ use crate::context::LlamaInvocation;

		#[napi]
		#[napi(string_enum)]
		pub enum TokenizeResultType {
		@@ -32,3 +32,3 @@ Error,

		#[napi]
		#[napi(string_enum)]
		pub enum InferenceResultType {
		@@ -47,3 +47,3 @@ Error,

		#[napi]
		#[napi(string_enum)]
		pub enum EmbeddingResultType {
		@@ -50,0 +50,0 @@ Error,

@llama-node/llama-cpp.darwin-arm64.node

Sorry, the diff of this file is not supported yet

@llama-node/llama-cpp.darwin-x64.node

Sorry, the diff of this file is not supported yet

@llama-node/llama-cpp.linux-x64-gnu.node

Sorry, the diff of this file is not supported yet

@llama-node/llama-cpp.win32-x64-msvc.node

Sorry, the diff of this file is not supported yet

@llama-node/llama-cpp - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics