@forwardimpact/libllm - npm Package Compare versions

+79

src/hallucination.js

		import { tool } from "@forwardimpact/libtype";

		/**
		* Checks if a tool call is a hallucinated multi_tool_use.parallel call.
		* @param {string} name - Function name from tool call
		* @returns {boolean} True if this is a parallel hallucination
		*/
		function isParallelHallucination(name) {
		return name === "multi_tool_use.parallel" \|\| name === "parallel";
		}

		/**
		* Extracts the function name from a nested tool_use, stripping prefixes.
		* @param {object} nestedTool - Nested tool use object
		* @returns {string} Clean function name
		*/
		function extractNestedName(nestedTool) {
		const rawName = nestedTool.recipient_name \|\| nestedTool.name \|\| "";
		return rawName.startsWith("functions.") ? rawName.slice(10) : rawName;
		}

		/**
		* Converts a nested tool_use to a proper ToolCall object.
		* @param {object} nestedTool - Nested tool use from parallel call
		* @param {string} parentId - Parent tool call ID
		* @param {number} index - Index of this tool in the array
		* @returns {object} Proper ToolCall object
		*/
		function convertNestedToolUse(nestedTool, parentId, index) {
		const nestedArgs = nestedTool.parameters \|\| nestedTool.arguments \|\| {};
		return tool.ToolCall.fromObject({
		id: `${parentId}_${index}`,
		type: "function",
		function: {
		name: extractNestedName(nestedTool),
		arguments: JSON.stringify(nestedArgs),
		},
		});
		}

		/**
		* Expands a hallucinated parallel tool call into proper individual tool calls.
		* @param {object} toolCall - The multi_tool_use.parallel tool call
		* @returns {object[]} Array of proper tool calls
		*/
		function expandParallelToolCall(toolCall) {
		try {
		const args = JSON.parse(toolCall.function.arguments \|\| "{}");
		const toolUses = args.tool_uses \|\| [];
		return toolUses.map((nested, i) =>
		convertNestedToolUse(nested, toolCall.id, i),
		);
		} catch {
		// If parsing fails, keep the original (will likely fail downstream)
		return [toolCall];
		}
		}

		/**
		* Fixes hallucinated multi_tool_use.parallel tool calls from OpenAI models.
		*
		* Some models occasionally emit a pseudo-tool call named "multi_tool_use.parallel"
		* or "parallel" that wraps multiple tool calls in its arguments. This function
		* detects and converts these to proper individual tool calls.
		* @see https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
		* @param {object[]} toolCalls - Array of tool call objects from LLM response
		* @returns {object[]} Fixed tool calls array with parallel calls expanded
		*/
		export function fixMultiToolUseParallel(toolCalls) {
		if (!toolCalls?.length) return toolCalls;

		return toolCalls.flatMap((toolCall) => {
		const functionName = toolCall.function?.name;
		if (isParallelHallucination(functionName)) {
		return expandParallelToolCall(toolCall);
		}
		return [toolCall];
		});
		}

+391

src/index.js

		import { readFile } from "node:fs/promises";
		import { common, llm } from "@forwardimpact/libtype";
		import {
		countTokens,
		createTokenizer,
		createRetry,
		} from "@forwardimpact/libutil";
		import { fixMultiToolUseParallel } from "./hallucination.js";

		// Note: getBudget has moved to @forwardimpact/libmemory as getModelBudget
		// This re-export is deprecated and will be removed in a future version
		export { getBudget } from "./models.js";

		/**
		* Default base URL for GitHub Models API
		* @type {string}
		*/
		export const DEFAULT_BASE_URL = "https://models.github.ai/inference";

		/**
		* Normalizes the base URL to include /inference for GitHub Models
		* @param {string} baseUrl - Base URL for the LLM API
		* @returns {string} Normalized base URL
		*/
		function normalizeBaseUrl(baseUrl) {
		// For GitHub Models, ensure /inference is appended if not present
		if (baseUrl.includes("models.github.ai") && !baseUrl.includes("/inference")) {
		return `${baseUrl.replace(/\/$/, "")}/inference`;
		}
		return baseUrl;
		}

		/**
		* LLM API client with direct HTTP calls to OpenAI-compatible endpoints
		*/
		export class LlmApi {
		#model;
		#baseURL;
		#embeddingBaseURL;
		#useTeiEmbeddings;
		#headers;
		#fetch;
		#tokenizer;
		#retry;
		#temperature;

		/**
		* Creates a new LLM API instance
		* @param {string} token - LLM API token
		* @param {string} model - Default model to use for completions
		* @param {string} baseUrl - Base URL for the LLM API
		* @param {string} embeddingBaseUrl - Base URL for embeddings (TEI endpoint or OpenAI-compatible)
		* @param {import("@forwardimpact/libutil").Retry} retry - Retry instance for handling transient errors
		* @param {(url: string, options?: object) => Promise<Response>} fetchFn - HTTP client function (defaults to fetch if not provided)
		* @param {() => object} tokenizerFn - Tokenizer instance for counting tokens
		* @param {number} [temperature] - Temperature for completions
		*/
		constructor(
		token,
		model,
		baseUrl,
		embeddingBaseUrl,
		retry,
		fetchFn = fetch,
		tokenizerFn = createTokenizer,
		temperature = 0.3,
		) {
		if (!baseUrl) throw new Error("baseUrl is required");
		if (!retry) throw new Error("retry is required");
		if (typeof fetchFn !== "function")
		throw new Error("Invalid fetch function");
		if (typeof tokenizerFn !== "function")
		throw new Error("Invalid tokenizer function");

		this.#model = model;
		this.#baseURL = normalizeBaseUrl(baseUrl);
		this.#embeddingBaseURL = embeddingBaseUrl \|\| this.#baseURL;
		this.#useTeiEmbeddings =
		!!embeddingBaseUrl &&
		normalizeBaseUrl(embeddingBaseUrl) !== this.#baseURL;
		this.#headers = {
		Authorization: `Bearer ${token}`,
		"Content-Type": "application/json",
		Accept: "application/vnd.github+json",
		"X-GitHub-Api-Version": "2022-11-28",
		};
		this.#fetch = fetchFn;
		this.#tokenizer = tokenizerFn();
		this.#retry = retry;
		this.#temperature = temperature;
		}

		/**
		* Throws an Error with HTTP status and a snippet of the response body when response is not OK
		* @param {Response} response - Fetch API response
		* @returns {Promise<void>}
		* @throws {Error} With enriched message including body snippet
		*/
		async #throwIfNotOk(response) {
		if (response.ok) return;
		let errorDetails = "";
		try {
		const text = await response.text();
		errorDetails = text ? `: ${text.substring(0, 200)}` : "";
		} catch {
		// Ignore error reading body
		}
		throw new Error(
		`HTTP ${response.status}: ${response.statusText}${errorDetails}`,
		);
		}

		/**
		* Creates chat completions using the LLM API
		* @param {import("@forwardimpact/libtype").memory.Window[]} window - Memory window
		* @returns {Promise<import("@forwardimpact/libtype").llm.CompletionsResponse>} Completion response
		*/
		async createCompletions(window) {
		const body = {
		...window,
		model: this.#model,
		temperature: this.#temperature,
		};

		const response = await this.#retry.execute(() =>
		this.#fetch(`${this.#baseURL}/chat/completions`, {
		method: "POST",
		headers: this.#headers,
		body: JSON.stringify(body),
		}),
		);

		await this.#throwIfNotOk(response);

		const json = await response.json();

		// Fix hallucinated multi_tool_use.parallel calls before converting to protobuf
		for (const choice of json.choices \|\| []) {
		if (choice.message?.tool_calls) {
		choice.message.tool_calls = fixMultiToolUseParallel(
		choice.message.tool_calls,
		);
		}
		}

		return llm.CompletionsResponse.fromObject(json);
		}

		/**
		* Creates embeddings via TEI or OpenAI-compatible endpoint.
		* Uses TEI format when EMBEDDING_BASE_URL is explicitly set to a
		* different host; otherwise uses the OpenAI-compatible /embeddings
		* endpoint on the LLM base URL.
		* @param {string[]} input - Array of text strings to embed
		* @returns {Promise<import("@forwardimpact/libtype").common.Embeddings>} Embeddings response
		*/
		async createEmbeddings(input) {
		if (this.#useTeiEmbeddings) {
		return this.#createTeiEmbeddings(input);
		}
		return this.#createOpenAIEmbeddings(input);
		}

		/**
		* TEI (Text Embeddings Inference) format: POST /embed
		* @param {string[]} input
		*/
		async #createTeiEmbeddings(input) {
		const response = await this.#retry.execute(() =>
		this.#fetch(`${this.#embeddingBaseURL}/embed`, {
		method: "POST",
		headers: { "Content-Type": "application/json" },
		body: JSON.stringify({ inputs: input }),
		}),
		);

		await this.#throwIfNotOk(response);
		const json = await response.json();

		// TEI returns [[0.1, 0.2, ...]]
		return common.Embeddings.fromObject({
		object: "list",
		data: json.map((embedding, index) => ({
		object: "embedding",
		index,
		embedding,
		})),
		model: "bge-small-en-v1.5",
		usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
		});
		}

		/**
		* OpenAI-compatible format: POST /embeddings
		* @param {string[]} input
		*/
		async #createOpenAIEmbeddings(input) {
		const response = await this.#retry.execute(() =>
		this.#fetch(`${this.#embeddingBaseURL}/embeddings`, {
		method: "POST",
		headers: this.#headers,
		body: JSON.stringify({
		input,
		model: this.#model,
		}),
		}),
		);

		await this.#throwIfNotOk(response);
		const json = await response.json();

		return common.Embeddings.fromObject({
		object: json.object \|\| "list",
		data: json.data.map((item) => ({
		object: item.object \|\| "embedding",
		index: item.index,
		embedding: item.embedding,
		})),
		model: json.model \|\| this.#model,
		usage: json.usage \|\| {
		prompt_tokens: 0,
		completion_tokens: 0,
		total_tokens: 0,
		},
		});
		}

		/**
		* Lists models available to the current user
		* @returns {Promise<object[]>} Array of available models
		*/
		async listModels() {
		// GitHub Models catalog is at the root domain, not org-specific
		const catalogUrl = "https://models.github.ai/catalog/models";
		const response = await this.#fetch(catalogUrl, {
		method: "GET",
		headers: this.#headers,
		});

		await this.#throwIfNotOk(response);
		const json = await response.json();
		return json;
		}

		/**
		* Counts tokens in the given text using the tokenizer
		* @param {string} text - The text to count tokens for
		* @returns {number} Number of tokens in the text
		*/
		countTokens(text) {
		return countTokens(text, this.#tokenizer);
		}

		/**
		* Converts an image to text description using vision capabilities
		* @param {string\|Buffer} file - Path to the image file or a Buffer containing the image data
		* @param {string} [prompt] - Optional text prompt to guide the description
		* @param {string} [model] - Model to use for image-to-text conversion, defaults to instance model
		* @param {string} [systemPrompt] - System prompt to set context for the description
		* @param {number} [max_tokens] - Maximum tokens to generate in the description
		* @param {string} [mimeType] - The mime type of the file. Defaults to image/png if file is a buffer, otherwise determined from the extension
		* @returns {Promise<string>} Text description of the image
		*/
		async imageToText(
		file,
		prompt = "Describe this image in detail.",
		model = this.#model,
		systemPrompt = "You are an AI assistant that describes images accurately and in detail.",
		max_tokens = 1000,
		mimeType = "image/png",
		) {
		let buffer;
		if (Buffer.isBuffer(file)) {
		buffer = file;
		} else {
		buffer = await readFile(file);
		const extension = file.split(".").pop().toLowerCase();
		mimeType = `image/${extension === "jpg" ? "jpeg" : extension}`;
		}

		const base64 = buffer.toString("base64");

		const body = {
		model: model,
		messages: [
		{
		role: "system",
		content: systemPrompt,
		},
		{
		role: "user",
		content: [
		{
		type: "text",
		text: prompt,
		},
		{
		type: "image_url",
		image_url: {
		url: `data:${mimeType};base64,${base64}`,
		},
		},
		],
		},
		],
		max_tokens,
		};

		const response = await this.#retry.execute(() =>
		this.#fetch(`${this.#baseURL}/chat/completions`, {
		method: "POST",
		headers: this.#headers,
		body: JSON.stringify(body),
		}),
		);

		await this.#throwIfNotOk(response);

		const json = await response.json();
		return json.choices[0]?.message?.content \|\| "";
		}
		}

		/**
		* Creates a proxy-aware fetch function that respects HTTPS_PROXY environment variable
		* @param {object} [process] - Process object for environment variable access
		* @returns {(url: string, options?: object) => Promise<Response>} Fetch function with proxy support
		*/
		export function createProxyAwareFetch(process = global.process) {
		const httpsProxy = process.env.HTTPS_PROXY \|\| process.env.https_proxy;

		if (!httpsProxy) {
		return fetch;
		}

		return (url, options = {}) => {
		return fetch(url, {
		...options,
		proxy: httpsProxy,
		});
		};
		}

		/**
		* Factory function to create an LlmApi instance with default dependencies
		* @param {string} token - LLM API token
		* @param {string} model - Model to use
		* @param {string} baseUrl - Base URL for the LLM API (required, e.g. https://models.github.ai/orgs/{org})
		* @param {string\|null} embeddingBaseUrl - Base URL for embeddings (null falls back to baseUrl with OpenAI-compatible format)
		* @param {number} [temperature] - Temperature for completions
		* @param {(url: string, options?: object) => Promise<Response>} [fetchFn] - HTTP client function
		* @param {() => object} [tokenizerFn] - Tokenizer factory function
		* @returns {LlmApi} Configured LlmApi instance
		*/
		export function createLlmApi(
		token,
		model,
		baseUrl,
		embeddingBaseUrl,
		temperature = 0.3,
		fetchFn = createProxyAwareFetch(),
		tokenizerFn = createTokenizer,
		) {
		if (!baseUrl) {
		throw new Error(
		"baseUrl is required. Set LLM_BASE_URL to https://models.github.ai/orgs/{YOUR_ORG} for org-level PATs.",
		);
		}
		const retry = createRetry();
		return new LlmApi(
		token,
		model,
		baseUrl,
		embeddingBaseUrl,
		retry,
		fetchFn,
		tokenizerFn,
		temperature,
		);
		}

		/**
		* Normalizes a vector to unit length
		* @param {number[]} vector - Vector to normalize
		* @returns {number[]} Normalized vector
		*/
		export function normalizeVector(vector) {
		const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
		if (magnitude === 0) return vector.slice(); // Return copy of zero vector
		return vector.map((val) => val / magnitude);
		}

+66

src/models.js

		/**
		* Static map of model names to their context window token budgets
		* Seeded from GitHub Models API via `./scripts/env.sh node scripts/models.js`
		* @type {Map<string, number>}
		*/
		export const BUDGETS = new Map([
		["ai21-labs/ai21-jamba-1.5-large", 262144],
		["cohere/cohere-command-a", 131072],
		["cohere/cohere-command-r-08-2024", 131072],
		["cohere/cohere-command-r-plus-08-2024", 131072],
		["deepseek/deepseek-r1", 128000],
		["deepseek/deepseek-r1-0528", 128000],
		["deepseek/deepseek-v3-0324", 128000],
		["meta/llama-3.2-11b-vision-instruct", 128000],
		["meta/llama-3.2-90b-vision-instruct", 128000],
		["meta/llama-3.3-70b-instruct", 128000],
		["meta/llama-4-maverick-17b-128e-instruct-fp8", 1000000],
		["meta/llama-4-scout-17b-16e-instruct", 10000000],
		["meta/meta-llama-3.1-405b-instruct", 131072],
		["meta/meta-llama-3.1-8b-instruct", 131072],
		["microsoft/mai-ds-r1", 128000],
		["microsoft/phi-4", 16384],
		["microsoft/phi-4-mini-instruct", 128000],
		["microsoft/phi-4-mini-reasoning", 128000],
		["microsoft/phi-4-multimodal-instruct", 128000],
		["microsoft/phi-4-reasoning", 32768],
		["mistral-ai/codestral-2501", 256000],
		["mistral-ai/ministral-3b", 131072],
		["mistral-ai/mistral-medium-2505", 128000],
		["mistral-ai/mistral-small-2503", 128000],
		["openai/gpt-4.1", 1048576],
		["openai/gpt-4.1-mini", 1048576],
		["openai/gpt-4.1-nano", 1048576],
		["openai/gpt-4o", 131072],
		["openai/gpt-4o-mini", 131072],
		["openai/gpt-5", 200000],
		["openai/gpt-5-chat", 200000],
		["openai/gpt-5-mini", 200000],
		["openai/gpt-5-nano", 200000],
		["openai/o1", 200000],
		["openai/o1-mini", 128000],
		["openai/o1-preview", 128000],
		["openai/o3", 200000],
		["openai/o3-mini", 200000],
		["openai/o4-mini", 200000],
		["openai/text-embedding-3-large", 8191],
		["openai/text-embedding-3-small", 8191],
		["xai/grok-3", 131072],
		["xai/grok-3-mini", 131072],
		]);

		/**
		* Returns the token budget for a given model
		* @param {string} model - Model name with provider prefix (e.g., 'openai/gpt-5')
		* @returns {number} Token budget for the model
		* @throws {Error} If model is not found in BUDGETS
		*/
		export function getBudget(model) {
		const budget = BUDGETS.get(model);
		if (!budget) {
		throw new Error(
		`Unknown model: ${model}. Known models: ${[...BUDGETS.keys()].join(", ")}`,
		);
		}
		return budget;
		}

+11

-2

package.json

		{
		"name": "@forwardimpact/libllm",
		"version": "0.1.84",
		"version": "0.1.85",
		"description": "LLM API client for OpenAI-compatible endpoints",
		@@ -8,6 +8,15 @@ "license": "Apache-2.0",
		"type": "module",
		"main": "index.js",
		"main": "./src/index.js",
		"exports": {
		".": "./src/index.js",
		"./bin/fit-completion.js": "./bin/fit-completion.js"
		},
		"bin": {
		"fit-completion": "./bin/fit-completion.js"
		},
		"files": [
		"src/*/.js",
		"bin/*/.js",
		"README.md"
		],
		"engines": {
		@@ -14,0 +23,0 @@ "bun": ">=1.2.0",

-79

hallucination.js

		import { tool } from "@forwardimpact/libtype";

		/**
		* Checks if a tool call is a hallucinated multi_tool_use.parallel call.
		* @param {string} name - Function name from tool call
		* @returns {boolean} True if this is a parallel hallucination
		*/
		function isParallelHallucination(name) {
		return name === "multi_tool_use.parallel" \|\| name === "parallel";
		}

		/**
		* Extracts the function name from a nested tool_use, stripping prefixes.
		* @param {object} nestedTool - Nested tool use object
		* @returns {string} Clean function name
		*/
		function extractNestedName(nestedTool) {
		const rawName = nestedTool.recipient_name \|\| nestedTool.name \|\| "";
		return rawName.startsWith("functions.") ? rawName.slice(10) : rawName;
		}

		/**
		* Converts a nested tool_use to a proper ToolCall object.
		* @param {object} nestedTool - Nested tool use from parallel call
		* @param {string} parentId - Parent tool call ID
		* @param {number} index - Index of this tool in the array
		* @returns {object} Proper ToolCall object
		*/
		function convertNestedToolUse(nestedTool, parentId, index) {
		const nestedArgs = nestedTool.parameters \|\| nestedTool.arguments \|\| {};
		return tool.ToolCall.fromObject({
		id: `${parentId}_${index}`,
		type: "function",
		function: {
		name: extractNestedName(nestedTool),
		arguments: JSON.stringify(nestedArgs),
		},
		});
		}

		/**
		* Expands a hallucinated parallel tool call into proper individual tool calls.
		* @param {object} toolCall - The multi_tool_use.parallel tool call
		* @returns {object[]} Array of proper tool calls
		*/
		function expandParallelToolCall(toolCall) {
		try {
		const args = JSON.parse(toolCall.function.arguments \|\| "{}");
		const toolUses = args.tool_uses \|\| [];
		return toolUses.map((nested, i) =>
		convertNestedToolUse(nested, toolCall.id, i),
		);
		} catch {
		// If parsing fails, keep the original (will likely fail downstream)
		return [toolCall];
		}
		}

		/**
		* Fixes hallucinated multi_tool_use.parallel tool calls from OpenAI models.
		*
		* Some models occasionally emit a pseudo-tool call named "multi_tool_use.parallel"
		* or "parallel" that wraps multiple tool calls in its arguments. This function
		* detects and converts these to proper individual tool calls.
		* @see https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
		* @param {object[]} toolCalls - Array of tool call objects from LLM response
		* @returns {object[]} Fixed tool calls array with parallel calls expanded
		*/
		export function fixMultiToolUseParallel(toolCalls) {
		if (!toolCalls?.length) return toolCalls;

		return toolCalls.flatMap((toolCall) => {
		const functionName = toolCall.function?.name;
		if (isParallelHallucination(functionName)) {
		return expandParallelToolCall(toolCall);
		}
		return [toolCall];
		});
		}

-391

index.js

		import { readFile } from "node:fs/promises";
		import { common, llm } from "@forwardimpact/libtype";
		import {
		countTokens,
		createTokenizer,
		createRetry,
		} from "@forwardimpact/libutil";
		import { fixMultiToolUseParallel } from "./hallucination.js";

		// Note: getBudget has moved to @forwardimpact/libmemory as getModelBudget
		// This re-export is deprecated and will be removed in a future version
		export { getBudget } from "./models.js";

		/**
		* Default base URL for GitHub Models API
		* @type {string}
		*/
		export const DEFAULT_BASE_URL = "https://models.github.ai/inference";

		/**
		* Normalizes the base URL to include /inference for GitHub Models
		* @param {string} baseUrl - Base URL for the LLM API
		* @returns {string} Normalized base URL
		*/
		function normalizeBaseUrl(baseUrl) {
		// For GitHub Models, ensure /inference is appended if not present
		if (baseUrl.includes("models.github.ai") && !baseUrl.includes("/inference")) {
		return `${baseUrl.replace(/\/$/, "")}/inference`;
		}
		return baseUrl;
		}

		/**
		* LLM API client with direct HTTP calls to OpenAI-compatible endpoints
		*/
		export class LlmApi {
		#model;
		#baseURL;
		#embeddingBaseURL;
		#useTeiEmbeddings;
		#headers;
		#fetch;
		#tokenizer;
		#retry;
		#temperature;

		/**
		* Creates a new LLM API instance
		* @param {string} token - LLM API token
		* @param {string} model - Default model to use for completions
		* @param {string} baseUrl - Base URL for the LLM API
		* @param {string} embeddingBaseUrl - Base URL for embeddings (TEI endpoint or OpenAI-compatible)
		* @param {import("@forwardimpact/libutil").Retry} retry - Retry instance for handling transient errors
		* @param {(url: string, options?: object) => Promise<Response>} fetchFn - HTTP client function (defaults to fetch if not provided)
		* @param {() => object} tokenizerFn - Tokenizer instance for counting tokens
		* @param {number} [temperature] - Temperature for completions
		*/
		constructor(
		token,
		model,
		baseUrl,
		embeddingBaseUrl,
		retry,
		fetchFn = fetch,
		tokenizerFn = createTokenizer,
		temperature = 0.3,
		) {
		if (!baseUrl) throw new Error("baseUrl is required");
		if (!retry) throw new Error("retry is required");
		if (typeof fetchFn !== "function")
		throw new Error("Invalid fetch function");
		if (typeof tokenizerFn !== "function")
		throw new Error("Invalid tokenizer function");

		this.#model = model;
		this.#baseURL = normalizeBaseUrl(baseUrl);
		this.#embeddingBaseURL = embeddingBaseUrl \|\| this.#baseURL;
		this.#useTeiEmbeddings =
		!!embeddingBaseUrl &&
		normalizeBaseUrl(embeddingBaseUrl) !== this.#baseURL;
		this.#headers = {
		Authorization: `Bearer ${token}`,
		"Content-Type": "application/json",
		Accept: "application/vnd.github+json",
		"X-GitHub-Api-Version": "2022-11-28",
		};
		this.#fetch = fetchFn;
		this.#tokenizer = tokenizerFn();
		this.#retry = retry;
		this.#temperature = temperature;
		}

		/**
		* Throws an Error with HTTP status and a snippet of the response body when response is not OK
		* @param {Response} response - Fetch API response
		* @returns {Promise<void>}
		* @throws {Error} With enriched message including body snippet
		*/
		async #throwIfNotOk(response) {
		if (response.ok) return;
		let errorDetails = "";
		try {
		const text = await response.text();
		errorDetails = text ? `: ${text.substring(0, 200)}` : "";
		} catch {
		// Ignore error reading body
		}
		throw new Error(
		`HTTP ${response.status}: ${response.statusText}${errorDetails}`,
		);
		}

		/**
		* Creates chat completions using the LLM API
		* @param {import("@forwardimpact/libtype").memory.Window[]} window - Memory window
		* @returns {Promise<import("@forwardimpact/libtype").llm.CompletionsResponse>} Completion response
		*/
		async createCompletions(window) {
		const body = {
		...window,
		model: this.#model,
		temperature: this.#temperature,
		};

		const response = await this.#retry.execute(() =>
		this.#fetch(`${this.#baseURL}/chat/completions`, {
		method: "POST",
		headers: this.#headers,
		body: JSON.stringify(body),
		}),
		);

		await this.#throwIfNotOk(response);

		const json = await response.json();

		// Fix hallucinated multi_tool_use.parallel calls before converting to protobuf
		for (const choice of json.choices \|\| []) {
		if (choice.message?.tool_calls) {
		choice.message.tool_calls = fixMultiToolUseParallel(
		choice.message.tool_calls,
		);
		}
		}

		return llm.CompletionsResponse.fromObject(json);
		}

		/**
		* Creates embeddings via TEI or OpenAI-compatible endpoint.
		* Uses TEI format when EMBEDDING_BASE_URL is explicitly set to a
		* different host; otherwise uses the OpenAI-compatible /embeddings
		* endpoint on the LLM base URL.
		* @param {string[]} input - Array of text strings to embed
		* @returns {Promise<import("@forwardimpact/libtype").common.Embeddings>} Embeddings response
		*/
		async createEmbeddings(input) {
		if (this.#useTeiEmbeddings) {
		return this.#createTeiEmbeddings(input);
		}
		return this.#createOpenAIEmbeddings(input);
		}

		/**
		* TEI (Text Embeddings Inference) format: POST /embed
		* @param {string[]} input
		*/
		async #createTeiEmbeddings(input) {
		const response = await this.#retry.execute(() =>
		this.#fetch(`${this.#embeddingBaseURL}/embed`, {
		method: "POST",
		headers: { "Content-Type": "application/json" },
		body: JSON.stringify({ inputs: input }),
		}),
		);

		await this.#throwIfNotOk(response);
		const json = await response.json();

		// TEI returns [[0.1, 0.2, ...]]
		return common.Embeddings.fromObject({
		object: "list",
		data: json.map((embedding, index) => ({
		object: "embedding",
		index,
		embedding,
		})),
		model: "bge-small-en-v1.5",
		usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
		});
		}

		/**
		* OpenAI-compatible format: POST /embeddings
		* @param {string[]} input
		*/
		async #createOpenAIEmbeddings(input) {
		const response = await this.#retry.execute(() =>
		this.#fetch(`${this.#embeddingBaseURL}/embeddings`, {
		method: "POST",
		headers: this.#headers,
		body: JSON.stringify({
		input,
		model: this.#model,
		}),
		}),
		);

		await this.#throwIfNotOk(response);
		const json = await response.json();

		return common.Embeddings.fromObject({
		object: json.object \|\| "list",
		data: json.data.map((item) => ({
		object: item.object \|\| "embedding",
		index: item.index,
		embedding: item.embedding,
		})),
		model: json.model \|\| this.#model,
		usage: json.usage \|\| {
		prompt_tokens: 0,
		completion_tokens: 0,
		total_tokens: 0,
		},
		});
		}

		/**
		* Lists models available to the current user
		* @returns {Promise<object[]>} Array of available models
		*/
		async listModels() {
		// GitHub Models catalog is at the root domain, not org-specific
		const catalogUrl = "https://models.github.ai/catalog/models";
		const response = await this.#fetch(catalogUrl, {
		method: "GET",
		headers: this.#headers,
		});

		await this.#throwIfNotOk(response);
		const json = await response.json();
		return json;
		}

		/**
		* Counts tokens in the given text using the tokenizer
		* @param {string} text - The text to count tokens for
		* @returns {number} Number of tokens in the text
		*/
		countTokens(text) {
		return countTokens(text, this.#tokenizer);
		}

		/**
		* Converts an image to text description using vision capabilities
		* @param {string\|Buffer} file - Path to the image file or a Buffer containing the image data
		* @param {string} [prompt] - Optional text prompt to guide the description
		* @param {string} [model] - Model to use for image-to-text conversion, defaults to instance model
		* @param {string} [systemPrompt] - System prompt to set context for the description
		* @param {number} [max_tokens] - Maximum tokens to generate in the description
		* @param {string} [mimeType] - The mime type of the file. Defaults to image/png if file is a buffer, otherwise determined from the extension
		* @returns {Promise<string>} Text description of the image
		*/
		async imageToText(
		file,
		prompt = "Describe this image in detail.",
		model = this.#model,
		systemPrompt = "You are an AI assistant that describes images accurately and in detail.",
		max_tokens = 1000,
		mimeType = "image/png",
		) {
		let buffer;
		if (Buffer.isBuffer(file)) {
		buffer = file;
		} else {
		buffer = await readFile(file);
		const extension = file.split(".").pop().toLowerCase();
		mimeType = `image/${extension === "jpg" ? "jpeg" : extension}`;
		}

		const base64 = buffer.toString("base64");

		const body = {
		model: model,
		messages: [
		{
		role: "system",
		content: systemPrompt,
		},
		{
		role: "user",
		content: [
		{
		type: "text",
		text: prompt,
		},
		{
		type: "image_url",
		image_url: {
		url: `data:${mimeType};base64,${base64}`,
		},
		},
		],
		},
		],
		max_tokens,
		};

		const response = await this.#retry.execute(() =>
		this.#fetch(`${this.#baseURL}/chat/completions`, {
		method: "POST",
		headers: this.#headers,
		body: JSON.stringify(body),
		}),
		);

		await this.#throwIfNotOk(response);

		const json = await response.json();
		return json.choices[0]?.message?.content \|\| "";
		}
		}

		/**
		* Creates a proxy-aware fetch function that respects HTTPS_PROXY environment variable
		* @param {object} [process] - Process object for environment variable access
		* @returns {(url: string, options?: object) => Promise<Response>} Fetch function with proxy support
		*/
		export function createProxyAwareFetch(process = global.process) {
		const httpsProxy = process.env.HTTPS_PROXY \|\| process.env.https_proxy;

		if (!httpsProxy) {
		return fetch;
		}

		return (url, options = {}) => {
		return fetch(url, {
		...options,
		proxy: httpsProxy,
		});
		};
		}

		/**
		* Factory function to create an LlmApi instance with default dependencies
		* @param {string} token - LLM API token
		* @param {string} model - Model to use
		* @param {string} baseUrl - Base URL for the LLM API (required, e.g. https://models.github.ai/orgs/{org})
		* @param {string\|null} embeddingBaseUrl - Base URL for embeddings (null falls back to baseUrl with OpenAI-compatible format)
		* @param {number} [temperature] - Temperature for completions
		* @param {(url: string, options?: object) => Promise<Response>} [fetchFn] - HTTP client function
		* @param {() => object} [tokenizerFn] - Tokenizer factory function
		* @returns {LlmApi} Configured LlmApi instance
		*/
		export function createLlmApi(
		token,
		model,
		baseUrl,
		embeddingBaseUrl,
		temperature = 0.3,
		fetchFn = createProxyAwareFetch(),
		tokenizerFn = createTokenizer,
		) {
		if (!baseUrl) {
		throw new Error(
		"baseUrl is required. Set LLM_BASE_URL to https://models.github.ai/orgs/{YOUR_ORG} for org-level PATs.",
		);
		}
		const retry = createRetry();
		return new LlmApi(
		token,
		model,
		baseUrl,
		embeddingBaseUrl,
		retry,
		fetchFn,
		tokenizerFn,
		temperature,
		);
		}

		/**
		* Normalizes a vector to unit length
		* @param {number[]} vector - Vector to normalize
		* @returns {number[]} Normalized vector
		*/
		export function normalizeVector(vector) {
		const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
		if (magnitude === 0) return vector.slice(); // Return copy of zero vector
		return vector.map((val) => val / magnitude);
		}

-66

models.js

		/**
		* Static map of model names to their context window token budgets
		* Seeded from GitHub Models API via `./scripts/env.sh node scripts/models.js`
		* @type {Map<string, number>}
		*/
		export const BUDGETS = new Map([
		["ai21-labs/ai21-jamba-1.5-large", 262144],
		["cohere/cohere-command-a", 131072],
		["cohere/cohere-command-r-08-2024", 131072],
		["cohere/cohere-command-r-plus-08-2024", 131072],
		["deepseek/deepseek-r1", 128000],
		["deepseek/deepseek-r1-0528", 128000],
		["deepseek/deepseek-v3-0324", 128000],
		["meta/llama-3.2-11b-vision-instruct", 128000],
		["meta/llama-3.2-90b-vision-instruct", 128000],
		["meta/llama-3.3-70b-instruct", 128000],
		["meta/llama-4-maverick-17b-128e-instruct-fp8", 1000000],
		["meta/llama-4-scout-17b-16e-instruct", 10000000],
		["meta/meta-llama-3.1-405b-instruct", 131072],
		["meta/meta-llama-3.1-8b-instruct", 131072],
		["microsoft/mai-ds-r1", 128000],
		["microsoft/phi-4", 16384],
		["microsoft/phi-4-mini-instruct", 128000],
		["microsoft/phi-4-mini-reasoning", 128000],
		["microsoft/phi-4-multimodal-instruct", 128000],
		["microsoft/phi-4-reasoning", 32768],
		["mistral-ai/codestral-2501", 256000],
		["mistral-ai/ministral-3b", 131072],
		["mistral-ai/mistral-medium-2505", 128000],
		["mistral-ai/mistral-small-2503", 128000],
		["openai/gpt-4.1", 1048576],
		["openai/gpt-4.1-mini", 1048576],
		["openai/gpt-4.1-nano", 1048576],
		["openai/gpt-4o", 131072],
		["openai/gpt-4o-mini", 131072],
		["openai/gpt-5", 200000],
		["openai/gpt-5-chat", 200000],
		["openai/gpt-5-mini", 200000],
		["openai/gpt-5-nano", 200000],
		["openai/o1", 200000],
		["openai/o1-mini", 128000],
		["openai/o1-preview", 128000],
		["openai/o3", 200000],
		["openai/o3-mini", 200000],
		["openai/o4-mini", 200000],
		["openai/text-embedding-3-large", 8191],
		["openai/text-embedding-3-small", 8191],
		["xai/grok-3", 131072],
		["xai/grok-3-mini", 131072],
		]);

		/**
		* Returns the token budget for a given model
		* @param {string} model - Model name with provider prefix (e.g., 'openai/gpt-5')
		* @returns {number} Token budget for the model
		* @throws {Error} If model is not found in BUDGETS
		*/
		export function getBudget(model) {
		const budget = BUDGETS.get(model);
		if (!budget) {
		throw new Error(
		`Unknown model: ${model}. Known models: ${[...BUDGETS.keys()].join(", ")}`,
		);
		}
		return budget;
		}

-460

test/libllm-api.test.js

		import { test, describe, beforeEach, mock } from "node:test";
		import assert from "node:assert";

		import { LlmApi, DEFAULT_BASE_URL } from "../index.js";
		import { Retry } from "@forwardimpact/libutil";

		const EMBEDDING_BASE_URL = "http://localhost:8090";

		describe("LlmApi", () => {
		let mockFetch;
		let llmApi;
		let retry;

		beforeEach(() => {
		mockFetch = mock.fn();
		retry = new Retry();
		llmApi = new LlmApi(
		"test-token",
		"gpt-4",
		DEFAULT_BASE_URL,
		EMBEDDING_BASE_URL,
		retry,
		mockFetch,
		);
		});

		test("creates LlmApi with token and model", () => {
		assert.ok(llmApi instanceof LlmApi);
		});

		test("createCompletions makes correct API call", async () => {
		const mockResponse = {
		ok: true,
		json: mock.fn(() =>
		Promise.resolve({
		id: "test-id",
		object: "chat.completion",
		choices: [{ message: { role: "assistant", content: "Hello" } }],
		usage: { total_tokens: 10 },
		}),
		),
		};
		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));

		const messages = [{ role: "user", content: "Hello" }];
		const tools = undefined;
		const temperature = 0.5;
		const max_tokens = 100;

		const result = await llmApi.createCompletions(
		messages,
		tools,
		temperature,
		max_tokens,
		);

		assert.strictEqual(mockFetch.mock.callCount(), 1);
		const [url, options] = mockFetch.mock.calls[0].arguments;
		assert.strictEqual(url, `${DEFAULT_BASE_URL}/chat/completions`);
		assert.strictEqual(options.method, "POST");
		assert.ok(options.headers.Authorization.includes("test-token"));
		assert.strictEqual(result.id, "test-id");
		});

		test("createCompletions uses default model when not specified", async () => {
		const mockResponse = {
		ok: true,
		json: mock.fn(() =>
		Promise.resolve({
		id: "test-id",
		object: "chat.completion",
		choices: [],
		usage: { total_tokens: 10 },
		}),
		),
		};
		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));

		const messages = [{ role: "user", content: "Hello" }];

		await llmApi.createCompletions(messages);

		const [, options] = mockFetch.mock.calls[0].arguments;
		const body = JSON.parse(options.body);
		assert.strictEqual(body.model, "gpt-4");
		});

		test("createCompletions throws error on HTTP error", async () => {
		const mockResponse = {
		ok: false,
		status: 404,
		statusText: "Not Found",
		text: mock.fn(() => Promise.resolve("Error details")),
		};
		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));

		const messages = [{ role: "user", content: "Hello" }];

		await assert.rejects(() => llmApi.createCompletions(messages), {
		message: /HTTP 404: Not Found/,
		});
		});

		test("createCompletions throws error immediately on non-retryable HTTP error", async () => {
		const errorResponse = {
		ok: false,
		status: 400,
		statusText: "Bad Request",
		text: mock.fn(() => Promise.resolve("Invalid request details")),
		};

		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(errorResponse));

		const messages = [{ role: "user", content: "Hello" }];

		await assert.rejects(() => llmApi.createCompletions(messages), {
		message: /HTTP 400: Bad Request/,
		});

		assert.strictEqual(mockFetch.mock.callCount(), 1);
		});

		test("createCompletions fixes multi_tool_use.parallel hallucination", async () => {
		const mockResponse = {
		ok: true,
		json: mock.fn(() =>
		Promise.resolve({
		id: "test-id",
		object: "chat.completion",
		choices: [
		{
		message: {
		role: "assistant",
		content: "Planning to call tools...",
		tool_calls: [
		{
		id: "call_abc123",
		type: "function",
		function: {
		name: "multi_tool_use.parallel",
		arguments: JSON.stringify({
		tool_uses: [
		{
		recipient_name: "functions.get_ontology",
		parameters: {},
		},
		{
		recipient_name: "functions.get_subjects",
		parameters: { type: "schema:Person" },
		},
		],
		}),
		},
		},
		],
		},
		},
		],
		usage: { total_tokens: 100 },
		}),
		),
		};
		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));

		const messages = [{ role: "user", content: "Query the graph" }];
		const result = await llmApi.createCompletions({ messages });

		assert.strictEqual(result.choices[0].message.tool_calls.length, 2);

		const call0 = result.choices[0].message.tool_calls[0];
		assert.strictEqual(call0.function.name, "get_ontology");
		assert.strictEqual(call0.id, "call_abc123_0");
		assert.deepStrictEqual(JSON.parse(call0.function.arguments), {});

		const call1 = result.choices[0].message.tool_calls[1];
		assert.strictEqual(call1.function.name, "get_subjects");
		assert.strictEqual(call1.id, "call_abc123_1");
		assert.deepStrictEqual(JSON.parse(call1.function.arguments), {
		type: "schema:Person",
		});
		});

		test("createCompletions fixes parallel hallucination (short form)", async () => {
		const mockResponse = {
		ok: true,
		json: mock.fn(() =>
		Promise.resolve({
		id: "test-id",
		object: "chat.completion",
		choices: [
		{
		message: {
		role: "assistant",
		content: "",
		tool_calls: [
		{
		id: "call_xyz",
		type: "function",
		function: {
		name: "parallel",
		arguments: JSON.stringify({
		tool_uses: [
		{
		recipient_name: "search_content",
		parameters: { query: "test" },
		},
		],
		}),
		},
		},
		],
		},
		},
		],
		usage: { total_tokens: 50 },
		}),
		),
		};
		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));

		const result = await llmApi.createCompletions({
		messages: [{ role: "user", content: "Search" }],
		});

		assert.strictEqual(result.choices[0].message.tool_calls.length, 1);
		assert.strictEqual(
		result.choices[0].message.tool_calls[0].function.name,
		"search_content",
		);
		});

		test("createCompletions preserves normal tool calls", async () => {
		const mockResponse = {
		ok: true,
		json: mock.fn(() =>
		Promise.resolve({
		id: "test-id",
		object: "chat.completion",
		choices: [
		{
		message: {
		role: "assistant",
		content: "",
		tool_calls: [
		{
		id: "call_normal",
		type: "function",
		function: {
		name: "search_content",
		arguments: '{"query":"test"}',
		},
		},
		],
		},
		},
		],
		usage: { total_tokens: 30 },
		}),
		),
		};
		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));

		const result = await llmApi.createCompletions({
		messages: [{ role: "user", content: "Test" }],
		});

		assert.strictEqual(result.choices[0].message.tool_calls.length, 1);
		assert.strictEqual(
		result.choices[0].message.tool_calls[0].function.name,
		"search_content",
		);
		assert.strictEqual(
		result.choices[0].message.tool_calls[0].id,
		"call_normal",
		);
		});

		test("createEmbeddings makes correct TEI API call", async () => {
		const mockResponse = {
		ok: true,
		json: mock.fn(() =>
		Promise.resolve([
		[0.1, 0.2, 0.3],
		[0.4, 0.5, 0.6],
		]),
		),
		};
		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));

		const texts = ["Hello", "World"];
		const result = await llmApi.createEmbeddings(texts);

		assert.strictEqual(mockFetch.mock.callCount(), 1);
		const [url, options] = mockFetch.mock.calls[0].arguments;
		assert.strictEqual(url, `${EMBEDDING_BASE_URL}/embed`);
		assert.strictEqual(options.method, "POST");

		const body = JSON.parse(options.body);
		assert.deepStrictEqual(body.inputs, texts);
		assert.strictEqual(body.model, undefined);

		assert.strictEqual(options.headers.Authorization, undefined);
		assert.strictEqual(options.headers["Content-Type"], "application/json");

		assert.strictEqual(result.data.length, 2);
		assert.deepStrictEqual(result.data[0].embedding, [0.1, 0.2, 0.3]);
		assert.deepStrictEqual(result.data[1].embedding, [0.4, 0.5, 0.6]);
		assert.strictEqual(result.model, "bge-small-en-v1.5");
		});

		test("createEmbeddings retries on 429 status", async () => {
		const retryResponse = {
		ok: false,
		status: 429,
		statusText: "Too Many Requests",
		};
		const successResponse = {
		ok: true,
		json: mock.fn(() => Promise.resolve([[0.1, 0.2, 0.3]])),
		};

		let callCount = 0;
		mockFetch.mock.mockImplementation(() => {
		callCount++;
		if (callCount === 1) {
		return Promise.resolve(retryResponse);
		} else {
		return Promise.resolve(successResponse);
		}
		});

		const texts = ["Hello"];
		const result = await llmApi.createEmbeddings(texts);

		assert(mockFetch.mock.callCount() >= 2);
		assert.strictEqual(result.data.length, 1);
		});

		test("createEmbeddings throws error immediately on non-retryable HTTP error", async () => {
		const errorResponse = {
		ok: false,
		status: 400,
		statusText: "Bad Request",
		text: mock.fn(() => Promise.resolve("Invalid request details")),
		};

		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(errorResponse));

		const texts = ["Hello"];

		await assert.rejects(() => llmApi.createEmbeddings(texts), {
		message: /HTTP 400: Bad Request/,
		});

		assert.strictEqual(mockFetch.mock.callCount(), 1);
		});

		test("LlmApi falls back to baseUrl when embeddingBaseUrl is null", () => {
		const teiMockFetch = mock.fn();
		const teiRetry = new Retry();

		const llm = new LlmApi(
		"test-token",
		"gpt-4",
		DEFAULT_BASE_URL,
		null,
		teiRetry,
		teiMockFetch,
		);

		assert.ok(llm instanceof LlmApi);
		});

		test("createEmbeddings uses OpenAI-compatible format when embeddingBaseUrl matches baseUrl", async () => {
		const oaiMockFetch = mock.fn();
		const oaiRetry = new Retry();
		const oaiLlm = new LlmApi(
		"test-token",
		"gpt-4",
		DEFAULT_BASE_URL,
		null,
		oaiRetry,
		oaiMockFetch,
		);

		const mockResponse = {
		ok: true,
		json: mock.fn(() =>
		Promise.resolve({
		object: "list",
		data: [{ object: "embedding", index: 0, embedding: [0.1, 0.2, 0.3] }],
		model: "text-embedding-ada-002",
		usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 },
		}),
		),
		};
		oaiMockFetch.mock.mockImplementationOnce(() =>
		Promise.resolve(mockResponse),
		);

		const result = await oaiLlm.createEmbeddings(["Hello"]);

		assert.strictEqual(oaiMockFetch.mock.callCount(), 1);
		const [url, options] = oaiMockFetch.mock.calls[0].arguments;
		assert.ok(url.endsWith("/embeddings"));
		assert.ok(!url.endsWith("/embed"));
		assert.strictEqual(options.method, "POST");

		const body = JSON.parse(options.body);
		assert.deepStrictEqual(body.input, ["Hello"]);
		assert.strictEqual(body.model, "gpt-4");

		assert.strictEqual(result.data.length, 1);
		assert.deepStrictEqual(result.data[0].embedding, [0.1, 0.2, 0.3]);
		assert.strictEqual(result.model, "text-embedding-ada-002");
		});

		test("listModels makes correct API call", async () => {
		const mockResponse = {
		ok: true,
		json: mock.fn(() =>
		Promise.resolve({
		data: [
		{ id: "gpt-4", object: "model" },
		{ id: "gpt-3.5-turbo", object: "model" },
		],
		}),
		),
		};
		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));

		const result = await llmApi.listModels();

		assert.strictEqual(mockFetch.mock.callCount(), 1);
		const [url, options] = mockFetch.mock.calls[0].arguments;
		assert.strictEqual(
		url,
		DEFAULT_BASE_URL.replace("/inference", "/catalog/models"),
		);
		assert.strictEqual(options.method, "GET");

		assert.strictEqual(result.data.length, 2);
		assert.strictEqual(result.data[0].id, "gpt-4");
		assert.strictEqual(result.data[1].id, "gpt-3.5-turbo");
		});

		test("listModels throws error on HTTP error", async () => {
		const mockResponse = {
		ok: false,
		status: 401,
		statusText: "Unauthorized",
		text: mock.fn(() => Promise.resolve("Auth error details")),
		};
		mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));

		await assert.rejects(() => llmApi.listModels(), {
		message: /HTTP 401: Unauthorized/,
		});
		});
		});

-98

test/libllm-instance.test.js

		import { test, describe, beforeEach, mock } from "node:test";
		import assert from "node:assert";

		import { LlmApi, DEFAULT_BASE_URL } from "../index.js";
		import { Retry } from "@forwardimpact/libutil";

		const EMBEDDING_BASE_URL = "http://localhost:8090";

		describe("LlmApi instance methods", () => {
		let llmApi;
		let retry;

		beforeEach(() => {
		const mockFetch = mock.fn();
		retry = new Retry();
		llmApi = new LlmApi(
		"test-token",
		"gpt-4",
		DEFAULT_BASE_URL,
		EMBEDDING_BASE_URL,
		retry,
		mockFetch,
		);
		});

		test("countTokens returns token count for text", () => {
		const text = "Hello, world!";
		const count = llmApi.countTokens(text);

		assert.strictEqual(typeof count, "number");
		assert(count > 0);
		});

		test("countTokens handles empty text", () => {
		const count = llmApi.countTokens("");
		assert.strictEqual(count, 0);
		});

		test("countTokens handles longer text", () => {
		const shortText = "Hello";
		const longText =
		"Hello, this is a much longer text that should have more tokens";

		const shortCount = llmApi.countTokens(shortText);
		const longCount = llmApi.countTokens(longText);

		assert(longCount > shortCount);
		});
		});

		describe("Proxy Support", () => {
		test("createLlmApi creates LlmApi instance with default fetch", async () => {
		const { createLlmApi, LlmApi, DEFAULT_BASE_URL } =
		await import("../index.js");

		const llm = createLlmApi(
		"test-token",
		"gpt-4",
		DEFAULT_BASE_URL,
		EMBEDDING_BASE_URL,
		);

		assert.ok(llm instanceof LlmApi);
		});

		test("createLlmApi works without embeddingBaseUrl", async () => {
		const { createLlmApi, LlmApi, DEFAULT_BASE_URL } =
		await import("../index.js");

		const llm = createLlmApi("test-token", "gpt-4", DEFAULT_BASE_URL);
		assert.ok(llm instanceof LlmApi);
		});

		test("createLlmApi works when HTTPS_PROXY environment variable is set", async () => {
		const originalProxy = process.env.HTTPS_PROXY;
		process.env.HTTPS_PROXY = "http://proxy.example.com:3128";

		try {
		const { createLlmApi, LlmApi, DEFAULT_BASE_URL } =
		await import("../index.js");

		const llm = createLlmApi(
		"test-token",
		"gpt-4",
		DEFAULT_BASE_URL,
		EMBEDDING_BASE_URL,
		);

		assert.ok(llm instanceof LlmApi);
		} finally {
		if (originalProxy) {
		process.env.HTTPS_PROXY = originalProxy;
		} else {
		delete process.env.HTTPS_PROXY;
		}
		}
		});
		});

@forwardimpact/libllm - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics