@forwardimpact/libllm
Advanced tools
| import { tool } from "@forwardimpact/libtype"; | ||
| /** | ||
| * Checks if a tool call is a hallucinated multi_tool_use.parallel call. | ||
| * @param {string} name - Function name from tool call | ||
| * @returns {boolean} True if this is a parallel hallucination | ||
| */ | ||
| function isParallelHallucination(name) { | ||
| return name === "multi_tool_use.parallel" || name === "parallel"; | ||
| } | ||
| /** | ||
| * Extracts the function name from a nested tool_use, stripping prefixes. | ||
| * @param {object} nestedTool - Nested tool use object | ||
| * @returns {string} Clean function name | ||
| */ | ||
| function extractNestedName(nestedTool) { | ||
| const rawName = nestedTool.recipient_name || nestedTool.name || ""; | ||
| return rawName.startsWith("functions.") ? rawName.slice(10) : rawName; | ||
| } | ||
| /** | ||
| * Converts a nested tool_use to a proper ToolCall object. | ||
| * @param {object} nestedTool - Nested tool use from parallel call | ||
| * @param {string} parentId - Parent tool call ID | ||
| * @param {number} index - Index of this tool in the array | ||
| * @returns {object} Proper ToolCall object | ||
| */ | ||
| function convertNestedToolUse(nestedTool, parentId, index) { | ||
| const nestedArgs = nestedTool.parameters || nestedTool.arguments || {}; | ||
| return tool.ToolCall.fromObject({ | ||
| id: `${parentId}_${index}`, | ||
| type: "function", | ||
| function: { | ||
| name: extractNestedName(nestedTool), | ||
| arguments: JSON.stringify(nestedArgs), | ||
| }, | ||
| }); | ||
| } | ||
| /** | ||
| * Expands a hallucinated parallel tool call into proper individual tool calls. | ||
| * @param {object} toolCall - The multi_tool_use.parallel tool call | ||
| * @returns {object[]} Array of proper tool calls | ||
| */ | ||
| function expandParallelToolCall(toolCall) { | ||
| try { | ||
| const args = JSON.parse(toolCall.function.arguments || "{}"); | ||
| const toolUses = args.tool_uses || []; | ||
| return toolUses.map((nested, i) => | ||
| convertNestedToolUse(nested, toolCall.id, i), | ||
| ); | ||
| } catch { | ||
| // If parsing fails, keep the original (will likely fail downstream) | ||
| return [toolCall]; | ||
| } | ||
| } | ||
| /** | ||
| * Fixes hallucinated multi_tool_use.parallel tool calls from OpenAI models. | ||
| * | ||
| * Some models occasionally emit a pseudo-tool call named "multi_tool_use.parallel" | ||
| * or "parallel" that wraps multiple tool calls in its arguments. This function | ||
| * detects and converts these to proper individual tool calls. | ||
| * @see https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653 | ||
| * @param {object[]} toolCalls - Array of tool call objects from LLM response | ||
| * @returns {object[]} Fixed tool calls array with parallel calls expanded | ||
| */ | ||
| export function fixMultiToolUseParallel(toolCalls) { | ||
| if (!toolCalls?.length) return toolCalls; | ||
| return toolCalls.flatMap((toolCall) => { | ||
| const functionName = toolCall.function?.name; | ||
| if (isParallelHallucination(functionName)) { | ||
| return expandParallelToolCall(toolCall); | ||
| } | ||
| return [toolCall]; | ||
| }); | ||
| } |
+391
| import { readFile } from "node:fs/promises"; | ||
| import { common, llm } from "@forwardimpact/libtype"; | ||
| import { | ||
| countTokens, | ||
| createTokenizer, | ||
| createRetry, | ||
| } from "@forwardimpact/libutil"; | ||
| import { fixMultiToolUseParallel } from "./hallucination.js"; | ||
| // Note: getBudget has moved to @forwardimpact/libmemory as getModelBudget | ||
| // This re-export is deprecated and will be removed in a future version | ||
| export { getBudget } from "./models.js"; | ||
| /** | ||
| * Default base URL for GitHub Models API | ||
| * @type {string} | ||
| */ | ||
| export const DEFAULT_BASE_URL = "https://models.github.ai/inference"; | ||
| /** | ||
| * Normalizes the base URL to include /inference for GitHub Models | ||
| * @param {string} baseUrl - Base URL for the LLM API | ||
| * @returns {string} Normalized base URL | ||
| */ | ||
| function normalizeBaseUrl(baseUrl) { | ||
| // For GitHub Models, ensure /inference is appended if not present | ||
| if (baseUrl.includes("models.github.ai") && !baseUrl.includes("/inference")) { | ||
| return `${baseUrl.replace(/\/$/, "")}/inference`; | ||
| } | ||
| return baseUrl; | ||
| } | ||
| /** | ||
| * LLM API client with direct HTTP calls to OpenAI-compatible endpoints | ||
| */ | ||
| export class LlmApi { | ||
| #model; | ||
| #baseURL; | ||
| #embeddingBaseURL; | ||
| #useTeiEmbeddings; | ||
| #headers; | ||
| #fetch; | ||
| #tokenizer; | ||
| #retry; | ||
| #temperature; | ||
| /** | ||
| * Creates a new LLM API instance | ||
| * @param {string} token - LLM API token | ||
| * @param {string} model - Default model to use for completions | ||
| * @param {string} baseUrl - Base URL for the LLM API | ||
| * @param {string} embeddingBaseUrl - Base URL for embeddings (TEI endpoint or OpenAI-compatible) | ||
| * @param {import("@forwardimpact/libutil").Retry} retry - Retry instance for handling transient errors | ||
| * @param {(url: string, options?: object) => Promise<Response>} fetchFn - HTTP client function (defaults to fetch if not provided) | ||
| * @param {() => object} tokenizerFn - Tokenizer instance for counting tokens | ||
| * @param {number} [temperature] - Temperature for completions | ||
| */ | ||
| constructor( | ||
| token, | ||
| model, | ||
| baseUrl, | ||
| embeddingBaseUrl, | ||
| retry, | ||
| fetchFn = fetch, | ||
| tokenizerFn = createTokenizer, | ||
| temperature = 0.3, | ||
| ) { | ||
| if (!baseUrl) throw new Error("baseUrl is required"); | ||
| if (!retry) throw new Error("retry is required"); | ||
| if (typeof fetchFn !== "function") | ||
| throw new Error("Invalid fetch function"); | ||
| if (typeof tokenizerFn !== "function") | ||
| throw new Error("Invalid tokenizer function"); | ||
| this.#model = model; | ||
| this.#baseURL = normalizeBaseUrl(baseUrl); | ||
| this.#embeddingBaseURL = embeddingBaseUrl || this.#baseURL; | ||
| this.#useTeiEmbeddings = | ||
| !!embeddingBaseUrl && | ||
| normalizeBaseUrl(embeddingBaseUrl) !== this.#baseURL; | ||
| this.#headers = { | ||
| Authorization: `Bearer ${token}`, | ||
| "Content-Type": "application/json", | ||
| Accept: "application/vnd.github+json", | ||
| "X-GitHub-Api-Version": "2022-11-28", | ||
| }; | ||
| this.#fetch = fetchFn; | ||
| this.#tokenizer = tokenizerFn(); | ||
| this.#retry = retry; | ||
| this.#temperature = temperature; | ||
| } | ||
| /** | ||
| * Throws an Error with HTTP status and a snippet of the response body when response is not OK | ||
| * @param {Response} response - Fetch API response | ||
| * @returns {Promise<void>} | ||
| * @throws {Error} With enriched message including body snippet | ||
| */ | ||
| async #throwIfNotOk(response) { | ||
| if (response.ok) return; | ||
| let errorDetails = ""; | ||
| try { | ||
| const text = await response.text(); | ||
| errorDetails = text ? `: ${text.substring(0, 200)}` : ""; | ||
| } catch { | ||
| // Ignore error reading body | ||
| } | ||
| throw new Error( | ||
| `HTTP ${response.status}: ${response.statusText}${errorDetails}`, | ||
| ); | ||
| } | ||
| /** | ||
| * Creates chat completions using the LLM API | ||
| * @param {import("@forwardimpact/libtype").memory.Window[]} window - Memory window | ||
| * @returns {Promise<import("@forwardimpact/libtype").llm.CompletionsResponse>} Completion response | ||
| */ | ||
| async createCompletions(window) { | ||
| const body = { | ||
| ...window, | ||
| model: this.#model, | ||
| temperature: this.#temperature, | ||
| }; | ||
| const response = await this.#retry.execute(() => | ||
| this.#fetch(`${this.#baseURL}/chat/completions`, { | ||
| method: "POST", | ||
| headers: this.#headers, | ||
| body: JSON.stringify(body), | ||
| }), | ||
| ); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| // Fix hallucinated multi_tool_use.parallel calls before converting to protobuf | ||
| for (const choice of json.choices || []) { | ||
| if (choice.message?.tool_calls) { | ||
| choice.message.tool_calls = fixMultiToolUseParallel( | ||
| choice.message.tool_calls, | ||
| ); | ||
| } | ||
| } | ||
| return llm.CompletionsResponse.fromObject(json); | ||
| } | ||
| /** | ||
| * Creates embeddings via TEI or OpenAI-compatible endpoint. | ||
| * Uses TEI format when EMBEDDING_BASE_URL is explicitly set to a | ||
| * different host; otherwise uses the OpenAI-compatible /embeddings | ||
| * endpoint on the LLM base URL. | ||
| * @param {string[]} input - Array of text strings to embed | ||
| * @returns {Promise<import("@forwardimpact/libtype").common.Embeddings>} Embeddings response | ||
| */ | ||
| async createEmbeddings(input) { | ||
| if (this.#useTeiEmbeddings) { | ||
| return this.#createTeiEmbeddings(input); | ||
| } | ||
| return this.#createOpenAIEmbeddings(input); | ||
| } | ||
| /** | ||
| * TEI (Text Embeddings Inference) format: POST /embed | ||
| * @param {string[]} input | ||
| */ | ||
| async #createTeiEmbeddings(input) { | ||
| const response = await this.#retry.execute(() => | ||
| this.#fetch(`${this.#embeddingBaseURL}/embed`, { | ||
| method: "POST", | ||
| headers: { "Content-Type": "application/json" }, | ||
| body: JSON.stringify({ inputs: input }), | ||
| }), | ||
| ); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| // TEI returns [[0.1, 0.2, ...]] | ||
| return common.Embeddings.fromObject({ | ||
| object: "list", | ||
| data: json.map((embedding, index) => ({ | ||
| object: "embedding", | ||
| index, | ||
| embedding, | ||
| })), | ||
| model: "bge-small-en-v1.5", | ||
| usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }, | ||
| }); | ||
| } | ||
| /** | ||
| * OpenAI-compatible format: POST /embeddings | ||
| * @param {string[]} input | ||
| */ | ||
| async #createOpenAIEmbeddings(input) { | ||
| const response = await this.#retry.execute(() => | ||
| this.#fetch(`${this.#embeddingBaseURL}/embeddings`, { | ||
| method: "POST", | ||
| headers: this.#headers, | ||
| body: JSON.stringify({ | ||
| input, | ||
| model: this.#model, | ||
| }), | ||
| }), | ||
| ); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| return common.Embeddings.fromObject({ | ||
| object: json.object || "list", | ||
| data: json.data.map((item) => ({ | ||
| object: item.object || "embedding", | ||
| index: item.index, | ||
| embedding: item.embedding, | ||
| })), | ||
| model: json.model || this.#model, | ||
| usage: json.usage || { | ||
| prompt_tokens: 0, | ||
| completion_tokens: 0, | ||
| total_tokens: 0, | ||
| }, | ||
| }); | ||
| } | ||
| /** | ||
| * Lists models available to the current user | ||
| * @returns {Promise<object[]>} Array of available models | ||
| */ | ||
| async listModels() { | ||
| // GitHub Models catalog is at the root domain, not org-specific | ||
| const catalogUrl = "https://models.github.ai/catalog/models"; | ||
| const response = await this.#fetch(catalogUrl, { | ||
| method: "GET", | ||
| headers: this.#headers, | ||
| }); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| return json; | ||
| } | ||
| /** | ||
| * Counts tokens in the given text using the tokenizer | ||
| * @param {string} text - The text to count tokens for | ||
| * @returns {number} Number of tokens in the text | ||
| */ | ||
| countTokens(text) { | ||
| return countTokens(text, this.#tokenizer); | ||
| } | ||
| /** | ||
| * Converts an image to text description using vision capabilities | ||
| * @param {string|Buffer} file - Path to the image file or a Buffer containing the image data | ||
| * @param {string} [prompt] - Optional text prompt to guide the description | ||
| * @param {string} [model] - Model to use for image-to-text conversion, defaults to instance model | ||
| * @param {string} [systemPrompt] - System prompt to set context for the description | ||
| * @param {number} [max_tokens] - Maximum tokens to generate in the description | ||
| * @param {string} [mimeType] - The mime type of the file. Defaults to image/png if file is a buffer, otherwise determined from the extension | ||
| * @returns {Promise<string>} Text description of the image | ||
| */ | ||
| async imageToText( | ||
| file, | ||
| prompt = "Describe this image in detail.", | ||
| model = this.#model, | ||
| systemPrompt = "You are an AI assistant that describes images accurately and in detail.", | ||
| max_tokens = 1000, | ||
| mimeType = "image/png", | ||
| ) { | ||
| let buffer; | ||
| if (Buffer.isBuffer(file)) { | ||
| buffer = file; | ||
| } else { | ||
| buffer = await readFile(file); | ||
| const extension = file.split(".").pop().toLowerCase(); | ||
| mimeType = `image/${extension === "jpg" ? "jpeg" : extension}`; | ||
| } | ||
| const base64 = buffer.toString("base64"); | ||
| const body = { | ||
| model: model, | ||
| messages: [ | ||
| { | ||
| role: "system", | ||
| content: systemPrompt, | ||
| }, | ||
| { | ||
| role: "user", | ||
| content: [ | ||
| { | ||
| type: "text", | ||
| text: prompt, | ||
| }, | ||
| { | ||
| type: "image_url", | ||
| image_url: { | ||
| url: `data:${mimeType};base64,${base64}`, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| ], | ||
| max_tokens, | ||
| }; | ||
| const response = await this.#retry.execute(() => | ||
| this.#fetch(`${this.#baseURL}/chat/completions`, { | ||
| method: "POST", | ||
| headers: this.#headers, | ||
| body: JSON.stringify(body), | ||
| }), | ||
| ); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| return json.choices[0]?.message?.content || ""; | ||
| } | ||
| } | ||
| /** | ||
| * Creates a proxy-aware fetch function that respects HTTPS_PROXY environment variable | ||
| * @param {object} [process] - Process object for environment variable access | ||
| * @returns {(url: string, options?: object) => Promise<Response>} Fetch function with proxy support | ||
| */ | ||
| export function createProxyAwareFetch(process = global.process) { | ||
| const httpsProxy = process.env.HTTPS_PROXY || process.env.https_proxy; | ||
| if (!httpsProxy) { | ||
| return fetch; | ||
| } | ||
| return (url, options = {}) => { | ||
| return fetch(url, { | ||
| ...options, | ||
| proxy: httpsProxy, | ||
| }); | ||
| }; | ||
| } | ||
| /** | ||
| * Factory function to create an LlmApi instance with default dependencies | ||
| * @param {string} token - LLM API token | ||
| * @param {string} model - Model to use | ||
| * @param {string} baseUrl - Base URL for the LLM API (required, e.g. https://models.github.ai/orgs/{org}) | ||
| * @param {string|null} embeddingBaseUrl - Base URL for embeddings (null falls back to baseUrl with OpenAI-compatible format) | ||
| * @param {number} [temperature] - Temperature for completions | ||
| * @param {(url: string, options?: object) => Promise<Response>} [fetchFn] - HTTP client function | ||
| * @param {() => object} [tokenizerFn] - Tokenizer factory function | ||
| * @returns {LlmApi} Configured LlmApi instance | ||
| */ | ||
| export function createLlmApi( | ||
| token, | ||
| model, | ||
| baseUrl, | ||
| embeddingBaseUrl, | ||
| temperature = 0.3, | ||
| fetchFn = createProxyAwareFetch(), | ||
| tokenizerFn = createTokenizer, | ||
| ) { | ||
| if (!baseUrl) { | ||
| throw new Error( | ||
| "baseUrl is required. Set LLM_BASE_URL to https://models.github.ai/orgs/{YOUR_ORG} for org-level PATs.", | ||
| ); | ||
| } | ||
| const retry = createRetry(); | ||
| return new LlmApi( | ||
| token, | ||
| model, | ||
| baseUrl, | ||
| embeddingBaseUrl, | ||
| retry, | ||
| fetchFn, | ||
| tokenizerFn, | ||
| temperature, | ||
| ); | ||
| } | ||
| /** | ||
| * Normalizes a vector to unit length | ||
| * @param {number[]} vector - Vector to normalize | ||
| * @returns {number[]} Normalized vector | ||
| */ | ||
| export function normalizeVector(vector) { | ||
| const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)); | ||
| if (magnitude === 0) return vector.slice(); // Return copy of zero vector | ||
| return vector.map((val) => val / magnitude); | ||
| } |
| /** | ||
| * Static map of model names to their context window token budgets | ||
| * Seeded from GitHub Models API via `./scripts/env.sh node scripts/models.js` | ||
| * @type {Map<string, number>} | ||
| */ | ||
| export const BUDGETS = new Map([ | ||
| ["ai21-labs/ai21-jamba-1.5-large", 262144], | ||
| ["cohere/cohere-command-a", 131072], | ||
| ["cohere/cohere-command-r-08-2024", 131072], | ||
| ["cohere/cohere-command-r-plus-08-2024", 131072], | ||
| ["deepseek/deepseek-r1", 128000], | ||
| ["deepseek/deepseek-r1-0528", 128000], | ||
| ["deepseek/deepseek-v3-0324", 128000], | ||
| ["meta/llama-3.2-11b-vision-instruct", 128000], | ||
| ["meta/llama-3.2-90b-vision-instruct", 128000], | ||
| ["meta/llama-3.3-70b-instruct", 128000], | ||
| ["meta/llama-4-maverick-17b-128e-instruct-fp8", 1000000], | ||
| ["meta/llama-4-scout-17b-16e-instruct", 10000000], | ||
| ["meta/meta-llama-3.1-405b-instruct", 131072], | ||
| ["meta/meta-llama-3.1-8b-instruct", 131072], | ||
| ["microsoft/mai-ds-r1", 128000], | ||
| ["microsoft/phi-4", 16384], | ||
| ["microsoft/phi-4-mini-instruct", 128000], | ||
| ["microsoft/phi-4-mini-reasoning", 128000], | ||
| ["microsoft/phi-4-multimodal-instruct", 128000], | ||
| ["microsoft/phi-4-reasoning", 32768], | ||
| ["mistral-ai/codestral-2501", 256000], | ||
| ["mistral-ai/ministral-3b", 131072], | ||
| ["mistral-ai/mistral-medium-2505", 128000], | ||
| ["mistral-ai/mistral-small-2503", 128000], | ||
| ["openai/gpt-4.1", 1048576], | ||
| ["openai/gpt-4.1-mini", 1048576], | ||
| ["openai/gpt-4.1-nano", 1048576], | ||
| ["openai/gpt-4o", 131072], | ||
| ["openai/gpt-4o-mini", 131072], | ||
| ["openai/gpt-5", 200000], | ||
| ["openai/gpt-5-chat", 200000], | ||
| ["openai/gpt-5-mini", 200000], | ||
| ["openai/gpt-5-nano", 200000], | ||
| ["openai/o1", 200000], | ||
| ["openai/o1-mini", 128000], | ||
| ["openai/o1-preview", 128000], | ||
| ["openai/o3", 200000], | ||
| ["openai/o3-mini", 200000], | ||
| ["openai/o4-mini", 200000], | ||
| ["openai/text-embedding-3-large", 8191], | ||
| ["openai/text-embedding-3-small", 8191], | ||
| ["xai/grok-3", 131072], | ||
| ["xai/grok-3-mini", 131072], | ||
| ]); | ||
| /** | ||
| * Returns the token budget for a given model | ||
| * @param {string} model - Model name with provider prefix (e.g., 'openai/gpt-5') | ||
| * @returns {number} Token budget for the model | ||
| * @throws {Error} If model is not found in BUDGETS | ||
| */ | ||
| export function getBudget(model) { | ||
| const budget = BUDGETS.get(model); | ||
| if (!budget) { | ||
| throw new Error( | ||
| `Unknown model: ${model}. Known models: ${[...BUDGETS.keys()].join(", ")}`, | ||
| ); | ||
| } | ||
| return budget; | ||
| } |
+11
-2
| { | ||
| "name": "@forwardimpact/libllm", | ||
| "version": "0.1.84", | ||
| "version": "0.1.85", | ||
| "description": "LLM API client for OpenAI-compatible endpoints", | ||
@@ -8,6 +8,15 @@ "license": "Apache-2.0", | ||
| "type": "module", | ||
| "main": "index.js", | ||
| "main": "./src/index.js", | ||
| "exports": { | ||
| ".": "./src/index.js", | ||
| "./bin/fit-completion.js": "./bin/fit-completion.js" | ||
| }, | ||
| "bin": { | ||
| "fit-completion": "./bin/fit-completion.js" | ||
| }, | ||
| "files": [ | ||
| "src/**/*.js", | ||
| "bin/**/*.js", | ||
| "README.md" | ||
| ], | ||
| "engines": { | ||
@@ -14,0 +23,0 @@ "bun": ">=1.2.0", |
| import { tool } from "@forwardimpact/libtype"; | ||
| /** | ||
| * Checks if a tool call is a hallucinated multi_tool_use.parallel call. | ||
| * @param {string} name - Function name from tool call | ||
| * @returns {boolean} True if this is a parallel hallucination | ||
| */ | ||
| function isParallelHallucination(name) { | ||
| return name === "multi_tool_use.parallel" || name === "parallel"; | ||
| } | ||
| /** | ||
| * Extracts the function name from a nested tool_use, stripping prefixes. | ||
| * @param {object} nestedTool - Nested tool use object | ||
| * @returns {string} Clean function name | ||
| */ | ||
| function extractNestedName(nestedTool) { | ||
| const rawName = nestedTool.recipient_name || nestedTool.name || ""; | ||
| return rawName.startsWith("functions.") ? rawName.slice(10) : rawName; | ||
| } | ||
| /** | ||
| * Converts a nested tool_use to a proper ToolCall object. | ||
| * @param {object} nestedTool - Nested tool use from parallel call | ||
| * @param {string} parentId - Parent tool call ID | ||
| * @param {number} index - Index of this tool in the array | ||
| * @returns {object} Proper ToolCall object | ||
| */ | ||
| function convertNestedToolUse(nestedTool, parentId, index) { | ||
| const nestedArgs = nestedTool.parameters || nestedTool.arguments || {}; | ||
| return tool.ToolCall.fromObject({ | ||
| id: `${parentId}_${index}`, | ||
| type: "function", | ||
| function: { | ||
| name: extractNestedName(nestedTool), | ||
| arguments: JSON.stringify(nestedArgs), | ||
| }, | ||
| }); | ||
| } | ||
| /** | ||
| * Expands a hallucinated parallel tool call into proper individual tool calls. | ||
| * @param {object} toolCall - The multi_tool_use.parallel tool call | ||
| * @returns {object[]} Array of proper tool calls | ||
| */ | ||
| function expandParallelToolCall(toolCall) { | ||
| try { | ||
| const args = JSON.parse(toolCall.function.arguments || "{}"); | ||
| const toolUses = args.tool_uses || []; | ||
| return toolUses.map((nested, i) => | ||
| convertNestedToolUse(nested, toolCall.id, i), | ||
| ); | ||
| } catch { | ||
| // If parsing fails, keep the original (will likely fail downstream) | ||
| return [toolCall]; | ||
| } | ||
| } | ||
| /** | ||
| * Fixes hallucinated multi_tool_use.parallel tool calls from OpenAI models. | ||
| * | ||
| * Some models occasionally emit a pseudo-tool call named "multi_tool_use.parallel" | ||
| * or "parallel" that wraps multiple tool calls in its arguments. This function | ||
| * detects and converts these to proper individual tool calls. | ||
| * @see https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653 | ||
| * @param {object[]} toolCalls - Array of tool call objects from LLM response | ||
| * @returns {object[]} Fixed tool calls array with parallel calls expanded | ||
| */ | ||
| export function fixMultiToolUseParallel(toolCalls) { | ||
| if (!toolCalls?.length) return toolCalls; | ||
| return toolCalls.flatMap((toolCall) => { | ||
| const functionName = toolCall.function?.name; | ||
| if (isParallelHallucination(functionName)) { | ||
| return expandParallelToolCall(toolCall); | ||
| } | ||
| return [toolCall]; | ||
| }); | ||
| } |
-391
| import { readFile } from "node:fs/promises"; | ||
| import { common, llm } from "@forwardimpact/libtype"; | ||
| import { | ||
| countTokens, | ||
| createTokenizer, | ||
| createRetry, | ||
| } from "@forwardimpact/libutil"; | ||
| import { fixMultiToolUseParallel } from "./hallucination.js"; | ||
| // Note: getBudget has moved to @forwardimpact/libmemory as getModelBudget | ||
| // This re-export is deprecated and will be removed in a future version | ||
| export { getBudget } from "./models.js"; | ||
| /** | ||
| * Default base URL for GitHub Models API | ||
| * @type {string} | ||
| */ | ||
| export const DEFAULT_BASE_URL = "https://models.github.ai/inference"; | ||
| /** | ||
| * Normalizes the base URL to include /inference for GitHub Models | ||
| * @param {string} baseUrl - Base URL for the LLM API | ||
| * @returns {string} Normalized base URL | ||
| */ | ||
| function normalizeBaseUrl(baseUrl) { | ||
| // For GitHub Models, ensure /inference is appended if not present | ||
| if (baseUrl.includes("models.github.ai") && !baseUrl.includes("/inference")) { | ||
| return `${baseUrl.replace(/\/$/, "")}/inference`; | ||
| } | ||
| return baseUrl; | ||
| } | ||
| /** | ||
| * LLM API client with direct HTTP calls to OpenAI-compatible endpoints | ||
| */ | ||
| export class LlmApi { | ||
| #model; | ||
| #baseURL; | ||
| #embeddingBaseURL; | ||
| #useTeiEmbeddings; | ||
| #headers; | ||
| #fetch; | ||
| #tokenizer; | ||
| #retry; | ||
| #temperature; | ||
| /** | ||
| * Creates a new LLM API instance | ||
| * @param {string} token - LLM API token | ||
| * @param {string} model - Default model to use for completions | ||
| * @param {string} baseUrl - Base URL for the LLM API | ||
| * @param {string} embeddingBaseUrl - Base URL for embeddings (TEI endpoint or OpenAI-compatible) | ||
| * @param {import("@forwardimpact/libutil").Retry} retry - Retry instance for handling transient errors | ||
| * @param {(url: string, options?: object) => Promise<Response>} fetchFn - HTTP client function (defaults to fetch if not provided) | ||
| * @param {() => object} tokenizerFn - Tokenizer instance for counting tokens | ||
| * @param {number} [temperature] - Temperature for completions | ||
| */ | ||
| constructor( | ||
| token, | ||
| model, | ||
| baseUrl, | ||
| embeddingBaseUrl, | ||
| retry, | ||
| fetchFn = fetch, | ||
| tokenizerFn = createTokenizer, | ||
| temperature = 0.3, | ||
| ) { | ||
| if (!baseUrl) throw new Error("baseUrl is required"); | ||
| if (!retry) throw new Error("retry is required"); | ||
| if (typeof fetchFn !== "function") | ||
| throw new Error("Invalid fetch function"); | ||
| if (typeof tokenizerFn !== "function") | ||
| throw new Error("Invalid tokenizer function"); | ||
| this.#model = model; | ||
| this.#baseURL = normalizeBaseUrl(baseUrl); | ||
| this.#embeddingBaseURL = embeddingBaseUrl || this.#baseURL; | ||
| this.#useTeiEmbeddings = | ||
| !!embeddingBaseUrl && | ||
| normalizeBaseUrl(embeddingBaseUrl) !== this.#baseURL; | ||
| this.#headers = { | ||
| Authorization: `Bearer ${token}`, | ||
| "Content-Type": "application/json", | ||
| Accept: "application/vnd.github+json", | ||
| "X-GitHub-Api-Version": "2022-11-28", | ||
| }; | ||
| this.#fetch = fetchFn; | ||
| this.#tokenizer = tokenizerFn(); | ||
| this.#retry = retry; | ||
| this.#temperature = temperature; | ||
| } | ||
| /** | ||
| * Throws an Error with HTTP status and a snippet of the response body when response is not OK | ||
| * @param {Response} response - Fetch API response | ||
| * @returns {Promise<void>} | ||
| * @throws {Error} With enriched message including body snippet | ||
| */ | ||
| async #throwIfNotOk(response) { | ||
| if (response.ok) return; | ||
| let errorDetails = ""; | ||
| try { | ||
| const text = await response.text(); | ||
| errorDetails = text ? `: ${text.substring(0, 200)}` : ""; | ||
| } catch { | ||
| // Ignore error reading body | ||
| } | ||
| throw new Error( | ||
| `HTTP ${response.status}: ${response.statusText}${errorDetails}`, | ||
| ); | ||
| } | ||
| /** | ||
| * Creates chat completions using the LLM API | ||
| * @param {import("@forwardimpact/libtype").memory.Window[]} window - Memory window | ||
| * @returns {Promise<import("@forwardimpact/libtype").llm.CompletionsResponse>} Completion response | ||
| */ | ||
| async createCompletions(window) { | ||
| const body = { | ||
| ...window, | ||
| model: this.#model, | ||
| temperature: this.#temperature, | ||
| }; | ||
| const response = await this.#retry.execute(() => | ||
| this.#fetch(`${this.#baseURL}/chat/completions`, { | ||
| method: "POST", | ||
| headers: this.#headers, | ||
| body: JSON.stringify(body), | ||
| }), | ||
| ); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| // Fix hallucinated multi_tool_use.parallel calls before converting to protobuf | ||
| for (const choice of json.choices || []) { | ||
| if (choice.message?.tool_calls) { | ||
| choice.message.tool_calls = fixMultiToolUseParallel( | ||
| choice.message.tool_calls, | ||
| ); | ||
| } | ||
| } | ||
| return llm.CompletionsResponse.fromObject(json); | ||
| } | ||
| /** | ||
| * Creates embeddings via TEI or OpenAI-compatible endpoint. | ||
| * Uses TEI format when EMBEDDING_BASE_URL is explicitly set to a | ||
| * different host; otherwise uses the OpenAI-compatible /embeddings | ||
| * endpoint on the LLM base URL. | ||
| * @param {string[]} input - Array of text strings to embed | ||
| * @returns {Promise<import("@forwardimpact/libtype").common.Embeddings>} Embeddings response | ||
| */ | ||
| async createEmbeddings(input) { | ||
| if (this.#useTeiEmbeddings) { | ||
| return this.#createTeiEmbeddings(input); | ||
| } | ||
| return this.#createOpenAIEmbeddings(input); | ||
| } | ||
| /** | ||
| * TEI (Text Embeddings Inference) format: POST /embed | ||
| * @param {string[]} input | ||
| */ | ||
| async #createTeiEmbeddings(input) { | ||
| const response = await this.#retry.execute(() => | ||
| this.#fetch(`${this.#embeddingBaseURL}/embed`, { | ||
| method: "POST", | ||
| headers: { "Content-Type": "application/json" }, | ||
| body: JSON.stringify({ inputs: input }), | ||
| }), | ||
| ); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| // TEI returns [[0.1, 0.2, ...]] | ||
| return common.Embeddings.fromObject({ | ||
| object: "list", | ||
| data: json.map((embedding, index) => ({ | ||
| object: "embedding", | ||
| index, | ||
| embedding, | ||
| })), | ||
| model: "bge-small-en-v1.5", | ||
| usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }, | ||
| }); | ||
| } | ||
| /** | ||
| * OpenAI-compatible format: POST /embeddings | ||
| * @param {string[]} input | ||
| */ | ||
| async #createOpenAIEmbeddings(input) { | ||
| const response = await this.#retry.execute(() => | ||
| this.#fetch(`${this.#embeddingBaseURL}/embeddings`, { | ||
| method: "POST", | ||
| headers: this.#headers, | ||
| body: JSON.stringify({ | ||
| input, | ||
| model: this.#model, | ||
| }), | ||
| }), | ||
| ); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| return common.Embeddings.fromObject({ | ||
| object: json.object || "list", | ||
| data: json.data.map((item) => ({ | ||
| object: item.object || "embedding", | ||
| index: item.index, | ||
| embedding: item.embedding, | ||
| })), | ||
| model: json.model || this.#model, | ||
| usage: json.usage || { | ||
| prompt_tokens: 0, | ||
| completion_tokens: 0, | ||
| total_tokens: 0, | ||
| }, | ||
| }); | ||
| } | ||
| /** | ||
| * Lists models available to the current user | ||
| * @returns {Promise<object[]>} Array of available models | ||
| */ | ||
| async listModels() { | ||
| // GitHub Models catalog is at the root domain, not org-specific | ||
| const catalogUrl = "https://models.github.ai/catalog/models"; | ||
| const response = await this.#fetch(catalogUrl, { | ||
| method: "GET", | ||
| headers: this.#headers, | ||
| }); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| return json; | ||
| } | ||
| /** | ||
| * Counts tokens in the given text using the tokenizer | ||
| * @param {string} text - The text to count tokens for | ||
| * @returns {number} Number of tokens in the text | ||
| */ | ||
| countTokens(text) { | ||
| return countTokens(text, this.#tokenizer); | ||
| } | ||
| /** | ||
| * Converts an image to text description using vision capabilities | ||
| * @param {string|Buffer} file - Path to the image file or a Buffer containing the image data | ||
| * @param {string} [prompt] - Optional text prompt to guide the description | ||
| * @param {string} [model] - Model to use for image-to-text conversion, defaults to instance model | ||
| * @param {string} [systemPrompt] - System prompt to set context for the description | ||
| * @param {number} [max_tokens] - Maximum tokens to generate in the description | ||
| * @param {string} [mimeType] - The mime type of the file. Defaults to image/png if file is a buffer, otherwise determined from the extension | ||
| * @returns {Promise<string>} Text description of the image | ||
| */ | ||
| async imageToText( | ||
| file, | ||
| prompt = "Describe this image in detail.", | ||
| model = this.#model, | ||
| systemPrompt = "You are an AI assistant that describes images accurately and in detail.", | ||
| max_tokens = 1000, | ||
| mimeType = "image/png", | ||
| ) { | ||
| let buffer; | ||
| if (Buffer.isBuffer(file)) { | ||
| buffer = file; | ||
| } else { | ||
| buffer = await readFile(file); | ||
| const extension = file.split(".").pop().toLowerCase(); | ||
| mimeType = `image/${extension === "jpg" ? "jpeg" : extension}`; | ||
| } | ||
| const base64 = buffer.toString("base64"); | ||
| const body = { | ||
| model: model, | ||
| messages: [ | ||
| { | ||
| role: "system", | ||
| content: systemPrompt, | ||
| }, | ||
| { | ||
| role: "user", | ||
| content: [ | ||
| { | ||
| type: "text", | ||
| text: prompt, | ||
| }, | ||
| { | ||
| type: "image_url", | ||
| image_url: { | ||
| url: `data:${mimeType};base64,${base64}`, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| ], | ||
| max_tokens, | ||
| }; | ||
| const response = await this.#retry.execute(() => | ||
| this.#fetch(`${this.#baseURL}/chat/completions`, { | ||
| method: "POST", | ||
| headers: this.#headers, | ||
| body: JSON.stringify(body), | ||
| }), | ||
| ); | ||
| await this.#throwIfNotOk(response); | ||
| const json = await response.json(); | ||
| return json.choices[0]?.message?.content || ""; | ||
| } | ||
| } | ||
| /** | ||
| * Creates a proxy-aware fetch function that respects HTTPS_PROXY environment variable | ||
| * @param {object} [process] - Process object for environment variable access | ||
| * @returns {(url: string, options?: object) => Promise<Response>} Fetch function with proxy support | ||
| */ | ||
| export function createProxyAwareFetch(process = global.process) { | ||
| const httpsProxy = process.env.HTTPS_PROXY || process.env.https_proxy; | ||
| if (!httpsProxy) { | ||
| return fetch; | ||
| } | ||
| return (url, options = {}) => { | ||
| return fetch(url, { | ||
| ...options, | ||
| proxy: httpsProxy, | ||
| }); | ||
| }; | ||
| } | ||
| /** | ||
| * Factory function to create an LlmApi instance with default dependencies | ||
| * @param {string} token - LLM API token | ||
| * @param {string} model - Model to use | ||
| * @param {string} baseUrl - Base URL for the LLM API (required, e.g. https://models.github.ai/orgs/{org}) | ||
| * @param {string|null} embeddingBaseUrl - Base URL for embeddings (null falls back to baseUrl with OpenAI-compatible format) | ||
| * @param {number} [temperature] - Temperature for completions | ||
| * @param {(url: string, options?: object) => Promise<Response>} [fetchFn] - HTTP client function | ||
| * @param {() => object} [tokenizerFn] - Tokenizer factory function | ||
| * @returns {LlmApi} Configured LlmApi instance | ||
| */ | ||
| export function createLlmApi( | ||
| token, | ||
| model, | ||
| baseUrl, | ||
| embeddingBaseUrl, | ||
| temperature = 0.3, | ||
| fetchFn = createProxyAwareFetch(), | ||
| tokenizerFn = createTokenizer, | ||
| ) { | ||
| if (!baseUrl) { | ||
| throw new Error( | ||
| "baseUrl is required. Set LLM_BASE_URL to https://models.github.ai/orgs/{YOUR_ORG} for org-level PATs.", | ||
| ); | ||
| } | ||
| const retry = createRetry(); | ||
| return new LlmApi( | ||
| token, | ||
| model, | ||
| baseUrl, | ||
| embeddingBaseUrl, | ||
| retry, | ||
| fetchFn, | ||
| tokenizerFn, | ||
| temperature, | ||
| ); | ||
| } | ||
| /** | ||
| * Normalizes a vector to unit length | ||
| * @param {number[]} vector - Vector to normalize | ||
| * @returns {number[]} Normalized vector | ||
| */ | ||
| export function normalizeVector(vector) { | ||
| const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)); | ||
| if (magnitude === 0) return vector.slice(); // Return copy of zero vector | ||
| return vector.map((val) => val / magnitude); | ||
| } |
-66
| /** | ||
| * Static map of model names to their context window token budgets | ||
| * Seeded from GitHub Models API via `./scripts/env.sh node scripts/models.js` | ||
| * @type {Map<string, number>} | ||
| */ | ||
| export const BUDGETS = new Map([ | ||
| ["ai21-labs/ai21-jamba-1.5-large", 262144], | ||
| ["cohere/cohere-command-a", 131072], | ||
| ["cohere/cohere-command-r-08-2024", 131072], | ||
| ["cohere/cohere-command-r-plus-08-2024", 131072], | ||
| ["deepseek/deepseek-r1", 128000], | ||
| ["deepseek/deepseek-r1-0528", 128000], | ||
| ["deepseek/deepseek-v3-0324", 128000], | ||
| ["meta/llama-3.2-11b-vision-instruct", 128000], | ||
| ["meta/llama-3.2-90b-vision-instruct", 128000], | ||
| ["meta/llama-3.3-70b-instruct", 128000], | ||
| ["meta/llama-4-maverick-17b-128e-instruct-fp8", 1000000], | ||
| ["meta/llama-4-scout-17b-16e-instruct", 10000000], | ||
| ["meta/meta-llama-3.1-405b-instruct", 131072], | ||
| ["meta/meta-llama-3.1-8b-instruct", 131072], | ||
| ["microsoft/mai-ds-r1", 128000], | ||
| ["microsoft/phi-4", 16384], | ||
| ["microsoft/phi-4-mini-instruct", 128000], | ||
| ["microsoft/phi-4-mini-reasoning", 128000], | ||
| ["microsoft/phi-4-multimodal-instruct", 128000], | ||
| ["microsoft/phi-4-reasoning", 32768], | ||
| ["mistral-ai/codestral-2501", 256000], | ||
| ["mistral-ai/ministral-3b", 131072], | ||
| ["mistral-ai/mistral-medium-2505", 128000], | ||
| ["mistral-ai/mistral-small-2503", 128000], | ||
| ["openai/gpt-4.1", 1048576], | ||
| ["openai/gpt-4.1-mini", 1048576], | ||
| ["openai/gpt-4.1-nano", 1048576], | ||
| ["openai/gpt-4o", 131072], | ||
| ["openai/gpt-4o-mini", 131072], | ||
| ["openai/gpt-5", 200000], | ||
| ["openai/gpt-5-chat", 200000], | ||
| ["openai/gpt-5-mini", 200000], | ||
| ["openai/gpt-5-nano", 200000], | ||
| ["openai/o1", 200000], | ||
| ["openai/o1-mini", 128000], | ||
| ["openai/o1-preview", 128000], | ||
| ["openai/o3", 200000], | ||
| ["openai/o3-mini", 200000], | ||
| ["openai/o4-mini", 200000], | ||
| ["openai/text-embedding-3-large", 8191], | ||
| ["openai/text-embedding-3-small", 8191], | ||
| ["xai/grok-3", 131072], | ||
| ["xai/grok-3-mini", 131072], | ||
| ]); | ||
| /** | ||
| * Returns the token budget for a given model | ||
| * @param {string} model - Model name with provider prefix (e.g., 'openai/gpt-5') | ||
| * @returns {number} Token budget for the model | ||
| * @throws {Error} If model is not found in BUDGETS | ||
| */ | ||
| export function getBudget(model) { | ||
| const budget = BUDGETS.get(model); | ||
| if (!budget) { | ||
| throw new Error( | ||
| `Unknown model: ${model}. Known models: ${[...BUDGETS.keys()].join(", ")}`, | ||
| ); | ||
| } | ||
| return budget; | ||
| } |
| import { test, describe, beforeEach, mock } from "node:test"; | ||
| import assert from "node:assert"; | ||
| import { LlmApi, DEFAULT_BASE_URL } from "../index.js"; | ||
| import { Retry } from "@forwardimpact/libutil"; | ||
| const EMBEDDING_BASE_URL = "http://localhost:8090"; | ||
| describe("LlmApi", () => { | ||
| let mockFetch; | ||
| let llmApi; | ||
| let retry; | ||
| beforeEach(() => { | ||
| mockFetch = mock.fn(); | ||
| retry = new Retry(); | ||
| llmApi = new LlmApi( | ||
| "test-token", | ||
| "gpt-4", | ||
| DEFAULT_BASE_URL, | ||
| EMBEDDING_BASE_URL, | ||
| retry, | ||
| mockFetch, | ||
| ); | ||
| }); | ||
| test("creates LlmApi with token and model", () => { | ||
| assert.ok(llmApi instanceof LlmApi); | ||
| }); | ||
| test("createCompletions makes correct API call", async () => { | ||
| const mockResponse = { | ||
| ok: true, | ||
| json: mock.fn(() => | ||
| Promise.resolve({ | ||
| id: "test-id", | ||
| object: "chat.completion", | ||
| choices: [{ message: { role: "assistant", content: "Hello" } }], | ||
| usage: { total_tokens: 10 }, | ||
| }), | ||
| ), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse)); | ||
| const messages = [{ role: "user", content: "Hello" }]; | ||
| const tools = undefined; | ||
| const temperature = 0.5; | ||
| const max_tokens = 100; | ||
| const result = await llmApi.createCompletions( | ||
| messages, | ||
| tools, | ||
| temperature, | ||
| max_tokens, | ||
| ); | ||
| assert.strictEqual(mockFetch.mock.callCount(), 1); | ||
| const [url, options] = mockFetch.mock.calls[0].arguments; | ||
| assert.strictEqual(url, `${DEFAULT_BASE_URL}/chat/completions`); | ||
| assert.strictEqual(options.method, "POST"); | ||
| assert.ok(options.headers.Authorization.includes("test-token")); | ||
| assert.strictEqual(result.id, "test-id"); | ||
| }); | ||
| test("createCompletions uses default model when not specified", async () => { | ||
| const mockResponse = { | ||
| ok: true, | ||
| json: mock.fn(() => | ||
| Promise.resolve({ | ||
| id: "test-id", | ||
| object: "chat.completion", | ||
| choices: [], | ||
| usage: { total_tokens: 10 }, | ||
| }), | ||
| ), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse)); | ||
| const messages = [{ role: "user", content: "Hello" }]; | ||
| await llmApi.createCompletions(messages); | ||
| const [, options] = mockFetch.mock.calls[0].arguments; | ||
| const body = JSON.parse(options.body); | ||
| assert.strictEqual(body.model, "gpt-4"); | ||
| }); | ||
| test("createCompletions throws error on HTTP error", async () => { | ||
| const mockResponse = { | ||
| ok: false, | ||
| status: 404, | ||
| statusText: "Not Found", | ||
| text: mock.fn(() => Promise.resolve("Error details")), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse)); | ||
| const messages = [{ role: "user", content: "Hello" }]; | ||
| await assert.rejects(() => llmApi.createCompletions(messages), { | ||
| message: /HTTP 404: Not Found/, | ||
| }); | ||
| }); | ||
| test("createCompletions throws error immediately on non-retryable HTTP error", async () => { | ||
| const errorResponse = { | ||
| ok: false, | ||
| status: 400, | ||
| statusText: "Bad Request", | ||
| text: mock.fn(() => Promise.resolve("Invalid request details")), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(errorResponse)); | ||
| const messages = [{ role: "user", content: "Hello" }]; | ||
| await assert.rejects(() => llmApi.createCompletions(messages), { | ||
| message: /HTTP 400: Bad Request/, | ||
| }); | ||
| assert.strictEqual(mockFetch.mock.callCount(), 1); | ||
| }); | ||
| test("createCompletions fixes multi_tool_use.parallel hallucination", async () => { | ||
| const mockResponse = { | ||
| ok: true, | ||
| json: mock.fn(() => | ||
| Promise.resolve({ | ||
| id: "test-id", | ||
| object: "chat.completion", | ||
| choices: [ | ||
| { | ||
| message: { | ||
| role: "assistant", | ||
| content: "Planning to call tools...", | ||
| tool_calls: [ | ||
| { | ||
| id: "call_abc123", | ||
| type: "function", | ||
| function: { | ||
| name: "multi_tool_use.parallel", | ||
| arguments: JSON.stringify({ | ||
| tool_uses: [ | ||
| { | ||
| recipient_name: "functions.get_ontology", | ||
| parameters: {}, | ||
| }, | ||
| { | ||
| recipient_name: "functions.get_subjects", | ||
| parameters: { type: "schema:Person" }, | ||
| }, | ||
| ], | ||
| }), | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| usage: { total_tokens: 100 }, | ||
| }), | ||
| ), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse)); | ||
| const messages = [{ role: "user", content: "Query the graph" }]; | ||
| const result = await llmApi.createCompletions({ messages }); | ||
| assert.strictEqual(result.choices[0].message.tool_calls.length, 2); | ||
| const call0 = result.choices[0].message.tool_calls[0]; | ||
| assert.strictEqual(call0.function.name, "get_ontology"); | ||
| assert.strictEqual(call0.id, "call_abc123_0"); | ||
| assert.deepStrictEqual(JSON.parse(call0.function.arguments), {}); | ||
| const call1 = result.choices[0].message.tool_calls[1]; | ||
| assert.strictEqual(call1.function.name, "get_subjects"); | ||
| assert.strictEqual(call1.id, "call_abc123_1"); | ||
| assert.deepStrictEqual(JSON.parse(call1.function.arguments), { | ||
| type: "schema:Person", | ||
| }); | ||
| }); | ||
| test("createCompletions fixes parallel hallucination (short form)", async () => { | ||
| const mockResponse = { | ||
| ok: true, | ||
| json: mock.fn(() => | ||
| Promise.resolve({ | ||
| id: "test-id", | ||
| object: "chat.completion", | ||
| choices: [ | ||
| { | ||
| message: { | ||
| role: "assistant", | ||
| content: "", | ||
| tool_calls: [ | ||
| { | ||
| id: "call_xyz", | ||
| type: "function", | ||
| function: { | ||
| name: "parallel", | ||
| arguments: JSON.stringify({ | ||
| tool_uses: [ | ||
| { | ||
| recipient_name: "search_content", | ||
| parameters: { query: "test" }, | ||
| }, | ||
| ], | ||
| }), | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| usage: { total_tokens: 50 }, | ||
| }), | ||
| ), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse)); | ||
| const result = await llmApi.createCompletions({ | ||
| messages: [{ role: "user", content: "Search" }], | ||
| }); | ||
| assert.strictEqual(result.choices[0].message.tool_calls.length, 1); | ||
| assert.strictEqual( | ||
| result.choices[0].message.tool_calls[0].function.name, | ||
| "search_content", | ||
| ); | ||
| }); | ||
| test("createCompletions preserves normal tool calls", async () => { | ||
| const mockResponse = { | ||
| ok: true, | ||
| json: mock.fn(() => | ||
| Promise.resolve({ | ||
| id: "test-id", | ||
| object: "chat.completion", | ||
| choices: [ | ||
| { | ||
| message: { | ||
| role: "assistant", | ||
| content: "", | ||
| tool_calls: [ | ||
| { | ||
| id: "call_normal", | ||
| type: "function", | ||
| function: { | ||
| name: "search_content", | ||
| arguments: '{"query":"test"}', | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| usage: { total_tokens: 30 }, | ||
| }), | ||
| ), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse)); | ||
| const result = await llmApi.createCompletions({ | ||
| messages: [{ role: "user", content: "Test" }], | ||
| }); | ||
| assert.strictEqual(result.choices[0].message.tool_calls.length, 1); | ||
| assert.strictEqual( | ||
| result.choices[0].message.tool_calls[0].function.name, | ||
| "search_content", | ||
| ); | ||
| assert.strictEqual( | ||
| result.choices[0].message.tool_calls[0].id, | ||
| "call_normal", | ||
| ); | ||
| }); | ||
| test("createEmbeddings makes correct TEI API call", async () => { | ||
| const mockResponse = { | ||
| ok: true, | ||
| json: mock.fn(() => | ||
| Promise.resolve([ | ||
| [0.1, 0.2, 0.3], | ||
| [0.4, 0.5, 0.6], | ||
| ]), | ||
| ), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse)); | ||
| const texts = ["Hello", "World"]; | ||
| const result = await llmApi.createEmbeddings(texts); | ||
| assert.strictEqual(mockFetch.mock.callCount(), 1); | ||
| const [url, options] = mockFetch.mock.calls[0].arguments; | ||
| assert.strictEqual(url, `${EMBEDDING_BASE_URL}/embed`); | ||
| assert.strictEqual(options.method, "POST"); | ||
| const body = JSON.parse(options.body); | ||
| assert.deepStrictEqual(body.inputs, texts); | ||
| assert.strictEqual(body.model, undefined); | ||
| assert.strictEqual(options.headers.Authorization, undefined); | ||
| assert.strictEqual(options.headers["Content-Type"], "application/json"); | ||
| assert.strictEqual(result.data.length, 2); | ||
| assert.deepStrictEqual(result.data[0].embedding, [0.1, 0.2, 0.3]); | ||
| assert.deepStrictEqual(result.data[1].embedding, [0.4, 0.5, 0.6]); | ||
| assert.strictEqual(result.model, "bge-small-en-v1.5"); | ||
| }); | ||
| test("createEmbeddings retries on 429 status", async () => { | ||
| const retryResponse = { | ||
| ok: false, | ||
| status: 429, | ||
| statusText: "Too Many Requests", | ||
| }; | ||
| const successResponse = { | ||
| ok: true, | ||
| json: mock.fn(() => Promise.resolve([[0.1, 0.2, 0.3]])), | ||
| }; | ||
| let callCount = 0; | ||
| mockFetch.mock.mockImplementation(() => { | ||
| callCount++; | ||
| if (callCount === 1) { | ||
| return Promise.resolve(retryResponse); | ||
| } else { | ||
| return Promise.resolve(successResponse); | ||
| } | ||
| }); | ||
| const texts = ["Hello"]; | ||
| const result = await llmApi.createEmbeddings(texts); | ||
| assert(mockFetch.mock.callCount() >= 2); | ||
| assert.strictEqual(result.data.length, 1); | ||
| }); | ||
| test("createEmbeddings throws error immediately on non-retryable HTTP error", async () => { | ||
| const errorResponse = { | ||
| ok: false, | ||
| status: 400, | ||
| statusText: "Bad Request", | ||
| text: mock.fn(() => Promise.resolve("Invalid request details")), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(errorResponse)); | ||
| const texts = ["Hello"]; | ||
| await assert.rejects(() => llmApi.createEmbeddings(texts), { | ||
| message: /HTTP 400: Bad Request/, | ||
| }); | ||
| assert.strictEqual(mockFetch.mock.callCount(), 1); | ||
| }); | ||
| test("LlmApi falls back to baseUrl when embeddingBaseUrl is null", () => { | ||
| const teiMockFetch = mock.fn(); | ||
| const teiRetry = new Retry(); | ||
| const llm = new LlmApi( | ||
| "test-token", | ||
| "gpt-4", | ||
| DEFAULT_BASE_URL, | ||
| null, | ||
| teiRetry, | ||
| teiMockFetch, | ||
| ); | ||
| assert.ok(llm instanceof LlmApi); | ||
| }); | ||
| test("createEmbeddings uses OpenAI-compatible format when embeddingBaseUrl matches baseUrl", async () => { | ||
| const oaiMockFetch = mock.fn(); | ||
| const oaiRetry = new Retry(); | ||
| const oaiLlm = new LlmApi( | ||
| "test-token", | ||
| "gpt-4", | ||
| DEFAULT_BASE_URL, | ||
| null, | ||
| oaiRetry, | ||
| oaiMockFetch, | ||
| ); | ||
| const mockResponse = { | ||
| ok: true, | ||
| json: mock.fn(() => | ||
| Promise.resolve({ | ||
| object: "list", | ||
| data: [{ object: "embedding", index: 0, embedding: [0.1, 0.2, 0.3] }], | ||
| model: "text-embedding-ada-002", | ||
| usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 }, | ||
| }), | ||
| ), | ||
| }; | ||
| oaiMockFetch.mock.mockImplementationOnce(() => | ||
| Promise.resolve(mockResponse), | ||
| ); | ||
| const result = await oaiLlm.createEmbeddings(["Hello"]); | ||
| assert.strictEqual(oaiMockFetch.mock.callCount(), 1); | ||
| const [url, options] = oaiMockFetch.mock.calls[0].arguments; | ||
| assert.ok(url.endsWith("/embeddings")); | ||
| assert.ok(!url.endsWith("/embed")); | ||
| assert.strictEqual(options.method, "POST"); | ||
| const body = JSON.parse(options.body); | ||
| assert.deepStrictEqual(body.input, ["Hello"]); | ||
| assert.strictEqual(body.model, "gpt-4"); | ||
| assert.strictEqual(result.data.length, 1); | ||
| assert.deepStrictEqual(result.data[0].embedding, [0.1, 0.2, 0.3]); | ||
| assert.strictEqual(result.model, "text-embedding-ada-002"); | ||
| }); | ||
| test("listModels makes correct API call", async () => { | ||
| const mockResponse = { | ||
| ok: true, | ||
| json: mock.fn(() => | ||
| Promise.resolve({ | ||
| data: [ | ||
| { id: "gpt-4", object: "model" }, | ||
| { id: "gpt-3.5-turbo", object: "model" }, | ||
| ], | ||
| }), | ||
| ), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse)); | ||
| const result = await llmApi.listModels(); | ||
| assert.strictEqual(mockFetch.mock.callCount(), 1); | ||
| const [url, options] = mockFetch.mock.calls[0].arguments; | ||
| assert.strictEqual( | ||
| url, | ||
| DEFAULT_BASE_URL.replace("/inference", "/catalog/models"), | ||
| ); | ||
| assert.strictEqual(options.method, "GET"); | ||
| assert.strictEqual(result.data.length, 2); | ||
| assert.strictEqual(result.data[0].id, "gpt-4"); | ||
| assert.strictEqual(result.data[1].id, "gpt-3.5-turbo"); | ||
| }); | ||
| test("listModels throws error on HTTP error", async () => { | ||
| const mockResponse = { | ||
| ok: false, | ||
| status: 401, | ||
| statusText: "Unauthorized", | ||
| text: mock.fn(() => Promise.resolve("Auth error details")), | ||
| }; | ||
| mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse)); | ||
| await assert.rejects(() => llmApi.listModels(), { | ||
| message: /HTTP 401: Unauthorized/, | ||
| }); | ||
| }); | ||
| }); |
| import { test, describe, beforeEach, mock } from "node:test"; | ||
| import assert from "node:assert"; | ||
| import { LlmApi, DEFAULT_BASE_URL } from "../index.js"; | ||
| import { Retry } from "@forwardimpact/libutil"; | ||
| const EMBEDDING_BASE_URL = "http://localhost:8090"; | ||
| describe("LlmApi instance methods", () => { | ||
| let llmApi; | ||
| let retry; | ||
| beforeEach(() => { | ||
| const mockFetch = mock.fn(); | ||
| retry = new Retry(); | ||
| llmApi = new LlmApi( | ||
| "test-token", | ||
| "gpt-4", | ||
| DEFAULT_BASE_URL, | ||
| EMBEDDING_BASE_URL, | ||
| retry, | ||
| mockFetch, | ||
| ); | ||
| }); | ||
| test("countTokens returns token count for text", () => { | ||
| const text = "Hello, world!"; | ||
| const count = llmApi.countTokens(text); | ||
| assert.strictEqual(typeof count, "number"); | ||
| assert(count > 0); | ||
| }); | ||
| test("countTokens handles empty text", () => { | ||
| const count = llmApi.countTokens(""); | ||
| assert.strictEqual(count, 0); | ||
| }); | ||
| test("countTokens handles longer text", () => { | ||
| const shortText = "Hello"; | ||
| const longText = | ||
| "Hello, this is a much longer text that should have more tokens"; | ||
| const shortCount = llmApi.countTokens(shortText); | ||
| const longCount = llmApi.countTokens(longText); | ||
| assert(longCount > shortCount); | ||
| }); | ||
| }); | ||
| describe("Proxy Support", () => { | ||
| test("createLlmApi creates LlmApi instance with default fetch", async () => { | ||
| const { createLlmApi, LlmApi, DEFAULT_BASE_URL } = | ||
| await import("../index.js"); | ||
| const llm = createLlmApi( | ||
| "test-token", | ||
| "gpt-4", | ||
| DEFAULT_BASE_URL, | ||
| EMBEDDING_BASE_URL, | ||
| ); | ||
| assert.ok(llm instanceof LlmApi); | ||
| }); | ||
| test("createLlmApi works without embeddingBaseUrl", async () => { | ||
| const { createLlmApi, LlmApi, DEFAULT_BASE_URL } = | ||
| await import("../index.js"); | ||
| const llm = createLlmApi("test-token", "gpt-4", DEFAULT_BASE_URL); | ||
| assert.ok(llm instanceof LlmApi); | ||
| }); | ||
| test("createLlmApi works when HTTPS_PROXY environment variable is set", async () => { | ||
| const originalProxy = process.env.HTTPS_PROXY; | ||
| process.env.HTTPS_PROXY = "http://proxy.example.com:3128"; | ||
| try { | ||
| const { createLlmApi, LlmApi, DEFAULT_BASE_URL } = | ||
| await import("../index.js"); | ||
| const llm = createLlmApi( | ||
| "test-token", | ||
| "gpt-4", | ||
| DEFAULT_BASE_URL, | ||
| EMBEDDING_BASE_URL, | ||
| ); | ||
| assert.ok(llm instanceof LlmApi); | ||
| } finally { | ||
| if (originalProxy) { | ||
| process.env.HTTPS_PROXY = originalProxy; | ||
| } else { | ||
| delete process.env.HTTPS_PROXY; | ||
| } | ||
| } | ||
| }); | ||
| }); |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
4
-50%31365
-34.04%6
-25%551
-46.4%11
10%