Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

@forwardimpact/libllm

Package Overview
Dependencies
Maintainers
1
Versions
11
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@forwardimpact/libllm - npm Package Compare versions

Comparing version
0.1.84
to
0.1.85
+79
src/hallucination.js
import { tool } from "@forwardimpact/libtype";
/**
* Checks if a tool call is a hallucinated multi_tool_use.parallel call.
* @param {string} name - Function name from tool call
* @returns {boolean} True if this is a parallel hallucination
*/
function isParallelHallucination(name) {
return name === "multi_tool_use.parallel" || name === "parallel";
}
/**
* Extracts the function name from a nested tool_use, stripping prefixes.
* @param {object} nestedTool - Nested tool use object
* @returns {string} Clean function name
*/
function extractNestedName(nestedTool) {
const rawName = nestedTool.recipient_name || nestedTool.name || "";
return rawName.startsWith("functions.") ? rawName.slice(10) : rawName;
}
/**
* Converts a nested tool_use to a proper ToolCall object.
* @param {object} nestedTool - Nested tool use from parallel call
* @param {string} parentId - Parent tool call ID
* @param {number} index - Index of this tool in the array
* @returns {object} Proper ToolCall object
*/
function convertNestedToolUse(nestedTool, parentId, index) {
const nestedArgs = nestedTool.parameters || nestedTool.arguments || {};
return tool.ToolCall.fromObject({
id: `${parentId}_${index}`,
type: "function",
function: {
name: extractNestedName(nestedTool),
arguments: JSON.stringify(nestedArgs),
},
});
}
/**
* Expands a hallucinated parallel tool call into proper individual tool calls.
* @param {object} toolCall - The multi_tool_use.parallel tool call
* @returns {object[]} Array of proper tool calls
*/
function expandParallelToolCall(toolCall) {
try {
const args = JSON.parse(toolCall.function.arguments || "{}");
const toolUses = args.tool_uses || [];
return toolUses.map((nested, i) =>
convertNestedToolUse(nested, toolCall.id, i),
);
} catch {
// If parsing fails, keep the original (will likely fail downstream)
return [toolCall];
}
}
/**
* Fixes hallucinated multi_tool_use.parallel tool calls from OpenAI models.
*
* Some models occasionally emit a pseudo-tool call named "multi_tool_use.parallel"
* or "parallel" that wraps multiple tool calls in its arguments. This function
* detects and converts these to proper individual tool calls.
* @see https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
* @param {object[]} toolCalls - Array of tool call objects from LLM response
* @returns {object[]} Fixed tool calls array with parallel calls expanded
*/
export function fixMultiToolUseParallel(toolCalls) {
if (!toolCalls?.length) return toolCalls;
return toolCalls.flatMap((toolCall) => {
const functionName = toolCall.function?.name;
if (isParallelHallucination(functionName)) {
return expandParallelToolCall(toolCall);
}
return [toolCall];
});
}
import { readFile } from "node:fs/promises";
import { common, llm } from "@forwardimpact/libtype";
import {
countTokens,
createTokenizer,
createRetry,
} from "@forwardimpact/libutil";
import { fixMultiToolUseParallel } from "./hallucination.js";
// Note: getBudget has moved to @forwardimpact/libmemory as getModelBudget
// This re-export is deprecated and will be removed in a future version
export { getBudget } from "./models.js";
/**
* Default base URL for GitHub Models API
* @type {string}
*/
export const DEFAULT_BASE_URL = "https://models.github.ai/inference";
/**
* Normalizes the base URL to include /inference for GitHub Models
* @param {string} baseUrl - Base URL for the LLM API
* @returns {string} Normalized base URL
*/
function normalizeBaseUrl(baseUrl) {
// For GitHub Models, ensure /inference is appended if not present
if (baseUrl.includes("models.github.ai") && !baseUrl.includes("/inference")) {
return `${baseUrl.replace(/\/$/, "")}/inference`;
}
return baseUrl;
}
/**
* LLM API client with direct HTTP calls to OpenAI-compatible endpoints
*/
export class LlmApi {
#model;
#baseURL;
#embeddingBaseURL;
#useTeiEmbeddings;
#headers;
#fetch;
#tokenizer;
#retry;
#temperature;
/**
* Creates a new LLM API instance
* @param {string} token - LLM API token
* @param {string} model - Default model to use for completions
* @param {string} baseUrl - Base URL for the LLM API
* @param {string} embeddingBaseUrl - Base URL for embeddings (TEI endpoint or OpenAI-compatible)
* @param {import("@forwardimpact/libutil").Retry} retry - Retry instance for handling transient errors
* @param {(url: string, options?: object) => Promise<Response>} fetchFn - HTTP client function (defaults to fetch if not provided)
* @param {() => object} tokenizerFn - Tokenizer instance for counting tokens
* @param {number} [temperature] - Temperature for completions
*/
constructor(
token,
model,
baseUrl,
embeddingBaseUrl,
retry,
fetchFn = fetch,
tokenizerFn = createTokenizer,
temperature = 0.3,
) {
if (!baseUrl) throw new Error("baseUrl is required");
if (!retry) throw new Error("retry is required");
if (typeof fetchFn !== "function")
throw new Error("Invalid fetch function");
if (typeof tokenizerFn !== "function")
throw new Error("Invalid tokenizer function");
this.#model = model;
this.#baseURL = normalizeBaseUrl(baseUrl);
this.#embeddingBaseURL = embeddingBaseUrl || this.#baseURL;
this.#useTeiEmbeddings =
!!embeddingBaseUrl &&
normalizeBaseUrl(embeddingBaseUrl) !== this.#baseURL;
this.#headers = {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
Accept: "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
};
this.#fetch = fetchFn;
this.#tokenizer = tokenizerFn();
this.#retry = retry;
this.#temperature = temperature;
}
/**
* Throws an Error with HTTP status and a snippet of the response body when response is not OK
* @param {Response} response - Fetch API response
* @returns {Promise<void>}
* @throws {Error} With enriched message including body snippet
*/
async #throwIfNotOk(response) {
if (response.ok) return;
let errorDetails = "";
try {
const text = await response.text();
errorDetails = text ? `: ${text.substring(0, 200)}` : "";
} catch {
// Ignore error reading body
}
throw new Error(
`HTTP ${response.status}: ${response.statusText}${errorDetails}`,
);
}
/**
* Creates chat completions using the LLM API
* @param {import("@forwardimpact/libtype").memory.Window[]} window - Memory window
* @returns {Promise<import("@forwardimpact/libtype").llm.CompletionsResponse>} Completion response
*/
async createCompletions(window) {
const body = {
...window,
model: this.#model,
temperature: this.#temperature,
};
const response = await this.#retry.execute(() =>
this.#fetch(`${this.#baseURL}/chat/completions`, {
method: "POST",
headers: this.#headers,
body: JSON.stringify(body),
}),
);
await this.#throwIfNotOk(response);
const json = await response.json();
// Fix hallucinated multi_tool_use.parallel calls before converting to protobuf
for (const choice of json.choices || []) {
if (choice.message?.tool_calls) {
choice.message.tool_calls = fixMultiToolUseParallel(
choice.message.tool_calls,
);
}
}
return llm.CompletionsResponse.fromObject(json);
}
/**
* Creates embeddings via TEI or OpenAI-compatible endpoint.
* Uses TEI format when EMBEDDING_BASE_URL is explicitly set to a
* different host; otherwise uses the OpenAI-compatible /embeddings
* endpoint on the LLM base URL.
* @param {string[]} input - Array of text strings to embed
* @returns {Promise<import("@forwardimpact/libtype").common.Embeddings>} Embeddings response
*/
async createEmbeddings(input) {
if (this.#useTeiEmbeddings) {
return this.#createTeiEmbeddings(input);
}
return this.#createOpenAIEmbeddings(input);
}
/**
* TEI (Text Embeddings Inference) format: POST /embed
* @param {string[]} input
*/
async #createTeiEmbeddings(input) {
const response = await this.#retry.execute(() =>
this.#fetch(`${this.#embeddingBaseURL}/embed`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ inputs: input }),
}),
);
await this.#throwIfNotOk(response);
const json = await response.json();
// TEI returns [[0.1, 0.2, ...]]
return common.Embeddings.fromObject({
object: "list",
data: json.map((embedding, index) => ({
object: "embedding",
index,
embedding,
})),
model: "bge-small-en-v1.5",
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
});
}
/**
* OpenAI-compatible format: POST /embeddings
* @param {string[]} input
*/
async #createOpenAIEmbeddings(input) {
const response = await this.#retry.execute(() =>
this.#fetch(`${this.#embeddingBaseURL}/embeddings`, {
method: "POST",
headers: this.#headers,
body: JSON.stringify({
input,
model: this.#model,
}),
}),
);
await this.#throwIfNotOk(response);
const json = await response.json();
return common.Embeddings.fromObject({
object: json.object || "list",
data: json.data.map((item) => ({
object: item.object || "embedding",
index: item.index,
embedding: item.embedding,
})),
model: json.model || this.#model,
usage: json.usage || {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0,
},
});
}
/**
* Lists models available to the current user
* @returns {Promise<object[]>} Array of available models
*/
async listModels() {
// GitHub Models catalog is at the root domain, not org-specific
const catalogUrl = "https://models.github.ai/catalog/models";
const response = await this.#fetch(catalogUrl, {
method: "GET",
headers: this.#headers,
});
await this.#throwIfNotOk(response);
const json = await response.json();
return json;
}
/**
* Counts tokens in the given text using the tokenizer
* @param {string} text - The text to count tokens for
* @returns {number} Number of tokens in the text
*/
countTokens(text) {
return countTokens(text, this.#tokenizer);
}
/**
* Converts an image to text description using vision capabilities
* @param {string|Buffer} file - Path to the image file or a Buffer containing the image data
* @param {string} [prompt] - Optional text prompt to guide the description
* @param {string} [model] - Model to use for image-to-text conversion, defaults to instance model
* @param {string} [systemPrompt] - System prompt to set context for the description
* @param {number} [max_tokens] - Maximum tokens to generate in the description
* @param {string} [mimeType] - The mime type of the file. Defaults to image/png if file is a buffer, otherwise determined from the extension
* @returns {Promise<string>} Text description of the image
*/
async imageToText(
file,
prompt = "Describe this image in detail.",
model = this.#model,
systemPrompt = "You are an AI assistant that describes images accurately and in detail.",
max_tokens = 1000,
mimeType = "image/png",
) {
let buffer;
if (Buffer.isBuffer(file)) {
buffer = file;
} else {
buffer = await readFile(file);
const extension = file.split(".").pop().toLowerCase();
mimeType = `image/${extension === "jpg" ? "jpeg" : extension}`;
}
const base64 = buffer.toString("base64");
const body = {
model: model,
messages: [
{
role: "system",
content: systemPrompt,
},
{
role: "user",
content: [
{
type: "text",
text: prompt,
},
{
type: "image_url",
image_url: {
url: `data:${mimeType};base64,${base64}`,
},
},
],
},
],
max_tokens,
};
const response = await this.#retry.execute(() =>
this.#fetch(`${this.#baseURL}/chat/completions`, {
method: "POST",
headers: this.#headers,
body: JSON.stringify(body),
}),
);
await this.#throwIfNotOk(response);
const json = await response.json();
return json.choices[0]?.message?.content || "";
}
}
/**
* Creates a proxy-aware fetch function that respects HTTPS_PROXY environment variable
* @param {object} [process] - Process object for environment variable access
* @returns {(url: string, options?: object) => Promise<Response>} Fetch function with proxy support
*/
export function createProxyAwareFetch(process = global.process) {
const httpsProxy = process.env.HTTPS_PROXY || process.env.https_proxy;
if (!httpsProxy) {
return fetch;
}
return (url, options = {}) => {
return fetch(url, {
...options,
proxy: httpsProxy,
});
};
}
/**
* Factory function to create an LlmApi instance with default dependencies
* @param {string} token - LLM API token
* @param {string} model - Model to use
* @param {string} baseUrl - Base URL for the LLM API (required, e.g. https://models.github.ai/orgs/{org})
* @param {string|null} embeddingBaseUrl - Base URL for embeddings (null falls back to baseUrl with OpenAI-compatible format)
* @param {number} [temperature] - Temperature for completions
* @param {(url: string, options?: object) => Promise<Response>} [fetchFn] - HTTP client function
* @param {() => object} [tokenizerFn] - Tokenizer factory function
* @returns {LlmApi} Configured LlmApi instance
*/
export function createLlmApi(
token,
model,
baseUrl,
embeddingBaseUrl,
temperature = 0.3,
fetchFn = createProxyAwareFetch(),
tokenizerFn = createTokenizer,
) {
if (!baseUrl) {
throw new Error(
"baseUrl is required. Set LLM_BASE_URL to https://models.github.ai/orgs/{YOUR_ORG} for org-level PATs.",
);
}
const retry = createRetry();
return new LlmApi(
token,
model,
baseUrl,
embeddingBaseUrl,
retry,
fetchFn,
tokenizerFn,
temperature,
);
}
/**
* Normalizes a vector to unit length
* @param {number[]} vector - Vector to normalize
* @returns {number[]} Normalized vector
*/
export function normalizeVector(vector) {
const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
if (magnitude === 0) return vector.slice(); // Return copy of zero vector
return vector.map((val) => val / magnitude);
}
/**
* Static map of model names to their context window token budgets
* Seeded from GitHub Models API via `./scripts/env.sh node scripts/models.js`
* @type {Map<string, number>}
*/
export const BUDGETS = new Map([
["ai21-labs/ai21-jamba-1.5-large", 262144],
["cohere/cohere-command-a", 131072],
["cohere/cohere-command-r-08-2024", 131072],
["cohere/cohere-command-r-plus-08-2024", 131072],
["deepseek/deepseek-r1", 128000],
["deepseek/deepseek-r1-0528", 128000],
["deepseek/deepseek-v3-0324", 128000],
["meta/llama-3.2-11b-vision-instruct", 128000],
["meta/llama-3.2-90b-vision-instruct", 128000],
["meta/llama-3.3-70b-instruct", 128000],
["meta/llama-4-maverick-17b-128e-instruct-fp8", 1000000],
["meta/llama-4-scout-17b-16e-instruct", 10000000],
["meta/meta-llama-3.1-405b-instruct", 131072],
["meta/meta-llama-3.1-8b-instruct", 131072],
["microsoft/mai-ds-r1", 128000],
["microsoft/phi-4", 16384],
["microsoft/phi-4-mini-instruct", 128000],
["microsoft/phi-4-mini-reasoning", 128000],
["microsoft/phi-4-multimodal-instruct", 128000],
["microsoft/phi-4-reasoning", 32768],
["mistral-ai/codestral-2501", 256000],
["mistral-ai/ministral-3b", 131072],
["mistral-ai/mistral-medium-2505", 128000],
["mistral-ai/mistral-small-2503", 128000],
["openai/gpt-4.1", 1048576],
["openai/gpt-4.1-mini", 1048576],
["openai/gpt-4.1-nano", 1048576],
["openai/gpt-4o", 131072],
["openai/gpt-4o-mini", 131072],
["openai/gpt-5", 200000],
["openai/gpt-5-chat", 200000],
["openai/gpt-5-mini", 200000],
["openai/gpt-5-nano", 200000],
["openai/o1", 200000],
["openai/o1-mini", 128000],
["openai/o1-preview", 128000],
["openai/o3", 200000],
["openai/o3-mini", 200000],
["openai/o4-mini", 200000],
["openai/text-embedding-3-large", 8191],
["openai/text-embedding-3-small", 8191],
["xai/grok-3", 131072],
["xai/grok-3-mini", 131072],
]);
/**
* Returns the token budget for a given model
* @param {string} model - Model name with provider prefix (e.g., 'openai/gpt-5')
* @returns {number} Token budget for the model
* @throws {Error} If model is not found in BUDGETS
*/
export function getBudget(model) {
const budget = BUDGETS.get(model);
if (!budget) {
throw new Error(
`Unknown model: ${model}. Known models: ${[...BUDGETS.keys()].join(", ")}`,
);
}
return budget;
}
+11
-2
{
"name": "@forwardimpact/libllm",
"version": "0.1.84",
"version": "0.1.85",
"description": "LLM API client for OpenAI-compatible endpoints",

@@ -8,6 +8,15 @@ "license": "Apache-2.0",

"type": "module",
"main": "index.js",
"main": "./src/index.js",
"exports": {
".": "./src/index.js",
"./bin/fit-completion.js": "./bin/fit-completion.js"
},
"bin": {
"fit-completion": "./bin/fit-completion.js"
},
"files": [
"src/**/*.js",
"bin/**/*.js",
"README.md"
],
"engines": {

@@ -14,0 +23,0 @@ "bun": ">=1.2.0",

-79
import { tool } from "@forwardimpact/libtype";
/**
* Checks if a tool call is a hallucinated multi_tool_use.parallel call.
* @param {string} name - Function name from tool call
* @returns {boolean} True if this is a parallel hallucination
*/
function isParallelHallucination(name) {
return name === "multi_tool_use.parallel" || name === "parallel";
}
/**
* Extracts the function name from a nested tool_use, stripping prefixes.
* @param {object} nestedTool - Nested tool use object
* @returns {string} Clean function name
*/
function extractNestedName(nestedTool) {
const rawName = nestedTool.recipient_name || nestedTool.name || "";
return rawName.startsWith("functions.") ? rawName.slice(10) : rawName;
}
/**
* Converts a nested tool_use to a proper ToolCall object.
* @param {object} nestedTool - Nested tool use from parallel call
* @param {string} parentId - Parent tool call ID
* @param {number} index - Index of this tool in the array
* @returns {object} Proper ToolCall object
*/
function convertNestedToolUse(nestedTool, parentId, index) {
const nestedArgs = nestedTool.parameters || nestedTool.arguments || {};
return tool.ToolCall.fromObject({
id: `${parentId}_${index}`,
type: "function",
function: {
name: extractNestedName(nestedTool),
arguments: JSON.stringify(nestedArgs),
},
});
}
/**
* Expands a hallucinated parallel tool call into proper individual tool calls.
* @param {object} toolCall - The multi_tool_use.parallel tool call
* @returns {object[]} Array of proper tool calls
*/
function expandParallelToolCall(toolCall) {
try {
const args = JSON.parse(toolCall.function.arguments || "{}");
const toolUses = args.tool_uses || [];
return toolUses.map((nested, i) =>
convertNestedToolUse(nested, toolCall.id, i),
);
} catch {
// If parsing fails, keep the original (will likely fail downstream)
return [toolCall];
}
}
/**
* Fixes hallucinated multi_tool_use.parallel tool calls from OpenAI models.
*
* Some models occasionally emit a pseudo-tool call named "multi_tool_use.parallel"
* or "parallel" that wraps multiple tool calls in its arguments. This function
* detects and converts these to proper individual tool calls.
* @see https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
* @param {object[]} toolCalls - Array of tool call objects from LLM response
* @returns {object[]} Fixed tool calls array with parallel calls expanded
*/
export function fixMultiToolUseParallel(toolCalls) {
if (!toolCalls?.length) return toolCalls;
return toolCalls.flatMap((toolCall) => {
const functionName = toolCall.function?.name;
if (isParallelHallucination(functionName)) {
return expandParallelToolCall(toolCall);
}
return [toolCall];
});
}
import { readFile } from "node:fs/promises";
import { common, llm } from "@forwardimpact/libtype";
import {
countTokens,
createTokenizer,
createRetry,
} from "@forwardimpact/libutil";
import { fixMultiToolUseParallel } from "./hallucination.js";
// Note: getBudget has moved to @forwardimpact/libmemory as getModelBudget
// This re-export is deprecated and will be removed in a future version
export { getBudget } from "./models.js";
/**
* Default base URL for GitHub Models API
* @type {string}
*/
export const DEFAULT_BASE_URL = "https://models.github.ai/inference";
/**
* Normalizes the base URL to include /inference for GitHub Models
* @param {string} baseUrl - Base URL for the LLM API
* @returns {string} Normalized base URL
*/
function normalizeBaseUrl(baseUrl) {
// For GitHub Models, ensure /inference is appended if not present
if (baseUrl.includes("models.github.ai") && !baseUrl.includes("/inference")) {
return `${baseUrl.replace(/\/$/, "")}/inference`;
}
return baseUrl;
}
/**
* LLM API client with direct HTTP calls to OpenAI-compatible endpoints
*/
export class LlmApi {
#model;
#baseURL;
#embeddingBaseURL;
#useTeiEmbeddings;
#headers;
#fetch;
#tokenizer;
#retry;
#temperature;
/**
* Creates a new LLM API instance
* @param {string} token - LLM API token
* @param {string} model - Default model to use for completions
* @param {string} baseUrl - Base URL for the LLM API
* @param {string} embeddingBaseUrl - Base URL for embeddings (TEI endpoint or OpenAI-compatible)
* @param {import("@forwardimpact/libutil").Retry} retry - Retry instance for handling transient errors
* @param {(url: string, options?: object) => Promise<Response>} fetchFn - HTTP client function (defaults to fetch if not provided)
* @param {() => object} tokenizerFn - Tokenizer instance for counting tokens
* @param {number} [temperature] - Temperature for completions
*/
constructor(
token,
model,
baseUrl,
embeddingBaseUrl,
retry,
fetchFn = fetch,
tokenizerFn = createTokenizer,
temperature = 0.3,
) {
if (!baseUrl) throw new Error("baseUrl is required");
if (!retry) throw new Error("retry is required");
if (typeof fetchFn !== "function")
throw new Error("Invalid fetch function");
if (typeof tokenizerFn !== "function")
throw new Error("Invalid tokenizer function");
this.#model = model;
this.#baseURL = normalizeBaseUrl(baseUrl);
this.#embeddingBaseURL = embeddingBaseUrl || this.#baseURL;
this.#useTeiEmbeddings =
!!embeddingBaseUrl &&
normalizeBaseUrl(embeddingBaseUrl) !== this.#baseURL;
this.#headers = {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
Accept: "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
};
this.#fetch = fetchFn;
this.#tokenizer = tokenizerFn();
this.#retry = retry;
this.#temperature = temperature;
}
/**
* Throws an Error with HTTP status and a snippet of the response body when response is not OK
* @param {Response} response - Fetch API response
* @returns {Promise<void>}
* @throws {Error} With enriched message including body snippet
*/
async #throwIfNotOk(response) {
if (response.ok) return;
let errorDetails = "";
try {
const text = await response.text();
errorDetails = text ? `: ${text.substring(0, 200)}` : "";
} catch {
// Ignore error reading body
}
throw new Error(
`HTTP ${response.status}: ${response.statusText}${errorDetails}`,
);
}
/**
* Creates chat completions using the LLM API
* @param {import("@forwardimpact/libtype").memory.Window[]} window - Memory window
* @returns {Promise<import("@forwardimpact/libtype").llm.CompletionsResponse>} Completion response
*/
async createCompletions(window) {
const body = {
...window,
model: this.#model,
temperature: this.#temperature,
};
const response = await this.#retry.execute(() =>
this.#fetch(`${this.#baseURL}/chat/completions`, {
method: "POST",
headers: this.#headers,
body: JSON.stringify(body),
}),
);
await this.#throwIfNotOk(response);
const json = await response.json();
// Fix hallucinated multi_tool_use.parallel calls before converting to protobuf
for (const choice of json.choices || []) {
if (choice.message?.tool_calls) {
choice.message.tool_calls = fixMultiToolUseParallel(
choice.message.tool_calls,
);
}
}
return llm.CompletionsResponse.fromObject(json);
}
/**
* Creates embeddings via TEI or OpenAI-compatible endpoint.
* Uses TEI format when EMBEDDING_BASE_URL is explicitly set to a
* different host; otherwise uses the OpenAI-compatible /embeddings
* endpoint on the LLM base URL.
* @param {string[]} input - Array of text strings to embed
* @returns {Promise<import("@forwardimpact/libtype").common.Embeddings>} Embeddings response
*/
async createEmbeddings(input) {
if (this.#useTeiEmbeddings) {
return this.#createTeiEmbeddings(input);
}
return this.#createOpenAIEmbeddings(input);
}
/**
* TEI (Text Embeddings Inference) format: POST /embed
* @param {string[]} input
*/
async #createTeiEmbeddings(input) {
const response = await this.#retry.execute(() =>
this.#fetch(`${this.#embeddingBaseURL}/embed`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ inputs: input }),
}),
);
await this.#throwIfNotOk(response);
const json = await response.json();
// TEI returns [[0.1, 0.2, ...]]
return common.Embeddings.fromObject({
object: "list",
data: json.map((embedding, index) => ({
object: "embedding",
index,
embedding,
})),
model: "bge-small-en-v1.5",
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
});
}
/**
* OpenAI-compatible format: POST /embeddings
* @param {string[]} input
*/
async #createOpenAIEmbeddings(input) {
const response = await this.#retry.execute(() =>
this.#fetch(`${this.#embeddingBaseURL}/embeddings`, {
method: "POST",
headers: this.#headers,
body: JSON.stringify({
input,
model: this.#model,
}),
}),
);
await this.#throwIfNotOk(response);
const json = await response.json();
return common.Embeddings.fromObject({
object: json.object || "list",
data: json.data.map((item) => ({
object: item.object || "embedding",
index: item.index,
embedding: item.embedding,
})),
model: json.model || this.#model,
usage: json.usage || {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0,
},
});
}
/**
* Lists models available to the current user
* @returns {Promise<object[]>} Array of available models
*/
async listModels() {
// GitHub Models catalog is at the root domain, not org-specific
const catalogUrl = "https://models.github.ai/catalog/models";
const response = await this.#fetch(catalogUrl, {
method: "GET",
headers: this.#headers,
});
await this.#throwIfNotOk(response);
const json = await response.json();
return json;
}
/**
* Counts tokens in the given text using the tokenizer
* @param {string} text - The text to count tokens for
* @returns {number} Number of tokens in the text
*/
countTokens(text) {
return countTokens(text, this.#tokenizer);
}
/**
* Converts an image to text description using vision capabilities
* @param {string|Buffer} file - Path to the image file or a Buffer containing the image data
* @param {string} [prompt] - Optional text prompt to guide the description
* @param {string} [model] - Model to use for image-to-text conversion, defaults to instance model
* @param {string} [systemPrompt] - System prompt to set context for the description
* @param {number} [max_tokens] - Maximum tokens to generate in the description
* @param {string} [mimeType] - The mime type of the file. Defaults to image/png if file is a buffer, otherwise determined from the extension
* @returns {Promise<string>} Text description of the image
*/
async imageToText(
file,
prompt = "Describe this image in detail.",
model = this.#model,
systemPrompt = "You are an AI assistant that describes images accurately and in detail.",
max_tokens = 1000,
mimeType = "image/png",
) {
let buffer;
if (Buffer.isBuffer(file)) {
buffer = file;
} else {
buffer = await readFile(file);
const extension = file.split(".").pop().toLowerCase();
mimeType = `image/${extension === "jpg" ? "jpeg" : extension}`;
}
const base64 = buffer.toString("base64");
const body = {
model: model,
messages: [
{
role: "system",
content: systemPrompt,
},
{
role: "user",
content: [
{
type: "text",
text: prompt,
},
{
type: "image_url",
image_url: {
url: `data:${mimeType};base64,${base64}`,
},
},
],
},
],
max_tokens,
};
const response = await this.#retry.execute(() =>
this.#fetch(`${this.#baseURL}/chat/completions`, {
method: "POST",
headers: this.#headers,
body: JSON.stringify(body),
}),
);
await this.#throwIfNotOk(response);
const json = await response.json();
return json.choices[0]?.message?.content || "";
}
}
/**
* Creates a proxy-aware fetch function that respects HTTPS_PROXY environment variable
* @param {object} [process] - Process object for environment variable access
* @returns {(url: string, options?: object) => Promise<Response>} Fetch function with proxy support
*/
export function createProxyAwareFetch(process = global.process) {
const httpsProxy = process.env.HTTPS_PROXY || process.env.https_proxy;
if (!httpsProxy) {
return fetch;
}
return (url, options = {}) => {
return fetch(url, {
...options,
proxy: httpsProxy,
});
};
}
/**
* Factory function to create an LlmApi instance with default dependencies
* @param {string} token - LLM API token
* @param {string} model - Model to use
* @param {string} baseUrl - Base URL for the LLM API (required, e.g. https://models.github.ai/orgs/{org})
* @param {string|null} embeddingBaseUrl - Base URL for embeddings (null falls back to baseUrl with OpenAI-compatible format)
* @param {number} [temperature] - Temperature for completions
* @param {(url: string, options?: object) => Promise<Response>} [fetchFn] - HTTP client function
* @param {() => object} [tokenizerFn] - Tokenizer factory function
* @returns {LlmApi} Configured LlmApi instance
*/
export function createLlmApi(
token,
model,
baseUrl,
embeddingBaseUrl,
temperature = 0.3,
fetchFn = createProxyAwareFetch(),
tokenizerFn = createTokenizer,
) {
if (!baseUrl) {
throw new Error(
"baseUrl is required. Set LLM_BASE_URL to https://models.github.ai/orgs/{YOUR_ORG} for org-level PATs.",
);
}
const retry = createRetry();
return new LlmApi(
token,
model,
baseUrl,
embeddingBaseUrl,
retry,
fetchFn,
tokenizerFn,
temperature,
);
}
/**
* Normalizes a vector to unit length
* @param {number[]} vector - Vector to normalize
* @returns {number[]} Normalized vector
*/
export function normalizeVector(vector) {
const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
if (magnitude === 0) return vector.slice(); // Return copy of zero vector
return vector.map((val) => val / magnitude);
}
/**
* Static map of model names to their context window token budgets
* Seeded from GitHub Models API via `./scripts/env.sh node scripts/models.js`
* @type {Map<string, number>}
*/
export const BUDGETS = new Map([
["ai21-labs/ai21-jamba-1.5-large", 262144],
["cohere/cohere-command-a", 131072],
["cohere/cohere-command-r-08-2024", 131072],
["cohere/cohere-command-r-plus-08-2024", 131072],
["deepseek/deepseek-r1", 128000],
["deepseek/deepseek-r1-0528", 128000],
["deepseek/deepseek-v3-0324", 128000],
["meta/llama-3.2-11b-vision-instruct", 128000],
["meta/llama-3.2-90b-vision-instruct", 128000],
["meta/llama-3.3-70b-instruct", 128000],
["meta/llama-4-maverick-17b-128e-instruct-fp8", 1000000],
["meta/llama-4-scout-17b-16e-instruct", 10000000],
["meta/meta-llama-3.1-405b-instruct", 131072],
["meta/meta-llama-3.1-8b-instruct", 131072],
["microsoft/mai-ds-r1", 128000],
["microsoft/phi-4", 16384],
["microsoft/phi-4-mini-instruct", 128000],
["microsoft/phi-4-mini-reasoning", 128000],
["microsoft/phi-4-multimodal-instruct", 128000],
["microsoft/phi-4-reasoning", 32768],
["mistral-ai/codestral-2501", 256000],
["mistral-ai/ministral-3b", 131072],
["mistral-ai/mistral-medium-2505", 128000],
["mistral-ai/mistral-small-2503", 128000],
["openai/gpt-4.1", 1048576],
["openai/gpt-4.1-mini", 1048576],
["openai/gpt-4.1-nano", 1048576],
["openai/gpt-4o", 131072],
["openai/gpt-4o-mini", 131072],
["openai/gpt-5", 200000],
["openai/gpt-5-chat", 200000],
["openai/gpt-5-mini", 200000],
["openai/gpt-5-nano", 200000],
["openai/o1", 200000],
["openai/o1-mini", 128000],
["openai/o1-preview", 128000],
["openai/o3", 200000],
["openai/o3-mini", 200000],
["openai/o4-mini", 200000],
["openai/text-embedding-3-large", 8191],
["openai/text-embedding-3-small", 8191],
["xai/grok-3", 131072],
["xai/grok-3-mini", 131072],
]);
/**
* Returns the token budget for a given model
* @param {string} model - Model name with provider prefix (e.g., 'openai/gpt-5')
* @returns {number} Token budget for the model
* @throws {Error} If model is not found in BUDGETS
*/
export function getBudget(model) {
const budget = BUDGETS.get(model);
if (!budget) {
throw new Error(
`Unknown model: ${model}. Known models: ${[...BUDGETS.keys()].join(", ")}`,
);
}
return budget;
}
import { test, describe, beforeEach, mock } from "node:test";
import assert from "node:assert";
import { LlmApi, DEFAULT_BASE_URL } from "../index.js";
import { Retry } from "@forwardimpact/libutil";
const EMBEDDING_BASE_URL = "http://localhost:8090";
describe("LlmApi", () => {
let mockFetch;
let llmApi;
let retry;
beforeEach(() => {
mockFetch = mock.fn();
retry = new Retry();
llmApi = new LlmApi(
"test-token",
"gpt-4",
DEFAULT_BASE_URL,
EMBEDDING_BASE_URL,
retry,
mockFetch,
);
});
test("creates LlmApi with token and model", () => {
assert.ok(llmApi instanceof LlmApi);
});
test("createCompletions makes correct API call", async () => {
const mockResponse = {
ok: true,
json: mock.fn(() =>
Promise.resolve({
id: "test-id",
object: "chat.completion",
choices: [{ message: { role: "assistant", content: "Hello" } }],
usage: { total_tokens: 10 },
}),
),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));
const messages = [{ role: "user", content: "Hello" }];
const tools = undefined;
const temperature = 0.5;
const max_tokens = 100;
const result = await llmApi.createCompletions(
messages,
tools,
temperature,
max_tokens,
);
assert.strictEqual(mockFetch.mock.callCount(), 1);
const [url, options] = mockFetch.mock.calls[0].arguments;
assert.strictEqual(url, `${DEFAULT_BASE_URL}/chat/completions`);
assert.strictEqual(options.method, "POST");
assert.ok(options.headers.Authorization.includes("test-token"));
assert.strictEqual(result.id, "test-id");
});
test("createCompletions uses default model when not specified", async () => {
const mockResponse = {
ok: true,
json: mock.fn(() =>
Promise.resolve({
id: "test-id",
object: "chat.completion",
choices: [],
usage: { total_tokens: 10 },
}),
),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));
const messages = [{ role: "user", content: "Hello" }];
await llmApi.createCompletions(messages);
const [, options] = mockFetch.mock.calls[0].arguments;
const body = JSON.parse(options.body);
assert.strictEqual(body.model, "gpt-4");
});
test("createCompletions throws error on HTTP error", async () => {
const mockResponse = {
ok: false,
status: 404,
statusText: "Not Found",
text: mock.fn(() => Promise.resolve("Error details")),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));
const messages = [{ role: "user", content: "Hello" }];
await assert.rejects(() => llmApi.createCompletions(messages), {
message: /HTTP 404: Not Found/,
});
});
test("createCompletions throws error immediately on non-retryable HTTP error", async () => {
const errorResponse = {
ok: false,
status: 400,
statusText: "Bad Request",
text: mock.fn(() => Promise.resolve("Invalid request details")),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(errorResponse));
const messages = [{ role: "user", content: "Hello" }];
await assert.rejects(() => llmApi.createCompletions(messages), {
message: /HTTP 400: Bad Request/,
});
assert.strictEqual(mockFetch.mock.callCount(), 1);
});
test("createCompletions fixes multi_tool_use.parallel hallucination", async () => {
const mockResponse = {
ok: true,
json: mock.fn(() =>
Promise.resolve({
id: "test-id",
object: "chat.completion",
choices: [
{
message: {
role: "assistant",
content: "Planning to call tools...",
tool_calls: [
{
id: "call_abc123",
type: "function",
function: {
name: "multi_tool_use.parallel",
arguments: JSON.stringify({
tool_uses: [
{
recipient_name: "functions.get_ontology",
parameters: {},
},
{
recipient_name: "functions.get_subjects",
parameters: { type: "schema:Person" },
},
],
}),
},
},
],
},
},
],
usage: { total_tokens: 100 },
}),
),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));
const messages = [{ role: "user", content: "Query the graph" }];
const result = await llmApi.createCompletions({ messages });
assert.strictEqual(result.choices[0].message.tool_calls.length, 2);
const call0 = result.choices[0].message.tool_calls[0];
assert.strictEqual(call0.function.name, "get_ontology");
assert.strictEqual(call0.id, "call_abc123_0");
assert.deepStrictEqual(JSON.parse(call0.function.arguments), {});
const call1 = result.choices[0].message.tool_calls[1];
assert.strictEqual(call1.function.name, "get_subjects");
assert.strictEqual(call1.id, "call_abc123_1");
assert.deepStrictEqual(JSON.parse(call1.function.arguments), {
type: "schema:Person",
});
});
test("createCompletions fixes parallel hallucination (short form)", async () => {
const mockResponse = {
ok: true,
json: mock.fn(() =>
Promise.resolve({
id: "test-id",
object: "chat.completion",
choices: [
{
message: {
role: "assistant",
content: "",
tool_calls: [
{
id: "call_xyz",
type: "function",
function: {
name: "parallel",
arguments: JSON.stringify({
tool_uses: [
{
recipient_name: "search_content",
parameters: { query: "test" },
},
],
}),
},
},
],
},
},
],
usage: { total_tokens: 50 },
}),
),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));
const result = await llmApi.createCompletions({
messages: [{ role: "user", content: "Search" }],
});
assert.strictEqual(result.choices[0].message.tool_calls.length, 1);
assert.strictEqual(
result.choices[0].message.tool_calls[0].function.name,
"search_content",
);
});
test("createCompletions preserves normal tool calls", async () => {
const mockResponse = {
ok: true,
json: mock.fn(() =>
Promise.resolve({
id: "test-id",
object: "chat.completion",
choices: [
{
message: {
role: "assistant",
content: "",
tool_calls: [
{
id: "call_normal",
type: "function",
function: {
name: "search_content",
arguments: '{"query":"test"}',
},
},
],
},
},
],
usage: { total_tokens: 30 },
}),
),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));
const result = await llmApi.createCompletions({
messages: [{ role: "user", content: "Test" }],
});
assert.strictEqual(result.choices[0].message.tool_calls.length, 1);
assert.strictEqual(
result.choices[0].message.tool_calls[0].function.name,
"search_content",
);
assert.strictEqual(
result.choices[0].message.tool_calls[0].id,
"call_normal",
);
});
test("createEmbeddings makes correct TEI API call", async () => {
const mockResponse = {
ok: true,
json: mock.fn(() =>
Promise.resolve([
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
]),
),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));
const texts = ["Hello", "World"];
const result = await llmApi.createEmbeddings(texts);
assert.strictEqual(mockFetch.mock.callCount(), 1);
const [url, options] = mockFetch.mock.calls[0].arguments;
assert.strictEqual(url, `${EMBEDDING_BASE_URL}/embed`);
assert.strictEqual(options.method, "POST");
const body = JSON.parse(options.body);
assert.deepStrictEqual(body.inputs, texts);
assert.strictEqual(body.model, undefined);
assert.strictEqual(options.headers.Authorization, undefined);
assert.strictEqual(options.headers["Content-Type"], "application/json");
assert.strictEqual(result.data.length, 2);
assert.deepStrictEqual(result.data[0].embedding, [0.1, 0.2, 0.3]);
assert.deepStrictEqual(result.data[1].embedding, [0.4, 0.5, 0.6]);
assert.strictEqual(result.model, "bge-small-en-v1.5");
});
test("createEmbeddings retries on 429 status", async () => {
const retryResponse = {
ok: false,
status: 429,
statusText: "Too Many Requests",
};
const successResponse = {
ok: true,
json: mock.fn(() => Promise.resolve([[0.1, 0.2, 0.3]])),
};
let callCount = 0;
mockFetch.mock.mockImplementation(() => {
callCount++;
if (callCount === 1) {
return Promise.resolve(retryResponse);
} else {
return Promise.resolve(successResponse);
}
});
const texts = ["Hello"];
const result = await llmApi.createEmbeddings(texts);
assert(mockFetch.mock.callCount() >= 2);
assert.strictEqual(result.data.length, 1);
});
test("createEmbeddings throws error immediately on non-retryable HTTP error", async () => {
const errorResponse = {
ok: false,
status: 400,
statusText: "Bad Request",
text: mock.fn(() => Promise.resolve("Invalid request details")),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(errorResponse));
const texts = ["Hello"];
await assert.rejects(() => llmApi.createEmbeddings(texts), {
message: /HTTP 400: Bad Request/,
});
assert.strictEqual(mockFetch.mock.callCount(), 1);
});
test("LlmApi falls back to baseUrl when embeddingBaseUrl is null", () => {
const teiMockFetch = mock.fn();
const teiRetry = new Retry();
const llm = new LlmApi(
"test-token",
"gpt-4",
DEFAULT_BASE_URL,
null,
teiRetry,
teiMockFetch,
);
assert.ok(llm instanceof LlmApi);
});
test("createEmbeddings uses OpenAI-compatible format when embeddingBaseUrl matches baseUrl", async () => {
const oaiMockFetch = mock.fn();
const oaiRetry = new Retry();
const oaiLlm = new LlmApi(
"test-token",
"gpt-4",
DEFAULT_BASE_URL,
null,
oaiRetry,
oaiMockFetch,
);
const mockResponse = {
ok: true,
json: mock.fn(() =>
Promise.resolve({
object: "list",
data: [{ object: "embedding", index: 0, embedding: [0.1, 0.2, 0.3] }],
model: "text-embedding-ada-002",
usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 },
}),
),
};
oaiMockFetch.mock.mockImplementationOnce(() =>
Promise.resolve(mockResponse),
);
const result = await oaiLlm.createEmbeddings(["Hello"]);
assert.strictEqual(oaiMockFetch.mock.callCount(), 1);
const [url, options] = oaiMockFetch.mock.calls[0].arguments;
assert.ok(url.endsWith("/embeddings"));
assert.ok(!url.endsWith("/embed"));
assert.strictEqual(options.method, "POST");
const body = JSON.parse(options.body);
assert.deepStrictEqual(body.input, ["Hello"]);
assert.strictEqual(body.model, "gpt-4");
assert.strictEqual(result.data.length, 1);
assert.deepStrictEqual(result.data[0].embedding, [0.1, 0.2, 0.3]);
assert.strictEqual(result.model, "text-embedding-ada-002");
});
test("listModels makes correct API call", async () => {
const mockResponse = {
ok: true,
json: mock.fn(() =>
Promise.resolve({
data: [
{ id: "gpt-4", object: "model" },
{ id: "gpt-3.5-turbo", object: "model" },
],
}),
),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));
const result = await llmApi.listModels();
assert.strictEqual(mockFetch.mock.callCount(), 1);
const [url, options] = mockFetch.mock.calls[0].arguments;
assert.strictEqual(
url,
DEFAULT_BASE_URL.replace("/inference", "/catalog/models"),
);
assert.strictEqual(options.method, "GET");
assert.strictEqual(result.data.length, 2);
assert.strictEqual(result.data[0].id, "gpt-4");
assert.strictEqual(result.data[1].id, "gpt-3.5-turbo");
});
test("listModels throws error on HTTP error", async () => {
const mockResponse = {
ok: false,
status: 401,
statusText: "Unauthorized",
text: mock.fn(() => Promise.resolve("Auth error details")),
};
mockFetch.mock.mockImplementationOnce(() => Promise.resolve(mockResponse));
await assert.rejects(() => llmApi.listModels(), {
message: /HTTP 401: Unauthorized/,
});
});
});
import { test, describe, beforeEach, mock } from "node:test";
import assert from "node:assert";
import { LlmApi, DEFAULT_BASE_URL } from "../index.js";
import { Retry } from "@forwardimpact/libutil";
const EMBEDDING_BASE_URL = "http://localhost:8090";
describe("LlmApi instance methods", () => {
let llmApi;
let retry;
beforeEach(() => {
const mockFetch = mock.fn();
retry = new Retry();
llmApi = new LlmApi(
"test-token",
"gpt-4",
DEFAULT_BASE_URL,
EMBEDDING_BASE_URL,
retry,
mockFetch,
);
});
test("countTokens returns token count for text", () => {
const text = "Hello, world!";
const count = llmApi.countTokens(text);
assert.strictEqual(typeof count, "number");
assert(count > 0);
});
test("countTokens handles empty text", () => {
const count = llmApi.countTokens("");
assert.strictEqual(count, 0);
});
test("countTokens handles longer text", () => {
const shortText = "Hello";
const longText =
"Hello, this is a much longer text that should have more tokens";
const shortCount = llmApi.countTokens(shortText);
const longCount = llmApi.countTokens(longText);
assert(longCount > shortCount);
});
});
describe("Proxy Support", () => {
test("createLlmApi creates LlmApi instance with default fetch", async () => {
const { createLlmApi, LlmApi, DEFAULT_BASE_URL } =
await import("../index.js");
const llm = createLlmApi(
"test-token",
"gpt-4",
DEFAULT_BASE_URL,
EMBEDDING_BASE_URL,
);
assert.ok(llm instanceof LlmApi);
});
test("createLlmApi works without embeddingBaseUrl", async () => {
const { createLlmApi, LlmApi, DEFAULT_BASE_URL } =
await import("../index.js");
const llm = createLlmApi("test-token", "gpt-4", DEFAULT_BASE_URL);
assert.ok(llm instanceof LlmApi);
});
test("createLlmApi works when HTTPS_PROXY environment variable is set", async () => {
const originalProxy = process.env.HTTPS_PROXY;
process.env.HTTPS_PROXY = "http://proxy.example.com:3128";
try {
const { createLlmApi, LlmApi, DEFAULT_BASE_URL } =
await import("../index.js");
const llm = createLlmApi(
"test-token",
"gpt-4",
DEFAULT_BASE_URL,
EMBEDDING_BASE_URL,
);
assert.ok(llm instanceof LlmApi);
} finally {
if (originalProxy) {
process.env.HTTPS_PROXY = originalProxy;
} else {
delete process.env.HTTPS_PROXY;
}
}
});
});