| // src/middleware/context-compaction/prompt.ts | ||
| var NO_TOOLS_PREAMBLE = `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools. | ||
| - Do NOT use any tools or functions. | ||
| - You already have all the context you need in the conversation below. | ||
| - Tool calls will be REJECTED and your response will be discarded. | ||
| - Your entire response must be plain text containing the summary. | ||
| `; | ||
| var SUMMARY_INSTRUCTION = `Your task is to create a detailed summary of the conversation history provided below. This summary will replace the older portion of the conversation so that work can continue without losing important context. | ||
| Your summary should include the following sections: | ||
| 1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail. | ||
| 2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed. | ||
| 3. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Include code snippets where applicable and a summary of why each file is important. | ||
| 4. Errors and Fixes: List all errors encountered and how they were fixed. Include any user feedback on corrections. | ||
| 5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts. | ||
| 6. All User Messages: List ALL user messages that are not tool results. These are critical for understanding user feedback and changing intent. | ||
| 7. Pending Tasks: Outline any pending tasks that were explicitly requested. | ||
| 8. Current Work: Describe precisely what was being worked on most recently, including file names and code snippets where applicable. | ||
| 9. Optional Next Step: List the next step that should be taken, directly in line with the most recent user requests. If the last task was concluded, only list next steps that are explicitly requested. | ||
| Be thorough and precise. Technical details, file paths, and code patterns are essential for continuing work without losing context.`; | ||
| var NO_TOOLS_TRAILER = "\n\nREMINDER: Do NOT call any tools. Respond with plain text only containing the summary."; | ||
| function buildCompactionPrompt(serializedHistory, targetTokens) { | ||
| const systemContent = NO_TOOLS_PREAMBLE + SUMMARY_INSTRUCTION + ` | ||
| Target summary length: approximately ${targetTokens} tokens. Be concise but do not omit important details.` + NO_TOOLS_TRAILER; | ||
| return [ | ||
| { role: "system", content: systemContent }, | ||
| { | ||
| role: "user", | ||
| content: [ | ||
| { | ||
| type: "text", | ||
| text: `Here is the conversation history to summarize: | ||
| ${serializedHistory}` | ||
| } | ||
| ] | ||
| } | ||
| ]; | ||
| } | ||
| // src/middleware/context-compaction/model-message-utils.ts | ||
| var PLACEHOLDER_IMAGE_CHARS = 1e3; | ||
| var PLACEHOLDER_FILE_CHARS = 200; | ||
| var PLACEHOLDER_APPROVAL_CHARS = 80; | ||
| function estimateModelMessageTokens(messages) { | ||
| let chars = 0; | ||
| for (const msg of messages) { | ||
| chars += messageCharCount(msg); | ||
| } | ||
| return Math.ceil(chars / 4); | ||
| } | ||
| function messageCharCount(msg) { | ||
| if (msg.role === "system") { | ||
| return msg.content.length; | ||
| } | ||
| if (typeof msg.content === "string") { | ||
| return msg.content.length; | ||
| } | ||
| let chars = 0; | ||
| for (const part of msg.content) { | ||
| chars += partCharCount(part); | ||
| } | ||
| return chars; | ||
| } | ||
| function partCharCount(part) { | ||
| switch (part.type) { | ||
| case "text": | ||
| return part.text.length; | ||
| case "reasoning": | ||
| return part.text.length; | ||
| case "image": | ||
| return PLACEHOLDER_IMAGE_CHARS; | ||
| case "file": | ||
| return PLACEHOLDER_FILE_CHARS; | ||
| case "tool-call": | ||
| return part.toolName.length + safeJsonLength(part.input); | ||
| case "tool-result": | ||
| return part.toolName.length + toolResultLength(part.output); | ||
| case "tool-approval-request": | ||
| case "tool-approval-response": | ||
| return PLACEHOLDER_APPROVAL_CHARS; | ||
| default: | ||
| return PLACEHOLDER_APPROVAL_CHARS; | ||
| } | ||
| } | ||
| function safeJsonLength(value) { | ||
| try { | ||
| return JSON.stringify(value ?? "").length; | ||
| } catch { | ||
| return 50; | ||
| } | ||
| } | ||
| function toolResultLength(output) { | ||
| if (output == null || typeof output !== "object") return 50; | ||
| const o = output; | ||
| switch (o.type) { | ||
| case "text": | ||
| return String(o.value ?? "").length; | ||
| case "json": | ||
| return safeJsonLength(o.value); | ||
| case "execution-denied": | ||
| return String(o.reason ?? "").length + 30; | ||
| case "error-text": | ||
| return String(o.value ?? "").length + 20; | ||
| case "error-json": | ||
| return safeJsonLength(o.value) + 20; | ||
| case "content": | ||
| return safeJsonLength(o.value); | ||
| default: | ||
| return 50; | ||
| } | ||
| } | ||
| function splitModelMessages(messages, keepRecentMessages) { | ||
| let systemPrefixEnd = 0; | ||
| while (systemPrefixEnd < messages.length && messages[systemPrefixEnd].role === "system") { | ||
| systemPrefixEnd++; | ||
| } | ||
| const systemPrefix = messages.slice(0, systemPrefixEnd); | ||
| const nonSystem = messages.slice(systemPrefixEnd); | ||
| if (nonSystem.length <= keepRecentMessages) { | ||
| return { systemPrefix, olderHistory: [], recentWindow: nonSystem }; | ||
| } | ||
| let boundary = nonSystem.length - keepRecentMessages; | ||
| let extended = true; | ||
| let safetyIterations = nonSystem.length + 1; | ||
| while (extended && safetyIterations-- > 0) { | ||
| extended = false; | ||
| const keptIds = /* @__PURE__ */ new Set(); | ||
| for (let i = boundary; i < nonSystem.length; i++) { | ||
| collectIds(nonSystem[i], keptIds); | ||
| } | ||
| if (keptIds.size === 0) break; | ||
| for (let i = 0; i < boundary; i++) { | ||
| if (messageReferencesAny(nonSystem[i], keptIds)) { | ||
| boundary = i; | ||
| extended = true; | ||
| break; | ||
| } | ||
| } | ||
| } | ||
| return { | ||
| systemPrefix, | ||
| olderHistory: nonSystem.slice(0, boundary), | ||
| recentWindow: nonSystem.slice(boundary) | ||
| }; | ||
| } | ||
| function collectIds(msg, ids) { | ||
| if (msg.role === "system" || msg.role === "user") return; | ||
| if (typeof msg.content === "string") return; | ||
| for (const part of msg.content) { | ||
| addPartIds(part, ids); | ||
| } | ||
| } | ||
| function messageReferencesAny(msg, ids) { | ||
| if (msg.role === "system" || msg.role === "user") return false; | ||
| if (typeof msg.content === "string") return false; | ||
| for (const part of msg.content) { | ||
| if (partReferencesAny(part, ids)) return true; | ||
| } | ||
| return false; | ||
| } | ||
| function addPartIds(part, ids) { | ||
| if (part.type === "tool-call" || part.type === "tool-result") { | ||
| ids.add(part.toolCallId); | ||
| } else if (part.type === "tool-approval-request" || part.type === "tool-approval-response") { | ||
| ids.add(part.approvalId); | ||
| } | ||
| } | ||
| function partReferencesAny(part, ids) { | ||
| if (part.type === "tool-call" || part.type === "tool-result") { | ||
| return ids.has(part.toolCallId); | ||
| } | ||
| if (part.type === "tool-approval-request" || part.type === "tool-approval-response") { | ||
| return ids.has(part.approvalId); | ||
| } | ||
| return false; | ||
| } | ||
| function extractSummaryText(result) { | ||
| const text = result.content.filter( | ||
| (c) => c.type === "text" | ||
| ).map((c) => c.text).join("\n").trim(); | ||
| return text || null; | ||
| } | ||
| // src/middleware/context-compaction/serialize.ts | ||
| function serializeModelMessages(messages) { | ||
| return messages.map(serializeMessage).join("\n\n"); | ||
| } | ||
| function serializeMessage(msg) { | ||
| const label = msg.role.toUpperCase(); | ||
| if (msg.role === "system") { | ||
| return `[${label}] | ||
| ${msg.content}`; | ||
| } | ||
| if (typeof msg.content === "string") { | ||
| return `[${label}] | ||
| ${msg.content}`; | ||
| } | ||
| const parts = []; | ||
| for (const part of msg.content) { | ||
| const s = serializePart(part); | ||
| if (s) parts.push(s); | ||
| } | ||
| return `[${label}] | ||
| ${parts.join("\n")}`; | ||
| } | ||
| function serializePart(part) { | ||
| switch (part.type) { | ||
| case "text": | ||
| return part.text; | ||
| case "reasoning": | ||
| return `[reasoning: ${part.text}]`; | ||
| case "image": | ||
| return "[image]"; | ||
| case "file": { | ||
| const fp = part; | ||
| const name = fp.filename ? `: ${fp.filename}` : ""; | ||
| return `[file${name}]`; | ||
| } | ||
| case "tool-call": | ||
| return `[tool-call: ${part.toolName}(${safeJsonString(part.input)})]`; | ||
| case "tool-result": | ||
| return `[tool-result: ${part.toolName} \u2192 ${serializeToolOutput(part.output)}]`; | ||
| case "tool-approval-request": | ||
| return `[tool-approval-request: id=${part.approvalId}]`; | ||
| case "tool-approval-response": | ||
| return `[tool-approval-response: id=${part.approvalId} approved=${part.approved}]`; | ||
| default: | ||
| return ""; | ||
| } | ||
| } | ||
| function safeJsonString(value) { | ||
| try { | ||
| return JSON.stringify(value ?? ""); | ||
| } catch { | ||
| return '"[unserializable]"'; | ||
| } | ||
| } | ||
| function serializeToolOutput(output) { | ||
| if (output == null || typeof output !== "object") return ""; | ||
| const o = output; | ||
| switch (o.type) { | ||
| case "text": | ||
| return String(o.value ?? ""); | ||
| case "json": | ||
| return safeJsonString(o.value); | ||
| case "execution-denied": | ||
| return `denied: ${String(o.reason ?? "")}`; | ||
| case "error-text": | ||
| return `error: ${String(o.value ?? "")}`; | ||
| case "error-json": | ||
| return `error: ${safeJsonString(o.value)}`; | ||
| case "content": | ||
| return safeJsonString(o.value); | ||
| default: | ||
| return ""; | ||
| } | ||
| } | ||
| // src/middleware/context-compaction/validate.ts | ||
| var DEFAULT_RESERVED_OUTPUT = 16384; | ||
| var DEFAULT_THRESHOLD_PCT = 0.8; | ||
| function validateConfig(options) { | ||
| if (typeof options.maxContextTokens !== "number" || options.maxContextTokens <= 0) { | ||
| throw new Error( | ||
| "[compactMessages] maxContextTokens must be a positive number" | ||
| ); | ||
| } | ||
| const reservedOutputTokens = options.reservedOutputTokens ?? DEFAULT_RESERVED_OUTPUT; | ||
| if (reservedOutputTokens < 0) { | ||
| throw new Error( | ||
| "[compactMessages] reservedOutputTokens must be non-negative" | ||
| ); | ||
| } | ||
| if (reservedOutputTokens >= options.maxContextTokens) { | ||
| throw new Error( | ||
| `[compactMessages] reservedOutputTokens (${reservedOutputTokens}) must be less than maxContextTokens (${options.maxContextTokens})` | ||
| ); | ||
| } | ||
| const thresholdPct = options.autoCompactThresholdPct ?? DEFAULT_THRESHOLD_PCT; | ||
| if (thresholdPct <= 0 || thresholdPct > 1) { | ||
| throw new Error( | ||
| "[compactMessages] autoCompactThresholdPct must be in (0, 1]" | ||
| ); | ||
| } | ||
| if (options.summaryTargetTokens !== void 0) { | ||
| if (options.summaryTargetTokens <= 0 || options.summaryTargetTokens >= options.maxContextTokens) { | ||
| throw new Error( | ||
| "[compactMessages] summaryTargetTokens must be > 0 and < maxContextTokens" | ||
| ); | ||
| } | ||
| } | ||
| if (options.keepRecentMessages !== void 0) { | ||
| if (!Number.isInteger(options.keepRecentMessages) || options.keepRecentMessages < 1) { | ||
| throw new Error( | ||
| "[compactMessages] keepRecentMessages must be a positive integer" | ||
| ); | ||
| } | ||
| } | ||
| const hasSummarize = typeof options.summarize === "function"; | ||
| const hasSummaryModel = options.summaryModel != null; | ||
| if (hasSummarize && hasSummaryModel) { | ||
| throw new Error( | ||
| "[compactMessages] Pass exactly one of `summaryModel` or `summarize`, not both" | ||
| ); | ||
| } | ||
| if (!hasSummarize && !hasSummaryModel) { | ||
| throw new Error( | ||
| "[compactMessages] Either `summaryModel` or `summarize` must be provided" | ||
| ); | ||
| } | ||
| } | ||
| // src/middleware/context-compaction/compact-messages.ts | ||
| var DEFAULT_THRESHOLD_PCT2 = 0.8; | ||
| var DEFAULT_SUMMARY_TARGET_PCT = 0.05; | ||
| var DEFAULT_RESERVED_OUTPUT2 = 16384; | ||
| var DEFAULT_KEEP_RECENT = 1; | ||
| async function compactMessages(options) { | ||
| validateConfig(options); | ||
| const { | ||
| messages, | ||
| maxContextTokens, | ||
| autoCompactThresholdPct = DEFAULT_THRESHOLD_PCT2, | ||
| reservedOutputTokens = DEFAULT_RESERVED_OUTPUT2, | ||
| keepRecentMessages = DEFAULT_KEEP_RECENT, | ||
| estimateTokens = estimateModelMessageTokens, | ||
| onCompactionFailure = "passthrough" | ||
| } = options; | ||
| const summaryTargetTokens = options.summaryTargetTokens ?? Math.floor(maxContextTokens * DEFAULT_SUMMARY_TARGET_PCT); | ||
| const thresholdPct = clamp(autoCompactThresholdPct, 0, 1); | ||
| const threshold = maxContextTokens * thresholdPct - reservedOutputTokens; | ||
| const estimated = estimateTokens(messages); | ||
| if (estimated <= threshold) { | ||
| return messages; | ||
| } | ||
| const { systemPrefix, olderHistory, recentWindow } = splitModelMessages( | ||
| messages, | ||
| keepRecentMessages | ||
| ); | ||
| if (olderHistory.length === 0) { | ||
| return messages; | ||
| } | ||
| const sysTokens = estimateTokens(systemPrefix); | ||
| const recentTokens = estimateTokens(recentWindow); | ||
| const projected = sysTokens + summaryTargetTokens + recentTokens; | ||
| if (projected > maxContextTokens) { | ||
| throw new Error( | ||
| `[compactMessages] Configuration cannot fit: systemPrefix (${sysTokens}) + summaryTarget (${summaryTargetTokens}) + recentWindow (${recentTokens}) = ${projected} exceeds maxContextTokens (${maxContextTokens}). Reduce keepRecentMessages, lower summaryTargetTokens, or trim the system prefix.` | ||
| ); | ||
| } | ||
| let summaryText; | ||
| try { | ||
| if (options.summarize) { | ||
| summaryText = await options.summarize(olderHistory, summaryTargetTokens); | ||
| } else { | ||
| summaryText = await defaultSummarize( | ||
| options.summaryModel, | ||
| olderHistory, | ||
| summaryTargetTokens | ||
| ); | ||
| } | ||
| } catch (error) { | ||
| if (onCompactionFailure === "throw") throw error; | ||
| return messages; | ||
| } | ||
| if (!summaryText || !summaryText.trim()) { | ||
| if (onCompactionFailure === "throw") { | ||
| throw new Error( | ||
| "[compactMessages] Summarizer produced empty output" | ||
| ); | ||
| } | ||
| return messages; | ||
| } | ||
| const compacted = [ | ||
| ...systemPrefix, | ||
| { role: "user", content: summaryText }, | ||
| { role: "assistant", content: "Understood." }, | ||
| ...recentWindow | ||
| ]; | ||
| const compactedTokens = estimateTokens(compacted); | ||
| if (compactedTokens > maxContextTokens) { | ||
| if (onCompactionFailure === "throw") { | ||
| throw new Error( | ||
| `[compactMessages] Summarizer returned oversize output: compacted prompt is ${compactedTokens} tokens, exceeds maxContextTokens (${maxContextTokens})` | ||
| ); | ||
| } | ||
| return messages; | ||
| } | ||
| return compacted; | ||
| } | ||
| async function defaultSummarize(model, olderHistory, targetTokens) { | ||
| const serialized = serializeModelMessages(olderHistory); | ||
| const summaryPrompt = buildCompactionPrompt(serialized, targetTokens); | ||
| const result = await model.doGenerate({ | ||
| prompt: summaryPrompt, | ||
| maxOutputTokens: targetTokens, | ||
| tools: void 0, | ||
| toolChoice: void 0, | ||
| responseFormat: void 0, | ||
| inputFormat: "messages", | ||
| mode: { type: "regular" } | ||
| }); | ||
| const text = extractSummaryText(result); | ||
| if (!text) { | ||
| throw new Error( | ||
| "[compactMessages] Summary model returned no text content" | ||
| ); | ||
| } | ||
| return text; | ||
| } | ||
| function clamp(value, min, max) { | ||
| return Math.min(max, Math.max(min, value)); | ||
| } | ||
| export { | ||
| compactMessages | ||
| }; |
| "use strict";Object.defineProperty(exports, "__esModule", {value: true}); function _nullishCoalesce(lhs, rhsFn) { if (lhs != null) { return lhs; } else { return rhsFn(); } }// src/middleware/context-compaction/prompt.ts | ||
| var NO_TOOLS_PREAMBLE = `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools. | ||
| - Do NOT use any tools or functions. | ||
| - You already have all the context you need in the conversation below. | ||
| - Tool calls will be REJECTED and your response will be discarded. | ||
| - Your entire response must be plain text containing the summary. | ||
| `; | ||
| var SUMMARY_INSTRUCTION = `Your task is to create a detailed summary of the conversation history provided below. This summary will replace the older portion of the conversation so that work can continue without losing important context. | ||
| Your summary should include the following sections: | ||
| 1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail. | ||
| 2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed. | ||
| 3. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Include code snippets where applicable and a summary of why each file is important. | ||
| 4. Errors and Fixes: List all errors encountered and how they were fixed. Include any user feedback on corrections. | ||
| 5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts. | ||
| 6. All User Messages: List ALL user messages that are not tool results. These are critical for understanding user feedback and changing intent. | ||
| 7. Pending Tasks: Outline any pending tasks that were explicitly requested. | ||
| 8. Current Work: Describe precisely what was being worked on most recently, including file names and code snippets where applicable. | ||
| 9. Optional Next Step: List the next step that should be taken, directly in line with the most recent user requests. If the last task was concluded, only list next steps that are explicitly requested. | ||
| Be thorough and precise. Technical details, file paths, and code patterns are essential for continuing work without losing context.`; | ||
| var NO_TOOLS_TRAILER = "\n\nREMINDER: Do NOT call any tools. Respond with plain text only containing the summary."; | ||
| function buildCompactionPrompt(serializedHistory, targetTokens) { | ||
| const systemContent = NO_TOOLS_PREAMBLE + SUMMARY_INSTRUCTION + ` | ||
| Target summary length: approximately ${targetTokens} tokens. Be concise but do not omit important details.` + NO_TOOLS_TRAILER; | ||
| return [ | ||
| { role: "system", content: systemContent }, | ||
| { | ||
| role: "user", | ||
| content: [ | ||
| { | ||
| type: "text", | ||
| text: `Here is the conversation history to summarize: | ||
| ${serializedHistory}` | ||
| } | ||
| ] | ||
| } | ||
| ]; | ||
| } | ||
| // src/middleware/context-compaction/model-message-utils.ts | ||
| var PLACEHOLDER_IMAGE_CHARS = 1e3; | ||
| var PLACEHOLDER_FILE_CHARS = 200; | ||
| var PLACEHOLDER_APPROVAL_CHARS = 80; | ||
| function estimateModelMessageTokens(messages) { | ||
| let chars = 0; | ||
| for (const msg of messages) { | ||
| chars += messageCharCount(msg); | ||
| } | ||
| return Math.ceil(chars / 4); | ||
| } | ||
| function messageCharCount(msg) { | ||
| if (msg.role === "system") { | ||
| return msg.content.length; | ||
| } | ||
| if (typeof msg.content === "string") { | ||
| return msg.content.length; | ||
| } | ||
| let chars = 0; | ||
| for (const part of msg.content) { | ||
| chars += partCharCount(part); | ||
| } | ||
| return chars; | ||
| } | ||
| function partCharCount(part) { | ||
| switch (part.type) { | ||
| case "text": | ||
| return part.text.length; | ||
| case "reasoning": | ||
| return part.text.length; | ||
| case "image": | ||
| return PLACEHOLDER_IMAGE_CHARS; | ||
| case "file": | ||
| return PLACEHOLDER_FILE_CHARS; | ||
| case "tool-call": | ||
| return part.toolName.length + safeJsonLength(part.input); | ||
| case "tool-result": | ||
| return part.toolName.length + toolResultLength(part.output); | ||
| case "tool-approval-request": | ||
| case "tool-approval-response": | ||
| return PLACEHOLDER_APPROVAL_CHARS; | ||
| default: | ||
| return PLACEHOLDER_APPROVAL_CHARS; | ||
| } | ||
| } | ||
| function safeJsonLength(value) { | ||
| try { | ||
| return JSON.stringify(_nullishCoalesce(value, () => ( ""))).length; | ||
| } catch (e) { | ||
| return 50; | ||
| } | ||
| } | ||
| function toolResultLength(output) { | ||
| if (output == null || typeof output !== "object") return 50; | ||
| const o = output; | ||
| switch (o.type) { | ||
| case "text": | ||
| return String(_nullishCoalesce(o.value, () => ( ""))).length; | ||
| case "json": | ||
| return safeJsonLength(o.value); | ||
| case "execution-denied": | ||
| return String(_nullishCoalesce(o.reason, () => ( ""))).length + 30; | ||
| case "error-text": | ||
| return String(_nullishCoalesce(o.value, () => ( ""))).length + 20; | ||
| case "error-json": | ||
| return safeJsonLength(o.value) + 20; | ||
| case "content": | ||
| return safeJsonLength(o.value); | ||
| default: | ||
| return 50; | ||
| } | ||
| } | ||
| function splitModelMessages(messages, keepRecentMessages) { | ||
| let systemPrefixEnd = 0; | ||
| while (systemPrefixEnd < messages.length && messages[systemPrefixEnd].role === "system") { | ||
| systemPrefixEnd++; | ||
| } | ||
| const systemPrefix = messages.slice(0, systemPrefixEnd); | ||
| const nonSystem = messages.slice(systemPrefixEnd); | ||
| if (nonSystem.length <= keepRecentMessages) { | ||
| return { systemPrefix, olderHistory: [], recentWindow: nonSystem }; | ||
| } | ||
| let boundary = nonSystem.length - keepRecentMessages; | ||
| let extended = true; | ||
| let safetyIterations = nonSystem.length + 1; | ||
| while (extended && safetyIterations-- > 0) { | ||
| extended = false; | ||
| const keptIds = /* @__PURE__ */ new Set(); | ||
| for (let i = boundary; i < nonSystem.length; i++) { | ||
| collectIds(nonSystem[i], keptIds); | ||
| } | ||
| if (keptIds.size === 0) break; | ||
| for (let i = 0; i < boundary; i++) { | ||
| if (messageReferencesAny(nonSystem[i], keptIds)) { | ||
| boundary = i; | ||
| extended = true; | ||
| break; | ||
| } | ||
| } | ||
| } | ||
| return { | ||
| systemPrefix, | ||
| olderHistory: nonSystem.slice(0, boundary), | ||
| recentWindow: nonSystem.slice(boundary) | ||
| }; | ||
| } | ||
| function collectIds(msg, ids) { | ||
| if (msg.role === "system" || msg.role === "user") return; | ||
| if (typeof msg.content === "string") return; | ||
| for (const part of msg.content) { | ||
| addPartIds(part, ids); | ||
| } | ||
| } | ||
| function messageReferencesAny(msg, ids) { | ||
| if (msg.role === "system" || msg.role === "user") return false; | ||
| if (typeof msg.content === "string") return false; | ||
| for (const part of msg.content) { | ||
| if (partReferencesAny(part, ids)) return true; | ||
| } | ||
| return false; | ||
| } | ||
| function addPartIds(part, ids) { | ||
| if (part.type === "tool-call" || part.type === "tool-result") { | ||
| ids.add(part.toolCallId); | ||
| } else if (part.type === "tool-approval-request" || part.type === "tool-approval-response") { | ||
| ids.add(part.approvalId); | ||
| } | ||
| } | ||
| function partReferencesAny(part, ids) { | ||
| if (part.type === "tool-call" || part.type === "tool-result") { | ||
| return ids.has(part.toolCallId); | ||
| } | ||
| if (part.type === "tool-approval-request" || part.type === "tool-approval-response") { | ||
| return ids.has(part.approvalId); | ||
| } | ||
| return false; | ||
| } | ||
| function extractSummaryText(result) { | ||
| const text = result.content.filter( | ||
| (c) => c.type === "text" | ||
| ).map((c) => c.text).join("\n").trim(); | ||
| return text || null; | ||
| } | ||
| // src/middleware/context-compaction/serialize.ts | ||
| function serializeModelMessages(messages) { | ||
| return messages.map(serializeMessage).join("\n\n"); | ||
| } | ||
| function serializeMessage(msg) { | ||
| const label = msg.role.toUpperCase(); | ||
| if (msg.role === "system") { | ||
| return `[${label}] | ||
| ${msg.content}`; | ||
| } | ||
| if (typeof msg.content === "string") { | ||
| return `[${label}] | ||
| ${msg.content}`; | ||
| } | ||
| const parts = []; | ||
| for (const part of msg.content) { | ||
| const s = serializePart(part); | ||
| if (s) parts.push(s); | ||
| } | ||
| return `[${label}] | ||
| ${parts.join("\n")}`; | ||
| } | ||
| function serializePart(part) { | ||
| switch (part.type) { | ||
| case "text": | ||
| return part.text; | ||
| case "reasoning": | ||
| return `[reasoning: ${part.text}]`; | ||
| case "image": | ||
| return "[image]"; | ||
| case "file": { | ||
| const fp = part; | ||
| const name = fp.filename ? `: ${fp.filename}` : ""; | ||
| return `[file${name}]`; | ||
| } | ||
| case "tool-call": | ||
| return `[tool-call: ${part.toolName}(${safeJsonString(part.input)})]`; | ||
| case "tool-result": | ||
| return `[tool-result: ${part.toolName} \u2192 ${serializeToolOutput(part.output)}]`; | ||
| case "tool-approval-request": | ||
| return `[tool-approval-request: id=${part.approvalId}]`; | ||
| case "tool-approval-response": | ||
| return `[tool-approval-response: id=${part.approvalId} approved=${part.approved}]`; | ||
| default: | ||
| return ""; | ||
| } | ||
| } | ||
| function safeJsonString(value) { | ||
| try { | ||
| return JSON.stringify(_nullishCoalesce(value, () => ( ""))); | ||
| } catch (e2) { | ||
| return '"[unserializable]"'; | ||
| } | ||
| } | ||
| function serializeToolOutput(output) { | ||
| if (output == null || typeof output !== "object") return ""; | ||
| const o = output; | ||
| switch (o.type) { | ||
| case "text": | ||
| return String(_nullishCoalesce(o.value, () => ( ""))); | ||
| case "json": | ||
| return safeJsonString(o.value); | ||
| case "execution-denied": | ||
| return `denied: ${String(_nullishCoalesce(o.reason, () => ( "")))}`; | ||
| case "error-text": | ||
| return `error: ${String(_nullishCoalesce(o.value, () => ( "")))}`; | ||
| case "error-json": | ||
| return `error: ${safeJsonString(o.value)}`; | ||
| case "content": | ||
| return safeJsonString(o.value); | ||
| default: | ||
| return ""; | ||
| } | ||
| } | ||
| // src/middleware/context-compaction/validate.ts | ||
| var DEFAULT_RESERVED_OUTPUT = 16384; | ||
| var DEFAULT_THRESHOLD_PCT = 0.8; | ||
| function validateConfig(options) { | ||
| if (typeof options.maxContextTokens !== "number" || options.maxContextTokens <= 0) { | ||
| throw new Error( | ||
| "[compactMessages] maxContextTokens must be a positive number" | ||
| ); | ||
| } | ||
| const reservedOutputTokens = _nullishCoalesce(options.reservedOutputTokens, () => ( DEFAULT_RESERVED_OUTPUT)); | ||
| if (reservedOutputTokens < 0) { | ||
| throw new Error( | ||
| "[compactMessages] reservedOutputTokens must be non-negative" | ||
| ); | ||
| } | ||
| if (reservedOutputTokens >= options.maxContextTokens) { | ||
| throw new Error( | ||
| `[compactMessages] reservedOutputTokens (${reservedOutputTokens}) must be less than maxContextTokens (${options.maxContextTokens})` | ||
| ); | ||
| } | ||
| const thresholdPct = _nullishCoalesce(options.autoCompactThresholdPct, () => ( DEFAULT_THRESHOLD_PCT)); | ||
| if (thresholdPct <= 0 || thresholdPct > 1) { | ||
| throw new Error( | ||
| "[compactMessages] autoCompactThresholdPct must be in (0, 1]" | ||
| ); | ||
| } | ||
| if (options.summaryTargetTokens !== void 0) { | ||
| if (options.summaryTargetTokens <= 0 || options.summaryTargetTokens >= options.maxContextTokens) { | ||
| throw new Error( | ||
| "[compactMessages] summaryTargetTokens must be > 0 and < maxContextTokens" | ||
| ); | ||
| } | ||
| } | ||
| if (options.keepRecentMessages !== void 0) { | ||
| if (!Number.isInteger(options.keepRecentMessages) || options.keepRecentMessages < 1) { | ||
| throw new Error( | ||
| "[compactMessages] keepRecentMessages must be a positive integer" | ||
| ); | ||
| } | ||
| } | ||
| const hasSummarize = typeof options.summarize === "function"; | ||
| const hasSummaryModel = options.summaryModel != null; | ||
| if (hasSummarize && hasSummaryModel) { | ||
| throw new Error( | ||
| "[compactMessages] Pass exactly one of `summaryModel` or `summarize`, not both" | ||
| ); | ||
| } | ||
| if (!hasSummarize && !hasSummaryModel) { | ||
| throw new Error( | ||
| "[compactMessages] Either `summaryModel` or `summarize` must be provided" | ||
| ); | ||
| } | ||
| } | ||
| // src/middleware/context-compaction/compact-messages.ts | ||
| var DEFAULT_THRESHOLD_PCT2 = 0.8; | ||
| var DEFAULT_SUMMARY_TARGET_PCT = 0.05; | ||
| var DEFAULT_RESERVED_OUTPUT2 = 16384; | ||
| var DEFAULT_KEEP_RECENT = 1; | ||
| async function compactMessages(options) { | ||
| validateConfig(options); | ||
| const { | ||
| messages, | ||
| maxContextTokens, | ||
| autoCompactThresholdPct = DEFAULT_THRESHOLD_PCT2, | ||
| reservedOutputTokens = DEFAULT_RESERVED_OUTPUT2, | ||
| keepRecentMessages = DEFAULT_KEEP_RECENT, | ||
| estimateTokens = estimateModelMessageTokens, | ||
| onCompactionFailure = "passthrough" | ||
| } = options; | ||
| const summaryTargetTokens = _nullishCoalesce(options.summaryTargetTokens, () => ( Math.floor(maxContextTokens * DEFAULT_SUMMARY_TARGET_PCT))); | ||
| const thresholdPct = clamp(autoCompactThresholdPct, 0, 1); | ||
| const threshold = maxContextTokens * thresholdPct - reservedOutputTokens; | ||
| const estimated = estimateTokens(messages); | ||
| if (estimated <= threshold) { | ||
| return messages; | ||
| } | ||
| const { systemPrefix, olderHistory, recentWindow } = splitModelMessages( | ||
| messages, | ||
| keepRecentMessages | ||
| ); | ||
| if (olderHistory.length === 0) { | ||
| return messages; | ||
| } | ||
| const sysTokens = estimateTokens(systemPrefix); | ||
| const recentTokens = estimateTokens(recentWindow); | ||
| const projected = sysTokens + summaryTargetTokens + recentTokens; | ||
| if (projected > maxContextTokens) { | ||
| throw new Error( | ||
| `[compactMessages] Configuration cannot fit: systemPrefix (${sysTokens}) + summaryTarget (${summaryTargetTokens}) + recentWindow (${recentTokens}) = ${projected} exceeds maxContextTokens (${maxContextTokens}). Reduce keepRecentMessages, lower summaryTargetTokens, or trim the system prefix.` | ||
| ); | ||
| } | ||
| let summaryText; | ||
| try { | ||
| if (options.summarize) { | ||
| summaryText = await options.summarize(olderHistory, summaryTargetTokens); | ||
| } else { | ||
| summaryText = await defaultSummarize( | ||
| options.summaryModel, | ||
| olderHistory, | ||
| summaryTargetTokens | ||
| ); | ||
| } | ||
| } catch (error) { | ||
| if (onCompactionFailure === "throw") throw error; | ||
| return messages; | ||
| } | ||
| if (!summaryText || !summaryText.trim()) { | ||
| if (onCompactionFailure === "throw") { | ||
| throw new Error( | ||
| "[compactMessages] Summarizer produced empty output" | ||
| ); | ||
| } | ||
| return messages; | ||
| } | ||
| const compacted = [ | ||
| ...systemPrefix, | ||
| { role: "user", content: summaryText }, | ||
| { role: "assistant", content: "Understood." }, | ||
| ...recentWindow | ||
| ]; | ||
| const compactedTokens = estimateTokens(compacted); | ||
| if (compactedTokens > maxContextTokens) { | ||
| if (onCompactionFailure === "throw") { | ||
| throw new Error( | ||
| `[compactMessages] Summarizer returned oversize output: compacted prompt is ${compactedTokens} tokens, exceeds maxContextTokens (${maxContextTokens})` | ||
| ); | ||
| } | ||
| return messages; | ||
| } | ||
| return compacted; | ||
| } | ||
| async function defaultSummarize(model, olderHistory, targetTokens) { | ||
| const serialized = serializeModelMessages(olderHistory); | ||
| const summaryPrompt = buildCompactionPrompt(serialized, targetTokens); | ||
| const result = await model.doGenerate({ | ||
| prompt: summaryPrompt, | ||
| maxOutputTokens: targetTokens, | ||
| tools: void 0, | ||
| toolChoice: void 0, | ||
| responseFormat: void 0, | ||
| inputFormat: "messages", | ||
| mode: { type: "regular" } | ||
| }); | ||
| const text = extractSummaryText(result); | ||
| if (!text) { | ||
| throw new Error( | ||
| "[compactMessages] Summary model returned no text content" | ||
| ); | ||
| } | ||
| return text; | ||
| } | ||
| function clamp(value, min, max) { | ||
| return Math.min(max, Math.max(min, value)); | ||
| } | ||
| exports.compactMessages = compactMessages; |
| "use strict";Object.defineProperty(exports, "__esModule", {value: true}); | ||
| var _chunkN5EFHCHEcjs = require('../chunk-N5EFHCHE.cjs'); | ||
| var _chunkB76NYX22cjs = require('../chunk-B76NYX22.cjs'); | ||
| exports.createContextCompaction = _chunkN5EFHCHEcjs.createContextCompaction; | ||
| exports.compactMessages = _chunkB76NYX22cjs.compactMessages; |
@@ -1,20 +0,13 @@ | ||
| import { LanguageModelV3Prompt, LanguageModelV3Middleware } from '@ai-sdk/provider'; | ||
| import { LanguageModelV3 } from '@ai-sdk/provider'; | ||
| import { ModelMessage } from 'ai'; | ||
| /** | ||
| * Configuration for the context compaction middleware. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * import { createContextCompaction } from 'agentool/context-compaction'; | ||
| * import { wrapLanguageModel } from 'ai'; | ||
| * | ||
| * const model = wrapLanguageModel({ | ||
| * model: anthropic('claude-sonnet-4-20250514'), | ||
| * middleware: createContextCompaction({ | ||
| * maxContextTokens: 200_000, | ||
| * }), | ||
| * }); | ||
| * ``` | ||
| * Custom summarizer callback. Receives the older history slice | ||
| * (already split off from the leading system prefix and recent | ||
| * window) and the target summary token budget. | ||
| */ | ||
| interface ContextCompactionConfig { | ||
| type CompactSummarizer = (olderHistory: ModelMessage[], targetTokens: number) => Promise<string>; | ||
| interface BaseCompactOptions { | ||
| /** Conversation messages to compact. */ | ||
| messages: ModelMessage[]; | ||
| /** Model's max context window in tokens. Required. */ | ||
@@ -24,41 +17,68 @@ maxContextTokens: number; | ||
| * Trigger compaction when estimated usage exceeds this fraction of | ||
| * the context window (0–1). Default: `0.80`. | ||
| * the context window (0–1). Default: `0.8`. | ||
| */ | ||
| autoCompactThresholdPct?: number; | ||
| /** | ||
| * Target summary size as a fraction of `maxContextTokens` (0–1). | ||
| * Default: `0.05`. | ||
| * Target summary size in tokens. | ||
| * Default: `floor(maxContextTokens * 0.05)`. | ||
| */ | ||
| summaryTargetPct?: number; | ||
| summaryTargetTokens?: number; | ||
| /** Tokens reserved for model output. Default: `16384`. */ | ||
| reservedOutputTokens?: number; | ||
| /** | ||
| * Custom token estimator. Receives the full prompt array and must | ||
| * return an estimated token count. | ||
| * Default: character-count / 4 heuristic. | ||
| * Number of trailing messages to keep verbatim. The boundary is | ||
| * automatically extended backwards to preserve tool-call / | ||
| * tool-result / tool-approval pairs. Default: `1`. | ||
| */ | ||
| estimateTokens?: (prompt: LanguageModelV3Prompt) => number; | ||
| keepRecentMessages?: number; | ||
| /** | ||
| * Custom summarizer. When provided, the middleware calls this | ||
| * instead of using the underlying model for summarization. | ||
| * Custom token estimator over `ModelMessage[]`. Pass a | ||
| * provider-specific tokenizer (e.g. tiktoken) for accuracy. | ||
| * Default: char-count / 4 heuristic. | ||
| */ | ||
| summarize?: (messages: LanguageModelV3Prompt, targetTokens: number) => Promise<string>; | ||
| estimateTokens?: (messages: ModelMessage[]) => number; | ||
| /** | ||
| * What to do when summarization fails. | ||
| * - `'passthrough'` (default): proceed with the original, uncompacted prompt. | ||
| * - `'throw'`: throw the summarization error. | ||
| * What to do when summarization fails or produces oversize output. | ||
| * - `'passthrough'` (default): return the original `messages`. | ||
| * - `'throw'`: throw the underlying error. | ||
| */ | ||
| onCompactionFailure?: 'passthrough' | 'throw'; | ||
| } | ||
| type CompactMessagesOptions = (BaseCompactOptions & { | ||
| summaryModel: LanguageModelV3; | ||
| summarize?: never; | ||
| }) | (BaseCompactOptions & { | ||
| summarize: CompactSummarizer; | ||
| summaryModel?: never; | ||
| }); | ||
| /** | ||
| * Create a context-compaction middleware for the Vercel AI SDK. | ||
| * Compact a conversation by summarizing older history while | ||
| * preserving the leading system prefix and the most recent turns. | ||
| * | ||
| * Wrap any language model with this middleware via | ||
| * `wrapLanguageModel({ model, middleware })`. When the prompt | ||
| * exceeds `maxContextTokens * autoCompactThresholdPct`, the | ||
| * middleware transparently summarizes older history while | ||
| * preserving system messages and the most recent turns. | ||
| * Returns the **same `messages` reference** (===) when no | ||
| * compaction is needed, so callers can use identity to detect a | ||
| * no-op. When compaction occurs, returns a new array shaped as: | ||
| * | ||
| * `[ ...systemPrefix, user(summary), assistant('Understood.'), ...recentWindow ]` | ||
| * | ||
| * The synthetic `user → assistant` ack pair preserves role | ||
| * alternation, which is required by Anthropic and Google providers. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * import { compactMessages } from 'agentool/context-compaction'; | ||
| * import { openai } from '@ai-sdk/openai'; | ||
| * import { generateText } from 'ai'; | ||
| * | ||
| * const model = openai('gpt-5'); | ||
| * messages = await compactMessages({ | ||
| * messages, | ||
| * summaryModel: openai('gpt-5-mini'), | ||
| * maxContextTokens: 400_000, | ||
| * }); | ||
| * const result = await generateText({ model, messages }); | ||
| * ``` | ||
| */ | ||
| declare function createContextCompaction(config: ContextCompactionConfig): LanguageModelV3Middleware; | ||
| declare function compactMessages(options: CompactMessagesOptions): Promise<ModelMessage[]>; | ||
| export { type ContextCompactionConfig, createContextCompaction }; | ||
| export { type CompactMessagesOptions, type CompactSummarizer, compactMessages }; |
@@ -1,20 +0,13 @@ | ||
| import { LanguageModelV3Prompt, LanguageModelV3Middleware } from '@ai-sdk/provider'; | ||
| import { LanguageModelV3 } from '@ai-sdk/provider'; | ||
| import { ModelMessage } from 'ai'; | ||
| /** | ||
| * Configuration for the context compaction middleware. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * import { createContextCompaction } from 'agentool/context-compaction'; | ||
| * import { wrapLanguageModel } from 'ai'; | ||
| * | ||
| * const model = wrapLanguageModel({ | ||
| * model: anthropic('claude-sonnet-4-20250514'), | ||
| * middleware: createContextCompaction({ | ||
| * maxContextTokens: 200_000, | ||
| * }), | ||
| * }); | ||
| * ``` | ||
| * Custom summarizer callback. Receives the older history slice | ||
| * (already split off from the leading system prefix and recent | ||
| * window) and the target summary token budget. | ||
| */ | ||
| interface ContextCompactionConfig { | ||
| type CompactSummarizer = (olderHistory: ModelMessage[], targetTokens: number) => Promise<string>; | ||
| interface BaseCompactOptions { | ||
| /** Conversation messages to compact. */ | ||
| messages: ModelMessage[]; | ||
| /** Model's max context window in tokens. Required. */ | ||
@@ -24,41 +17,68 @@ maxContextTokens: number; | ||
| * Trigger compaction when estimated usage exceeds this fraction of | ||
| * the context window (0–1). Default: `0.80`. | ||
| * the context window (0–1). Default: `0.8`. | ||
| */ | ||
| autoCompactThresholdPct?: number; | ||
| /** | ||
| * Target summary size as a fraction of `maxContextTokens` (0–1). | ||
| * Default: `0.05`. | ||
| * Target summary size in tokens. | ||
| * Default: `floor(maxContextTokens * 0.05)`. | ||
| */ | ||
| summaryTargetPct?: number; | ||
| summaryTargetTokens?: number; | ||
| /** Tokens reserved for model output. Default: `16384`. */ | ||
| reservedOutputTokens?: number; | ||
| /** | ||
| * Custom token estimator. Receives the full prompt array and must | ||
| * return an estimated token count. | ||
| * Default: character-count / 4 heuristic. | ||
| * Number of trailing messages to keep verbatim. The boundary is | ||
| * automatically extended backwards to preserve tool-call / | ||
| * tool-result / tool-approval pairs. Default: `1`. | ||
| */ | ||
| estimateTokens?: (prompt: LanguageModelV3Prompt) => number; | ||
| keepRecentMessages?: number; | ||
| /** | ||
| * Custom summarizer. When provided, the middleware calls this | ||
| * instead of using the underlying model for summarization. | ||
| * Custom token estimator over `ModelMessage[]`. Pass a | ||
| * provider-specific tokenizer (e.g. tiktoken) for accuracy. | ||
| * Default: char-count / 4 heuristic. | ||
| */ | ||
| summarize?: (messages: LanguageModelV3Prompt, targetTokens: number) => Promise<string>; | ||
| estimateTokens?: (messages: ModelMessage[]) => number; | ||
| /** | ||
| * What to do when summarization fails. | ||
| * - `'passthrough'` (default): proceed with the original, uncompacted prompt. | ||
| * - `'throw'`: throw the summarization error. | ||
| * What to do when summarization fails or produces oversize output. | ||
| * - `'passthrough'` (default): return the original `messages`. | ||
| * - `'throw'`: throw the underlying error. | ||
| */ | ||
| onCompactionFailure?: 'passthrough' | 'throw'; | ||
| } | ||
| type CompactMessagesOptions = (BaseCompactOptions & { | ||
| summaryModel: LanguageModelV3; | ||
| summarize?: never; | ||
| }) | (BaseCompactOptions & { | ||
| summarize: CompactSummarizer; | ||
| summaryModel?: never; | ||
| }); | ||
| /** | ||
| * Create a context-compaction middleware for the Vercel AI SDK. | ||
| * Compact a conversation by summarizing older history while | ||
| * preserving the leading system prefix and the most recent turns. | ||
| * | ||
| * Wrap any language model with this middleware via | ||
| * `wrapLanguageModel({ model, middleware })`. When the prompt | ||
| * exceeds `maxContextTokens * autoCompactThresholdPct`, the | ||
| * middleware transparently summarizes older history while | ||
| * preserving system messages and the most recent turns. | ||
| * Returns the **same `messages` reference** (===) when no | ||
| * compaction is needed, so callers can use identity to detect a | ||
| * no-op. When compaction occurs, returns a new array shaped as: | ||
| * | ||
| * `[ ...systemPrefix, user(summary), assistant('Understood.'), ...recentWindow ]` | ||
| * | ||
| * The synthetic `user → assistant` ack pair preserves role | ||
| * alternation, which is required by Anthropic and Google providers. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * import { compactMessages } from 'agentool/context-compaction'; | ||
| * import { openai } from '@ai-sdk/openai'; | ||
| * import { generateText } from 'ai'; | ||
| * | ||
| * const model = openai('gpt-5'); | ||
| * messages = await compactMessages({ | ||
| * messages, | ||
| * summaryModel: openai('gpt-5-mini'), | ||
| * maxContextTokens: 400_000, | ||
| * }); | ||
| * const result = await generateText({ model, messages }); | ||
| * ``` | ||
| */ | ||
| declare function createContextCompaction(config: ContextCompactionConfig): LanguageModelV3Middleware; | ||
| declare function compactMessages(options: CompactMessagesOptions): Promise<ModelMessage[]>; | ||
| export { type ContextCompactionConfig, createContextCompaction }; | ||
| export { type CompactMessagesOptions, type CompactSummarizer, compactMessages }; |
| import { | ||
| createContextCompaction | ||
| } from "../chunk-VCP53KEZ.js"; | ||
| compactMessages | ||
| } from "../chunk-6CI3UDOJ.js"; | ||
| export { | ||
| createContextCompaction | ||
| compactMessages | ||
| }; |
+2
-2
@@ -18,3 +18,3 @@ "use strict";Object.defineProperty(exports, "__esModule", {value: true}); | ||
| var _chunkN5EFHCHEcjs = require('./chunk-N5EFHCHE.cjs'); | ||
| var _chunkB76NYX22cjs = require('./chunk-B76NYX22.cjs'); | ||
@@ -173,2 +173,2 @@ | ||
| exports.askUser = _chunkKUFZFNPTcjs.askUser; exports.askUserPrompt = _chunkKUFZFNPTcjs.getPrompt; exports.bash = _chunkCXBWF5ONcjs.bash; exports.bashPrompt = _chunkCXBWF5ONcjs.getPrompt; exports.createAskUser = _chunkKUFZFNPTcjs.createAskUser; exports.createBash = _chunkCXBWF5ONcjs.createBash; exports.createContextCompaction = _chunkN5EFHCHEcjs.createContextCompaction; exports.createDiff = _chunkOYLTQJXTcjs.createDiff; exports.createEdit = _chunk6ULQG2W2cjs.createEdit; exports.createGlob = _chunkYCWJVQYOcjs.createGlob; exports.createGrep = _chunkRIGL3JTScjs.createGrep; exports.createHttpRequest = _chunk5T3SQYI4cjs.createHttpRequest; exports.createLsp = _chunkNQIV6LBHcjs.createLsp; exports.createMemory = _chunkLNAR3NJQcjs.createMemory; exports.createMultiEdit = _chunkVPV6WG5Vcjs.createMultiEdit; exports.createRead = _chunkHG5T47NAcjs.createRead; exports.createSleep = _chunkJYTOARJVcjs.createSleep; exports.createTaskCreate = _chunkSFDZRLSXcjs.createTaskCreate; exports.createTaskGet = _chunkXGDE7S2Dcjs.createTaskGet; exports.createTaskList = _chunk3FT4ZPB2cjs.createTaskList; exports.createTaskUpdate = _chunkT6STO7PScjs.createTaskUpdate; exports.createToolSearch = _chunk2JBLVFB7cjs.createToolSearch; exports.createWebFetch = _chunkG6ZVJA4Vcjs.createWebFetch; exports.createWebSearch = _chunkCM3VRCNXcjs.createWebSearch; exports.createWrite = _chunkABXTBB2Ncjs.createWrite; exports.diff = _chunkOYLTQJXTcjs.diff; exports.diffPrompt = _chunkOYLTQJXTcjs.getPrompt; exports.edit = _chunk6ULQG2W2cjs.edit; exports.editPrompt = _chunk6ULQG2W2cjs.getPrompt; exports.glob = _chunkYCWJVQYOcjs.glob; exports.globPrompt = _chunkYCWJVQYOcjs.getPrompt; exports.grep = _chunkRIGL3JTScjs.grep; exports.grepPrompt = _chunkRIGL3JTScjs.getPrompt; exports.httpRequest = _chunk5T3SQYI4cjs.httpRequest; exports.httpRequestPrompt = _chunk5T3SQYI4cjs.getPrompt; exports.lsp = _chunkNQIV6LBHcjs.lsp; exports.lspPrompt = _chunkNQIV6LBHcjs.getPrompt; exports.memory = _chunkLNAR3NJQcjs.memory; exports.memoryPrompt = _chunkLNAR3NJQcjs.getPrompt; exports.multiEdit = _chunkVPV6WG5Vcjs.multiEdit; exports.multiEditPrompt = _chunkVPV6WG5Vcjs.getPrompt; exports.read = _chunkHG5T47NAcjs.read; exports.readPrompt = _chunkHG5T47NAcjs.getPrompt; exports.sleep = _chunkJYTOARJVcjs.sleep; exports.sleepPrompt = _chunkJYTOARJVcjs.getPrompt; exports.taskCreate = _chunkSFDZRLSXcjs.taskCreate; exports.taskCreatePrompt = _chunkSFDZRLSXcjs.getPrompt; exports.taskGet = _chunkXGDE7S2Dcjs.taskGet; exports.taskGetPrompt = _chunkXGDE7S2Dcjs.getPrompt; exports.taskList = _chunk3FT4ZPB2cjs.taskList; exports.taskListPrompt = _chunk3FT4ZPB2cjs.getPrompt; exports.taskUpdate = _chunkT6STO7PScjs.taskUpdate; exports.taskUpdatePrompt = _chunkT6STO7PScjs.getPrompt; exports.toolSearch = _chunk2JBLVFB7cjs.toolSearch; exports.toolSearchPrompt = _chunk2JBLVFB7cjs.getPrompt; exports.webFetch = _chunkG6ZVJA4Vcjs.webFetch; exports.webFetchPrompt = _chunkG6ZVJA4Vcjs.getPrompt; exports.webSearch = _chunkCM3VRCNXcjs.webSearch; exports.webSearchPrompt = _chunkCM3VRCNXcjs.getPrompt; exports.write = _chunkABXTBB2Ncjs.write; exports.writePrompt = _chunkABXTBB2Ncjs.getPrompt; | ||
| exports.askUser = _chunkKUFZFNPTcjs.askUser; exports.askUserPrompt = _chunkKUFZFNPTcjs.getPrompt; exports.bash = _chunkCXBWF5ONcjs.bash; exports.bashPrompt = _chunkCXBWF5ONcjs.getPrompt; exports.compactMessages = _chunkB76NYX22cjs.compactMessages; exports.createAskUser = _chunkKUFZFNPTcjs.createAskUser; exports.createBash = _chunkCXBWF5ONcjs.createBash; exports.createDiff = _chunkOYLTQJXTcjs.createDiff; exports.createEdit = _chunk6ULQG2W2cjs.createEdit; exports.createGlob = _chunkYCWJVQYOcjs.createGlob; exports.createGrep = _chunkRIGL3JTScjs.createGrep; exports.createHttpRequest = _chunk5T3SQYI4cjs.createHttpRequest; exports.createLsp = _chunkNQIV6LBHcjs.createLsp; exports.createMemory = _chunkLNAR3NJQcjs.createMemory; exports.createMultiEdit = _chunkVPV6WG5Vcjs.createMultiEdit; exports.createRead = _chunkHG5T47NAcjs.createRead; exports.createSleep = _chunkJYTOARJVcjs.createSleep; exports.createTaskCreate = _chunkSFDZRLSXcjs.createTaskCreate; exports.createTaskGet = _chunkXGDE7S2Dcjs.createTaskGet; exports.createTaskList = _chunk3FT4ZPB2cjs.createTaskList; exports.createTaskUpdate = _chunkT6STO7PScjs.createTaskUpdate; exports.createToolSearch = _chunk2JBLVFB7cjs.createToolSearch; exports.createWebFetch = _chunkG6ZVJA4Vcjs.createWebFetch; exports.createWebSearch = _chunkCM3VRCNXcjs.createWebSearch; exports.createWrite = _chunkABXTBB2Ncjs.createWrite; exports.diff = _chunkOYLTQJXTcjs.diff; exports.diffPrompt = _chunkOYLTQJXTcjs.getPrompt; exports.edit = _chunk6ULQG2W2cjs.edit; exports.editPrompt = _chunk6ULQG2W2cjs.getPrompt; exports.glob = _chunkYCWJVQYOcjs.glob; exports.globPrompt = _chunkYCWJVQYOcjs.getPrompt; exports.grep = _chunkRIGL3JTScjs.grep; exports.grepPrompt = _chunkRIGL3JTScjs.getPrompt; exports.httpRequest = _chunk5T3SQYI4cjs.httpRequest; exports.httpRequestPrompt = _chunk5T3SQYI4cjs.getPrompt; exports.lsp = _chunkNQIV6LBHcjs.lsp; exports.lspPrompt = _chunkNQIV6LBHcjs.getPrompt; exports.memory = _chunkLNAR3NJQcjs.memory; exports.memoryPrompt = _chunkLNAR3NJQcjs.getPrompt; exports.multiEdit = _chunkVPV6WG5Vcjs.multiEdit; exports.multiEditPrompt = _chunkVPV6WG5Vcjs.getPrompt; exports.read = _chunkHG5T47NAcjs.read; exports.readPrompt = _chunkHG5T47NAcjs.getPrompt; exports.sleep = _chunkJYTOARJVcjs.sleep; exports.sleepPrompt = _chunkJYTOARJVcjs.getPrompt; exports.taskCreate = _chunkSFDZRLSXcjs.taskCreate; exports.taskCreatePrompt = _chunkSFDZRLSXcjs.getPrompt; exports.taskGet = _chunkXGDE7S2Dcjs.taskGet; exports.taskGetPrompt = _chunkXGDE7S2Dcjs.getPrompt; exports.taskList = _chunk3FT4ZPB2cjs.taskList; exports.taskListPrompt = _chunk3FT4ZPB2cjs.getPrompt; exports.taskUpdate = _chunkT6STO7PScjs.taskUpdate; exports.taskUpdatePrompt = _chunkT6STO7PScjs.getPrompt; exports.toolSearch = _chunk2JBLVFB7cjs.toolSearch; exports.toolSearchPrompt = _chunk2JBLVFB7cjs.getPrompt; exports.webFetch = _chunkG6ZVJA4Vcjs.webFetch; exports.webFetchPrompt = _chunkG6ZVJA4Vcjs.getPrompt; exports.webSearch = _chunkCM3VRCNXcjs.webSearch; exports.webSearchPrompt = _chunkCM3VRCNXcjs.getPrompt; exports.write = _chunkABXTBB2Ncjs.write; exports.writePrompt = _chunkABXTBB2Ncjs.getPrompt; |
+1
-1
@@ -19,3 +19,3 @@ export { BashConfig, bash, bashPrompt, createBash } from './bash/index.cjs'; | ||
| export { HttpRequestConfig, createHttpRequest, httpRequest, httpRequestPrompt } from './http-request/index.cjs'; | ||
| export { ContextCompactionConfig, createContextCompaction } from './context-compaction/index.cjs'; | ||
| export { CompactMessagesOptions, CompactSummarizer, compactMessages } from './context-compaction/index.cjs'; | ||
| export { AskUserConfig, askUser, askUserPrompt, createAskUser } from './ask-user/index.cjs'; | ||
@@ -22,0 +22,0 @@ export { SleepConfig, createSleep, sleep, sleepPrompt } from './sleep/index.cjs'; |
+1
-1
@@ -19,3 +19,3 @@ export { BashConfig, bash, bashPrompt, createBash } from './bash/index.js'; | ||
| export { HttpRequestConfig, createHttpRequest, httpRequest, httpRequestPrompt } from './http-request/index.js'; | ||
| export { ContextCompactionConfig, createContextCompaction } from './context-compaction/index.js'; | ||
| export { CompactMessagesOptions, CompactSummarizer, compactMessages } from './context-compaction/index.js'; | ||
| export { AskUserConfig, askUser, askUserPrompt, createAskUser } from './ask-user/index.js'; | ||
@@ -22,0 +22,0 @@ export { SleepConfig, createSleep, sleep, sleepPrompt } from './sleep/index.js'; |
+3
-3
@@ -17,4 +17,4 @@ import { | ||
| import { | ||
| createContextCompaction | ||
| } from "./chunk-VCP53KEZ.js"; | ||
| compactMessages | ||
| } from "./chunk-6CI3UDOJ.js"; | ||
| import { | ||
@@ -116,5 +116,5 @@ askUser, | ||
| getPrompt as bashPrompt, | ||
| compactMessages, | ||
| createAskUser, | ||
| createBash, | ||
| createContextCompaction, | ||
| createDiff, | ||
@@ -121,0 +121,0 @@ createEdit, |
+2
-2
| { | ||
| "name": "agentool", | ||
| "version": "1.2.0", | ||
| "version": "1.3.0", | ||
| "type": "module", | ||
| "description": "21 AI agent tools + context-compaction middleware as standalone Vercel AI SDK modules", | ||
| "description": "21 AI agent tools + context-compaction helper as standalone Vercel AI SDK modules", | ||
| "author": "Z-M-Huang", | ||
@@ -7,0 +7,0 @@ "license": "Apache-2.0", |
+98
-20
@@ -5,3 +5,3 @@ <div align="center"> | ||
| **21 AI agent tools + context-compaction middleware for the [Vercel AI SDK](https://sdk.vercel.ai/).** | ||
| **21 AI agent tools + context-compaction helper for the [Vercel AI SDK](https://sdk.vercel.ai/).** | ||
@@ -545,31 +545,109 @@ <p> | ||
| ### context-compaction (middleware) | ||
| ### context-compaction (function) | ||
| Transparent context compaction middleware for `wrapLanguageModel()`. When the prompt exceeds a configurable threshold, it automatically summarizes older conversation history while preserving system messages and recent turns. | ||
| > **Breaking in 1.3.0:** the `createContextCompaction` middleware was removed in favor of a pure `compactMessages` function. The middleware couldn't persist compacted state back to the caller, so every over-threshold turn re-summarized — costly and cache-busting. The function form returns the new messages and the caller assigns it back. See [migration](#migration-from-12x). | ||
| `compactMessages` summarizes older conversation history when usage crosses a threshold, preserving the leading system prefix and the most recent turns. It works with any provider the AI SDK supports (OpenAI / Anthropic / Google / Mistral / xAI / etc.) because it operates on the unified `ModelMessage[]` shape. | ||
| ```typescript | ||
| import { createContextCompaction } from 'agentool/context-compaction'; | ||
| import { wrapLanguageModel, generateText } from 'ai'; | ||
| import { compactMessages } from 'agentool/context-compaction'; | ||
| import { generateText } from 'ai'; | ||
| import { openai } from '@ai-sdk/openai'; | ||
| const model = openai('gpt-5'); | ||
| let messages: ModelMessage[] = []; | ||
| // Each turn: | ||
| messages.push({ role: 'user', content: userInput }); | ||
| messages = await compactMessages({ | ||
| messages, | ||
| summaryModel: openai('gpt-5-mini'), // cheap summarizer (can differ from main model) | ||
| maxContextTokens: 400_000, | ||
| }); | ||
| const result = await generateText({ model, messages }); | ||
| messages.push(...result.response.messages); | ||
| ``` | ||
| When usage is under threshold, `compactMessages` returns the **same `messages` reference** (`===`), so the second-call cost is cheap. | ||
| **Output shape after compaction:** | ||
| ``` | ||
| [ ...leadingSystemPrefix, | ||
| { role: 'user', content: <summary> }, | ||
| { role: 'assistant', content: 'Understood.' }, | ||
| ...lastNMessages ] | ||
| ``` | ||
| The synthetic `user → assistant` ack pair preserves role alternation (required by Anthropic / Google providers). | ||
| **Tool-chain safety:** the recent-window boundary auto-extends backwards so `tool-call` / `tool-result` / `tool-approval-request` / `tool-approval-response` IDs are never split across the summarization boundary. No `MissingToolResultsError` from the AI SDK. | ||
| **Cross-provider example (Anthropic):** | ||
| ```typescript | ||
| import { anthropic } from '@ai-sdk/anthropic'; | ||
| messages = await compactMessages({ | ||
| messages, | ||
| summaryModel: anthropic('claude-sonnet-4-20250514'), | ||
| maxContextTokens: 200_000, | ||
| }); | ||
| ``` | ||
| **Custom summarizer (e.g. local model, cached, or anything else):** | ||
| ```typescript | ||
| messages = await compactMessages({ | ||
| messages, | ||
| maxContextTokens: 200_000, | ||
| summarize: async (older, targetTokens) => { | ||
| return callMyOwnSummarizer(older, targetTokens); | ||
| }, | ||
| }); | ||
| ``` | ||
| **Options:** | ||
| | Option | Type | Default | Description | | ||
| |---|---|---|---| | ||
| | `messages` | `ModelMessage[]` | required | Conversation to compact | | ||
| | `maxContextTokens` | `number` | required | Model's context window | | ||
| | `summaryModel` *or* `summarize` | `LanguageModelV3` *or* fn | required | Exactly one | | ||
| | `autoCompactThresholdPct` | `number` (0-1) | `0.8` | Trigger threshold | | ||
| | `summaryTargetTokens` | `number` | `floor(maxContextTokens * 0.05)` | Target summary size | | ||
| | `reservedOutputTokens` | `number` | `16384` | Tokens reserved for output | | ||
| | `keepRecentMessages` | `number` | `1` | Last N messages kept verbatim (extended for tool-chain safety) | | ||
| | `estimateTokens` | `(msgs) => number` | char/4 heuristic | Custom token estimator (use a real tokenizer for accuracy) | | ||
| | `onCompactionFailure` | `'passthrough' \| 'throw'` | `'passthrough'` | What to do when summarization fails or returns oversize text | | ||
| **Caveats:** | ||
| - Default token estimation is char/4 — coarse but provider-agnostic. For accuracy pass `estimateTokens` with a provider-specific tokenizer (e.g. `tiktoken` for OpenAI, `@anthropic-ai/tokenizer` for Anthropic). | ||
| - Multimodal content (images, files) is reduced to placeholders during summarization. Original binary data is gone from the persisted compacted history. | ||
| - Summary-of-summary degradation accumulates over very long sessions. Recommend periodic session restarts for long-lived agents. | ||
| - Mid-conversation `system` messages are NOT hoisted — only the leading contiguous system prefix is preserved as system. | ||
| #### Migration from 1.2.x | ||
| Before (v1.2.x): | ||
| ```typescript | ||
| import { createContextCompaction } from 'agentool/context-compaction'; | ||
| import { wrapLanguageModel } from 'ai'; | ||
| const model = wrapLanguageModel({ | ||
| model: anthropic('claude-sonnet-4-20250514'), | ||
| middleware: createContextCompaction({ | ||
| maxContextTokens: 200_000, // model's context window (required) | ||
| autoCompactThresholdPct: 0.80, // compact when 80% full (default) | ||
| summaryTargetPct: 0.05, // summarize to 5% of context (default) | ||
| }), | ||
| middleware: createContextCompaction({ maxContextTokens: 200_000 }), | ||
| }); | ||
| const { text } = await generateText({ model, messages }); | ||
| ``` | ||
| // Use the wrapped model normally — compaction is transparent | ||
| const { text } = await generateText({ | ||
| model, | ||
| tools: { bash, read, edit }, | ||
| maxSteps: 20, | ||
| prompt: 'Find and fix the bug in src/auth.ts', | ||
| After (v1.3.0): | ||
| ```typescript | ||
| import { compactMessages } from 'agentool/context-compaction'; | ||
| const model = anthropic('claude-sonnet-4-20250514'); | ||
| messages = await compactMessages({ | ||
| messages, | ||
| summaryModel: model, | ||
| maxContextTokens: 200_000, | ||
| }); | ||
| const { text } = await generateText({ model, messages }); | ||
| ``` | ||
| **Config:** `maxContextTokens` (number, required), `autoCompactThresholdPct?` (0-1), `summaryTargetPct?` (0-1), `reservedOutputTokens?` (number), `estimateTokens?` (function), `summarize?` (function), `onCompactionFailure?` (`'passthrough'` | `'throw'`) | ||
| --- | ||
@@ -705,3 +783,3 @@ | ||
| lsp, createLsp, | ||
| createContextCompaction, // middleware, not a tool | ||
| compactMessages, // helper function, not a tool | ||
| askUser, createAskUser, | ||
@@ -733,3 +811,3 @@ sleep, createSleep, | ||
| import { lsp } from 'agentool/lsp'; | ||
| import { createContextCompaction } from 'agentool/context-compaction'; // middleware | ||
| import { compactMessages } from 'agentool/context-compaction'; // helper function | ||
| import { askUser } from 'agentool/ask-user'; | ||
@@ -736,0 +814,0 @@ import { sleep } from 'agentool/sleep'; |
| "use strict";Object.defineProperty(exports, "__esModule", {value: true}); function _nullishCoalesce(lhs, rhsFn) { if (lhs != null) { return lhs; } else { return rhsFn(); } } function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }// src/middleware/context-compaction/prompt.ts | ||
| var NO_TOOLS_PREAMBLE = `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools. | ||
| - Do NOT use any tools or functions. | ||
| - You already have all the context you need in the conversation below. | ||
| - Tool calls will be REJECTED and your response will be discarded. | ||
| - Your entire response must be plain text containing the summary. | ||
| `; | ||
| var SUMMARY_INSTRUCTION = `Your task is to create a detailed summary of the conversation history provided below. This summary will replace the older portion of the conversation so that work can continue without losing important context. | ||
| Your summary should include the following sections: | ||
| 1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail. | ||
| 2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed. | ||
| 3. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Include code snippets where applicable and a summary of why each file is important. | ||
| 4. Errors and Fixes: List all errors encountered and how they were fixed. Include any user feedback on corrections. | ||
| 5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts. | ||
| 6. All User Messages: List ALL user messages that are not tool results. These are critical for understanding user feedback and changing intent. | ||
| 7. Pending Tasks: Outline any pending tasks that were explicitly requested. | ||
| 8. Current Work: Describe precisely what was being worked on most recently, including file names and code snippets where applicable. | ||
| 9. Optional Next Step: List the next step that should be taken, directly in line with the most recent user requests. If the last task was concluded, only list next steps that are explicitly requested. | ||
| Be thorough and precise. Technical details, file paths, and code patterns are essential for continuing work without losing context.`; | ||
| var NO_TOOLS_TRAILER = "\n\nREMINDER: Do NOT call any tools. Respond with plain text only containing the summary."; | ||
| function buildCompactionPrompt(serializedHistory, targetTokens) { | ||
| const systemContent = NO_TOOLS_PREAMBLE + SUMMARY_INSTRUCTION + ` | ||
| Target summary length: approximately ${targetTokens} tokens. Be concise but do not omit important details.` + NO_TOOLS_TRAILER; | ||
| return [ | ||
| { role: "system", content: systemContent }, | ||
| { | ||
| role: "user", | ||
| content: [ | ||
| { | ||
| type: "text", | ||
| text: `Here is the conversation history to summarize: | ||
| ${serializedHistory}` | ||
| } | ||
| ] | ||
| } | ||
| ]; | ||
| } | ||
| // src/middleware/context-compaction/serialize.ts | ||
| function estimateTokens(prompt) { | ||
| let chars = 0; | ||
| for (const msg of prompt) { | ||
| chars += messageCharCount(msg); | ||
| } | ||
| return Math.ceil(chars / 4); | ||
| } | ||
| function messageCharCount(msg) { | ||
| if (msg.role === "system") { | ||
| return msg.content.length; | ||
| } | ||
| let chars = 0; | ||
| for (const part of msg.content) { | ||
| switch (part.type) { | ||
| case "text": | ||
| chars += part.text.length; | ||
| break; | ||
| case "reasoning": | ||
| chars += part.text.length; | ||
| break; | ||
| case "tool-call": | ||
| chars += part.toolName.length + JSON.stringify(part.input).length; | ||
| break; | ||
| case "tool-result": | ||
| chars += part.toolName.length + toolResultLength(part.output); | ||
| break; | ||
| case "file": | ||
| chars += 20; | ||
| break; | ||
| default: | ||
| chars += 20; | ||
| break; | ||
| } | ||
| } | ||
| return chars; | ||
| } | ||
| function toolResultLength(output) { | ||
| if (output.type === "text") return String(_nullishCoalesce(output.value, () => ( ""))).length; | ||
| if (output.type === "json") | ||
| return JSON.stringify(_nullishCoalesce(output.value, () => ( ""))).length; | ||
| if (output.type === "execution-denied") | ||
| return (_nullishCoalesce(output.reason, () => ( ""))).length + 20; | ||
| return 20; | ||
| } | ||
| function serializePrompt(messages) { | ||
| return messages.map(serializeMessage).join("\n\n"); | ||
| } | ||
| function serializeMessage(msg) { | ||
| const label = msg.role.toUpperCase(); | ||
| if (msg.role === "system") { | ||
| return `[${label}] | ||
| ${msg.content}`; | ||
| } | ||
| const parts = []; | ||
| for (const part of msg.content) { | ||
| const s = serializePart(part); | ||
| if (s) parts.push(s); | ||
| } | ||
| return `[${label}] | ||
| ${parts.join("\n")}`; | ||
| } | ||
| function serializePart(part) { | ||
| switch (part.type) { | ||
| case "text": | ||
| return part.text; | ||
| case "reasoning": | ||
| return `[reasoning: ${part.text}]`; | ||
| case "file": | ||
| return `[file${part.filename ? `: ${part.filename}` : ""}]`; | ||
| case "tool-call": | ||
| return `[tool-call: ${part.toolName}(${JSON.stringify(part.input)})]`; | ||
| case "tool-result": | ||
| return `[tool-result: ${part.toolName} \u2192 ${serializeToolOutput(part.output)}]`; | ||
| case "tool-approval-response": | ||
| return `[tool-approval: ${part.approved ? "approved" : "denied"}]`; | ||
| default: | ||
| return ""; | ||
| } | ||
| } | ||
| function serializeToolOutput(output) { | ||
| if (output.type === "text") return String(_nullishCoalesce(output.value, () => ( ""))); | ||
| if (output.type === "json") return JSON.stringify(_nullishCoalesce(output.value, () => ( ""))); | ||
| if (output.type === "execution-denied") | ||
| return `denied: ${_nullishCoalesce(output.reason, () => ( ""))}`; | ||
| return ""; | ||
| } | ||
| function extractSummaryText(result) { | ||
| const text = result.content.filter( | ||
| (c) => c.type === "text" | ||
| ).map((c) => c.text).join("\n").trim(); | ||
| return text || null; | ||
| } | ||
| function splitPrompt(prompt, recentTokenBudget, tokenEstimator) { | ||
| const systemMessages = []; | ||
| const nonSystem = []; | ||
| for (const msg of prompt) { | ||
| if (msg.role === "system") { | ||
| systemMessages.push(msg); | ||
| } else { | ||
| nonSystem.push(msg); | ||
| } | ||
| } | ||
| let recentTokens = 0; | ||
| let splitIndex = nonSystem.length; | ||
| for (let i = nonSystem.length - 1; i >= 0; i--) { | ||
| const msgTokens = tokenEstimator([nonSystem[i]]); | ||
| if (recentTokens + msgTokens > recentTokenBudget && i < nonSystem.length - 1) { | ||
| splitIndex = i + 1; | ||
| break; | ||
| } | ||
| recentTokens += msgTokens; | ||
| if (i === 0) { | ||
| splitIndex = 0; | ||
| } | ||
| } | ||
| while (splitIndex > 0 && _optionalChain([nonSystem, 'access', _ => _[splitIndex], 'optionalAccess', _2 => _2.role]) === "tool") { | ||
| splitIndex--; | ||
| } | ||
| return { | ||
| systemMessages, | ||
| olderHistory: nonSystem.slice(0, splitIndex), | ||
| recentWindow: nonSystem.slice(splitIndex) | ||
| }; | ||
| } | ||
| // src/middleware/context-compaction/index.ts | ||
| var DEFAULT_THRESHOLD_PCT = 0.8; | ||
| var DEFAULT_SUMMARY_TARGET_PCT = 0.05; | ||
| var DEFAULT_RESERVED_OUTPUT = 16384; | ||
| var RECENT_WINDOW_PCT = 0.2; | ||
| function createContextCompaction(config) { | ||
| if (!config.maxContextTokens || config.maxContextTokens <= 0) { | ||
| throw new Error( | ||
| "[context-compaction] maxContextTokens must be a positive number" | ||
| ); | ||
| } | ||
| const thresholdPct = clamp( | ||
| _nullishCoalesce(config.autoCompactThresholdPct, () => ( DEFAULT_THRESHOLD_PCT)), | ||
| 0, | ||
| 1 | ||
| ); | ||
| const summaryTargetPct = clamp( | ||
| _nullishCoalesce(config.summaryTargetPct, () => ( DEFAULT_SUMMARY_TARGET_PCT)), | ||
| 0, | ||
| 1 | ||
| ); | ||
| const reservedOutput = _nullishCoalesce(config.reservedOutputTokens, () => ( DEFAULT_RESERVED_OUTPUT)); | ||
| const tokenEstimator = _nullishCoalesce(config.estimateTokens, () => ( estimateTokens)); | ||
| const onFailure = _nullishCoalesce(config.onCompactionFailure, () => ( "passthrough")); | ||
| const threshold = config.maxContextTokens * thresholdPct - reservedOutput; | ||
| const summaryTargetTokens = Math.floor( | ||
| config.maxContextTokens * summaryTargetPct | ||
| ); | ||
| const recentWindowBudget = Math.floor( | ||
| config.maxContextTokens * RECENT_WINDOW_PCT | ||
| ); | ||
| return { | ||
| specificationVersion: "v3", | ||
| wrapGenerate: async ({ | ||
| doGenerate, | ||
| model, | ||
| params | ||
| }) => { | ||
| const compacted = await compactIfNeeded( | ||
| model, | ||
| params, | ||
| threshold, | ||
| summaryTargetTokens, | ||
| recentWindowBudget, | ||
| tokenEstimator, | ||
| config.summarize, | ||
| onFailure | ||
| ); | ||
| if (!compacted) { | ||
| return doGenerate(); | ||
| } | ||
| return model.doGenerate(compacted); | ||
| }, | ||
| wrapStream: async ({ | ||
| doStream, | ||
| model, | ||
| params | ||
| }) => { | ||
| const compacted = await compactIfNeeded( | ||
| model, | ||
| params, | ||
| threshold, | ||
| summaryTargetTokens, | ||
| recentWindowBudget, | ||
| tokenEstimator, | ||
| config.summarize, | ||
| onFailure | ||
| ); | ||
| if (!compacted) { | ||
| return doStream(); | ||
| } | ||
| return model.doStream(compacted); | ||
| } | ||
| }; | ||
| } | ||
| async function compactIfNeeded(model, params, threshold, summaryTargetTokens, recentWindowBudget, tokenEstimator, customSummarize, onFailure) { | ||
| const estimatedTokens = tokenEstimator(params.prompt); | ||
| if (estimatedTokens <= threshold) { | ||
| return null; | ||
| } | ||
| const { systemMessages, olderHistory, recentWindow } = splitPrompt( | ||
| params.prompt, | ||
| recentWindowBudget, | ||
| tokenEstimator | ||
| ); | ||
| if (olderHistory.length === 0) { | ||
| return null; | ||
| } | ||
| let summaryText; | ||
| try { | ||
| if (customSummarize) { | ||
| summaryText = await customSummarize( | ||
| olderHistory, | ||
| summaryTargetTokens | ||
| ); | ||
| } else { | ||
| summaryText = await defaultSummarize( | ||
| model, | ||
| olderHistory, | ||
| summaryTargetTokens | ||
| ); | ||
| } | ||
| } catch (error) { | ||
| if (onFailure === "throw") { | ||
| throw error; | ||
| } | ||
| return null; | ||
| } | ||
| if (!summaryText) { | ||
| if (onFailure === "throw") { | ||
| throw new Error( | ||
| "[context-compaction] Summarization produced no text output" | ||
| ); | ||
| } | ||
| return null; | ||
| } | ||
| const compactedPrompt = [ | ||
| ...systemMessages, | ||
| { | ||
| role: "user", | ||
| content: [{ type: "text", text: summaryText }] | ||
| }, | ||
| { | ||
| role: "assistant", | ||
| content: [{ type: "text", text: "Understood." }] | ||
| }, | ||
| ...recentWindow | ||
| ]; | ||
| return { ...params, prompt: compactedPrompt }; | ||
| } | ||
| async function defaultSummarize(model, olderHistory, targetTokens) { | ||
| const serialized = serializePrompt(olderHistory); | ||
| const summaryPrompt = buildCompactionPrompt(serialized, targetTokens); | ||
| const result = await model.doGenerate({ | ||
| prompt: summaryPrompt, | ||
| maxOutputTokens: targetTokens, | ||
| // Strip everything that could cause tool calls or structured output | ||
| tools: void 0, | ||
| toolChoice: void 0, | ||
| responseFormat: void 0, | ||
| inputFormat: "messages", | ||
| mode: { type: "regular" } | ||
| }); | ||
| const text = extractSummaryText(result); | ||
| if (!text) { | ||
| throw new Error( | ||
| "[context-compaction] Model returned no text content during summarization" | ||
| ); | ||
| } | ||
| return text; | ||
| } | ||
| function clamp(value, min, max) { | ||
| return Math.min(max, Math.max(min, value)); | ||
| } | ||
| exports.createContextCompaction = createContextCompaction; |
| // src/middleware/context-compaction/prompt.ts | ||
| var NO_TOOLS_PREAMBLE = `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools. | ||
| - Do NOT use any tools or functions. | ||
| - You already have all the context you need in the conversation below. | ||
| - Tool calls will be REJECTED and your response will be discarded. | ||
| - Your entire response must be plain text containing the summary. | ||
| `; | ||
| var SUMMARY_INSTRUCTION = `Your task is to create a detailed summary of the conversation history provided below. This summary will replace the older portion of the conversation so that work can continue without losing important context. | ||
| Your summary should include the following sections: | ||
| 1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail. | ||
| 2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed. | ||
| 3. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Include code snippets where applicable and a summary of why each file is important. | ||
| 4. Errors and Fixes: List all errors encountered and how they were fixed. Include any user feedback on corrections. | ||
| 5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts. | ||
| 6. All User Messages: List ALL user messages that are not tool results. These are critical for understanding user feedback and changing intent. | ||
| 7. Pending Tasks: Outline any pending tasks that were explicitly requested. | ||
| 8. Current Work: Describe precisely what was being worked on most recently, including file names and code snippets where applicable. | ||
| 9. Optional Next Step: List the next step that should be taken, directly in line with the most recent user requests. If the last task was concluded, only list next steps that are explicitly requested. | ||
| Be thorough and precise. Technical details, file paths, and code patterns are essential for continuing work without losing context.`; | ||
| var NO_TOOLS_TRAILER = "\n\nREMINDER: Do NOT call any tools. Respond with plain text only containing the summary."; | ||
| function buildCompactionPrompt(serializedHistory, targetTokens) { | ||
| const systemContent = NO_TOOLS_PREAMBLE + SUMMARY_INSTRUCTION + ` | ||
| Target summary length: approximately ${targetTokens} tokens. Be concise but do not omit important details.` + NO_TOOLS_TRAILER; | ||
| return [ | ||
| { role: "system", content: systemContent }, | ||
| { | ||
| role: "user", | ||
| content: [ | ||
| { | ||
| type: "text", | ||
| text: `Here is the conversation history to summarize: | ||
| ${serializedHistory}` | ||
| } | ||
| ] | ||
| } | ||
| ]; | ||
| } | ||
| // src/middleware/context-compaction/serialize.ts | ||
| function estimateTokens(prompt) { | ||
| let chars = 0; | ||
| for (const msg of prompt) { | ||
| chars += messageCharCount(msg); | ||
| } | ||
| return Math.ceil(chars / 4); | ||
| } | ||
| function messageCharCount(msg) { | ||
| if (msg.role === "system") { | ||
| return msg.content.length; | ||
| } | ||
| let chars = 0; | ||
| for (const part of msg.content) { | ||
| switch (part.type) { | ||
| case "text": | ||
| chars += part.text.length; | ||
| break; | ||
| case "reasoning": | ||
| chars += part.text.length; | ||
| break; | ||
| case "tool-call": | ||
| chars += part.toolName.length + JSON.stringify(part.input).length; | ||
| break; | ||
| case "tool-result": | ||
| chars += part.toolName.length + toolResultLength(part.output); | ||
| break; | ||
| case "file": | ||
| chars += 20; | ||
| break; | ||
| default: | ||
| chars += 20; | ||
| break; | ||
| } | ||
| } | ||
| return chars; | ||
| } | ||
| function toolResultLength(output) { | ||
| if (output.type === "text") return String(output.value ?? "").length; | ||
| if (output.type === "json") | ||
| return JSON.stringify(output.value ?? "").length; | ||
| if (output.type === "execution-denied") | ||
| return (output.reason ?? "").length + 20; | ||
| return 20; | ||
| } | ||
| function serializePrompt(messages) { | ||
| return messages.map(serializeMessage).join("\n\n"); | ||
| } | ||
| function serializeMessage(msg) { | ||
| const label = msg.role.toUpperCase(); | ||
| if (msg.role === "system") { | ||
| return `[${label}] | ||
| ${msg.content}`; | ||
| } | ||
| const parts = []; | ||
| for (const part of msg.content) { | ||
| const s = serializePart(part); | ||
| if (s) parts.push(s); | ||
| } | ||
| return `[${label}] | ||
| ${parts.join("\n")}`; | ||
| } | ||
| function serializePart(part) { | ||
| switch (part.type) { | ||
| case "text": | ||
| return part.text; | ||
| case "reasoning": | ||
| return `[reasoning: ${part.text}]`; | ||
| case "file": | ||
| return `[file${part.filename ? `: ${part.filename}` : ""}]`; | ||
| case "tool-call": | ||
| return `[tool-call: ${part.toolName}(${JSON.stringify(part.input)})]`; | ||
| case "tool-result": | ||
| return `[tool-result: ${part.toolName} \u2192 ${serializeToolOutput(part.output)}]`; | ||
| case "tool-approval-response": | ||
| return `[tool-approval: ${part.approved ? "approved" : "denied"}]`; | ||
| default: | ||
| return ""; | ||
| } | ||
| } | ||
| function serializeToolOutput(output) { | ||
| if (output.type === "text") return String(output.value ?? ""); | ||
| if (output.type === "json") return JSON.stringify(output.value ?? ""); | ||
| if (output.type === "execution-denied") | ||
| return `denied: ${output.reason ?? ""}`; | ||
| return ""; | ||
| } | ||
| function extractSummaryText(result) { | ||
| const text = result.content.filter( | ||
| (c) => c.type === "text" | ||
| ).map((c) => c.text).join("\n").trim(); | ||
| return text || null; | ||
| } | ||
| function splitPrompt(prompt, recentTokenBudget, tokenEstimator) { | ||
| const systemMessages = []; | ||
| const nonSystem = []; | ||
| for (const msg of prompt) { | ||
| if (msg.role === "system") { | ||
| systemMessages.push(msg); | ||
| } else { | ||
| nonSystem.push(msg); | ||
| } | ||
| } | ||
| let recentTokens = 0; | ||
| let splitIndex = nonSystem.length; | ||
| for (let i = nonSystem.length - 1; i >= 0; i--) { | ||
| const msgTokens = tokenEstimator([nonSystem[i]]); | ||
| if (recentTokens + msgTokens > recentTokenBudget && i < nonSystem.length - 1) { | ||
| splitIndex = i + 1; | ||
| break; | ||
| } | ||
| recentTokens += msgTokens; | ||
| if (i === 0) { | ||
| splitIndex = 0; | ||
| } | ||
| } | ||
| while (splitIndex > 0 && nonSystem[splitIndex]?.role === "tool") { | ||
| splitIndex--; | ||
| } | ||
| return { | ||
| systemMessages, | ||
| olderHistory: nonSystem.slice(0, splitIndex), | ||
| recentWindow: nonSystem.slice(splitIndex) | ||
| }; | ||
| } | ||
| // src/middleware/context-compaction/index.ts | ||
| var DEFAULT_THRESHOLD_PCT = 0.8; | ||
| var DEFAULT_SUMMARY_TARGET_PCT = 0.05; | ||
| var DEFAULT_RESERVED_OUTPUT = 16384; | ||
| var RECENT_WINDOW_PCT = 0.2; | ||
| function createContextCompaction(config) { | ||
| if (!config.maxContextTokens || config.maxContextTokens <= 0) { | ||
| throw new Error( | ||
| "[context-compaction] maxContextTokens must be a positive number" | ||
| ); | ||
| } | ||
| const thresholdPct = clamp( | ||
| config.autoCompactThresholdPct ?? DEFAULT_THRESHOLD_PCT, | ||
| 0, | ||
| 1 | ||
| ); | ||
| const summaryTargetPct = clamp( | ||
| config.summaryTargetPct ?? DEFAULT_SUMMARY_TARGET_PCT, | ||
| 0, | ||
| 1 | ||
| ); | ||
| const reservedOutput = config.reservedOutputTokens ?? DEFAULT_RESERVED_OUTPUT; | ||
| const tokenEstimator = config.estimateTokens ?? estimateTokens; | ||
| const onFailure = config.onCompactionFailure ?? "passthrough"; | ||
| const threshold = config.maxContextTokens * thresholdPct - reservedOutput; | ||
| const summaryTargetTokens = Math.floor( | ||
| config.maxContextTokens * summaryTargetPct | ||
| ); | ||
| const recentWindowBudget = Math.floor( | ||
| config.maxContextTokens * RECENT_WINDOW_PCT | ||
| ); | ||
| return { | ||
| specificationVersion: "v3", | ||
| wrapGenerate: async ({ | ||
| doGenerate, | ||
| model, | ||
| params | ||
| }) => { | ||
| const compacted = await compactIfNeeded( | ||
| model, | ||
| params, | ||
| threshold, | ||
| summaryTargetTokens, | ||
| recentWindowBudget, | ||
| tokenEstimator, | ||
| config.summarize, | ||
| onFailure | ||
| ); | ||
| if (!compacted) { | ||
| return doGenerate(); | ||
| } | ||
| return model.doGenerate(compacted); | ||
| }, | ||
| wrapStream: async ({ | ||
| doStream, | ||
| model, | ||
| params | ||
| }) => { | ||
| const compacted = await compactIfNeeded( | ||
| model, | ||
| params, | ||
| threshold, | ||
| summaryTargetTokens, | ||
| recentWindowBudget, | ||
| tokenEstimator, | ||
| config.summarize, | ||
| onFailure | ||
| ); | ||
| if (!compacted) { | ||
| return doStream(); | ||
| } | ||
| return model.doStream(compacted); | ||
| } | ||
| }; | ||
| } | ||
| async function compactIfNeeded(model, params, threshold, summaryTargetTokens, recentWindowBudget, tokenEstimator, customSummarize, onFailure) { | ||
| const estimatedTokens = tokenEstimator(params.prompt); | ||
| if (estimatedTokens <= threshold) { | ||
| return null; | ||
| } | ||
| const { systemMessages, olderHistory, recentWindow } = splitPrompt( | ||
| params.prompt, | ||
| recentWindowBudget, | ||
| tokenEstimator | ||
| ); | ||
| if (olderHistory.length === 0) { | ||
| return null; | ||
| } | ||
| let summaryText; | ||
| try { | ||
| if (customSummarize) { | ||
| summaryText = await customSummarize( | ||
| olderHistory, | ||
| summaryTargetTokens | ||
| ); | ||
| } else { | ||
| summaryText = await defaultSummarize( | ||
| model, | ||
| olderHistory, | ||
| summaryTargetTokens | ||
| ); | ||
| } | ||
| } catch (error) { | ||
| if (onFailure === "throw") { | ||
| throw error; | ||
| } | ||
| return null; | ||
| } | ||
| if (!summaryText) { | ||
| if (onFailure === "throw") { | ||
| throw new Error( | ||
| "[context-compaction] Summarization produced no text output" | ||
| ); | ||
| } | ||
| return null; | ||
| } | ||
| const compactedPrompt = [ | ||
| ...systemMessages, | ||
| { | ||
| role: "user", | ||
| content: [{ type: "text", text: summaryText }] | ||
| }, | ||
| { | ||
| role: "assistant", | ||
| content: [{ type: "text", text: "Understood." }] | ||
| }, | ||
| ...recentWindow | ||
| ]; | ||
| return { ...params, prompt: compactedPrompt }; | ||
| } | ||
| async function defaultSummarize(model, olderHistory, targetTokens) { | ||
| const serialized = serializePrompt(olderHistory); | ||
| const summaryPrompt = buildCompactionPrompt(serialized, targetTokens); | ||
| const result = await model.doGenerate({ | ||
| prompt: summaryPrompt, | ||
| maxOutputTokens: targetTokens, | ||
| // Strip everything that could cause tool calls or structured output | ||
| tools: void 0, | ||
| toolChoice: void 0, | ||
| responseFormat: void 0, | ||
| inputFormat: "messages", | ||
| mode: { type: "regular" } | ||
| }); | ||
| const text = extractSummaryText(result); | ||
| if (!text) { | ||
| throw new Error( | ||
| "[context-compaction] Model returned no text content during summarization" | ||
| ); | ||
| } | ||
| return text; | ||
| } | ||
| function clamp(value, min, max) { | ||
| return Math.min(max, Math.max(min, value)); | ||
| } | ||
| export { | ||
| createContextCompaction | ||
| }; |
375971
3.5%7375
2.84%846
10.16%