| // providers/condense.mjs — increment 0013 (US-005). | ||
| // Plan-state-aware message-history condensing for local providers. The old proxy logic | ||
| // kept head+tail and replaced the dropped middle with a semantically-empty | ||
| // "[Earlier conversation condensed]" filler. That could drop the turn where plan mode | ||
| // was established, so a weak local model loses the plan and re-enters plan mode in a | ||
| // loop. This version (a) never drops a plan-mode turn (or the assistant turn that | ||
| // follows it), and (b) replaces each dropped run with a short STRUCTURED summary | ||
| // (turn count + tool names) instead of empty filler. Pure + dependency-free. | ||
| const PLAN_RE = /(?:enter|exit)_?plan_?mode/i; | ||
| function msgChars(m) { | ||
| if (!m) return 0; | ||
| if (typeof m.content === 'string') return m.content.length; | ||
| if (Array.isArray(m.content)) { | ||
| return m.content.reduce((s, b) => s + ((b && b.text) || (b && b.input ? JSON.stringify(b.input) : '')).length, 0); | ||
| } | ||
| return 0; | ||
| } | ||
| // A turn establishes/uses plan mode if it carries an Enter/ExitPlanMode tool_use block. | ||
| export function isPlanTurn(m) { | ||
| if (!m || !Array.isArray(m.content)) return false; | ||
| return m.content.some(b => b && b.type === 'tool_use' && PLAN_RE.test(b.name || '')); | ||
| } | ||
| function toolNamesIn(m) { | ||
| if (!m || !Array.isArray(m.content)) return []; | ||
| return m.content.filter(b => b && b.type === 'tool_use' && b.name).map(b => b.name); | ||
| } | ||
| /** | ||
| * Condense a messages array to fit ~maxChars, preserving plan-mode turns. | ||
| * @returns {{ messages: any[], dropped: number }} — new array (input is not mutated). | ||
| */ | ||
| export function condenseMessages(messages, { maxChars = 16000 } = {}) { | ||
| if (!Array.isArray(messages) || messages.length <= 4) return { messages, dropped: 0 }; | ||
| const n = messages.length; | ||
| const total = messages.reduce((s, m) => s + msgChars(m), 0); | ||
| if (total <= maxChars) return { messages, dropped: 0 }; | ||
| const keep = Math.max(4, Math.min(n, Math.floor(maxChars / (total / n)))); | ||
| if (keep >= n) return { messages, dropped: 0 }; | ||
| // Must-keep indices: head (first 2), tail (last keep-2), and every plan-mode turn plus | ||
| // the assistant turn that immediately follows it (carries the plan / its tool_result). | ||
| const mustKeep = new Set([0, 1]); | ||
| for (let i = Math.max(2, n - (keep - 2)); i < n; i++) mustKeep.add(i); | ||
| for (let i = 0; i < n; i++) { | ||
| if (isPlanTurn(messages[i])) { mustKeep.add(i); if (i + 1 < n) mustKeep.add(i + 1); } | ||
| } | ||
| const out = []; | ||
| let dropped = 0; | ||
| let runCount = 0; | ||
| let runTools = []; | ||
| const flushRun = () => { | ||
| if (!runCount) return; | ||
| const toolsStr = [...new Set(runTools)].slice(0, 8).join(', '); | ||
| out.push({ | ||
| role: 'user', | ||
| content: `[Condensed ${runCount} earlier turn${runCount > 1 ? 's' : ''}${toolsStr ? `; tools used: ${toolsStr}` : ''}]`, | ||
| }); | ||
| dropped += runCount; | ||
| runCount = 0; | ||
| runTools = []; | ||
| }; | ||
| for (let i = 0; i < n; i++) { | ||
| if (mustKeep.has(i)) { flushRun(); out.push(messages[i]); } | ||
| else { runCount++; runTools.push(...toolNamesIn(messages[i])); } | ||
| } | ||
| flushRun(); | ||
| return { messages: out, dropped }; | ||
| } |
+26
-1
@@ -60,3 +60,3 @@ #!/usr/bin/env node | ||
| export function parseArgs(argv) { | ||
| const opts = { provider: 'auto', port: 9090, host: null, model: null, help: false, freeOnly: false, token: null, rpm: 60, passthrough: [], fullMcp: false, localFidelity: null }; | ||
| const opts = { provider: 'auto', port: 9090, host: null, model: null, help: false, freeOnly: false, token: null, rpm: 60, passthrough: [], fullMcp: false, localFidelity: null, localAgentic: false }; | ||
@@ -99,2 +99,8 @@ for (let i = 0; i < argv.length; i++) { | ||
| opts.fullMcp = true; | ||
| } else if (arg === '--local-agentic') { | ||
| // 0013/US-006: preset for agentic local coding — keep the Skill tool (--full-mcp), | ||
| // balanced skill-fidelity, and agentic env defaults (see applyLocalAgenticEnv). | ||
| opts.localAgentic = true; | ||
| opts.fullMcp = true; | ||
| if (!opts.localFidelity) opts.localFidelity = 'balanced'; | ||
| } else if (arg === '--local-fidelity' || arg.startsWith('--local-fidelity=')) { | ||
@@ -112,2 +118,12 @@ // Local skill-fidelity tier: lean | balanced | full (default balanced). 0010. | ||
| // 0013/US-006: apply the agentic-local env defaults WITHOUT overriding anything the user | ||
| // set explicitly. No-op unless opts.localAgentic — so default behavior is unchanged. | ||
| export function applyLocalAgenticEnv(env, opts = {}) { | ||
| if (!opts || !opts.localAgentic) return env; | ||
| if (env.LOCAL_REFUSAL_RETRY == null) env.LOCAL_REFUSAL_RETRY = 'on'; | ||
| if (env.LOCAL_NUM_CTX == null) env.LOCAL_NUM_CTX = '65536'; | ||
| if (env.LOCAL_FIDELITY == null) env.LOCAL_FIDELITY = opts.localFidelity || 'balanced'; | ||
| return env; | ||
| } | ||
| // Decide whether to auto-strip global MCP servers for the current session. | ||
@@ -264,2 +280,4 @@ // Local providers always get suppressed unless the user opts out or already passed an MCP flag. | ||
| --full-mcp Keep all globally-configured MCP servers (default on local: suppress global MCP) | ||
| --local-agentic Preset for agentic local coding: keeps the Skill tool (--full-mcp), balanced | ||
| fidelity, refusal-retry on, 64K ctx, + guidance to relax hook-heavy repos. | ||
| --local-fidelity <tier> Local skill-fidelity: lean | balanced | full (default balanced). | ||
@@ -542,2 +560,9 @@ balanced re-injects a compact skill catalog so skills auto-trigger on local models. | ||
| // 0013/US-006: agentic-local preset — set env defaults + surface hook-relaxation guidance. | ||
| if (opts.localAgentic) { | ||
| applyLocalAgenticEnv(process.env, opts); | ||
| console.log(`${C.green('[anymodel]')} ${C.bold('--local-agentic')}: LOCAL_REFUSAL_RETRY=on, LOCAL_NUM_CTX=${process.env.LOCAL_NUM_CTX}, LOCAL_FIDELITY=${process.env.LOCAL_FIDELITY}, full MCP (Skill tool kept)`); | ||
| console.log(`${C.yellow('[anymodel]')} For SpecWeave/hook-heavy repos, relax local-hostile gates: set ${C.bold('incrementAssist.mandatory=false')} and drop "ALWAYS plan mode" / "SKILL FIRST (BLOCKING)" from the project CLAUDE.md — a local model cannot satisfy hard meta-directives and will loop.`); | ||
| } | ||
| let providerName = opts.provider; | ||
@@ -544,0 +569,0 @@ if (providerName === 'auto') { |
+76
-0
@@ -125,2 +125,3 @@ # Running Claude Code locally through AnyModel → LMStudio | ||
| | Project `./.claude/skills/*/SKILL.md` | ✅ loaded | | ||
| | Foreign skill roots (`.agents` / `.codex` / `.gemini` / `.agent`, cwd + `$HOME`) | ✅ bridged ([see below](#universal-skill-discovery)) | | ||
| | Project `./.claude/agents/*.md` | ✅ loaded | | ||
@@ -199,2 +200,48 @@ | Project `CLAUDE.md` | ✅ loaded | | ||
| ## Universal skill discovery | ||
| SKILL.md is one shared open standard — Claude Code, OpenAI/Codex, Gemini/Antigravity, | ||
| Cursor, and Copilot all write the same `<name>/SKILL.md` format. Since `anymodel@1.16.0`, | ||
| AnyModel auto-discovers skills from the **other ecosystems'** roots and bridges them into | ||
| the bundled client with **zero format translation**. No flags needed. | ||
| It scans these foreign roots, in precedence order, under **both** the project cwd **and** | ||
| `$HOME`: | ||
| | Root | Ecosystem | | ||
| |---|---| | ||
| | `.agents/skills/` | cross-tool interop (Codex, Cursor, Copilot, Goose, Gemini CLI) | | ||
| | `.codex/skills/` | OpenAI Codex | | ||
| | `.gemini/skills/` | Gemini CLI | | ||
| | `.agent/skills/` | Google Antigravity (singular) | | ||
| Each discovered `<root>/<name>/SKILL.md` is symlinked into a per-session temp | ||
| `.claude/skills` shadow that AnyModel passes to the client via `--add-dir`, so the | ||
| client's native SKILL.md reader picks it up. You'll see a launch line like: | ||
| ``` | ||
| [anymodel] Bridged 3 skill(s) from .agents/.codex/.gemini: my-skill, codex-skill, gem-skill | ||
| ``` | ||
| **Rules:** | ||
| - Project `./.claude/skills/<name>` **wins** on a name collision (case-insensitive); | ||
| among foreign roots the first one wins, and shadowed duplicates are logged. | ||
| - Symlinked skill entries must resolve *inside* their scanned root (an untrusted repo | ||
| can't point a "skill" at `~/.ssh`); escapers are skipped. | ||
| - Unlinkable skills (e.g. Windows without symlink privilege) are logged, never silently | ||
| dropped. | ||
| ### `ANYMODEL_SKILL_ROOTS` — add or override discovery roots | ||
| Colon-separated paths add extra skill-discovery roots beyond the conventional ones: | ||
| ```bash | ||
| # absolute roots used as-is; relative roots resolve against cwd | ||
| ANYMODEL_SKILL_ROOTS="$HOME/work/shared-skills:./vendor/skills" anymodel | ||
| ``` | ||
| Use it to share one skill library across projects, or to point at a non-standard layout. | ||
| Roots are de-duped (order preserved) and the project's own `.claude/skills` still wins on | ||
| any name collision. | ||
| ## Troubleshooting | ||
@@ -290,2 +337,31 @@ | ||
| ## Local agentic profile (recommended for real coding loops) | ||
| Local coding models are strong at single tool calls but degrade on long, multi-turn agentic | ||
| tasks — they lose plan state and revert to RLHF refusals ("I can't browse / deploy"). The | ||
| `--local-agentic` preset tunes the proxy for that workload: | ||
| ```bash | ||
| anymodel proxy lmstudio --model qwen/qwen3-coder-30b --local-agentic | ||
| ``` | ||
| It sets (without overriding anything you set explicitly): | ||
| | Setting | Value | Why | | ||
| |---|---|---| | ||
| | `--full-mcp` | implied | keeps the **Skill tool** so SpecWeave/skill hooks are satisfiable | | ||
| | `LOCAL_FIDELITY` | `balanced` | re-inject the skill catalog + behavioral core every turn | | ||
| | `LOCAL_REFUSAL_RETRY` | `on` | re-issue once with a "use your tools" nudge on a capability-disclaimer refusal | | ||
| | `LOCAL_NUM_CTX` | `65536` | Qwen3-Coder's recommended agentic budget — keeps plan state from being truncated | | ||
| **Also relax hook-heavy repos.** In a SpecWeave (or similar) project, the surviving `CLAUDE.md` | ||
| can order things a local model cannot satisfy, producing the plan-mode loop. For local sessions: | ||
| set `incrementAssist.mandatory=false` in `.specweave/config.json`, drop the "ALWAYS enter plan | ||
| mode (MANDATORY)" and "SKILL FIRST = BLOCKING PRECONDITION" language, and keep `--full-mcp` | ||
| whenever the project depends on the Skill tool (the preset does this for you). | ||
| **Realistic expectations.** Local 30B is great for bounded edits, refactors, exploration, and | ||
| tight tool-attached loops under ~65K context — not a drop-in autonomous Claude for long | ||
| multi-turn tasks with screenshot verification. | ||
| ## Further reading | ||
@@ -292,0 +368,0 @@ |
+1
-1
| { | ||
| "name": "anymodel", | ||
| "version": "1.16.0", | ||
| "version": "1.16.1", | ||
| "description": "Universal AI model proxy — route any coding tool through OpenRouter, Ollama, LMStudio, llama.cpp, or any LLM provider", | ||
@@ -5,0 +5,0 @@ "type": "module", |
+16
-0
@@ -51,2 +51,18 @@ // OpenAI provider for anymodel | ||
| // US-004 (0013): detect an RLHF capability-disclaimer refusal ("I can't browse the | ||
| // internet", "I cannot deploy / run code", "I'm unable to access the URL"). A local | ||
| // coding model emits these as prose even with tools attached, dead-ending the loop. | ||
| const REFUSAL_RE = /\bI\b[^.!?\n]{0,30}?\b(?:can'?t|cannot|can ?not|unable to|am unable to|don'?t have (?:the )?(?:ability|access))\b[^.!?\n]{0,15}?\b(?:access|browse|visit|deploy|run|execute|open|reach|connect|provide|create)\b/i; | ||
| export function isCapabilityRefusal(text) { | ||
| return typeof text === 'string' && text.length > 0 && REFUSAL_RE.test(text); | ||
| } | ||
| // US-004: should the proxy re-issue once with a "use your tools" nudge? Only when the | ||
| // retry is enabled, the response is a prose refusal ending the turn, AND tools were | ||
| // attached (so the model has a real alternative to disclaiming). | ||
| export function shouldRetryRefusal({ enabled = true, stopReason, hasTools, text } = {}) { | ||
| return Boolean(enabled) && stopReason === 'end_turn' && Boolean(hasTools) && isCapabilityRefusal(text); | ||
| } | ||
| // P1.2: translate an Anthropic content-block array into OpenAI message content. | ||
@@ -53,0 +69,0 @@ // Returns a plain STRING when every block is text (keeps text-only turns |
+21
-0
@@ -110,2 +110,22 @@ # AnyModel | ||
| ### Universal Skills (1.16.0+) | ||
| `SKILL.md` is one shared open standard — Claude Code, OpenAI/Codex, Gemini/Antigravity, Cursor, and Copilot all read the same format (a `<name>/SKILL.md` directory with YAML frontmatter + Markdown body). AnyModel auto-discovers your skills no matter which tool's convention you used, with zero format translation. | ||
| At launch, AnyModel scans these roots in both the project working directory **and** `$HOME`: | ||
| ``` | ||
| .claude/skills/ .agents/skills/ .codex/skills/ .gemini/skills/ .agent/skills/ | ||
| ``` | ||
| Each discovered skill is symlinked into a per-session temp `.claude/skills` shadow that is passed to the client via `--add-dir`, so the client's native SKILL.md reader and progressive disclosure handle everything. | ||
| - **Project wins on collision** — a project `.claude/skills/<name>` shadows a foreign-root skill of the same name. | ||
| - **Duplicates and unlinkable skills are logged** — foreign-root name collisions and any skills that can't be symlinked are surfaced, not silently dropped. | ||
| - **Add or override roots** with `ANYMODEL_SKILL_ROOTS` — a colon-separated list of absolute paths merged into discovery. | ||
| ```bash | ||
| ANYMODEL_SKILL_ROOTS=/opt/shared/skills:/Users/me/extra/skills npx anymodel | ||
| ``` | ||
| ### OpenAI-Compatible APIs | ||
@@ -168,2 +188,3 @@ | ||
| | `ANYMODEL_TOKEN` | — | Auth token for remote mode | | ||
| | `ANYMODEL_SKILL_ROOTS` | — | Colon-separated absolute paths added to skill discovery roots | | ||
| | `OLLAMA_NUM_CTX` | `8192` | Ollama context window size | | ||
@@ -170,0 +191,0 @@ | `OLLAMA_KEEP_ALIVE` | `30m` | How long Ollama keeps model in GPU memory | |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
13300451
0.1%20
5.26%5533
2.33%208
11.23%74
5.71%