@avcodes/mi - npm Package Compare versions

Comparing version

1.4.0

1.5.0

+11

-9

index.mjs

		@@ -36,11 +36,13 @@ #!/usr/bin/env node
		* Call the chat API in a loop, executing tool calls, until the model
		* returns a plain text reply.
		* returns a plain text reply. Streams content tokens to stdout as they arrive.
		*/
		async function run(messages) { while (true) {

		/* POST to the completions endpoint; parse the JSON response. */
		const response = await fetch(`${(process.env.OPENAI_BASE_URL \|\| 'https://api.openai.com').replace(/\/+$/, '')}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${process.env.OPENAI_API_KEY}` }, body: JSON.stringify({ model: process.env.MODEL \|\| 'gpt-5.4', messages, tools: toolsDef }) }).then(res => res.json());
		/* POST with stream:true; throw on non-200 by reading the JSON error body. */
		const res = await fetch(`${(process.env.OPENAI_BASE_URL \|\| 'https://api.openai.com').replace(/\/+$/, '')}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${process.env.OPENAI_API_KEY}` }, body: JSON.stringify({ model: process.env.MODEL \|\| 'gpt-5.4', messages, tools: toolsDef, stream: true }) }); if (!res.ok) { const e = await res.json().catch(()=>({})); throw new Error(e.error?.message \|\| `HTTP ${res.status}`); }

		/* Throw on API error; push the message, return content once no tool calls remain. */
		if (response.error) throw new Error(response.error.message \|\| JSON.stringify(response.error)); const message = response.choices?.[0]?.message; if (!message) throw new Error(JSON.stringify(response)); messages.push(message); if (!message.tool_calls) return message.content;
		/* Iterate SSE deltas: write content tokens to stdout, merge tool_call fragments by index into one assistant message. */
		const message = { role: 'assistant', content: '' }, dec = new TextDecoder(); let buf = '';
		for await (const chunk of res.body) { buf += dec.decode(chunk, {stream:true}); let i; while ((i = buf.indexOf('\n\n')) >= 0) { const ev = buf.slice(0, i); buf = buf.slice(i+2); for (const line of ev.split('\n')) { if (!line.startsWith('data: ')) continue; const d = line.slice(6); if (d === '[DONE]') continue; let p; try { p = JSON.parse(d); } catch { continue; } if (p.error) throw new Error(p.error.message \|\| JSON.stringify(p.error)); const delta = p.choices?.[0]?.delta; if (!delta) continue; if (delta.content) { process.stdout.write(delta.content); message.content += delta.content; } if (delta.tool_calls) { message.tool_calls \|\|= []; for (const tc of delta.tool_calls) { const t = message.tool_calls[tc.index] \|\|= { id:'', type:'function', function:{name:'',arguments:''} }; if (tc.id) t.id = tc.id; if (tc.type) t.type = tc.type; if (tc.function?.name) t.function.name += tc.function.name; if (tc.function?.arguments) t.function.arguments += tc.function.arguments; } } } } }
		if (message.content) process.stdout.write('\n'); messages.push(message); if (!message.tool_calls) return;

		@@ -67,9 +69,9 @@ for (const toolCall of message.tool_calls) {

		if (getArg('-p')) { history.push({ role: 'user', content: getArg('-p') }); console.log(await run(history)); process.exit(0); }
		if (getArg('-p')) { history.push({ role: 'user', content: getArg('-p') }); await run(history); process.exit(0); }

		if (!process.stdin.isTTY) { let inputStr = ''; for await (const chunk of process.stdin) inputStr += chunk; history.push({ role: 'user', content: inputStr.trim() }); console.log(await run(history)); process.exit(0); }
		if (!process.stdin.isTTY) { let inputStr = ''; for await (const chunk of process.stdin) inputStr += chunk; history.push({ role: 'user', content: inputStr.trim() }); await run(history); process.exit(0); }

		/* Set up the readline interface and enter the interactive REPL. */
		const readLine = createInterface({ input: process.stdin, output: process.stdout }); const promptUser = query => new Promise(resolve => readLine.question(query, resolve));
		const readLine = createInterface({ input: process.stdin, output: process.stdout }); const promptUser = query => new Promise(resolve => readLine.question(query, resolve)); const ver = JSON.parse(readFileSync(DIR+'package.json','utf8')).version; console.log('\x1b[38;5;208m◰ mi\x1b[90m/'+ver+'\x1b[0m');

		readLine.on('close', () => process.exit(0)); while (true) { const input = await promptUser('\n> '); if (input === '/reset') { history.splice(1); continue; } if (input.trim()) { history.push({ role: 'user', content: input }); console.log(await run(history)); } }
		readLine.on('close', () => process.exit(0)); while (true) { const input = await promptUser('\n> '); if (input === '/reset') { history.splice(1); continue; } if (input.trim()) { history.push({ role: 'user', content: input }); process.stdout.write('\x1b[90m─────\x1b[0m\n'); try { await run(history); } catch(e) { console.error('\x1b[31m✗ ' + e.message + '\x1b[0m'); history.pop(); } } }

+2

-2

package.json

		{
		"name": "@avcodes/mi",
		"version": "1.4.0",
		"description": "agentic coding in 27 loc. a loop, two tools, and an llm.",
		"version": "1.5.0",
		"description": "agentic coding in 29 loc. a loop, two tools, and an llm.",
		"type": "module",
		@@ -6,0 +6,0 @@ "bin": {

+20

-18

README.md

		@@ -5,7 +5,8 @@ ![Splash image](./assets/splash.png)

		agentic coding in 27 loc. a loop, two tools, and an llm.
		agentic coding in 29 loc. a loop, two tools, and an llm.

		## features

		- `bash` (optional `timeout=<ms>` kills after delay, `bg=truthy` detaches and returns pid+log) and `skill` tools — file I/O goes through `bash` (`cat`, `sed -i`, heredocs)
		- streaming: assistant tokens stream to stdout as they arrive (SSE), no waiting for the full reply
		- `bash` (optional `timeout=<ms>` kills after delay, `bg=truthy` detaches and returns pid+log) and `skill` tools; file I/O goes through `bash` (`cat`, `sed -i`, heredocs)
		- `skill` tool loads `SKILL.md` playbooks from bundled `skills/` and `~/.agents/skills/` (descriptions auto-advertised in system prompt so the model matches tasks to skills)
		@@ -16,3 +17,3 @@ - bundled skills: `plan`, `tasks`, `delegate`, `verify`, `debug`, `tdd`
		- automatic ingestion of `AGENTS.md` if it exists in current directory
		- chat REPL by default with interactive `/reset` command to clear history
		- chat REPL by default with version banner, interactive `/reset` command, and error recovery (failed requests pop the user message instead of crashing)
		- graceful `SIGINT` (Ctrl+C) handling for bash child processes
		@@ -55,3 +56,3 @@ - non-interactive mode with `-p 'prompt'` arg
		\|-----\|---------\|------\|
		\| `OPENAI_API_KEY` \| — \| api key \|
		\| `OPENAI_API_KEY` \| (none) \| api key \|
		\| `OPENAI_BASE_URL` \| `https://api.openai.com` \| api base url (ollama, lmstudio, litellm, etc) \|
		@@ -76,3 +77,3 @@ \| `MODEL` \| `gpt-5.4` \| model name \|

		`bash` gives the agent access to the entire system: git, curl, compilers, package managers, and file I/O (via `cat`, `sed -n`, `sed -i`, heredocs — the system prompt teaches the patterns). optional `timeout=<ms>` kills the process after the given delay and resolves with `[timeout]`. optional `bg=truthy` runs the command detached and returns `pid:X log:/tmp/mi-*.log` immediately. `skill` gives the agent specialized workflows loaded on demand from markdown playbooks. every tool returns a string because that's what goes back into the conversation.
		`bash` gives the agent access to the entire system: git, curl, compilers, package managers, and file I/O (via `cat`, `sed -n`, `sed -i`, heredocs; the system prompt teaches the patterns). optional `timeout=<ms>` kills the process after the given delay and resolves with `[timeout]`. optional `bg=truthy` runs the command detached and returns `pid:X log:/tmp/mi-*.log` immediately. `skill` gives the agent specialized workflows loaded on demand from markdown playbooks. every tool returns a string because that's what goes back into the conversation.

		@@ -110,14 +111,14 @@ ### tool definitions

		each iteration makes a single call to the chat completions endpoint. the model receives the full message history and the tool definitions:
		each iteration makes a single call to the chat completions endpoint. the model receives the full message history and the tool definitions, and we ask for an SSE stream so tokens arrive incrementally:

		```js
		const r = await fetch(`${base}/v1/chat/completions`, {
		const res = await fetch(`${base}/v1/chat/completions`, {
		method: 'POST',
		headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${key}` },
		body: JSON.stringify({ model, messages: msgs, tools: defs }),
		}).then(r => r.json());
		const msg = r.choices[0].message;
		body: JSON.stringify({ model, messages: msgs, tools: defs, stream: true }),
		});
		// iterate res.body, parse `data: {...}` events, accumulate deltas into one message
		```

		the response message either has `content` (a text reply to the user) or `tool_calls` (the model wants to use tools). this is the decision point that drives the whole loop.
		the stream emits `delta` chunks: `delta.content` is partial text (write straight to stdout as it arrives), `delta.tool_calls[i]` are partial tool-call fragments (id/name first, then `arguments` in pieces; merge by `index`). once `[DONE]` arrives, the assembled message either has `content` (a text reply) or `tool_calls` (the model wants to use tools). this is the decision point that drives the whole loop.

		@@ -131,5 +132,5 @@ ### the agentic loop
		while (true) {
		const msg = await callLLM(msgs); // make the api call
		msgs.push(msg); // add assistant response to history
		if (!msg.tool_calls) return msg.content; // no tools? we're done
		const msg = await streamLLM(msgs); // stream tokens to stdout, return assembled message
		msgs.push(msg); // add assistant response to history
		if (!msg.tool_calls) return; // no tools? we're done (text already streamed)
		// otherwise, execute tools and continue...
		@@ -140,3 +141,3 @@ }

		the loop exits only when the model decides it has enough information to respond directly. the model might call tools once or twenty times, it drives its own execution. this is what makes it agentic: the llm decides when it's done, not the code.
		the loop exits only when the model decides it has enough information to respond directly. the model might call tools once or twenty times, it drives its own execution. this is what makes it agentic: the llm decides when it's done, not the code. note that text content is written to stdout during the stream, so `run()` doesn't return it; the user already saw it.

		@@ -160,3 +161,3 @@ ### tool execution

		the outer shell is a simple read-eval-print loop. it reads user input, pushes it as a user message, calls `run()`, and prints the result:
		the outer shell is a simple read-eval-print loop. it reads user input, pushes it as a user message, and calls `run()`, which streams the response to stdout itself:

		@@ -168,3 +169,4 @@ ```js
		hist.push({ role: 'user', content: input });
		console.log(await run(hist));
		try { await run(hist); }
		catch (e) { console.error('✗ ' + e.message); hist.pop(); }
		}
		@@ -174,3 +176,3 @@ }

		there's also a one-shot mode (`-p 'prompt'`) that skips the repl and exits after a single run. both modes use the same `run()` function. the agentic loop doesn't care where the prompt came from.
		there's also a one-shot mode (`-p 'prompt'`) that skips the repl and exits after a single run. both modes use the same `run()` function. streaming works the same way; tokens just go to a piped stdout instead of a terminal. the agentic loop doesn't care where the prompt came from.

		@@ -177,0 +179,0 @@ ### putting it together

+2

-2

skills/explore/SKILL.md

		@@ -29,8 +29,8 @@ ---
		- `SCOPE:` paths actually read
		- `ANSWER:` cluster-local answer in 1–3 sentences. No `file:line` references allowed in this field — every line number lives in `CITATIONS` only. If you feel the urge to write "line N" in prose, stop and put it in `CITATIONS`. Write `not found here` if the question doesn't resolve in this scope.
		- `ANSWER:` cluster-local answer in 1–3 sentences. No `file:line` references allowed in this field — every line number lives in `CITATIONS` only. If you feel the urge to write "line N" in prose, stop and put it in `CITATIONS`. Write `not found here` if the question doesn't resolve in this scope — use `STATUS: complete` (you definitively searched) and leave `CITATIONS` empty.
		- `CITATIONS:` one `path/to/file.ext:<line> — <quoted line or excerpt>` per claim in `ANSWER`
		- `FOLLOW_UPS:` paths outside the cluster that should be checked next, if any

		5. While subagents run, draft FOLLOW_UPS you'll pursue if clusters return "not found here". Check completion with `kill -0 <pid> 2>/dev/null` (exit 0 = still running, exit 1 = done) — do NOT use `wait` (detached children are unreachable by `wait`). Once a pid exits, `cat /tmp/mi-explore-<cluster>.md` to read the result. Compose the final answer by stitching cluster-local `ANSWER`s together. The final output MUST include every `file:line` from every subagent's `CITATIONS` block — one per line, verbatim. If you find yourself summarizing or compressing `CITATIONS`, stop and list them explicitly instead. If every subagent returned `not found here`, say so plainly — either the question was mis-scoped or the cluster partition missed the right directory.
		5. While subagents run, maintain a deduped list of FOLLOW_UPS — paths not yet assigned to any cluster — to pursue if clusters come back empty. Check completion with `kill -0 <pid> 2>/dev/null` (exit 0 = still running, exit 1 = done) — do NOT use `wait` (detached children are unreachable by `wait`). Once a pid exits, `cat /tmp/mi-explore-<cluster>.md` to read the result; if the file is missing or any required field is absent, treat that cluster as `blocked` and fold its scope into FOLLOW_UPS. After all pids exit, read any FOLLOW_UPS paths directly — do not spawn new subagents. Synthesize the final answer from cluster-local `ANSWER`s into a single coherent prose response, not a per-cluster list; omit clusters that returned `not found here` from the prose and surface any unresolved FOLLOW_UPS as "also worth checking". Append a flat CITATIONS block with every `file:line` from every subagent's `CITATIONS` — one per line, verbatim; if you find yourself compressing, stop. If every cluster returned `not found here`, say so plainly — either the question was mis-scoped or the cluster partition missed the right directory.

		On a single-GPU local endpoint subagents serialize at the model server — this pattern saves context, not wall-clock. On a hosted endpoint the fan-out is genuinely parallel.

@avcodes/mi - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics