@forwardimpact/libeval
Advanced tools
+1
-1
| { | ||
| "name": "@forwardimpact/libeval", | ||
| "version": "0.1.1", | ||
| "version": "0.1.2", | ||
| "description": "Process Claude Code stream-json output into structured traces", | ||
@@ -5,0 +5,0 @@ "license": "Apache-2.0", |
+14
-2
@@ -19,2 +19,4 @@ /** | ||
| * @param {string} [deps.permissionMode] - SDK permission mode | ||
| * @param {function} [deps.onLine] - Callback invoked with each NDJSON line as it's produced | ||
| * @param {string[]} [deps.settingSources] - SDK setting sources (e.g. ['project'] to load CLAUDE.md) | ||
| */ | ||
@@ -29,2 +31,4 @@ constructor({ | ||
| permissionMode, | ||
| onLine, | ||
| settingSources, | ||
| }) { | ||
@@ -48,2 +52,4 @@ if (!cwd) throw new Error("cwd is required"); | ||
| this.permissionMode = permissionMode ?? "bypassPermissions"; | ||
| this.onLine = onLine ?? null; | ||
| this.settingSources = settingSources ?? []; | ||
| this.sessionId = null; | ||
@@ -73,2 +79,3 @@ this.buffer = []; | ||
| allowDangerouslySkipPermissions: true, | ||
| settingSources: this.settingSources, | ||
| }, | ||
@@ -79,2 +86,3 @@ })) { | ||
| this.buffer.push(line); | ||
| if (this.onLine) this.onLine(line); | ||
@@ -93,3 +101,6 @@ if (message.type === "system" && message.subtype === "init") { | ||
| const success = !error && stopReason === "success"; | ||
| // If the SDK already emitted a successful result, honour it even when the | ||
| // stream throws afterwards (e.g. "Credit balance is too low" during | ||
| // cleanup). Only treat errors as fatal when no result was received yet. | ||
| const success = stopReason === "success"; | ||
| return { success, text, sessionId: this.sessionId, error }; | ||
@@ -116,2 +127,3 @@ } | ||
| this.buffer.push(line); | ||
| if (this.onLine) this.onLine(line); | ||
@@ -127,3 +139,3 @@ if (message.type === "result") { | ||
| const success = !error && stopReason === "success"; | ||
| const success = stopReason === "success"; | ||
| return { success, text, error }; | ||
@@ -130,0 +142,0 @@ } |
@@ -65,2 +65,3 @@ import { readFileSync, createWriteStream } from "node:fs"; | ||
| allowedTools, | ||
| settingSources: ["project"], | ||
| }); | ||
@@ -67,0 +68,0 @@ |
+13
-75
| import { createWriteStream } from "fs"; | ||
| import { createTraceCollector } from "@forwardimpact/libeval"; | ||
| import { PassThrough } from "node:stream"; | ||
| import { pipeline } from "node:stream/promises"; | ||
| import { createTeeWriter } from "../tee-writer.js"; | ||
@@ -15,42 +17,14 @@ /** | ||
| const fileStream = outputPath ? createWriteStream(outputPath) : null; | ||
| const collector = createTraceCollector(); | ||
| const turnsEmitted = { count: 0 }; | ||
| // TeeWriter requires a fileStream; when no output file is specified, | ||
| // use a PassThrough as a no-op sink (NDJSON is not saved). | ||
| const sink = fileStream ?? new PassThrough(); | ||
| const tee = createTeeWriter({ | ||
| fileStream: sink, | ||
| textStream: process.stdout, | ||
| mode: "raw", | ||
| }); | ||
| try { | ||
| let buffer = ""; | ||
| for await (const chunk of process.stdin) { | ||
| buffer += chunk.toString("utf8"); | ||
| let newlineIdx; | ||
| while ((newlineIdx = buffer.indexOf("\n")) !== -1) { | ||
| const line = buffer.slice(0, newlineIdx); | ||
| buffer = buffer.slice(newlineIdx + 1); | ||
| if (fileStream) { | ||
| fileStream.write(line + "\n"); | ||
| } | ||
| collector.addLine(line); | ||
| flushNewTurns(collector, turnsEmitted); | ||
| } | ||
| } | ||
| // Process any remaining data without a trailing newline | ||
| if (buffer.trim()) { | ||
| if (fileStream) { | ||
| fileStream.write(buffer + "\n"); | ||
| } | ||
| collector.addLine(buffer); | ||
| flushNewTurns(collector, turnsEmitted); | ||
| } | ||
| // Emit the result summary at the end | ||
| if (collector.result) { | ||
| const text = collector.toText(); | ||
| const lastNewline = text.lastIndexOf("\n---"); | ||
| if (lastNewline !== -1) { | ||
| process.stdout.write(text.slice(lastNewline) + "\n"); | ||
| } | ||
| } | ||
| await pipeline(process.stdin, tee); | ||
| } finally { | ||
@@ -65,37 +39,1 @@ if (fileStream) { | ||
| } | ||
| /** | ||
| * Write text for any new turns that haven't been emitted yet. | ||
| * @param {import("@forwardimpact/libeval").TraceCollector} collector | ||
| * @param {{ count: number }} turnsEmitted | ||
| */ | ||
| function flushNewTurns(collector, turnsEmitted) { | ||
| const turns = collector.turns; | ||
| while (turnsEmitted.count < turns.length) { | ||
| const turn = turns[turnsEmitted.count]; | ||
| turnsEmitted.count++; | ||
| if (turn.role === "assistant") { | ||
| for (const block of turn.content) { | ||
| if (block.type === "text") { | ||
| process.stdout.write(block.text + "\n"); | ||
| } else if (block.type === "tool_use") { | ||
| const inputSummary = summarizeInput(block.input); | ||
| process.stdout.write(`> Tool: ${block.name} ${inputSummary}\n`); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| /** | ||
| * Summarize tool input for text display, truncated to keep logs readable. | ||
| * @param {object} input - Tool input object | ||
| * @returns {string} Truncated summary | ||
| */ | ||
| function summarizeInput(input) { | ||
| if (!input || typeof input !== "object") return ""; | ||
| const json = JSON.stringify(input); | ||
| if (json.length <= 200) return json; | ||
| return json.slice(0, 197) + "..."; | ||
| } |
+37
-16
@@ -39,2 +39,6 @@ /** | ||
| this.maxTurns = maxTurns ?? 20; | ||
| /** @type {"agent"|"supervisor"} */ | ||
| this.currentSource = "agent"; | ||
| /** @type {number} */ | ||
| this.currentTurn = 0; | ||
| } | ||
@@ -49,4 +53,5 @@ | ||
| // Turn 0: Agent receives the task and starts working | ||
| this.currentSource = "agent"; | ||
| this.currentTurn = 0; | ||
| let agentResult = await this.agentRunner.run(task); | ||
| this.emitTagged("agent", 0); | ||
@@ -64,2 +69,4 @@ if (agentResult.error) { | ||
| this.currentSource = "supervisor"; | ||
| this.currentTurn = turn; | ||
| let supervisorResult; | ||
@@ -71,3 +78,2 @@ if (turn === 1) { | ||
| } | ||
| this.emitTagged("supervisor", turn); | ||
@@ -85,4 +91,5 @@ if (supervisorResult.error) { | ||
| // Supervisor's response becomes the agent's next input | ||
| this.currentSource = "agent"; | ||
| this.currentTurn = turn; | ||
| agentResult = await this.agentRunner.resume(supervisorResult.text); | ||
| this.emitTagged("agent", turn); | ||
@@ -100,15 +107,14 @@ if (agentResult.error) { | ||
| /** | ||
| * Drain a runner's buffered output and re-emit each line tagged with | ||
| * source and turn metadata. | ||
| * @param {"agent"|"supervisor"} source | ||
| * @param {number} turn | ||
| * Emit a single NDJSON line tagged with the current source and turn. | ||
| * Called in real-time via the AgentRunner onLine callback. | ||
| * @param {string} line - Raw NDJSON line from the runner | ||
| */ | ||
| emitTagged(source, turn) { | ||
| const runner = | ||
| source === "agent" ? this.agentRunner : this.supervisorRunner; | ||
| for (const line of runner.drainOutput()) { | ||
| const event = JSON.parse(line); | ||
| const tagged = { source, turn, event }; | ||
| this.output.write(JSON.stringify(tagged) + "\n"); | ||
| } | ||
| emitLine(line) { | ||
| const event = JSON.parse(line); | ||
| const tagged = { | ||
| source: this.currentSource, | ||
| turn: this.currentTurn, | ||
| event, | ||
| }; | ||
| this.output.write(JSON.stringify(tagged) + "\n"); | ||
| } | ||
@@ -152,2 +158,7 @@ | ||
| }) { | ||
| // Forward-reference: onLine captures `supervisor` before construction completes. | ||
| // This is safe because onLine is only called during run(), after construction. | ||
| let supervisor; | ||
| const onLine = (line) => supervisor.emitLine(line); | ||
| const agentRunner = createAgentRunner({ | ||
@@ -160,2 +171,4 @@ cwd: agentCwd, | ||
| allowedTools, | ||
| onLine, | ||
| settingSources: ["project"], | ||
| }); | ||
@@ -170,5 +183,13 @@ | ||
| allowedTools: ["Read", "Glob", "Grep"], | ||
| onLine, | ||
| settingSources: ["project"], | ||
| }); | ||
| return new Supervisor({ agentRunner, supervisorRunner, output, maxTurns }); | ||
| supervisor = new Supervisor({ | ||
| agentRunner, | ||
| supervisorRunner, | ||
| output, | ||
| maxTurns, | ||
| }); | ||
| return supervisor; | ||
| } |
@@ -84,2 +84,3 @@ import { describe, test } from "node:test"; | ||
| assert.strictEqual(runner.permissionMode, "bypassPermissions"); | ||
| assert.deepStrictEqual(runner.settingSources, []); | ||
| assert.strictEqual(runner.sessionId, null); | ||
@@ -149,2 +150,3 @@ }); | ||
| permissionMode: "plan", | ||
| settingSources: ["project"], | ||
| }); | ||
@@ -161,2 +163,3 @@ | ||
| assert.strictEqual(captured.options.allowDangerouslySkipPermissions, true); | ||
| assert.deepStrictEqual(captured.options.settingSources, ["project"]); | ||
| }); | ||
@@ -287,2 +290,24 @@ | ||
| test("run() succeeds when SDK throws after emitting successful result", async () => { | ||
| async function* creditExhaustedQuery() { | ||
| yield { type: "system", subtype: "init", session_id: "sess-credit" }; | ||
| yield { type: "assistant", content: "Analysis complete." }; | ||
| yield { type: "result", subtype: "success", result: "Done." }; | ||
| throw new Error("Credit balance is too low"); | ||
| } | ||
| const output = new PassThrough(); | ||
| const runner = new AgentRunner({ | ||
| cwd: "/tmp", | ||
| query: () => creditExhaustedQuery(), | ||
| output, | ||
| }); | ||
| const result = await runner.run("Task"); | ||
| assert.strictEqual(result.success, true); | ||
| assert.strictEqual(result.text, "Done."); | ||
| assert.ok(result.error); | ||
| assert.match(result.error.message, /Credit balance/); | ||
| }); | ||
| test("createAgentRunner factory returns an AgentRunner instance", () => { | ||
@@ -289,0 +314,0 @@ const runner = createAgentRunner({ |
@@ -32,3 +32,2 @@ import { describe, test } from "node:test"; | ||
| const resp = responses[callIndex++]; | ||
| // Buffer messages for drainOutput | ||
| const msgs = messages?.[callIndex - 1] ?? [ | ||
@@ -38,3 +37,5 @@ { type: "assistant", content: resp.text }, | ||
| for (const m of msgs) { | ||
| runner.buffer.push(JSON.stringify(m)); | ||
| const line = JSON.stringify(m); | ||
| runner.buffer.push(line); | ||
| if (runner.onLine) runner.onLine(line); | ||
| } | ||
@@ -55,3 +56,5 @@ runner.sessionId = "mock-session"; | ||
| for (const m of msgs) { | ||
| runner.buffer.push(JSON.stringify(m)); | ||
| const line = JSON.stringify(m); | ||
| runner.buffer.push(line); | ||
| if (runner.onLine) runner.onLine(line); | ||
| } | ||
@@ -217,2 +220,4 @@ return { success: resp.success ?? true, text: resp.text }; | ||
| }); | ||
| agentRunner.onLine = (line) => supervisor.emitLine(line); | ||
| supervisorRunner.onLine = (line) => supervisor.emitLine(line); | ||
@@ -265,2 +270,4 @@ await supervisor.run("Task"); | ||
| }); | ||
| agentRunner.onLine = (line) => supervisor.emitLine(line); | ||
| supervisorRunner.onLine = (line) => supervisor.emitLine(line); | ||
@@ -281,3 +288,3 @@ await supervisor.run("Task"); | ||
| test("drains agent output and emits summary when agent errors on turn 0", async () => { | ||
| test("emits agent output and summary when agent errors on turn 0", async () => { | ||
| const agentMessages = [[{ type: "assistant", content: "Partial work" }]]; | ||
@@ -305,2 +312,4 @@ const agentRunner = createMockRunner( | ||
| }); | ||
| agentRunner.onLine = (line) => supervisor.emitLine(line); | ||
| supervisorRunner.onLine = (line) => supervisor.emitLine(line); | ||
@@ -307,0 +316,0 @@ const result = await supervisor.run("Task"); |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
87200
1%2133
0.47%0
-100%