@forwardimpact/libeval - npm Package Compare versions

Comparing version

0.1.1

0.1.2

+1

-1

package.json

		{
		"name": "@forwardimpact/libeval",
		"version": "0.1.1",
		"version": "0.1.2",
		"description": "Process Claude Code stream-json output into structured traces",
		@@ -5,0 +5,0 @@ "license": "Apache-2.0",

+14

-2

src/agent-runner.js

		@@ -19,2 +19,4 @@ /**
		* @param {string} [deps.permissionMode] - SDK permission mode
		* @param {function} [deps.onLine] - Callback invoked with each NDJSON line as it's produced
		* @param {string[]} [deps.settingSources] - SDK setting sources (e.g. ['project'] to load CLAUDE.md)
		*/
		@@ -29,2 +31,4 @@ constructor({
		permissionMode,
		onLine,
		settingSources,
		}) {
		@@ -48,2 +52,4 @@ if (!cwd) throw new Error("cwd is required");
		this.permissionMode = permissionMode ?? "bypassPermissions";
		this.onLine = onLine ?? null;
		this.settingSources = settingSources ?? [];
		this.sessionId = null;
		@@ -73,2 +79,3 @@ this.buffer = [];
		allowDangerouslySkipPermissions: true,
		settingSources: this.settingSources,
		},
		@@ -79,2 +86,3 @@ })) {
		this.buffer.push(line);
		if (this.onLine) this.onLine(line);

		@@ -93,3 +101,6 @@ if (message.type === "system" && message.subtype === "init") {

		const success = !error && stopReason === "success";
		// If the SDK already emitted a successful result, honour it even when the
		// stream throws afterwards (e.g. "Credit balance is too low" during
		// cleanup). Only treat errors as fatal when no result was received yet.
		const success = stopReason === "success";
		return { success, text, sessionId: this.sessionId, error };
		@@ -116,2 +127,3 @@ }
		this.buffer.push(line);
		if (this.onLine) this.onLine(line);

		@@ -127,3 +139,3 @@ if (message.type === "result") {

		const success = !error && stopReason === "success";
		const success = stopReason === "success";
		return { success, text, error };
		@@ -130,0 +142,0 @@ }

+1

-0

src/commands/run.js

		@@ -65,2 +65,3 @@ import { readFileSync, createWriteStream } from "node:fs";
		allowedTools,
		settingSources: ["project"],
		});
		@@ -67,0 +68,0 @@

+13

-75

src/commands/tee.js

		import { createWriteStream } from "fs";
		import { createTraceCollector } from "@forwardimpact/libeval";
		import { PassThrough } from "node:stream";
		import { pipeline } from "node:stream/promises";
		import { createTeeWriter } from "../tee-writer.js";

		@@ -15,42 +17,14 @@ /**
		const fileStream = outputPath ? createWriteStream(outputPath) : null;
		const collector = createTraceCollector();
		const turnsEmitted = { count: 0 };

		// TeeWriter requires a fileStream; when no output file is specified,
		// use a PassThrough as a no-op sink (NDJSON is not saved).
		const sink = fileStream ?? new PassThrough();
		const tee = createTeeWriter({
		fileStream: sink,
		textStream: process.stdout,
		mode: "raw",
		});

		try {
		let buffer = "";

		for await (const chunk of process.stdin) {
		buffer += chunk.toString("utf8");

		let newlineIdx;
		while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
		const line = buffer.slice(0, newlineIdx);
		buffer = buffer.slice(newlineIdx + 1);

		if (fileStream) {
		fileStream.write(line + "\n");
		}

		collector.addLine(line);
		flushNewTurns(collector, turnsEmitted);
		}
		}

		// Process any remaining data without a trailing newline
		if (buffer.trim()) {
		if (fileStream) {
		fileStream.write(buffer + "\n");
		}
		collector.addLine(buffer);
		flushNewTurns(collector, turnsEmitted);
		}

		// Emit the result summary at the end
		if (collector.result) {
		const text = collector.toText();
		const lastNewline = text.lastIndexOf("\n---");
		if (lastNewline !== -1) {
		process.stdout.write(text.slice(lastNewline) + "\n");
		}
		}
		await pipeline(process.stdin, tee);
		} finally {
		@@ -65,37 +39,1 @@ if (fileStream) {
		}

		/**
		* Write text for any new turns that haven't been emitted yet.
		* @param {import("@forwardimpact/libeval").TraceCollector} collector
		* @param {{ count: number }} turnsEmitted
		*/
		function flushNewTurns(collector, turnsEmitted) {
		const turns = collector.turns;
		while (turnsEmitted.count < turns.length) {
		const turn = turns[turnsEmitted.count];
		turnsEmitted.count++;

		if (turn.role === "assistant") {
		for (const block of turn.content) {
		if (block.type === "text") {
		process.stdout.write(block.text + "\n");
		} else if (block.type === "tool_use") {
		const inputSummary = summarizeInput(block.input);
		process.stdout.write(`> Tool: ${block.name} ${inputSummary}\n`);
		}
		}
		}
		}
		}

		/**
		* Summarize tool input for text display, truncated to keep logs readable.
		* @param {object} input - Tool input object
		* @returns {string} Truncated summary
		*/
		function summarizeInput(input) {
		if (!input \|\| typeof input !== "object") return "";
		const json = JSON.stringify(input);
		if (json.length <= 200) return json;
		return json.slice(0, 197) + "...";
		}

+37

-16

src/supervisor.js

		@@ -39,2 +39,6 @@ /**
		this.maxTurns = maxTurns ?? 20;
		/** @type {"agent"\|"supervisor"} */
		this.currentSource = "agent";
		/** @type {number} */
		this.currentTurn = 0;
		}
		@@ -49,4 +53,5 @@
		// Turn 0: Agent receives the task and starts working
		this.currentSource = "agent";
		this.currentTurn = 0;
		let agentResult = await this.agentRunner.run(task);
		this.emitTagged("agent", 0);

		@@ -64,2 +69,4 @@ if (agentResult.error) {

		this.currentSource = "supervisor";
		this.currentTurn = turn;
		let supervisorResult;
		@@ -71,3 +78,2 @@ if (turn === 1) {
		}
		this.emitTagged("supervisor", turn);

		@@ -85,4 +91,5 @@ if (supervisorResult.error) {
		// Supervisor's response becomes the agent's next input
		this.currentSource = "agent";
		this.currentTurn = turn;
		agentResult = await this.agentRunner.resume(supervisorResult.text);
		this.emitTagged("agent", turn);

		@@ -100,15 +107,14 @@ if (agentResult.error) {
		/**
		* Drain a runner's buffered output and re-emit each line tagged with
		* source and turn metadata.
		* @param {"agent"\|"supervisor"} source
		* @param {number} turn
		* Emit a single NDJSON line tagged with the current source and turn.
		* Called in real-time via the AgentRunner onLine callback.
		* @param {string} line - Raw NDJSON line from the runner
		*/
		emitTagged(source, turn) {
		const runner =
		source === "agent" ? this.agentRunner : this.supervisorRunner;
		for (const line of runner.drainOutput()) {
		const event = JSON.parse(line);
		const tagged = { source, turn, event };
		this.output.write(JSON.stringify(tagged) + "\n");
		}
		emitLine(line) {
		const event = JSON.parse(line);
		const tagged = {
		source: this.currentSource,
		turn: this.currentTurn,
		event,
		};
		this.output.write(JSON.stringify(tagged) + "\n");
		}
		@@ -152,2 +158,7 @@
		}) {
		// Forward-reference: onLine captures `supervisor` before construction completes.
		// This is safe because onLine is only called during run(), after construction.
		let supervisor;
		const onLine = (line) => supervisor.emitLine(line);

		const agentRunner = createAgentRunner({
		@@ -160,2 +171,4 @@ cwd: agentCwd,
		allowedTools,
		onLine,
		settingSources: ["project"],
		});
		@@ -170,5 +183,13 @@
		allowedTools: ["Read", "Glob", "Grep"],
		onLine,
		settingSources: ["project"],
		});

		return new Supervisor({ agentRunner, supervisorRunner, output, maxTurns });
		supervisor = new Supervisor({
		agentRunner,
		supervisorRunner,
		output,
		maxTurns,
		});
		return supervisor;
		}

+25

-0

test/agent-runner.test.js

		@@ -84,2 +84,3 @@ import { describe, test } from "node:test";
		assert.strictEqual(runner.permissionMode, "bypassPermissions");
		assert.deepStrictEqual(runner.settingSources, []);
		assert.strictEqual(runner.sessionId, null);
		@@ -149,2 +150,3 @@ });
		permissionMode: "plan",
		settingSources: ["project"],
		});
		@@ -161,2 +163,3 @@
		assert.strictEqual(captured.options.allowDangerouslySkipPermissions, true);
		assert.deepStrictEqual(captured.options.settingSources, ["project"]);
		});
		@@ -287,2 +290,24 @@

		test("run() succeeds when SDK throws after emitting successful result", async () => {
		async function* creditExhaustedQuery() {
		yield { type: "system", subtype: "init", session_id: "sess-credit" };
		yield { type: "assistant", content: "Analysis complete." };
		yield { type: "result", subtype: "success", result: "Done." };
		throw new Error("Credit balance is too low");
		}

		const output = new PassThrough();
		const runner = new AgentRunner({
		cwd: "/tmp",
		query: () => creditExhaustedQuery(),
		output,
		});

		const result = await runner.run("Task");
		assert.strictEqual(result.success, true);
		assert.strictEqual(result.text, "Done.");
		assert.ok(result.error);
		assert.match(result.error.message, /Credit balance/);
		});

		test("createAgentRunner factory returns an AgentRunner instance", () => {
		@@ -289,0 +314,0 @@ const runner = createAgentRunner({

+13

-4

test/supervisor.test.js

		@@ -32,3 +32,2 @@ import { describe, test } from "node:test";
		const resp = responses[callIndex++];
		// Buffer messages for drainOutput
		const msgs = messages?.[callIndex - 1] ?? [
		@@ -38,3 +37,5 @@ { type: "assistant", content: resp.text },
		for (const m of msgs) {
		runner.buffer.push(JSON.stringify(m));
		const line = JSON.stringify(m);
		runner.buffer.push(line);
		if (runner.onLine) runner.onLine(line);
		}
		@@ -55,3 +56,5 @@ runner.sessionId = "mock-session";
		for (const m of msgs) {
		runner.buffer.push(JSON.stringify(m));
		const line = JSON.stringify(m);
		runner.buffer.push(line);
		if (runner.onLine) runner.onLine(line);
		}
		@@ -217,2 +220,4 @@ return { success: resp.success ?? true, text: resp.text };
		});
		agentRunner.onLine = (line) => supervisor.emitLine(line);
		supervisorRunner.onLine = (line) => supervisor.emitLine(line);

		@@ -265,2 +270,4 @@ await supervisor.run("Task");
		});
		agentRunner.onLine = (line) => supervisor.emitLine(line);
		supervisorRunner.onLine = (line) => supervisor.emitLine(line);

		@@ -281,3 +288,3 @@ await supervisor.run("Task");

		test("drains agent output and emits summary when agent errors on turn 0", async () => {
		test("emits agent output and summary when agent errors on turn 0", async () => {
		const agentMessages = [[{ type: "assistant", content: "Partial work" }]];
		@@ -305,2 +312,4 @@ const agentRunner = createMockRunner(
		});
		agentRunner.onLine = (line) => supervisor.emitLine(line);
		supervisorRunner.onLine = (line) => supervisor.emitLine(line);

		@@ -307,0 +316,0 @@ const result = await supervisor.run("Task");

@forwardimpact/libeval - npm Package Compare versions

Fixed alerts

Improved metrics