Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

@forwardimpact/libeval

Package Overview
Dependencies
Maintainers
1
Versions
53
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@forwardimpact/libeval - npm Package Compare versions

Comparing version
0.1.5
to
0.1.6
+1
-1
package.json
{
"name": "@forwardimpact/libeval",
"version": "0.1.5",
"version": "0.1.6",
"description": "Process Claude Code stream-json output into structured traces",

@@ -5,0 +5,0 @@ "license": "Apache-2.0",

@@ -56,2 +56,11 @@ /**

this.currentTurn = 0;
/**
* Set to true when any supervisor message contains the success signal.
* The SDK result text only reflects the last assistant message, so when
* the supervisor writes EVALUATION_SUCCESSFUL in an early message and
* then continues with follow-up work, the result text won't contain it.
* This flag captures the signal from the full message stream.
* @type {boolean}
*/
this.successSignalSeen = false;
}

@@ -70,2 +79,3 @@

this.currentTurn = 0;
this.successSignalSeen = false;
let supervisorResult = await this.supervisorRunner.run(task);

@@ -78,5 +88,8 @@

// The supervisor's turn is fully complete (all tool calls executed) by the
// time we check the signal — no work is interrupted.
if (isSuccessful(supervisorResult.text)) {
// Check for the success signal in either the SDK result text or the
// streamed message content. The SDK result text only reflects the last
// assistant message, so when the supervisor writes EVALUATION_SUCCESSFUL
// early and then continues (e.g. filing issues), we must also check the
// flag set by emitLine during streaming.
if (this.successSignalSeen || isSuccessful(supervisorResult.text)) {
this.emitSummary({ success: true, turns: 0 });

@@ -112,2 +125,3 @@ return { success: true, turns: 0 };

this.currentTurn = turn;
this.successSignalSeen = false;
supervisorResult = await this.supervisorRunner.resume(supervisorPrompt);

@@ -120,4 +134,5 @@

// The supervisor's turn is fully complete — check for success signal.
if (isSuccessful(supervisorResult.text)) {
// The supervisor's turn is fully complete — check for success signal
// in either the SDK result text or streamed messages.
if (this.successSignalSeen || isSuccessful(supervisorResult.text)) {
this.emitSummary({ success: true, turns: turn });

@@ -150,2 +165,5 @@ return { success: true, turns: turn };

* Called in real-time via the AgentRunner onLine callback.
*
* When the current source is the supervisor, also scans assistant text
* content for the EVALUATION_SUCCESSFUL signal and sets successSignalSeen.
* @param {string} line - Raw NDJSON line from the runner

@@ -161,2 +179,17 @@ */

this.output.write(JSON.stringify(tagged) + "\n");
// Scan supervisor assistant messages for the success signal in real time.
// The SDK result text only reflects the final assistant message, but the
// supervisor may write EVALUATION_SUCCESSFUL in an earlier message and
// then continue with follow-up tool calls.
if (this.currentSource === "supervisor" && event.type === "assistant") {
const content = event.message?.content ?? event.content ?? [];
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === "text" && isSuccessful(block.text)) {
this.successSignalSeen = true;
}
}
}
}
}

@@ -163,0 +196,0 @@

@@ -41,2 +41,9 @@ /**

// Unwrap combined supervised trace format {source, turn, event}.
// The Supervisor emits this wrapper; when replayed through addLine the
// inner event is the one we need.
if (event.event && !event.type && typeof event.source === "string") {
event = event.event;
}
switch (event.type) {

@@ -43,0 +50,0 @@ case "system":

@@ -192,2 +192,63 @@ import { describe, test } from "node:test";

test("detects EVALUATION_SUCCESSFUL in streamed messages when result text differs", async () => {
// Simulates the real failure: supervisor writes EVALUATION_SUCCESSFUL in
// an early message, then continues with follow-up work (e.g. filing issues).
// The SDK result text reflects only the final message, which does NOT
// contain the signal.
const agentRunner = createMockRunner([
{ text: "I installed the packages." },
]);
// The supervisor's result text is the Summary (no signal), but messages
// include one with EVALUATION_SUCCESSFUL.
const supervisorMessages = [
undefined, // turn 0: use default
[
{
type: "assistant",
message: {
content: [
{
type: "text",
text: "Good work.\n\nEVALUATION_SUCCESSFUL\n\nNow filing issues.",
},
],
},
},
{
type: "assistant",
message: {
content: [
{ type: "text", text: "## Summary\n\nAll issues filed." },
],
},
},
],
];
const supervisorRunner = createMockRunner(
[
{ text: "Welcome! Please install the packages." },
// Result text is the final message — does NOT contain the signal
{ text: "## Summary\n\nAll issues filed." },
],
supervisorMessages,
);
const output = new PassThrough();
const supervisor = new Supervisor({
agentRunner,
supervisorRunner,
output,
maxTurns: 10,
});
agentRunner.onLine = (line) => supervisor.emitLine(line);
supervisorRunner.onLine = (line) => supervisor.emitLine(line);
const result = await supervisor.run("Install stuff");
assert.strictEqual(result.success, true);
assert.strictEqual(result.turns, 1);
});
test("runs multiple turns before completion", async () => {

@@ -194,0 +255,0 @@ const agentRunner = createMockRunner([

@@ -152,2 +152,98 @@ import { describe, test } from "node:test";

test("unwraps combined supervised trace format {source, turn, event}", () => {
const collector = new TraceCollector();
// System init wrapped in supervisor envelope
collector.addLine(
JSON.stringify({
source: "agent",
turn: 0,
event: {
type: "system",
subtype: "init",
session_id: "sess-supervised",
model: "claude-opus-4-6",
tools: ["Bash"],
},
}),
);
// Assistant message wrapped in supervisor envelope
collector.addLine(
JSON.stringify({
source: "agent",
turn: 1,
event: {
type: "assistant",
message: {
content: [{ type: "text", text: "I ran the tests." }],
usage: { input_tokens: 100, output_tokens: 50 },
},
},
}),
);
// Tool result wrapped in supervisor envelope
collector.addLine(
JSON.stringify({
source: "agent",
turn: 1,
event: {
type: "user",
message: {
role: "user",
content: [
{
type: "tool_result",
tool_use_id: "toolu_sup",
content: "All tests passed",
},
],
},
},
}),
);
// Result event wrapped in supervisor envelope
collector.addLine(
JSON.stringify({
source: "supervisor",
turn: 1,
event: {
type: "result",
subtype: "success",
total_cost_usd: 0.44,
duration_ms: 30000,
num_turns: 2,
},
}),
);
const trace = collector.toJSON();
assert.strictEqual(trace.metadata.sessionId, "sess-supervised");
assert.strictEqual(trace.turns.length, 2);
assert.strictEqual(trace.turns[0].role, "assistant");
assert.strictEqual(trace.turns[0].content[0].text, "I ran the tests.");
assert.strictEqual(trace.turns[1].role, "tool_result");
assert.strictEqual(trace.turns[1].content, "All tests passed");
assert.strictEqual(trace.summary.result, "success");
assert.strictEqual(trace.summary.totalCostUsd, 0.44);
});
test("skips orchestrator summary lines from supervised traces", () => {
const collector = new TraceCollector();
collector.addLine(
JSON.stringify({
source: "orchestrator",
type: "summary",
success: true,
turns: 3,
}),
);
// Orchestrator summaries have no inner event and no recognized type
// after unwrap — they should be silently skipped.
assert.strictEqual(collector.toJSON().turns.length, 0);
});
test("skips rate_limit_event and unknown types", () => {

@@ -154,0 +250,0 @@ const collector = new TraceCollector();