@forwardimpact/libeval - npm Package Compare versions

Comparing version

0.1.30

0.1.31

+7

-2

bin/fit-trace.js

		@@ -185,8 +185,13 @@ #!/usr/bin/env node
		description:
		"Split a combined trace into per-source files (one per agent or supervisor)",
		"Split a combined trace into per-source files following the `trace--<case>--<participant>.<role>.ndjson` convention",
		options: {
		mode: {
		type: "string",
		description: "Execution mode: run (no-op), supervise, or facilitate",
		description: "Execution mode: run, supervise, or facilitate",
		},
		case: {
		type: "string",
		description:
		"Case identifier embedded in output filenames (default: default)",
		},
		"output-dir": {
		@@ -193,0 +198,0 @@ type: "string",

+1

-1

package.json

		{
		"name": "@forwardimpact/libeval",
		"version": "0.1.30",
		"version": "0.1.31",
		"description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
		@@ -5,0 +5,0 @@ "keywords": [

+24

-51

src/commands/trace.js

		@@ -155,7 +155,18 @@ import { readFileSync, writeFileSync, mkdirSync } from "node:fs";

		/** Valid agent source name pattern: lowercase letter, then lowercase alphanumeric or hyphen */
		/** Valid source name pattern: lowercase letter, then lowercase alphanumeric or hyphen. */
		const VALID_SOURCE_NAME = /^[a-z][a-z0-9-]*$/;

		/** Sources whose name is itself a structural role; classified into the role they represent. */
		const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]);

		/**
		* Split a combined NDJSON trace into per-source files.
		* Split a combined NDJSON trace into per-source files using the
		* `trace--<case>--<participant>.<role>.ndjson` convention.
		*
		* Each valid envelope source becomes one output file. Structural sources
		* (`agent`, `supervisor`, `facilitator`) classify into the matching role and
		* use their own name as participant; profile-named sources (e.g.
		* `staff-engineer`) classify as agents with the profile in the participant
		* slot. Orchestrator events and invalid source names are dropped.
		*
		* @param {object} values - Parsed option values
		@@ -170,10 +181,7 @@ * @param {string[]} args - [file]
		if (!mode) throw new Error("split: --mode is required");

		if (mode === "run") {
		process.stdout.write(
		"run mode: trace is already in final form, no split needed\n",
		);
		return;
		if (!["run", "supervise", "facilitate"].includes(mode)) {
		throw new Error(`split: invalid --mode "${mode}"`);
		}

		const caseId = values.case ?? "default";
		const outputDir = values["output-dir"] \|\| dirname(file);
		@@ -184,7 +192,10 @@ mkdirSync(outputDir, { recursive: true });

		if (mode === "supervise") {
		writeBucket(buckets, "agent", outputDir);
		writeBucket(buckets, "supervisor", outputDir);
		} else if (mode === "facilitate") {
		splitFacilitated(buckets, outputDir);
		for (const [source, lines] of buckets.entries()) {
		if (!VALID_SOURCE_NAME.test(source)) continue;
		const role = STRUCTURAL_ROLES.has(source) ? source : "agent";
		const outPath = join(
		outputDir,
		`trace--${caseId}--${source}.${role}.ndjson`,
		);
		writeFileSync(outPath, lines.join("\n") + "\n");
		}
		@@ -225,40 +236,2 @@ }

		/**
		* Write facilitated mode split: facilitator, per-agent, and combined agent files.
		* @param {Map<string, string[]>} buckets
		* @param {string} outputDir
		*/
		function splitFacilitated(buckets, outputDir) {
		writeBucket(buckets, "facilitator", outputDir);

		const agentSources = [...buckets.keys()].filter(
		(s) => s !== "facilitator" && VALID_SOURCE_NAME.test(s),
		);

		for (const name of agentSources) {
		writeBucket(buckets, name, outputDir);
		}

		const combinedLines = agentSources.flatMap((n) => buckets.get(n) ?? []);
		if (combinedLines.length > 0) {
		writeFileSync(
		join(outputDir, "trace-agent.ndjson"),
		combinedLines.join("\n") + "\n",
		);
		}
		}

		/**
		* Write a single source bucket to a trace-{name}.ndjson file.
		* @param {Map<string, string[]>} buckets
		* @param {string} name
		* @param {string} outputDir
		*/
		function writeBucket(buckets, name, outputDir) {
		const lines = buckets.get(name);
		if (!lines \|\| lines.length === 0) return;
		const outPath = join(outputDir, `trace-${name}.ndjson`);
		writeFileSync(outPath, lines.join("\n") + "\n");
		}

		// --- Shared helpers ---
		@@ -265,0 +238,0 @@

+15

-8

src/trace-github.js

		@@ -68,4 +68,6 @@ import { execSync } from "node:child_process";
		*
		* Tries artifact names in order: combined-trace, agent-trace.
		* The artifact zip is downloaded and extracted to the output directory.
		* When `opts.name` is set, looks up that exact artifact. Otherwise picks the
		* best match from the unified `trace--<case>--<participant>.<role>` naming
		* convention: prefer a `.raw` artifact (combined log), then any `.agent`,
		* then the first `trace--*` artifact found.
		*
		@@ -88,9 +90,14 @@ * @param {number\|string} runId
		// Find the trace artifact.
		const preferredNames = opts.name
		? [opts.name]
		: ["combined-trace", "agent-trace"];
		let artifact = null;
		for (const name of preferredNames) {
		artifact = artifacts.find((a) => a.name === name);
		if (artifact) break;
		if (opts.name) {
		artifact = artifacts.find((a) => a.name === opts.name);
		} else {
		const traceArtifacts = artifacts.filter((a) =>
		a.name.startsWith("trace--"),
		);
		artifact =
		traceArtifacts.find((a) => a.name.endsWith(".raw")) ??
		traceArtifacts.find((a) => a.name.endsWith(".agent")) ??
		traceArtifacts[0] ??
		null;
		}
		@@ -97,0 +104,0 @@

@forwardimpact/libeval - npm Package Compare versions

Improved metrics

Worsened metrics