@forwardimpact/libeval
Advanced tools
+7
-2
@@ -185,8 +185,13 @@ #!/usr/bin/env node | ||
| description: | ||
| "Split a combined trace into per-source files (one per agent or supervisor)", | ||
| "Split a combined trace into per-source files following the `trace--<case>--<participant>.<role>.ndjson` convention", | ||
| options: { | ||
| mode: { | ||
| type: "string", | ||
| description: "Execution mode: run (no-op), supervise, or facilitate", | ||
| description: "Execution mode: run, supervise, or facilitate", | ||
| }, | ||
| case: { | ||
| type: "string", | ||
| description: | ||
| "Case identifier embedded in output filenames (default: default)", | ||
| }, | ||
| "output-dir": { | ||
@@ -193,0 +198,0 @@ type: "string", |
+1
-1
| { | ||
| "name": "@forwardimpact/libeval", | ||
| "version": "0.1.30", | ||
| "version": "0.1.31", | ||
| "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
+24
-51
@@ -155,7 +155,18 @@ import { readFileSync, writeFileSync, mkdirSync } from "node:fs"; | ||
| /** Valid agent source name pattern: lowercase letter, then lowercase alphanumeric or hyphen */ | ||
| /** Valid source name pattern: lowercase letter, then lowercase alphanumeric or hyphen. */ | ||
| const VALID_SOURCE_NAME = /^[a-z][a-z0-9-]*$/; | ||
| /** Sources whose name is itself a structural role; classified into the role they represent. */ | ||
| const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]); | ||
| /** | ||
| * Split a combined NDJSON trace into per-source files. | ||
| * Split a combined NDJSON trace into per-source files using the | ||
| * `trace--<case>--<participant>.<role>.ndjson` convention. | ||
| * | ||
| * Each valid envelope source becomes one output file. Structural sources | ||
| * (`agent`, `supervisor`, `facilitator`) classify into the matching role and | ||
| * use their own name as participant; profile-named sources (e.g. | ||
| * `staff-engineer`) classify as agents with the profile in the participant | ||
| * slot. Orchestrator events and invalid source names are dropped. | ||
| * | ||
| * @param {object} values - Parsed option values | ||
@@ -170,10 +181,7 @@ * @param {string[]} args - [file] | ||
| if (!mode) throw new Error("split: --mode is required"); | ||
| if (mode === "run") { | ||
| process.stdout.write( | ||
| "run mode: trace is already in final form, no split needed\n", | ||
| ); | ||
| return; | ||
| if (!["run", "supervise", "facilitate"].includes(mode)) { | ||
| throw new Error(`split: invalid --mode "${mode}"`); | ||
| } | ||
| const caseId = values.case ?? "default"; | ||
| const outputDir = values["output-dir"] || dirname(file); | ||
@@ -184,7 +192,10 @@ mkdirSync(outputDir, { recursive: true }); | ||
| if (mode === "supervise") { | ||
| writeBucket(buckets, "agent", outputDir); | ||
| writeBucket(buckets, "supervisor", outputDir); | ||
| } else if (mode === "facilitate") { | ||
| splitFacilitated(buckets, outputDir); | ||
| for (const [source, lines] of buckets.entries()) { | ||
| if (!VALID_SOURCE_NAME.test(source)) continue; | ||
| const role = STRUCTURAL_ROLES.has(source) ? source : "agent"; | ||
| const outPath = join( | ||
| outputDir, | ||
| `trace--${caseId}--${source}.${role}.ndjson`, | ||
| ); | ||
| writeFileSync(outPath, lines.join("\n") + "\n"); | ||
| } | ||
@@ -225,40 +236,2 @@ } | ||
| /** | ||
| * Write facilitated mode split: facilitator, per-agent, and combined agent files. | ||
| * @param {Map<string, string[]>} buckets | ||
| * @param {string} outputDir | ||
| */ | ||
| function splitFacilitated(buckets, outputDir) { | ||
| writeBucket(buckets, "facilitator", outputDir); | ||
| const agentSources = [...buckets.keys()].filter( | ||
| (s) => s !== "facilitator" && VALID_SOURCE_NAME.test(s), | ||
| ); | ||
| for (const name of agentSources) { | ||
| writeBucket(buckets, name, outputDir); | ||
| } | ||
| const combinedLines = agentSources.flatMap((n) => buckets.get(n) ?? []); | ||
| if (combinedLines.length > 0) { | ||
| writeFileSync( | ||
| join(outputDir, "trace-agent.ndjson"), | ||
| combinedLines.join("\n") + "\n", | ||
| ); | ||
| } | ||
| } | ||
| /** | ||
| * Write a single source bucket to a trace-{name}.ndjson file. | ||
| * @param {Map<string, string[]>} buckets | ||
| * @param {string} name | ||
| * @param {string} outputDir | ||
| */ | ||
| function writeBucket(buckets, name, outputDir) { | ||
| const lines = buckets.get(name); | ||
| if (!lines || lines.length === 0) return; | ||
| const outPath = join(outputDir, `trace-${name}.ndjson`); | ||
| writeFileSync(outPath, lines.join("\n") + "\n"); | ||
| } | ||
| // --- Shared helpers --- | ||
@@ -265,0 +238,0 @@ |
+15
-8
@@ -68,4 +68,6 @@ import { execSync } from "node:child_process"; | ||
| * | ||
| * Tries artifact names in order: combined-trace, agent-trace. | ||
| * The artifact zip is downloaded and extracted to the output directory. | ||
| * When `opts.name` is set, looks up that exact artifact. Otherwise picks the | ||
| * best match from the unified `trace--<case>--<participant>.<role>` naming | ||
| * convention: prefer a `*.raw` artifact (combined log), then any `*.agent`, | ||
| * then the first `trace--*` artifact found. | ||
| * | ||
@@ -88,9 +90,14 @@ * @param {number|string} runId | ||
| // Find the trace artifact. | ||
| const preferredNames = opts.name | ||
| ? [opts.name] | ||
| : ["combined-trace", "agent-trace"]; | ||
| let artifact = null; | ||
| for (const name of preferredNames) { | ||
| artifact = artifacts.find((a) => a.name === name); | ||
| if (artifact) break; | ||
| if (opts.name) { | ||
| artifact = artifacts.find((a) => a.name === opts.name); | ||
| } else { | ||
| const traceArtifacts = artifacts.filter((a) => | ||
| a.name.startsWith("trace--"), | ||
| ); | ||
| artifact = | ||
| traceArtifacts.find((a) => a.name.endsWith(".raw")) ?? | ||
| traceArtifacts.find((a) => a.name.endsWith(".agent")) ?? | ||
| traceArtifacts[0] ?? | ||
| null; | ||
| } | ||
@@ -97,0 +104,0 @@ |
173790
0.08%4542
-0.22%