langsmith
Advanced tools
Comparing version 0.2.16-rc.5 to 0.2.16-rc.6
@@ -5,2 +5,2 @@ export { Client, type ClientConfig, type LangSmithTracingClientInterface, } from "./client.js"; | ||
export { overrideFetchImplementation } from "./singletons/fetch.js"; | ||
export declare const __version__ = "0.2.16-rc.5"; | ||
export declare const __version__ = "0.2.16-rc.6"; |
@@ -5,2 +5,2 @@ export { Client, } from "./client.js"; | ||
// Update using yarn bump-version | ||
export const __version__ = "0.2.16-rc.5"; | ||
export const __version__ = "0.2.16-rc.6"; |
@@ -83,3 +83,3 @@ import { Attachments, BaseRun, KVMap, RunCreate } from "./schemas.js"; | ||
private static getDefaultConfig; | ||
static getSharedClient(): Client; | ||
private static getSharedClient; | ||
createChild(config: RunTreeConfig): RunTree; | ||
@@ -86,0 +86,0 @@ end(outputs?: KVMap, error?: string, endTime?: number, metadata?: KVMap): Promise<void>; |
@@ -37,2 +37,3 @@ export interface TracerSession { | ||
presigned_url: string; | ||
mime_type?: string; | ||
} | ||
@@ -227,2 +228,3 @@ export type AttachmentData = Uint8Array | ArrayBuffer; | ||
s3_url: string; | ||
mime_type?: string; | ||
} | ||
@@ -229,0 +231,0 @@ export interface RawExample extends BaseExample { |
@@ -7,2 +7,3 @@ /// <reference types="node" resolution-mode="require"/> | ||
import { RunTree } from "../../run_trees.js"; | ||
export declare const DEFAULT_TEST_CLIENT: Client; | ||
export type TestWrapperAsyncLocalStorageData = { | ||
@@ -9,0 +10,0 @@ enableTestTracking?: boolean; |
import { AsyncLocalStorage } from "node:async_hooks"; | ||
import { Client } from "../../client.js"; | ||
import { getEnvironmentVariable } from "../env.js"; | ||
import { isTracingEnabled } from "../../env.js"; | ||
export const DEFAULT_TEST_CLIENT = new Client(); | ||
export const testWrapperAsyncLocalStorageInstance = new AsyncLocalStorage(); | ||
@@ -5,0 +7,0 @@ export function trackingEnabled(context) { |
@@ -6,2 +6,3 @@ /// <reference types="jest" /> | ||
import type { LangSmithJestlikeWrapperConfig, LangSmithJestlikeWrapperParams, LangSmithJestDescribeWrapper } from "./types.js"; | ||
export declare const STRIP_ANSI_REGEX: RegExp; | ||
export declare function logFeedback(feedback: EvaluationResult): void; | ||
@@ -8,0 +9,0 @@ export declare function logOutput(output: Record<string, unknown>): void; |
@@ -9,10 +9,9 @@ /* eslint-disable import/no-extraneous-dependencies */ | ||
import { getCurrentRunTree, traceable } from "../../traceable.js"; | ||
import { RunTree } from "../../run_trees.js"; | ||
import { randomName } from "../../evaluation/_random_name.js"; | ||
import { toBeRelativeCloseTo, toBeAbsoluteCloseTo, toBeSemanticCloseTo, } from "./matchers.js"; | ||
import { evaluatorLogFeedbackPromises, testWrapperAsyncLocalStorageInstance, _logTestFeedback, syncExamplePromises, trackingEnabled, } from "./globals.js"; | ||
import { evaluatorLogFeedbackPromises, testWrapperAsyncLocalStorageInstance, _logTestFeedback, syncExamplePromises, trackingEnabled, DEFAULT_TEST_CLIENT, } from "./globals.js"; | ||
import { wrapExpect } from "./vendor/chain.js"; | ||
const UUID5_NAMESPACE = "6ba7b810-9dad-11d1-80b4-00c04fd430c8"; | ||
// From https://stackoverflow.com/a/29497680 | ||
const STRIP_ANSI_REGEX = | ||
export const STRIP_ANSI_REGEX = | ||
// eslint-disable-next-line no-control-regex | ||
@@ -182,3 +181,3 @@ /[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g; | ||
return function (datasetName, fn, experimentConfig) { | ||
const client = experimentConfig?.client ?? RunTree.getSharedClient(); | ||
const client = experimentConfig?.client ?? DEFAULT_TEST_CLIENT; | ||
return method(datasetName, () => { | ||
@@ -253,4 +252,58 @@ const suiteUuid = v4(); | ||
const testFeedback = []; | ||
let testReturnValue; | ||
const onFeedbackLogged = (feedback) => testFeedback.push(feedback); | ||
let loggedOutput; | ||
const setLoggedOutput = (value) => { | ||
if (loggedOutput !== undefined) { | ||
console.warn(`[WARN]: New "logOutput()" call will override output set by previous "logOutput()" call.`); | ||
} | ||
loggedOutput = value; | ||
}; | ||
let exampleId; | ||
const runTestFn = async () => { | ||
const testContext = testWrapperAsyncLocalStorageInstance.getStore(); | ||
if (testContext === undefined) { | ||
throw new Error("Could not identify test context. Please contact us for help."); | ||
} | ||
try { | ||
const res = await testFn({ | ||
inputs: testInput, | ||
expected: testOutput, | ||
}); | ||
_logTestFeedback({ | ||
exampleId, | ||
feedback: { key: "pass", score: true }, | ||
context: testContext, | ||
runTree: trackingEnabled(testContext) | ||
? getCurrentRunTree() | ||
: undefined, | ||
client: testContext.client, | ||
}); | ||
if (res != null) { | ||
if (loggedOutput !== undefined) { | ||
console.warn(`[WARN]: Returned value from test function will override output set by previous "logOutput()" call.`); | ||
} | ||
loggedOutput = | ||
typeof res === "object" | ||
? res | ||
: { result: res }; | ||
} | ||
return loggedOutput; | ||
} | ||
catch (e) { | ||
_logTestFeedback({ | ||
exampleId, | ||
feedback: { key: "pass", score: false }, | ||
context: testContext, | ||
runTree: trackingEnabled(testContext) | ||
? getCurrentRunTree() | ||
: undefined, | ||
client: testContext.client, | ||
}); | ||
const rawError = e; | ||
const strippedErrorMessage = e.message.replace(STRIP_ANSI_REGEX, ""); | ||
const langsmithFriendlyError = new Error(strippedErrorMessage); | ||
langsmithFriendlyError.rawJestError = rawError; | ||
throw langsmithFriendlyError; | ||
} | ||
}; | ||
try { | ||
@@ -273,7 +326,9 @@ if (trackingEnabled(context)) { | ||
} | ||
const testClient = client; | ||
const exampleId = getExampleId(dataset.name, inputs, expected); | ||
// Create or update the example in the background | ||
exampleId = getExampleId(dataset.name, inputs, expected); | ||
// TODO: Create or update the example in the background | ||
// Currently run end time has to be after example modified time | ||
// for examples to render properly, so we must modify the example | ||
// first before running the test. | ||
if (syncExamplePromises.get(exampleId) === undefined) { | ||
syncExamplePromises.set(exampleId, syncExample({ | ||
syncExamplePromises.set(exampleId, await syncExample({ | ||
client, | ||
@@ -288,12 +343,2 @@ exampleId, | ||
} | ||
// .enterWith is OK here | ||
testWrapperAsyncLocalStorageInstance.enterWith({ | ||
...context, | ||
currentExample: { | ||
inputs, | ||
outputs: expected, | ||
id: exampleId, | ||
}, | ||
client: testClient, | ||
}); | ||
const traceableOptions = { | ||
@@ -305,3 +350,3 @@ reference_example_id: exampleId, | ||
}, | ||
client: testClient, | ||
client, | ||
tracingEnabled: true, | ||
@@ -312,51 +357,22 @@ name, | ||
// provide both to the user-defined test function | ||
const tracedFunction = traceable(async (_) => { | ||
const testContext = testWrapperAsyncLocalStorageInstance.getStore(); | ||
if (testContext === undefined) { | ||
throw new Error("Could not identify test context. Please contact us for help."); | ||
} | ||
try { | ||
const res = await testWrapperAsyncLocalStorageInstance.run({ | ||
...testContext, | ||
setLoggedOutput: (value) => { | ||
if (loggedOutput !== undefined) { | ||
console.warn(`[WARN]: New "logOutput()" call will override output set by previous "logOutput()" call.`); | ||
} | ||
loggedOutput = value; | ||
}, | ||
onFeedbackLogged: (feedback) => testFeedback.push(feedback), | ||
}, async () => { | ||
return testFn({ | ||
inputs: testInput, | ||
expected: testOutput, | ||
}); | ||
}); | ||
_logTestFeedback({ | ||
exampleId, | ||
feedback: { key: "pass", score: true }, | ||
context: testContext, | ||
runTree: getCurrentRunTree(), | ||
client: testClient, | ||
}); | ||
return res; | ||
} | ||
catch (e) { | ||
_logTestFeedback({ | ||
exampleId, | ||
feedback: { key: "pass", score: false }, | ||
context: testContext, | ||
runTree: getCurrentRunTree(), | ||
client: testClient, | ||
}); | ||
const rawError = e; | ||
const strippedErrorMessage = e.message.replace(STRIP_ANSI_REGEX, ""); | ||
const langsmithFriendlyError = new Error(strippedErrorMessage); | ||
langsmithFriendlyError.rawJestError = rawError; | ||
throw langsmithFriendlyError; | ||
} | ||
}, { ...traceableOptions, ...config }); | ||
const tracedFunction = traceable(async () => { | ||
return testWrapperAsyncLocalStorageInstance.run({ | ||
...context, | ||
currentExample: { | ||
inputs, | ||
outputs: expected, | ||
id: exampleId, | ||
}, | ||
setLoggedOutput, | ||
onFeedbackLogged, | ||
}, runTestFn); | ||
}, { | ||
...traceableOptions, | ||
...config, | ||
}); | ||
try { | ||
testReturnValue = await tracedFunction(testInput); | ||
await tracedFunction(testInput); | ||
} | ||
catch (e) { | ||
// Extract raw Jest error from LangSmith formatted one and throw | ||
if (e.rawJestError !== undefined) { | ||
@@ -369,3 +385,3 @@ throw e.rawJestError; | ||
else { | ||
testReturnValue = | ||
try { | ||
await testWrapperAsyncLocalStorageInstance.run({ | ||
@@ -377,27 +393,16 @@ ...context, | ||
}, | ||
setLoggedOutput: (value) => { | ||
if (loggedOutput !== undefined) { | ||
console.warn(`[WARN]: New "logOutput()" call will override output set by previous "logOutput()" call.`); | ||
} | ||
loggedOutput = value; | ||
}, | ||
onFeedbackLogged: (feedback) => testFeedback.push(feedback), | ||
}, async () => { | ||
return testFn({ | ||
inputs: testInput, | ||
expected: testOutput, | ||
}); | ||
}); | ||
setLoggedOutput, | ||
onFeedbackLogged, | ||
}, runTestFn); | ||
} | ||
catch (e) { | ||
// Extract raw Jest error from LangSmith formatted one and throw | ||
if (e.rawJestError !== undefined) { | ||
throw e.rawJestError; | ||
} | ||
throw e; | ||
} | ||
} | ||
} | ||
finally { | ||
if (testReturnValue != null) { | ||
if (loggedOutput !== undefined) { | ||
console.warn(`[WARN]: Returned value from test function will override output set by previous "logOutput()" call.`); | ||
} | ||
loggedOutput = | ||
typeof testReturnValue === "object" | ||
? testReturnValue | ||
: { result: testReturnValue }; | ||
} | ||
await fs.mkdir(path.dirname(resultsPath), { recursive: true }); | ||
@@ -404,0 +409,0 @@ await fs.writeFile(resultsPath, JSON.stringify({ |
@@ -6,3 +6,12 @@ import { Table } from "console-table-printer"; | ||
import * as fs from "node:fs/promises"; | ||
const RESERVED_KEYS = ["Name", "Result", "Inputs", "Expected", "Actual"]; | ||
import { STRIP_ANSI_REGEX } from "./index.js"; | ||
const FEEDBACK_COLLAPSE_THRESHOLD = 64; | ||
const RESERVED_KEYS = [ | ||
"Name", | ||
"Result", | ||
"Inputs", | ||
"Expected", | ||
"Actual", | ||
"pass", | ||
]; | ||
function formatTestName(name, duration) { | ||
@@ -134,2 +143,4 @@ if (duration != null) { | ||
} | ||
const feedbackKeysTotalLength = [...feedbackKeys].reduce((l, key) => l + key.length, 0); | ||
const collapseFeedbackColumn = feedbackKeysTotalLength > FEEDBACK_COLLAPSE_THRESHOLD; | ||
for (const key of feedbackKeys) { | ||
@@ -146,25 +157,44 @@ const scores = rows | ||
const deviation = (score - mean) / stdDev; | ||
let coloredKey; | ||
let coloredScore; | ||
if (isNaN(deviation)) { | ||
coloredKey = chalk.white(`${key}:`); | ||
coloredScore = chalk.white(score); | ||
} | ||
else if (deviation <= -1) { | ||
coloredKey = chalk.redBright(`${key}:`); | ||
coloredScore = chalk.redBright(score); | ||
} | ||
else if (deviation < -0.5) { | ||
coloredKey = chalk.red(`${key}:`); | ||
coloredScore = chalk.red(score); | ||
} | ||
else if (deviation < 0) { | ||
coloredKey = chalk.yellow(`${key}:`); | ||
coloredScore = chalk.yellow(score); | ||
} | ||
else if (deviation === 0) { | ||
coloredKey = chalk.white(`${key}:`); | ||
coloredScore = chalk.white(score); | ||
} | ||
else if (deviation <= 0.5) { | ||
coloredKey = chalk.green(`${key}:`); | ||
coloredScore = chalk.green(score); | ||
} | ||
else { | ||
coloredKey = chalk.greenBright(`${key}:`); | ||
coloredScore = chalk.greenBright(score); | ||
} | ||
row[0][key] = coloredScore; | ||
if (collapseFeedbackColumn) { | ||
delete row[0][key]; | ||
if (row[0].Feedback === undefined) { | ||
row[0].Feedback = `${coloredKey} ${coloredScore}`; | ||
} | ||
else { | ||
row[0].Feedback = `${row[0].Feedback}\n${coloredKey} ${coloredScore}`; | ||
} | ||
} | ||
else { | ||
row[0][key] = coloredScore; | ||
} | ||
} | ||
@@ -174,11 +204,25 @@ } | ||
} | ||
const defaultColumns = [ | ||
{ name: "Test", alignment: "left", maxLen: 48 }, | ||
{ name: "Inputs", alignment: "left" }, | ||
{ name: "Reference Outputs", alignment: "left" }, | ||
{ name: "Outputs", alignment: "left" }, | ||
{ name: "Status", alignment: "left" }, | ||
]; | ||
if (collapseFeedbackColumn) { | ||
const feedbackColumnLength = rows.reduce((max, [row]) => { | ||
const maxFeedbackLineLength = row.Feedback?.split("\n").reduce((max, feedbackLine) => { | ||
return Math.max(max, feedbackLine.replace(STRIP_ANSI_REGEX, "").length); | ||
}, 0) ?? 0; | ||
return Math.max(max, maxFeedbackLineLength); | ||
}, 0); | ||
defaultColumns.push({ | ||
name: "Feedback", | ||
alignment: "left", | ||
minLen: feedbackColumnLength + 10, | ||
}); | ||
} | ||
console.log(); | ||
const table = new Table({ | ||
columns: [ | ||
{ name: "Test", alignment: "left", maxLen: 48 }, | ||
{ name: "Inputs", alignment: "left" }, | ||
{ name: "Reference Outputs", alignment: "left" }, | ||
{ name: "Outputs", alignment: "left" }, | ||
{ name: "Status", alignment: "left" }, | ||
], | ||
columns: defaultColumns, | ||
colorMap: { | ||
@@ -185,0 +229,0 @@ grey: "\x1b[90m", |
{ | ||
"name": "langsmith", | ||
"version": "0.2.16-rc.5", | ||
"version": "0.2.16-rc.6", | ||
"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.", | ||
@@ -5,0 +5,0 @@ "packageManager": "yarn@1.22.19", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
921762
23406