langsmith
Advanced tools
Comparing version 0.2.15-rc.5 to 0.2.15-rc.6
@@ -5,2 +5,2 @@ export { Client, type ClientConfig, type LangSmithTracingClientInterface, } from "./client.js"; | ||
export { overrideFetchImplementation } from "./singletons/fetch.js"; | ||
export declare const __version__ = "0.2.15-rc.5"; | ||
export declare const __version__ = "0.2.15-rc.6"; |
@@ -5,2 +5,2 @@ export { Client, } from "./client.js"; | ||
// Update using yarn bump-version | ||
export const __version__ = "0.2.15-rc.5"; | ||
export const __version__ = "0.2.15-rc.6"; |
@@ -5,13 +5,13 @@ /// <reference types="node" resolution-mode="require"/> | ||
import { Client, CreateProjectParams } from "../client.js"; | ||
import { EvaluationResult } from "../evaluation/evaluator.js"; | ||
import { RunTree } from "../run_trees.js"; | ||
export type JestAsyncLocalStorageData = { | ||
enableTestTracking?: boolean; | ||
dataset?: Dataset; | ||
examples?: (Example & { | ||
inputHash: string; | ||
outputHash: string; | ||
})[]; | ||
createdAt: string; | ||
projectConfig?: Partial<CreateProjectParams>; | ||
project?: TracerSession; | ||
currentExample?: Partial<Example>; | ||
currentExample?: Partial<Example> & { | ||
syncPromise?: Promise<Example>; | ||
}; | ||
client: Client; | ||
@@ -23,1 +23,10 @@ suiteUuid: string; | ||
export declare function trackingEnabled(context: JestAsyncLocalStorageData): boolean; | ||
export declare const evaluatorLogFeedbackPromises: Set<unknown>; | ||
export declare const syncExamplePromises: Map<any, any>; | ||
export declare function logFeedback(params: { | ||
exampleId: string; | ||
feedback: EvaluationResult; | ||
context: JestAsyncLocalStorageData; | ||
runTree: RunTree; | ||
client: Client; | ||
}): void; |
import { AsyncLocalStorage } from "node:async_hooks"; | ||
import { getEnvironmentVariable } from "../utils/env.js"; | ||
import { isTracingEnabled } from "../env.js"; | ||
export const jestAsyncLocalStorageInstance = new AsyncLocalStorage(); | ||
export function trackingEnabled(context) { | ||
return (context.enableTestTracking || | ||
getEnvironmentVariable("LANGSMITH_TEST_TRACKING") === "true"); | ||
if (typeof context.enableTestTracking === "boolean") { | ||
return context.enableTestTracking; | ||
} | ||
if (getEnvironmentVariable("LANGSMITH_TEST_TRACKING") === "false") { | ||
return false; | ||
} | ||
return isTracingEnabled(); | ||
} | ||
export const evaluatorLogFeedbackPromises = new Set(); | ||
export const syncExamplePromises = new Map(); | ||
export function logFeedback(params) { | ||
const { exampleId, feedback, context, runTree, client } = params; | ||
if (trackingEnabled(context)) { | ||
evaluatorLogFeedbackPromises.add((async () => { | ||
await syncExamplePromises.get(exampleId); | ||
await client?.logEvaluationFeedback(feedback, runTree); | ||
})()); | ||
} | ||
} |
@@ -30,8 +30,9 @@ /// <reference types="jest" /> | ||
}; | ||
export type LangSmithJestWrapperConfig = Partial<RunTreeConfig> & { | ||
n?: number; | ||
export type LangSmithJestWrapperConfig = Partial<Omit<RunTreeConfig, "client">> & { | ||
iterations?: number; | ||
enableTestTracking?: boolean; | ||
}; | ||
export type LangSmithJestWrapperParams<I, O> = { | ||
inputs: I; | ||
outputs: O; | ||
expected: O; | ||
config?: LangSmithJestWrapperConfig; | ||
@@ -41,18 +42,18 @@ }; | ||
inputs: I; | ||
outputs: O; | ||
expected: O; | ||
}) => unknown | Promise<unknown>, params: LangSmithJestWrapperParams<I, O>, timeout?: number) => void; | ||
declare const lsTest: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestWrapperParams<I, O>, testFn: (data: { | ||
inputs: I; | ||
outputs: O; | ||
expected: O; | ||
}) => unknown | Promise<unknown>, timeout?: number) => void) & { | ||
only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestWrapperParams<I, O>, testFn: (data: { | ||
inputs: I; | ||
outputs: O; | ||
expected: O; | ||
}) => unknown | Promise<unknown>, timeout?: number) => void) & { | ||
each: <I_1 extends KVMap, O_1 extends KVMap>(table: "*" | { | ||
each: <I_1 extends KVMap, O_1 extends KVMap>(table: { | ||
inputs: I_1; | ||
outputs: O_1; | ||
expected: O_1; | ||
}[], config?: LangSmithJestWrapperConfig) => (name: string, fn: (params: { | ||
inputs: I_1; | ||
outputs: O_1; | ||
expected: O_1; | ||
}) => unknown | Promise<unknown>, timeout?: number) => void; | ||
@@ -62,18 +63,18 @@ }; | ||
inputs: I; | ||
outputs: O; | ||
expected: O; | ||
}) => unknown | Promise<unknown>, timeout?: number) => void) & { | ||
each: <I_1 extends KVMap, O_1 extends KVMap>(table: "*" | { | ||
each: <I_1 extends KVMap, O_1 extends KVMap>(table: { | ||
inputs: I_1; | ||
outputs: O_1; | ||
expected: O_1; | ||
}[], config?: LangSmithJestWrapperConfig) => (name: string, fn: (params: { | ||
inputs: I_1; | ||
outputs: O_1; | ||
expected: O_1; | ||
}) => unknown | Promise<unknown>, timeout?: number) => void; | ||
}; | ||
each: <I_1 extends KVMap, O_1 extends KVMap>(table: "*" | { | ||
each: <I_1 extends KVMap, O_1 extends KVMap>(table: { | ||
inputs: I_1; | ||
outputs: O_1; | ||
expected: O_1; | ||
}[], config?: LangSmithJestWrapperConfig) => (name: string, fn: (params: { | ||
inputs: I_1; | ||
outputs: O_1; | ||
expected: O_1; | ||
}) => unknown | Promise<unknown>, timeout?: number) => void; | ||
@@ -80,0 +81,0 @@ }; |
/* eslint-disable import/no-extraneous-dependencies */ | ||
/* eslint-disable @typescript-eslint/no-namespace */ | ||
import { expect, test, describe, beforeAll } from "@jest/globals"; | ||
import { expect, test, describe, beforeAll, afterAll } from "@jest/globals"; | ||
import crypto from "crypto"; | ||
import { v4 } from "uuid"; | ||
import { traceable } from "../traceable.js"; | ||
import { v4, v5 } from "uuid"; | ||
import { getCurrentRunTree, traceable } from "../traceable.js"; | ||
import { RunTree } from "../run_trees.js"; | ||
import { randomName } from "../evaluation/_random_name.js"; | ||
import { toBeRelativeCloseTo, toBeAbsoluteCloseTo, toBeSemanticCloseTo, } from "./matchers.js"; | ||
import { jestAsyncLocalStorageInstance, trackingEnabled, } from "./globals.js"; | ||
import { evaluatorLogFeedbackPromises, jestAsyncLocalStorageInstance, logFeedback, syncExamplePromises, trackingEnabled, } from "./globals.js"; | ||
import { wrapExpect } from "./vendor/chain.js"; | ||
const UUID5_NAMESPACE = "6ba7b810-9dad-11d1-80b4-00c04fd430c8"; | ||
// From https://stackoverflow.com/a/29497680 | ||
const STRIP_ANSI_REGEX = | ||
// eslint-disable-next-line no-control-regex | ||
/[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g; | ||
expect.extend({ | ||
@@ -63,6 +68,43 @@ toBeRelativeCloseTo, | ||
const datasetSetupInfo = new Map(); | ||
const fetchExamplesPromises = new Map(); | ||
const createExamplePromises = new Map(); | ||
function getExampleId(datasetName, inputs, outputs) { | ||
const identifier = JSON.stringify({ | ||
datasetName, | ||
inputsHash: objectHash(inputs), | ||
outputsHash: objectHash(outputs ?? {}), | ||
}); | ||
return v5(identifier, UUID5_NAMESPACE); | ||
} | ||
async function syncExample(params) { | ||
const { client, exampleId, inputs, outputs, metadata, createdAt, datasetId } = params; | ||
let example; | ||
try { | ||
example = await client.readExample(exampleId); | ||
if (objectHash(example.inputs) !== objectHash(inputs) || | ||
objectHash(example.outputs ?? {}) !== objectHash(outputs ?? {}) || | ||
example.dataset_id !== datasetId) { | ||
await client.updateExample(exampleId, { | ||
inputs, | ||
outputs, | ||
metadata, | ||
dataset_id: datasetId, | ||
}); | ||
} | ||
} | ||
catch (e) { | ||
if (e.message.includes("not found")) { | ||
example = await client.createExample(inputs, outputs, { | ||
exampleId, | ||
datasetId, | ||
createdAt: new Date(createdAt ?? new Date()), | ||
metadata, | ||
}); | ||
} | ||
else { | ||
throw e; | ||
} | ||
} | ||
return example; | ||
} | ||
async function runDatasetSetup(context) { | ||
const { client: testClient, suiteName: datasetName, projectConfig, suiteUuid, } = context; | ||
const { client: testClient, suiteName: datasetName, projectConfig } = context; | ||
let storageValue; | ||
@@ -91,10 +133,5 @@ if (!trackingEnabled(context)) { | ||
} | ||
if (fetchExamplesPromises.get(suiteUuid) === undefined) { | ||
fetchExamplesPromises.set(suiteUuid, fetchExamples(testClient, datasetName)); | ||
} | ||
const examples = await fetchExamplesPromises.get(suiteUuid); | ||
const project = await _createProject(testClient, dataset.id, projectConfig); | ||
storageValue = { | ||
dataset, | ||
examples, | ||
project, | ||
@@ -106,16 +143,5 @@ client: testClient, | ||
} | ||
async function fetchExamples(testClient, datasetName) { | ||
const examplesList = testClient.listExamples({ | ||
datasetName, | ||
}); | ||
const examples = []; | ||
for await (const example of examplesList) { | ||
const inputHash = objectHash(example.inputs); | ||
const outputHash = objectHash(example.outputs ?? {}); | ||
examples.push({ ...example, inputHash, outputHash }); | ||
} | ||
return examples; | ||
} | ||
function wrapDescribeMethod(method) { | ||
return function (datasetName, fn, experimentConfig) { | ||
const client = experimentConfig?.client ?? RunTree.getSharedClient(); | ||
return method(datasetName, () => { | ||
@@ -126,3 +152,3 @@ const suiteUuid = v4(); | ||
suiteName: datasetName, | ||
client: experimentConfig?.client ?? RunTree.getSharedClient(), | ||
client, | ||
createdAt: new Date().toISOString(), | ||
@@ -135,2 +161,9 @@ projectConfig: experimentConfig, | ||
}); | ||
afterAll(async () => { | ||
await Promise.all([ | ||
client.awaitPendingTraceBatches(), | ||
...syncExamplePromises.values(), | ||
...evaluatorLogFeedbackPromises.values(), | ||
]); | ||
}); | ||
/** | ||
@@ -162,4 +195,8 @@ * We cannot rely on setting AsyncLocalStorage in beforeAll or beforeEach, | ||
const context = jestAsyncLocalStorageInstance.getStore(); | ||
const { config, inputs, outputs } = lsParams; | ||
const totalRuns = config?.n ?? 1; | ||
if (context !== undefined && | ||
lsParams.config?.enableTestTracking !== undefined) { | ||
context.enableTestTracking = lsParams.config.enableTestTracking; | ||
} | ||
const { config, inputs, expected } = lsParams; | ||
const totalRuns = config?.iterations ?? 1; | ||
for (let i = 0; i < totalRuns; i += 1) { | ||
@@ -175,12 +212,7 @@ // Jest will not group tests under the same "describe" group if you await the test and | ||
} | ||
const { examples, dataset, createdAt, project, client } = datasetSetupInfo.get(context.suiteUuid); | ||
const { dataset, createdAt, project, client } = datasetSetupInfo.get(context.suiteUuid); | ||
const testInput = inputs; | ||
const testOutput = outputs; | ||
const inputHash = objectHash(testInput); | ||
const outputHash = objectHash(testOutput ?? {}); | ||
const testOutput = expected; | ||
if (trackingEnabled(context)) { | ||
const missingFields = []; | ||
if (examples === undefined) { | ||
missingFields.push("examples"); | ||
} | ||
if (dataset === undefined) { | ||
@@ -200,24 +232,15 @@ missingFields.push("dataset"); | ||
} | ||
const testClient = config?.client ?? client; | ||
let example = (examples ?? []).find((example) => { | ||
return (example.inputHash === inputHash && | ||
example.outputHash === outputHash); | ||
}); | ||
if (example === undefined) { | ||
// Avoid creating multiple of the same example | ||
// when running the same test case multiple times | ||
// Jest runs other tests serially | ||
const exampleKey = [ | ||
context.suiteUuid, | ||
inputHash, | ||
outputHash, | ||
].join(":"); | ||
if (createExamplePromises.get(exampleKey) === undefined) { | ||
createExamplePromises.set(exampleKey, testClient.createExample(testInput, testOutput, { | ||
datasetId: dataset?.id, | ||
createdAt: new Date(createdAt ?? new Date()), | ||
})); | ||
} | ||
const newExample = await createExamplePromises.get(exampleKey); | ||
example = { ...newExample, inputHash, outputHash }; | ||
const testClient = client; | ||
const exampleId = getExampleId(dataset.name, inputs, expected); | ||
// Create or update the example in the background | ||
if (syncExamplePromises.get(exampleId) === undefined) { | ||
syncExamplePromises.set(exampleId, syncExample({ | ||
client, | ||
exampleId, | ||
datasetId: dataset.id, | ||
inputs, | ||
outputs: expected, | ||
metadata: {}, | ||
createdAt, | ||
})); | ||
} | ||
@@ -227,17 +250,18 @@ // .enterWith is OK here | ||
...context, | ||
currentExample: example, | ||
currentExample: { | ||
inputs, | ||
outputs: expected, | ||
id: exampleId, | ||
}, | ||
client: testClient, | ||
}); | ||
const traceableOptions = { | ||
reference_example_id: example.id, | ||
reference_example_id: exampleId, | ||
project_name: project.name, | ||
metadata: { | ||
...config?.metadata, | ||
example_version: example.modified_at | ||
? new Date(example.modified_at).toISOString() | ||
: new Date(example.created_at).toISOString(), | ||
}, | ||
client: testClient, | ||
tracingEnabled: true, | ||
name: "Unit test", | ||
name, | ||
}; | ||
@@ -247,9 +271,40 @@ // Pass inputs into traceable so tracing works correctly but | ||
const tracedFunction = traceable(async (_) => { | ||
return testFn({ | ||
inputs: testInput, | ||
outputs: testOutput, | ||
}); | ||
try { | ||
const res = await testFn({ | ||
inputs: testInput, | ||
expected: testOutput, | ||
}); | ||
logFeedback({ | ||
exampleId: exampleId, | ||
feedback: { key: "pass", score: true }, | ||
context, | ||
runTree: getCurrentRunTree(), | ||
client: testClient, | ||
}); | ||
return res; | ||
} | ||
catch (e) { | ||
logFeedback({ | ||
exampleId: exampleId, | ||
feedback: { key: "pass", score: false }, | ||
context, | ||
runTree: getCurrentRunTree(), | ||
client: testClient, | ||
}); | ||
const rawError = e; | ||
const strippedErrorMessage = e.message.replace(STRIP_ANSI_REGEX, ""); | ||
const langsmithFriendlyError = new Error(strippedErrorMessage); | ||
langsmithFriendlyError.rawJestError = rawError; | ||
throw langsmithFriendlyError; | ||
} | ||
}, { ...traceableOptions, ...config }); | ||
await tracedFunction(testInput); | ||
await testClient.awaitPendingTraceBatches(); | ||
try { | ||
await tracedFunction(testInput); | ||
} | ||
catch (e) { | ||
if (e.rawJestError !== undefined) { | ||
throw e.rawJestError; | ||
} | ||
throw e; | ||
} | ||
} | ||
@@ -264,3 +319,3 @@ else { | ||
inputs: testInput, | ||
outputs: testOutput, | ||
expected: testOutput, | ||
}); | ||
@@ -279,47 +334,6 @@ } | ||
return function (name, fn, timeout) { | ||
if (table === "*") { | ||
/* | ||
* Jest doesn't allow async setup before declaring tests, so we can't | ||
* fetch dataset examples before running the test. | ||
* beforeAll() does not run before test declarations. | ||
* datasetSetupInfo will not be populated until inside a test. | ||
*/ | ||
method(`${name} (pulling from LangSmith dataset "${context.suiteName}")`, async () => { | ||
if (fetchExamplesPromises.get(context.suiteUuid) === undefined) { | ||
fetchExamplesPromises.set(context.suiteUuid, fetchExamples(context.client, context.suiteName)); | ||
} | ||
const examples = await fetchExamplesPromises.get(context.suiteUuid); | ||
const testMethodPromises = []; | ||
for (let i = 0; i < examples.length; i += 1) { | ||
const example = examples[i]; | ||
// Context gets overwritten by Jest, so reset it here. | ||
jestAsyncLocalStorageInstance.enterWith(context); | ||
// Use wrapTestMethod to get the traceable spans to properly appear. | ||
// The test method gets executed without being awaited internally, | ||
// but we want to await it to catch errors properly so we store the | ||
// promises and await them at the end of the overarching test to | ||
// properly catch errors. | ||
wrapTestMethod(async (_, fn, timeout) => { | ||
const testPromise = Promise.race([ | ||
fn(), | ||
timeout > 0 | ||
? new Promise((_, reject) => setTimeout(() => reject(new Error(`Test run #${i} over LangSmith dataset timed out.`)), timeout)) | ||
: Promise.resolve(), | ||
]); | ||
testMethodPromises.push(testPromise); | ||
})(`${name}, item ${i}`, { inputs: example.inputs, outputs: example.outputs, config }, fn, timeout); | ||
} | ||
await Promise.all(testMethodPromises); | ||
// TODO: Fix pending test issue caused by traces being sent after the | ||
// test promises resolve. | ||
}, | ||
// Handle timeouts individually within the test | ||
0); | ||
for (let i = 0; i < table.length; i += 1) { | ||
const example = table[i]; | ||
wrapTestMethod(method)(`${name}, item ${i}`, { inputs: example.inputs, expected: example.expected, config }, fn, timeout); | ||
} | ||
else { | ||
for (let i = 0; i < table.length; i += 1) { | ||
const example = table[i]; | ||
wrapTestMethod(method)(`${name}, item ${i}`, { inputs: example.inputs, outputs: example.outputs, config }, fn, timeout); | ||
} | ||
} | ||
}; | ||
@@ -326,0 +340,0 @@ } |
import { getCurrentRunTree, traceable } from "../../traceable.js"; | ||
import { jestAsyncLocalStorageInstance, trackingEnabled } from "../globals.js"; | ||
import { jestAsyncLocalStorageInstance, logFeedback, trackingEnabled, } from "../globals.js"; | ||
export async function evaluatedBy(actual, evaluator) { | ||
@@ -12,7 +12,2 @@ const context = jestAsyncLocalStorageInstance.getStore(); | ||
reference_example_id: context.currentExample.id, | ||
metadata: { | ||
example_version: context.currentExample.modified_at | ||
? new Date(context.currentExample.modified_at).toISOString() | ||
: new Date(context.currentExample.created_at ?? new Date()).toISOString(), | ||
}, | ||
client: context.client, | ||
@@ -26,3 +21,9 @@ tracingEnabled: true, | ||
}); | ||
await context.client?.logEvaluationFeedback(evalResult, runTree); | ||
logFeedback({ | ||
exampleId: context.currentExample.id, | ||
feedback: evalResult, | ||
context, | ||
runTree, | ||
client: context.client, | ||
}); | ||
return evalResult.score; | ||
@@ -29,0 +30,0 @@ } |
{ | ||
"name": "langsmith", | ||
"version": "0.2.15-rc.5", | ||
"version": "0.2.15-rc.6", | ||
"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.", | ||
@@ -5,0 +5,0 @@ "packageManager": "yarn@1.22.19", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
857181
22078