autoevals
Advanced tools
Comparing version 0.0.34 to 0.0.35
@@ -33,2 +33,5 @@ var __defProp = Object.defineProperty; | ||
// js/index.ts | ||
import { Score, ScorerArgs, Scorer } from "@braintrust/core"; | ||
// js/llm.ts | ||
@@ -59,4 +62,12 @@ import * as yaml from "js-yaml"; | ||
var PROXY_URL = "https://braintrustproxy.com/v1"; | ||
function buildOpenAIClient(options) { | ||
const { openAiApiKey, openAiOrganizationId, openAiBaseUrl } = options; | ||
return new OpenAI({ | ||
apiKey: openAiApiKey || Env.OPENAI_API_KEY, | ||
organization: openAiOrganizationId, | ||
baseURL: openAiBaseUrl || PROXY_URL | ||
}); | ||
} | ||
async function cachedChatCompletion(params, options) { | ||
const { cache, openAiApiKey, openAiOrganizationId, openAiBaseUrl } = options; | ||
const { cache } = options; | ||
return await currentSpanTraced( | ||
@@ -71,10 +82,3 @@ "OpenAI Completion", | ||
} else { | ||
const openai = new OpenAI({ | ||
apiKey: openAiApiKey || Env.OPENAI_API_KEY, | ||
organization: openAiOrganizationId, | ||
baseURL: openAiBaseUrl || PROXY_URL | ||
}); | ||
if (openai === null) { | ||
throw new Error("OPENAI_API_KEY not set"); | ||
} | ||
const openai = buildOpenAIClient(options); | ||
const completion = await openai.chat.completions.create(params); | ||
@@ -435,2 +439,3 @@ await (cache == null ? void 0 : cache.set(params, completion)); | ||
import levenshtein from "js-levenshtein"; | ||
import cossim from "compute-cosine-similarity"; | ||
var LevenshteinScorer = (args) => { | ||
@@ -447,6 +452,63 @@ if (args.expected === void 0) { | ||
return { | ||
name: "levenshtein", | ||
name: "Levenshtein", | ||
score | ||
}; | ||
}; | ||
var EmbeddingDistance = async (args) => { | ||
var _a, _b; | ||
if (args.expected === void 0) { | ||
throw new Error("EmbeddingDistance requires an expected value"); | ||
} | ||
const prefix = (_a = args.prefix) != null ? _a : ""; | ||
const expectedMin = (_b = args.expectedMin) != null ? _b : 0.7; | ||
const [output, expected] = [ | ||
`${prefix}${args.output}`, | ||
`${prefix}${args.expected}` | ||
]; | ||
const openai = buildOpenAIClient(args); | ||
const [outputResult, expectedResult] = await Promise.all( | ||
[output, expected].map( | ||
(input) => { | ||
var _a2; | ||
return embed(openai, { | ||
input, | ||
model: (_a2 = args.model) != null ? _a2 : "text-embedding-ada-002" | ||
}); | ||
} | ||
) | ||
); | ||
const score = cossim( | ||
outputResult.data[0].embedding, | ||
expectedResult.data[0].embedding | ||
); | ||
return { | ||
name: "EmbeddingDistance", | ||
score: scaleScore(score != null ? score : 0, expectedMin), | ||
error: score === null ? "EmbeddingDistance failed" : void 0 | ||
}; | ||
}; | ||
function scaleScore(score, expectedMin) { | ||
return Math.max((score - expectedMin) / (1 - expectedMin), 0); | ||
} | ||
async function embed(openai, params) { | ||
return await currentSpanTraced( | ||
"OpenAI Embedding", | ||
async (spanLog) => { | ||
var _b, _c; | ||
const result = await openai.embeddings.create(params); | ||
const output = result.data[0].embedding; | ||
const _a = params, { input } = _a, rest = __objRest(_a, ["input"]); | ||
spanLog({ | ||
input, | ||
output, | ||
metadata: __spreadValues({}, rest), | ||
metrics: { | ||
tokens: (_b = result.usage) == null ? void 0 : _b.total_tokens, | ||
prompt_tokens: (_c = result.usage) == null ? void 0 : _c.prompt_tokens | ||
} | ||
}); | ||
return result; | ||
} | ||
); | ||
} | ||
@@ -530,2 +592,3 @@ // js/number.ts | ||
ClosedQA, | ||
EmbeddingDistance, | ||
Factuality, | ||
@@ -541,2 +604,5 @@ Humor, | ||
Possible, | ||
Score, | ||
Scorer, | ||
ScorerArgs, | ||
Security, | ||
@@ -543,0 +609,0 @@ Sql, |
@@ -29,3 +29,3 @@ /** | ||
*/ | ||
export * from "./base.js"; | ||
export { Score, ScorerArgs, Scorer } from "@braintrust/core"; | ||
export * from "./llm.js"; | ||
@@ -32,0 +32,0 @@ export * from "./string.js"; |
@@ -29,3 +29,2 @@ /** | ||
*/ | ||
export * from "./base.js"; | ||
export * from "./llm.js"; | ||
@@ -32,0 +31,0 @@ export * from "./string.js"; |
@@ -1,2 +0,2 @@ | ||
import { Scorer } from "./base.js"; | ||
import { Scorer } from "@braintrust/core"; | ||
/** | ||
@@ -3,0 +3,0 @@ * A simple scorer that compares JSON objects, using a customizable comparison method for strings |
@@ -1,12 +0,9 @@ | ||
import { Score, Scorer, ScorerArgs } from "./base.js"; | ||
import { ChatCache } from "./oai.js"; | ||
import { Score, Scorer, ScorerArgs } from "@braintrust/core"; | ||
import { ChatCache, OpenAIAuth } from "./oai.js"; | ||
import { templates } from "./templates.js"; | ||
import { ChatCompletionCreateParams, ChatCompletionMessageParam } from "openai/resources/index.mjs"; | ||
interface LLMArgs { | ||
type LLMArgs = { | ||
maxTokens?: number; | ||
temperature?: number; | ||
openAiApiKey?: string; | ||
openAiOrganizationId?: string; | ||
openAiBaseUrl?: string; | ||
} | ||
} & OpenAIAuth; | ||
export declare function buildClassificationFunctions(useCoT: boolean): { | ||
@@ -13,0 +10,0 @@ name: string; |
@@ -38,2 +38,5 @@ var __defProp = Object.defineProperty; | ||
// js/index.ts | ||
import { Score, ScorerArgs, Scorer } from "@braintrust/core"; | ||
// js/llm.ts | ||
@@ -59,4 +62,12 @@ import * as yaml from "js-yaml"; | ||
var PROXY_URL = "https://braintrustproxy.com/v1"; | ||
function buildOpenAIClient(options) { | ||
const { openAiApiKey, openAiOrganizationId, openAiBaseUrl } = options; | ||
return new OpenAI({ | ||
apiKey: openAiApiKey || Env.OPENAI_API_KEY, | ||
organization: openAiOrganizationId, | ||
baseURL: openAiBaseUrl || PROXY_URL | ||
}); | ||
} | ||
async function cachedChatCompletion(params, options) { | ||
const { cache, openAiApiKey, openAiOrganizationId, openAiBaseUrl } = options; | ||
const { cache } = options; | ||
return await currentSpanTraced( | ||
@@ -71,10 +82,3 @@ "OpenAI Completion", | ||
} else { | ||
const openai = new OpenAI({ | ||
apiKey: openAiApiKey || Env.OPENAI_API_KEY, | ||
organization: openAiOrganizationId, | ||
baseURL: openAiBaseUrl || PROXY_URL | ||
}); | ||
if (openai === null) { | ||
throw new Error("OPENAI_API_KEY not set"); | ||
} | ||
const openai = buildOpenAIClient(options); | ||
const completion = await openai.chat.completions.create(params); | ||
@@ -435,2 +439,3 @@ await (cache == null ? void 0 : cache.set(params, completion)); | ||
import levenshtein from "js-levenshtein"; | ||
import cossim from "compute-cosine-similarity"; | ||
var LevenshteinScorer = (args) => { | ||
@@ -447,6 +452,63 @@ if (args.expected === void 0) { | ||
return { | ||
name: "levenshtein", | ||
name: "Levenshtein", | ||
score | ||
}; | ||
}; | ||
var EmbeddingDistance = async (args) => { | ||
var _a, _b; | ||
if (args.expected === void 0) { | ||
throw new Error("EmbeddingDistance requires an expected value"); | ||
} | ||
const prefix = (_a = args.prefix) != null ? _a : ""; | ||
const expectedMin = (_b = args.expectedMin) != null ? _b : 0.7; | ||
const [output, expected] = [ | ||
`${prefix}${args.output}`, | ||
`${prefix}${args.expected}` | ||
]; | ||
const openai = buildOpenAIClient(args); | ||
const [outputResult, expectedResult] = await Promise.all( | ||
[output, expected].map( | ||
(input) => { | ||
var _a2; | ||
return embed(openai, { | ||
input, | ||
model: (_a2 = args.model) != null ? _a2 : "text-embedding-ada-002" | ||
}); | ||
} | ||
) | ||
); | ||
const score = cossim( | ||
outputResult.data[0].embedding, | ||
expectedResult.data[0].embedding | ||
); | ||
return { | ||
name: "EmbeddingDistance", | ||
score: scaleScore(score != null ? score : 0, expectedMin), | ||
error: score === null ? "EmbeddingDistance failed" : void 0 | ||
}; | ||
}; | ||
function scaleScore(score, expectedMin) { | ||
return Math.max((score - expectedMin) / (1 - expectedMin), 0); | ||
} | ||
async function embed(openai, params) { | ||
return await currentSpanTraced( | ||
"OpenAI Embedding", | ||
async (spanLog) => { | ||
var _b, _c; | ||
const result = await openai.embeddings.create(params); | ||
const output = result.data[0].embedding; | ||
const _a = params, { input } = _a, rest = __objRest(_a, ["input"]); | ||
spanLog({ | ||
input, | ||
output, | ||
metadata: __spreadValues({}, rest), | ||
metrics: { | ||
tokens: (_b = result.usage) == null ? void 0 : _b.total_tokens, | ||
prompt_tokens: (_c = result.usage) == null ? void 0 : _c.prompt_tokens | ||
} | ||
}); | ||
return result; | ||
} | ||
); | ||
} | ||
@@ -533,2 +595,3 @@ // js/number.ts | ||
ClosedQA, | ||
EmbeddingDistance, | ||
Factuality, | ||
@@ -544,2 +607,5 @@ Humor, | ||
Possible, | ||
Score, | ||
Scorer, | ||
ScorerArgs, | ||
Security, | ||
@@ -546,0 +612,0 @@ Sql, |
@@ -1,2 +0,2 @@ | ||
import { Scorer } from "./base.js"; | ||
import { Scorer } from "@braintrust/core"; | ||
/** | ||
@@ -3,0 +3,0 @@ * A simple scorer that compares numbers by normalizing their difference. |
import { ChatCompletion, ChatCompletionCreateParams, ChatCompletionMessageParam } from "openai/resources/index.mjs"; | ||
import { OpenAI } from "openai"; | ||
export interface CachedLLMParams { | ||
@@ -19,4 +20,5 @@ model: string; | ||
} | ||
export declare function buildOpenAIClient(options: OpenAIAuth): OpenAI; | ||
export declare function cachedChatCompletion(params: CachedLLMParams, options: { | ||
cache?: ChatCache; | ||
} & OpenAIAuth): Promise<ChatCompletion>; |
@@ -25,5 +25,13 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { | ||
const PROXY_URL = "https://braintrustproxy.com/v1"; | ||
export function buildOpenAIClient(options) { | ||
const { openAiApiKey, openAiOrganizationId, openAiBaseUrl } = options; | ||
return new OpenAI({ | ||
apiKey: openAiApiKey || Env.OPENAI_API_KEY, | ||
organization: openAiOrganizationId, | ||
baseURL: openAiBaseUrl || PROXY_URL, | ||
}); | ||
} | ||
export function cachedChatCompletion(params, options) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const { cache, openAiApiKey, openAiOrganizationId, openAiBaseUrl } = options; | ||
const { cache } = options; | ||
return yield currentSpanTraced("OpenAI Completion", (spanLog) => __awaiter(this, void 0, void 0, function* () { | ||
@@ -37,10 +45,3 @@ var _a, _b, _c; | ||
else { | ||
const openai = new OpenAI({ | ||
apiKey: openAiApiKey || Env.OPENAI_API_KEY, | ||
organization: openAiOrganizationId, | ||
baseURL: openAiBaseUrl || PROXY_URL, | ||
}); | ||
if (openai === null) { | ||
throw new Error("OPENAI_API_KEY not set"); | ||
} | ||
const openai = buildOpenAIClient(options); | ||
const completion = yield openai.chat.completions.create(params); | ||
@@ -47,0 +48,0 @@ yield (cache === null || cache === void 0 ? void 0 : cache.set(params, completion)); |
@@ -1,2 +0,3 @@ | ||
import { Scorer } from "./base.js"; | ||
import { Scorer } from "@braintrust/core"; | ||
import { OpenAIAuth } from "./oai.js"; | ||
/** | ||
@@ -6,1 +7,6 @@ * A simple scorer that uses the Levenshtein distance to compare two strings. | ||
export declare const LevenshteinScorer: Scorer<string, {}>; | ||
export declare const EmbeddingDistance: Scorer<string, { | ||
prefix?: string; | ||
expectedMin?: number; | ||
model?: string; | ||
} & OpenAIAuth>; |
@@ -0,2 +1,25 @@ | ||
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { | ||
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } | ||
return new (P || (P = Promise))(function (resolve, reject) { | ||
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } | ||
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } | ||
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } | ||
step((generator = generator.apply(thisArg, _arguments || [])).next()); | ||
}); | ||
}; | ||
var __rest = (this && this.__rest) || function (s, e) { | ||
var t = {}; | ||
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0) | ||
t[p] = s[p]; | ||
if (s != null && typeof Object.getOwnPropertySymbols === "function") | ||
for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) { | ||
if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i])) | ||
t[p[i]] = s[p[i]]; | ||
} | ||
return t; | ||
}; | ||
import levenshtein from "js-levenshtein"; | ||
import { buildOpenAIClient } from "./oai.js"; | ||
import { currentSpanTraced } from "./util.js"; | ||
import cossim from "compute-cosine-similarity"; | ||
/** | ||
@@ -16,5 +39,54 @@ * A simple scorer that uses the Levenshtein distance to compare two strings. | ||
return { | ||
name: "levenshtein", | ||
name: "Levenshtein", | ||
score, | ||
}; | ||
}; | ||
export const EmbeddingDistance = (args) => __awaiter(void 0, void 0, void 0, function* () { | ||
var _a, _b; | ||
if (args.expected === undefined) { | ||
throw new Error("EmbeddingDistance requires an expected value"); | ||
} | ||
const prefix = (_a = args.prefix) !== null && _a !== void 0 ? _a : ""; | ||
const expectedMin = (_b = args.expectedMin) !== null && _b !== void 0 ? _b : 0.7; | ||
const [output, expected] = [ | ||
`${prefix}${args.output}`, | ||
`${prefix}${args.expected}`, | ||
]; | ||
const openai = buildOpenAIClient(args); | ||
const [outputResult, expectedResult] = yield Promise.all([output, expected].map((input) => { | ||
var _a; | ||
return embed(openai, { | ||
input, | ||
model: (_a = args.model) !== null && _a !== void 0 ? _a : "text-embedding-ada-002", | ||
}); | ||
})); | ||
const score = cossim(outputResult.data[0].embedding, expectedResult.data[0].embedding); | ||
return { | ||
name: "EmbeddingDistance", | ||
score: scaleScore(score !== null && score !== void 0 ? score : 0, expectedMin), | ||
error: score === null ? "EmbeddingDistance failed" : undefined, | ||
}; | ||
}); | ||
function scaleScore(score, expectedMin) { | ||
return Math.max((score - expectedMin) / (1 - expectedMin), 0); | ||
} | ||
function embed(openai, params) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
return yield currentSpanTraced("OpenAI Embedding", (spanLog) => __awaiter(this, void 0, void 0, function* () { | ||
var _a, _b; | ||
const result = yield openai.embeddings.create(params); | ||
const output = result.data[0].embedding; | ||
const { input } = params, rest = __rest(params, ["input"]); | ||
spanLog({ | ||
input, | ||
output, | ||
metadata: Object.assign({}, rest), | ||
metrics: { | ||
tokens: (_a = result.usage) === null || _a === void 0 ? void 0 : _a.total_tokens, | ||
prompt_tokens: (_b = result.usage) === null || _b === void 0 ? void 0 : _b.prompt_tokens, | ||
}, | ||
}); | ||
return result; | ||
})); | ||
}); | ||
} |
{ | ||
"name": "autoevals", | ||
"version": "0.0.34", | ||
"version": "0.0.35", | ||
"description": "Universal library for evaluating AI models", | ||
@@ -30,4 +30,4 @@ "main": "jsdist/bundle.js", | ||
"test": "jest", | ||
"prepublishOnly": "./scripts/node_prepublish.py", | ||
"postpublish": "./scripts/node_postpublish.py" | ||
"prepublishOnly": "../scripts/node_prepublish_autoevals.py", | ||
"postpublish": "../scripts/node_postpublish_autoevals.py" | ||
}, | ||
@@ -49,3 +49,5 @@ "author": "", | ||
"dependencies": { | ||
"@braintrust/core": "^0.0.6", | ||
"@types/node": "^20.4.4", | ||
"compute-cosine-similarity": "^1.1.0", | ||
"esbuild": "^0.19.1", | ||
@@ -52,0 +54,0 @@ "js-levenshtein": "^1.1.6", |
@@ -92,4 +92,4 @@ # AutoEvals | ||
- Embedding distance | ||
- [ ] BERTScore | ||
- [ ] Ada Embedding distance | ||
@@ -96,0 +96,0 @@ ### Heuristic |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
398456
3549
9
1
+ Added@braintrust/core@^0.0.6
+ Added@braintrust/core@0.0.6(transitive)
+ Addedcompute-cosine-similarity@1.1.0(transitive)
+ Addedcompute-dot@1.1.0(transitive)
+ Addedcompute-l2norm@1.1.0(transitive)
+ Addedvalidate.io-array@1.0.6(transitive)
+ Addedvalidate.io-function@1.0.2(transitive)