Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

autoevals

Package Overview
Dependencies
Maintainers
1
Versions
103
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

autoevals - npm Package Compare versions

Comparing version 0.0.31 to 0.0.32

3

jsdist/base.js

@@ -1,2 +0,1 @@

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
export {};

@@ -78,4 +78,5 @@ var __defProp = Object.defineProperty;

// js/oai.ts
var PROXY_URL = "https://braintrustproxy.com/v1";
async function cachedChatCompletion(params, options) {
const { cache, openAiApiKey, openAiOrganizationId } = options;
const { cache, openAiApiKey, openAiOrganizationId, openAiBaseUrl } = options;
return await currentSpan().traced("OpenAI Completion", async (span) => {

@@ -90,3 +91,4 @@ var _b, _c, _d;

apiKey: openAiApiKey || Env.OPENAI_API_KEY,
organization: openAiOrganizationId
organization: openAiOrganizationId,
baseURL: openAiBaseUrl || PROXY_URL
});

@@ -257,3 +259,4 @@ if (openai === null) {

openAiApiKey,
openAiOrganizationId
openAiOrganizationId,
openAiBaseUrl
} = _a, remaining = __objRest(_a, [

@@ -264,3 +267,4 @@ "name",

"openAiApiKey",
"openAiOrganizationId"
"openAiOrganizationId",
"openAiBaseUrl"
]);

@@ -320,3 +324,4 @@ const _b = remaining, {

openAiApiKey,
openAiOrganizationId
openAiOrganizationId,
openAiBaseUrl
}

@@ -323,0 +328,0 @@ );

@@ -1,6 +0,3 @@

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Env = void 0;
exports.Env = {
export const Env = {
OPENAI_API_KEY: undefined,
};

@@ -1,2 +0,1 @@

"use strict";
/**

@@ -30,22 +29,7 @@ * AutoEvals is a tool to quickly and easily evaluate AI model outputs.

*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __exportStar = (this && this.__exportStar) || function(m, exports) {
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
};
Object.defineProperty(exports, "__esModule", { value: true });
__exportStar(require("./base.js"), exports);
__exportStar(require("./llm.js"), exports);
__exportStar(require("./string.js"), exports);
__exportStar(require("./number.js"), exports);
__exportStar(require("./json.js"), exports);
__exportStar(require("./templates.js"), exports);
export * from "./base.js";
export * from "./llm.js";
export * from "./string.js";
export * from "./number.js";
export * from "./json.js";
export * from "./templates.js";

@@ -1,2 +0,1 @@

"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -11,6 +10,4 @@ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }

};
Object.defineProperty(exports, "__esModule", { value: true });
exports.JSONDiff = void 0;
const number_js_1 = require("./number.js");
const string_js_1 = require("./string.js");
import { NumericDiff } from "./number.js";
import { LevenshteinScorer } from "./string.js";
/**

@@ -20,3 +17,3 @@ * A simple scorer that compares JSON objects, using a customizable comparison method for strings

*/
const JSONDiff = ({ output, expected, stringScorer = string_js_1.LevenshteinScorer, numberScorer = number_js_1.NumericDiff, }) => __awaiter(void 0, void 0, void 0, function* () {
export const JSONDiff = ({ output, expected, stringScorer = LevenshteinScorer, numberScorer = NumericDiff, }) => __awaiter(void 0, void 0, void 0, function* () {
return {

@@ -27,3 +24,2 @@ name: "JSONDiff",

});
exports.JSONDiff = JSONDiff;
function jsonDiff(o1, o2, stringScorer, numberScorer) {

@@ -30,0 +26,0 @@ return __awaiter(this, void 0, void 0, function* () {

@@ -1,2 +0,1 @@

"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -11,4 +10,3 @@ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }

};
Object.defineProperty(exports, "__esModule", { value: true });
const json_js_1 = require("./json.js");
import { JSONDiff } from "./json.js";
test("JSON String Test", () => __awaiter(void 0, void 0, void 0, function* () {

@@ -27,3 +25,3 @@ const cases = [

for (const { a, b, expected } of cases) {
const score = (yield (0, json_js_1.JSONDiff)({ output: a, expected: b })).score;
const score = (yield JSONDiff({ output: a, expected: b })).score;
expect(score).toBeCloseTo(expected);

@@ -52,5 +50,5 @@ }

for (const { a, b, expected } of cases) {
const score = (yield (0, json_js_1.JSONDiff)({ output: a, expected: b })).score;
const score = (yield JSONDiff({ output: a, expected: b })).score;
expect(score).toBeCloseTo(expected);
}
}));
import { Score, Scorer, ScorerArgs } from "./base.js";
import { ChatCache } from "./oai.js";
import { templates } from "./templates.js";
import { ChatCompletionCreateParams, ChatCompletionMessage } from "openai/resources/index.mjs";
import { ChatCompletionCreateParams, ChatCompletionMessageParam } from "openai/resources/index.mjs";
interface LLMArgs {

@@ -10,2 +10,3 @@ maxTokens?: number;

openAiOrganizationId?: string;
openAiBaseUrl?: string;
}

@@ -31,3 +32,3 @@ export declare function buildClassificationFunctions(useCoT: boolean): {

model: string;
messages: ChatCompletionMessage[];
messages: ChatCompletionMessageParam[];
choiceScores: Record<string, number>;

@@ -34,0 +35,0 @@ classificationFunctions: ChatCompletionCreateParams.Function[];

@@ -1,25 +0,1 @@

"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -45,11 +21,6 @@ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }

};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.Translation = exports.Summary = exports.Sql = exports.Security = exports.Possible = exports.Factuality = exports.Humor = exports.ClosedQA = exports.Battle = exports.LLMClassifierFromSpecFile = exports.LLMClassifierFromSpec = exports.LLMClassifierFromTemplate = exports.OpenAIClassifier = exports.buildClassificationFunctions = void 0;
const yaml = __importStar(require("js-yaml"));
const mustache_1 = __importDefault(require("mustache"));
const oai_js_1 = require("./oai.js");
const templates_js_1 = require("./templates.js");
import * as yaml from "js-yaml";
import mustache from "mustache";
import { cachedChatCompletion } from "./oai.js";
import { templates } from "./templates.js";
const NO_COT_SUFFIX = "Answer the question by calling `select_choice` with a single choice from {{__choices}}.";

@@ -80,3 +51,3 @@ const COT_SUFFIX = "Answer the question by calling `select_choice` with your reasoning in a step-by-step matter to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset. Select a single choice by setting the `choice` parameter to a single choice from {{__choices}}.";

};
function buildClassificationFunctions(useCoT) {
export function buildClassificationFunctions(useCoT) {
return [

@@ -90,6 +61,5 @@ {

}
exports.buildClassificationFunctions = buildClassificationFunctions;
function OpenAIClassifier(args) {
export function OpenAIClassifier(args) {
return __awaiter(this, void 0, void 0, function* () {
const { name, output, expected, openAiApiKey, openAiOrganizationId } = args, remaining = __rest(args, ["name", "output", "expected", "openAiApiKey", "openAiOrganizationId"]);
const { name, output, expected, openAiApiKey, openAiOrganizationId, openAiBaseUrl } = args, remaining = __rest(args, ["name", "output", "expected", "openAiApiKey", "openAiOrganizationId", "openAiBaseUrl"]);
const { messages: messagesArg, model, choiceScores, classificationFunctions, maxTokens, temperature, cache } = remaining, remainingRenderArgs = __rest(remaining, ["messages", "model", "choiceScores", "classificationFunctions", "maxTokens", "temperature", "cache"]);

@@ -112,7 +82,7 @@ let found = false;

expected }, remainingRenderArgs);
const messages = messagesArg.map((m) => (Object.assign(Object.assign({}, m), { content: m.content && mustache_1.default.render(m.content, renderArgs) })));
const messages = messagesArg.map((m) => (Object.assign(Object.assign({}, m), { content: m.content && mustache.render(m.content, renderArgs) })));
let ret = null;
let validityScore = 1;
try {
const resp = yield (0, oai_js_1.cachedChatCompletion)(Object.assign({ model,
const resp = yield cachedChatCompletion(Object.assign({ model,
messages, functions: classificationFunctions, function_call: { name: "select_choice" } }, extraArgs), {

@@ -122,2 +92,3 @@ cache,

openAiOrganizationId,
openAiBaseUrl,
});

@@ -142,3 +113,2 @@ if (resp.choices.length > 0) {

}
exports.OpenAIClassifier = OpenAIClassifier;
function parseResponse(resp, choiceScores) {

@@ -171,3 +141,3 @@ var _a;

}
function LLMClassifierFromTemplate({ name, promptTemplate, choiceScores, model = "gpt-3.5-turbo", useCoT: useCoTArg, temperature, }) {
export function LLMClassifierFromTemplate({ name, promptTemplate, choiceScores, model = "gpt-3.5-turbo", useCoT: useCoTArg, temperature, }) {
const choiceStrings = Object.keys(choiceScores);

@@ -200,4 +170,3 @@ const ret = (runtimeArgs) => __awaiter(this, void 0, void 0, function* () {

}
exports.LLMClassifierFromTemplate = LLMClassifierFromTemplate;
function LLMClassifierFromSpec(name, spec) {
export function LLMClassifierFromSpec(name, spec) {
return LLMClassifierFromTemplate({

@@ -212,10 +181,8 @@ name,

}
exports.LLMClassifierFromSpec = LLMClassifierFromSpec;
function LLMClassifierFromSpecFile(name, templateName) {
const doc = yaml.load(templates_js_1.templates[templateName]);
export function LLMClassifierFromSpecFile(name, templateName) {
const doc = yaml.load(templates[templateName]);
return LLMClassifierFromSpec(name, doc);
}
exports.LLMClassifierFromSpecFile = LLMClassifierFromSpecFile;
function buildLLMClassifier(name, templateName) {
if (!(templateName in templates_js_1.templates)) {
if (!(templateName in templates)) {
throw new Error(`Model template ${name} not found`);

@@ -229,3 +196,3 @@ }

*/
exports.Battle = buildLLMClassifier("Battle", "battle");
export const Battle = buildLLMClassifier("Battle", "battle");
/**

@@ -235,27 +202,27 @@ * Test whether an output answers the `input` using knowledge built into the model.

*/
exports.ClosedQA = buildLLMClassifier("ClosedQA", "closed_q_a");
export const ClosedQA = buildLLMClassifier("ClosedQA", "closed_q_a");
/**
* Test whether an output is funny.
*/
exports.Humor = buildLLMClassifier("Humor", "humor");
export const Humor = buildLLMClassifier("Humor", "humor");
/**
* Test whether an output is factual, compared to an original (`expected`) value.
*/
exports.Factuality = buildLLMClassifier("Factuality", "factuality");
export const Factuality = buildLLMClassifier("Factuality", "factuality");
/**
* Test whether an output is a possible solution to the challenge posed in the input.
*/
exports.Possible = buildLLMClassifier("Possible", "possible");
export const Possible = buildLLMClassifier("Possible", "possible");
/**
* Test whether an output is malicious.
*/
exports.Security = buildLLMClassifier("Security", "security");
export const Security = buildLLMClassifier("Security", "security");
/**
* Test whether a SQL query is semantically the same as a reference (output) query.
*/
exports.Sql = buildLLMClassifier("Sql", "sql");
export const Sql = buildLLMClassifier("Sql", "sql");
/**
* Test whether an output is a better summary of the `input` than the original (`expected`) value.
*/
exports.Summary = buildLLMClassifier("Summary", "summary");
export const Summary = buildLLMClassifier("Summary", "summary");
/**

@@ -265,2 +232,2 @@ * Test whether an `output` is as good of a translation of the `input` in the specified `language`

*/
exports.Translation = buildLLMClassifier("Translation", "translation");
export const Translation = buildLLMClassifier("Translation", "translation");

@@ -78,4 +78,5 @@ var __defProp = Object.defineProperty;

// js/oai.ts
var PROXY_URL = "https://braintrustproxy.com/v1";
async function cachedChatCompletion(params, options) {
const { cache, openAiApiKey, openAiOrganizationId } = options;
const { cache, openAiApiKey, openAiOrganizationId, openAiBaseUrl } = options;
return await currentSpan().traced("OpenAI Completion", async (span) => {

@@ -90,3 +91,4 @@ var _b, _c, _d;

apiKey: openAiApiKey || Env.OPENAI_API_KEY,
organization: openAiOrganizationId
organization: openAiOrganizationId,
baseURL: openAiBaseUrl || PROXY_URL
});

@@ -257,3 +259,4 @@ if (openai === null) {

openAiApiKey,
openAiOrganizationId
openAiOrganizationId,
openAiBaseUrl
} = _a, remaining = __objRest(_a, [

@@ -264,3 +267,4 @@ "name",

"openAiApiKey",
"openAiOrganizationId"
"openAiOrganizationId",
"openAiBaseUrl"
]);

@@ -320,3 +324,4 @@ const _b = remaining, {

openAiApiKey,
openAiOrganizationId
openAiOrganizationId,
openAiBaseUrl
}

@@ -323,0 +328,0 @@ );

@@ -1,8 +0,5 @@

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.NumericDiff = void 0;
/**
* A simple scorer that compares numbers by normalizing their difference.
*/
const NumericDiff = (args) => {
export const NumericDiff = (args) => {
const { output, expected } = args;

@@ -21,2 +18,1 @@ if (expected === undefined) {

};
exports.NumericDiff = NumericDiff;

@@ -1,7 +0,7 @@

import { ChatCompletion, ChatCompletionCreateParams, ChatCompletionMessage } from "openai/resources/index.mjs";
import { ChatCompletion, ChatCompletionCreateParams, ChatCompletionMessageParam } from "openai/resources/index.mjs";
export interface CachedLLMParams {
model: string;
messages: ChatCompletionMessage[];
messages: ChatCompletionMessageParam[];
functions?: ChatCompletionCreateParams.Function[];
function_call?: ChatCompletionCreateParams.FunctionCallOption;
function_call?: ChatCompletionCreateParams["function_call"];
temperature?: number;

@@ -17,2 +17,3 @@ max_tokens?: number;

openAiOrganizationId?: string;
openAiBaseUrl?: string;
}

@@ -19,0 +20,0 @@ export declare function cachedChatCompletion(params: CachedLLMParams, options: {

@@ -1,2 +0,1 @@

"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -22,11 +21,10 @@ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }

};
Object.defineProperty(exports, "__esModule", { value: true });
exports.cachedChatCompletion = void 0;
const openai_1 = require("openai");
const env_js_1 = require("./env.js");
const util_js_1 = require("./util.js");
function cachedChatCompletion(params, options) {
import { OpenAI } from "openai";
import { Env } from "./env.js";
import { currentSpan } from "./util.js";
const PROXY_URL = "https://braintrustproxy.com/v1";
export function cachedChatCompletion(params, options) {
return __awaiter(this, void 0, void 0, function* () {
const { cache, openAiApiKey, openAiOrganizationId } = options;
return yield (0, util_js_1.currentSpan)().traced("OpenAI Completion", (span) => __awaiter(this, void 0, void 0, function* () {
const { cache, openAiApiKey, openAiOrganizationId, openAiBaseUrl } = options;
return yield currentSpan().traced("OpenAI Completion", (span) => __awaiter(this, void 0, void 0, function* () {
var _a, _b, _c;

@@ -39,5 +37,6 @@ let cached = false;

else {
const openai = new openai_1.OpenAI({
apiKey: openAiApiKey || env_js_1.Env.OPENAI_API_KEY,
const openai = new OpenAI({
apiKey: openAiApiKey || Env.OPENAI_API_KEY,
organization: openAiOrganizationId,
baseURL: openAiBaseUrl || PROXY_URL,
});

@@ -66,2 +65,1 @@ if (openai === null) {

}
exports.cachedChatCompletion = cachedChatCompletion;

@@ -1,12 +0,6 @@

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.LevenshteinScorer = void 0;
const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
import levenshtein from "js-levenshtein";
/**
* A simple scorer that uses the Levenshtein distance to compare two strings.
*/
const LevenshteinScorer = (args) => {
export const LevenshteinScorer = (args) => {
if (args.expected === undefined) {

@@ -19,3 +13,3 @@ throw new Error("LevenshteinScorer requires an expected value");

if (maxLen > 0) {
score = 1 - (0, js_levenshtein_1.default)(output, expected) / maxLen;
score = 1 - levenshtein(output, expected) / maxLen;
}

@@ -27,2 +21,1 @@ return {

};
exports.LevenshteinScorer = LevenshteinScorer;

@@ -1,26 +0,20 @@

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
import battle from "../templates/battle.yaml";
import closed_q_a from "../templates/closed_q_a.yaml";
import factuality from "../templates/factuality.yaml";
import humor from "../templates/humor.yaml";
import possible from "../templates/possible.yaml";
import security from "../templates/security.yaml";
import sql from "../templates/sql.yaml";
import summary from "../templates/summary.yaml";
import translation from "../templates/translation.yaml";
export const templates = {
battle,
closed_q_a,
factuality,
humor,
possible,
security,
sql,
summary,
translation,
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.templates = void 0;
const battle_yaml_1 = __importDefault(require("../templates/battle.yaml"));
const closed_q_a_yaml_1 = __importDefault(require("../templates/closed_q_a.yaml"));
const factuality_yaml_1 = __importDefault(require("../templates/factuality.yaml"));
const humor_yaml_1 = __importDefault(require("../templates/humor.yaml"));
const possible_yaml_1 = __importDefault(require("../templates/possible.yaml"));
const security_yaml_1 = __importDefault(require("../templates/security.yaml"));
const sql_yaml_1 = __importDefault(require("../templates/sql.yaml"));
const summary_yaml_1 = __importDefault(require("../templates/summary.yaml"));
const translation_yaml_1 = __importDefault(require("../templates/translation.yaml"));
exports.templates = {
battle: battle_yaml_1.default,
closed_q_a: closed_q_a_yaml_1.default,
factuality: factuality_yaml_1.default,
humor: humor_yaml_1.default,
possible: possible_yaml_1.default,
security: security_yaml_1.default,
sql: sql_yaml_1.default,
summary: summary_yaml_1.default,
translation: translation_yaml_1.default,
};

@@ -1,6 +0,3 @@

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.currentSpan = exports.NoopSpan = void 0;
/* This is copy/pasted from braintrust-sdk*/
class NoopSpan {
export class NoopSpan {
constructor() {

@@ -27,4 +24,3 @@ this.kind = "span";

}
exports.NoopSpan = NoopSpan;
function currentSpan() {
export function currentSpan() {
if (globalThis.__inherited_braintrust_state) {

@@ -37,2 +33,1 @@ return globalThis.__inherited_braintrust_state.currentSpan.getStore();

}
exports.currentSpan = currentSpan;

@@ -1,2 +0,1 @@

"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -11,5 +10,4 @@ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }

};
Object.defineProperty(exports, "__esModule", { value: true });
const number_js_1 = require("./number.js");
const string_js_1 = require("./string.js");
import { NumericDiff } from "./number.js";
import { LevenshteinScorer } from "./string.js";
test("Levenshtein Test", () => __awaiter(void 0, void 0, void 0, function* () {

@@ -28,3 +26,3 @@ const cases = [

for (const { a, b, expected } of cases) {
const score = (yield (0, string_js_1.LevenshteinScorer)({ output: a, expected: b })).score;
const score = (yield LevenshteinScorer({ output: a, expected: b })).score;
expect(score).toBeCloseTo(expected);

@@ -43,5 +41,5 @@ }

console.log(a, b, expected);
const score = (yield (0, number_js_1.NumericDiff)({ output: a, expected: b })).score;
const score = (yield NumericDiff({ output: a, expected: b })).score;
expect(score).toBeCloseTo(expected);
}
}));
{
"name": "autoevals",
"version": "0.0.31",
"version": "0.0.32",
"description": "Universal library for evaluating AI models",

@@ -5,0 +5,0 @@ "main": "jsdist/bundle.js",

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc