autoevals
Advanced tools
Comparing version 0.0.22 to 0.0.23
@@ -169,5 +169,36 @@ var __defProp = Object.defineProperty; | ||
// js/llm.ts | ||
var NO_COT_SUFFIX = `Answer the question by printing only a single choice from {{__choices}} (without quotes or punctuation) corresponding to the correct answer with no other text.`; | ||
var COT_SUFFIX = `Write out in a step by step manner your reasoning to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset. Then print only a single choice from {{__choices}} (without quotes or punctuation) on its own line corresponding to the correct answer. At the end, repeat just the answer by itself on a new line formatted as "Answer=X"`; | ||
var NO_COT_SUFFIX = "Answer the question by calling `select_choice` with a single choice from {{__choices}}."; | ||
var COT_SUFFIX = "Answer the question by calling `select_choice` with your reasoning in a step-by-step matter to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset. Select a single choice by setting the `choice` parameter to a single choice from {{__choices}}."; | ||
var SUPPORTED_MODELS = ["gpt-3.5-turbo", "gpt-4"]; | ||
var PLAIN_RESPONSE_SCHEMA = { | ||
properties: { | ||
choice: { description: "The choice", title: "Choice", type: "string" } | ||
}, | ||
required: ["choice"], | ||
title: "FunctionResponse", | ||
type: "object" | ||
}; | ||
var COT_RESPONSE_SCHEMA = { | ||
properties: { | ||
reasons: { | ||
description: "Write out in a step by step manner your reasoning to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset.", | ||
items: { type: "string" }, | ||
title: "Reasons", | ||
type: "array" | ||
}, | ||
choice: { description: "The choice", title: "Choice", type: "string" } | ||
}, | ||
required: ["reasons", "choice"], | ||
title: "CoTResponse", | ||
type: "object" | ||
}; | ||
function buildClassificationFunctions(useCoT) { | ||
return [ | ||
{ | ||
name: "select_choice", | ||
description: "Call this function to select a choice.", | ||
parameters: useCoT ? COT_RESPONSE_SCHEMA : PLAIN_RESPONSE_SCHEMA | ||
} | ||
]; | ||
} | ||
async function OpenAIClassifier(args) { | ||
@@ -180,4 +211,4 @@ const _a = args, { | ||
model, | ||
parseScoreFn, | ||
choiceScores, | ||
classificationFunctions, | ||
maxTokens, | ||
@@ -194,4 +225,4 @@ temperature, | ||
"model", | ||
"parseScoreFn", | ||
"choiceScores", | ||
"classificationFunctions", | ||
"maxTokens", | ||
@@ -230,3 +261,5 @@ "temperature", | ||
model, | ||
messages | ||
messages, | ||
functions: classificationFunctions, | ||
function_call: { name: "select_choice" } | ||
}, extraArgs), | ||
@@ -242,7 +275,3 @@ { | ||
name | ||
}, parseResponse( | ||
resp.choices[0].message.content, | ||
parseScoreFn, | ||
choiceScores | ||
)); | ||
}, parseResponse(resp.choices[0].message, choiceScores)); | ||
} else { | ||
@@ -255,7 +284,7 @@ throw new Error("Empty response from OpenAI"); | ||
score: 0, | ||
error | ||
error: `${error}` | ||
}; | ||
} | ||
} | ||
function parseResponse(resp, parseScoreFn, choiceScores) { | ||
function parseResponse(resp, choiceScores) { | ||
let score = 0; | ||
@@ -265,4 +294,4 @@ let error = void 0; | ||
try { | ||
metadata["rationale"] = `${resp}`; | ||
const choice = parseScoreFn(resp); | ||
metadata["rationale"] = `${resp.content}`; | ||
const choice = JSON.parse(resp.function_call.arguments)["choice"].trim(); | ||
metadata["choice"] = choice; | ||
@@ -276,3 +305,3 @@ if (choiceScores[choice] !== void 0) { | ||
score = 0; | ||
error = e; | ||
error = `${e}`; | ||
} | ||
@@ -298,18 +327,3 @@ return { | ||
const prompt = promptTemplate + "\n" + (useCoT ? COT_SUFFIX : NO_COT_SUFFIX); | ||
let maxTokens = void 0; | ||
let parseScoreFn = (resp) => resp.trim(); | ||
if (useCoT) { | ||
parseScoreFn = (resp) => { | ||
const answers = [...resp.matchAll(/Answer\s*=\s*(.*)/g)]; | ||
if (answers && answers.length > 0) { | ||
return answers[answers.length - 1][1].trim(); | ||
} else if (choiceStrings.includes(resp.trim())) { | ||
return resp.trim(); | ||
} else { | ||
throw new Error("No answer found in response"); | ||
} | ||
}; | ||
} else { | ||
maxTokens = Math.max(...choiceStrings.map((c) => c.length)); | ||
} | ||
let maxTokens = 512; | ||
const messages = [ | ||
@@ -324,4 +338,4 @@ { | ||
messages, | ||
parseScoreFn, | ||
choiceScores, | ||
classificationFunctions: buildClassificationFunctions(useCoT), | ||
model, | ||
@@ -357,3 +371,3 @@ maxTokens, | ||
return LLMClassifierFromSpecFile( | ||
templateName, | ||
name, | ||
templateName | ||
@@ -493,4 +507,5 @@ ); | ||
Translation, | ||
buildClassificationFunctions, | ||
templates | ||
}; | ||
//# sourceMappingURL=bundle.js.map |
import { Score, Scorer, ScorerArgs } from "./base.js"; | ||
import { ChatCompletionRequestMessage } from "openai"; | ||
import { ChatCompletionFunctions, ChatCompletionRequestMessage } from "openai"; | ||
import { ChatCache } from "./oai.js"; | ||
@@ -11,2 +11,18 @@ import { templates } from "./templates.js"; | ||
} | ||
export declare function buildClassificationFunctions(useCoT: boolean): { | ||
name: string; | ||
description: string; | ||
parameters: { | ||
properties: { | ||
choice: { | ||
description: string; | ||
title: string; | ||
type: string; | ||
}; | ||
}; | ||
required: string[]; | ||
title: string; | ||
type: string; | ||
}; | ||
}[]; | ||
export type OpenAIClassifierArgs<RenderArgs> = { | ||
@@ -16,4 +32,4 @@ name: string; | ||
messages: ChatCompletionRequestMessage[]; | ||
parseScoreFn: (resp: string) => string; | ||
choiceScores: Record<string, number>; | ||
classificationFunctions: ChatCompletionFunctions[]; | ||
cache?: ChatCache; | ||
@@ -20,0 +36,0 @@ } & LLMArgs & RenderArgs; |
@@ -49,3 +49,3 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.Translation = exports.Summary = exports.Sql = exports.Security = exports.Possible = exports.Factuality = exports.Humor = exports.ClosedQA = exports.Battle = exports.LLMClassifierFromSpecFile = exports.LLMClassifierFromSpec = exports.LLMClassifierFromTemplate = exports.OpenAIClassifier = void 0; | ||
exports.Translation = exports.Summary = exports.Sql = exports.Security = exports.Possible = exports.Factuality = exports.Humor = exports.ClosedQA = exports.Battle = exports.LLMClassifierFromSpecFile = exports.LLMClassifierFromSpec = exports.LLMClassifierFromTemplate = exports.OpenAIClassifier = exports.buildClassificationFunctions = void 0; | ||
const yaml = __importStar(require("js-yaml")); | ||
@@ -55,8 +55,40 @@ const mustache_1 = __importDefault(require("mustache")); | ||
const templates_js_1 = require("./templates.js"); | ||
const NO_COT_SUFFIX = `Answer the question by printing only a single choice from {{__choices}} (without quotes or punctuation) corresponding to the correct answer with no other text.`; | ||
const COT_SUFFIX = `Write out in a step by step manner your reasoning to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset. Then print only a single choice from {{__choices}} (without quotes or punctuation) on its own line corresponding to the correct answer. At the end, repeat just the answer by itself on a new line formatted as "Answer=X"`; | ||
const NO_COT_SUFFIX = "Answer the question by calling `select_choice` with a single choice from {{__choices}}."; | ||
const COT_SUFFIX = "Answer the question by calling `select_choice` with your reasoning in a step-by-step matter to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset. Select a single choice by setting the `choice` parameter to a single choice from {{__choices}}."; | ||
const SUPPORTED_MODELS = ["gpt-3.5-turbo", "gpt-4"]; | ||
const PLAIN_RESPONSE_SCHEMA = { | ||
properties: { | ||
choice: { description: "The choice", title: "Choice", type: "string" }, | ||
}, | ||
required: ["choice"], | ||
title: "FunctionResponse", | ||
type: "object", | ||
}; | ||
const COT_RESPONSE_SCHEMA = { | ||
properties: { | ||
reasons: { | ||
description: "Write out in a step by step manner your reasoning to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset.", | ||
items: { type: "string" }, | ||
title: "Reasons", | ||
type: "array", | ||
}, | ||
choice: { description: "The choice", title: "Choice", type: "string" }, | ||
}, | ||
required: ["reasons", "choice"], | ||
title: "CoTResponse", | ||
type: "object", | ||
}; | ||
function buildClassificationFunctions(useCoT) { | ||
return [ | ||
{ | ||
name: "select_choice", | ||
description: "Call this function to select a choice.", | ||
parameters: useCoT ? COT_RESPONSE_SCHEMA : PLAIN_RESPONSE_SCHEMA, | ||
}, | ||
]; | ||
} | ||
exports.buildClassificationFunctions = buildClassificationFunctions; | ||
function OpenAIClassifier(args) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const { name, output, expected, messages: messagesArg, model, parseScoreFn, choiceScores, maxTokens, temperature, cache, openAiApiKey, openAiOrganizationId } = args, remainingRenderArgs = __rest(args, ["name", "output", "expected", "messages", "model", "parseScoreFn", "choiceScores", "maxTokens", "temperature", "cache", "openAiApiKey", "openAiOrganizationId"]); | ||
const { name, output, expected, messages: messagesArg, model, choiceScores, classificationFunctions, maxTokens, temperature, cache, openAiApiKey, openAiOrganizationId } = args, remainingRenderArgs = __rest(args, ["name", "output", "expected", "messages", "model", "choiceScores", "classificationFunctions", "maxTokens", "temperature", "cache", "openAiApiKey", "openAiOrganizationId"]); | ||
let found = false; | ||
@@ -81,3 +113,3 @@ for (const m of SUPPORTED_MODELS) { | ||
const resp = yield (0, oai_js_1.cachedChatCompletion)(Object.assign({ model, | ||
messages }, extraArgs), { | ||
messages, functions: classificationFunctions, function_call: { name: "select_choice" } }, extraArgs), { | ||
cache, | ||
@@ -88,3 +120,3 @@ openAiApiKey, | ||
if (resp.choices.length > 0) { | ||
return Object.assign({ name }, parseResponse(resp.choices[0].message.content, parseScoreFn, choiceScores)); | ||
return Object.assign({ name }, parseResponse(resp.choices[0].message, choiceScores)); | ||
} | ||
@@ -99,3 +131,3 @@ else { | ||
score: 0, | ||
error, | ||
error: `${error}`, | ||
}; | ||
@@ -106,3 +138,3 @@ } | ||
exports.OpenAIClassifier = OpenAIClassifier; | ||
function parseResponse(resp, parseScoreFn, choiceScores) { | ||
function parseResponse(resp, choiceScores) { | ||
let score = 0; | ||
@@ -112,4 +144,4 @@ let error = undefined; | ||
try { | ||
metadata["rationale"] = `${resp}`; | ||
const choice = parseScoreFn(resp); | ||
metadata["rationale"] = `${resp.content}`; | ||
const choice = JSON.parse(resp.function_call.arguments)["choice"].trim(); | ||
metadata["choice"] = choice; | ||
@@ -125,3 +157,3 @@ if (choiceScores[choice] !== undefined) { | ||
score = 0; | ||
error = e; | ||
error = `${e}`; | ||
} | ||
@@ -140,21 +172,3 @@ return { | ||
const prompt = promptTemplate + "\n" + (useCoT ? COT_SUFFIX : NO_COT_SUFFIX); | ||
let maxTokens = undefined; | ||
let parseScoreFn = (resp) => resp.trim(); | ||
if (useCoT) { | ||
parseScoreFn = (resp) => { | ||
const answers = [...resp.matchAll(/Answer\s*=\s*(.*)/g)]; | ||
if (answers && answers.length > 0) { | ||
return answers[answers.length - 1][1].trim(); | ||
} | ||
else if (choiceStrings.includes(resp.trim())) { | ||
return resp.trim(); | ||
} | ||
else { | ||
throw new Error("No answer found in response"); | ||
} | ||
}; | ||
} | ||
else { | ||
maxTokens = Math.max(...choiceStrings.map((c) => c.length)); | ||
} | ||
let maxTokens = 512; | ||
const messages = [ | ||
@@ -168,5 +182,3 @@ { | ||
messages, | ||
parseScoreFn, | ||
choiceScores, | ||
model, | ||
choiceScores, classificationFunctions: buildClassificationFunctions(useCoT), model, | ||
maxTokens, | ||
@@ -200,3 +212,3 @@ temperature, __choices: choiceStrings }, runtimeArgs), { | ||
} | ||
return LLMClassifierFromSpecFile(templateName, templateName); | ||
return LLMClassifierFromSpecFile(name, templateName); | ||
} | ||
@@ -203,0 +215,0 @@ /** |
@@ -167,5 +167,36 @@ var __defProp = Object.defineProperty; | ||
// js/llm.ts | ||
var NO_COT_SUFFIX = `Answer the question by printing only a single choice from {{__choices}} (without quotes or punctuation) corresponding to the correct answer with no other text.`; | ||
var COT_SUFFIX = `Write out in a step by step manner your reasoning to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset. Then print only a single choice from {{__choices}} (without quotes or punctuation) on its own line corresponding to the correct answer. At the end, repeat just the answer by itself on a new line formatted as "Answer=X"`; | ||
var NO_COT_SUFFIX = "Answer the question by calling `select_choice` with a single choice from {{__choices}}."; | ||
var COT_SUFFIX = "Answer the question by calling `select_choice` with your reasoning in a step-by-step matter to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset. Select a single choice by setting the `choice` parameter to a single choice from {{__choices}}."; | ||
var SUPPORTED_MODELS = ["gpt-3.5-turbo", "gpt-4"]; | ||
var PLAIN_RESPONSE_SCHEMA = { | ||
properties: { | ||
choice: { description: "The choice", title: "Choice", type: "string" } | ||
}, | ||
required: ["choice"], | ||
title: "FunctionResponse", | ||
type: "object" | ||
}; | ||
var COT_RESPONSE_SCHEMA = { | ||
properties: { | ||
reasons: { | ||
description: "Write out in a step by step manner your reasoning to be sure that your conclusion is correct. Avoid simply stating the correct answer at the outset.", | ||
items: { type: "string" }, | ||
title: "Reasons", | ||
type: "array" | ||
}, | ||
choice: { description: "The choice", title: "Choice", type: "string" } | ||
}, | ||
required: ["reasons", "choice"], | ||
title: "CoTResponse", | ||
type: "object" | ||
}; | ||
function buildClassificationFunctions(useCoT) { | ||
return [ | ||
{ | ||
name: "select_choice", | ||
description: "Call this function to select a choice.", | ||
parameters: useCoT ? COT_RESPONSE_SCHEMA : PLAIN_RESPONSE_SCHEMA | ||
} | ||
]; | ||
} | ||
async function OpenAIClassifier(args) { | ||
@@ -178,4 +209,4 @@ const _a = args, { | ||
model, | ||
parseScoreFn, | ||
choiceScores, | ||
classificationFunctions, | ||
maxTokens, | ||
@@ -192,4 +223,4 @@ temperature, | ||
"model", | ||
"parseScoreFn", | ||
"choiceScores", | ||
"classificationFunctions", | ||
"maxTokens", | ||
@@ -228,3 +259,5 @@ "temperature", | ||
model, | ||
messages | ||
messages, | ||
functions: classificationFunctions, | ||
function_call: { name: "select_choice" } | ||
}, extraArgs), | ||
@@ -240,7 +273,3 @@ { | ||
name | ||
}, parseResponse( | ||
resp.choices[0].message.content, | ||
parseScoreFn, | ||
choiceScores | ||
)); | ||
}, parseResponse(resp.choices[0].message, choiceScores)); | ||
} else { | ||
@@ -253,7 +282,7 @@ throw new Error("Empty response from OpenAI"); | ||
score: 0, | ||
error | ||
error: `${error}` | ||
}; | ||
} | ||
} | ||
function parseResponse(resp, parseScoreFn, choiceScores) { | ||
function parseResponse(resp, choiceScores) { | ||
let score = 0; | ||
@@ -263,4 +292,4 @@ let error = void 0; | ||
try { | ||
metadata["rationale"] = `${resp}`; | ||
const choice = parseScoreFn(resp); | ||
metadata["rationale"] = `${resp.content}`; | ||
const choice = JSON.parse(resp.function_call.arguments)["choice"].trim(); | ||
metadata["choice"] = choice; | ||
@@ -274,3 +303,3 @@ if (choiceScores[choice] !== void 0) { | ||
score = 0; | ||
error = e; | ||
error = `${e}`; | ||
} | ||
@@ -296,18 +325,3 @@ return { | ||
const prompt = promptTemplate + "\n" + (useCoT ? COT_SUFFIX : NO_COT_SUFFIX); | ||
let maxTokens = void 0; | ||
let parseScoreFn = (resp) => resp.trim(); | ||
if (useCoT) { | ||
parseScoreFn = (resp) => { | ||
const answers = [...resp.matchAll(/Answer\s*=\s*(.*)/g)]; | ||
if (answers && answers.length > 0) { | ||
return answers[answers.length - 1][1].trim(); | ||
} else if (choiceStrings.includes(resp.trim())) { | ||
return resp.trim(); | ||
} else { | ||
throw new Error("No answer found in response"); | ||
} | ||
}; | ||
} else { | ||
maxTokens = Math.max(...choiceStrings.map((c) => c.length)); | ||
} | ||
let maxTokens = 512; | ||
const messages = [ | ||
@@ -322,4 +336,4 @@ { | ||
messages, | ||
parseScoreFn, | ||
choiceScores, | ||
classificationFunctions: buildClassificationFunctions(useCoT), | ||
model, | ||
@@ -355,3 +369,3 @@ maxTokens, | ||
return LLMClassifierFromSpecFile( | ||
templateName, | ||
name, | ||
templateName | ||
@@ -494,4 +508,5 @@ ); | ||
Translation, | ||
buildClassificationFunctions, | ||
templates | ||
}; | ||
//# sourceMappingURL=node.js.map |
@@ -1,5 +0,7 @@ | ||
import { ChatCompletionRequestMessage, CreateChatCompletionResponse } from "openai"; | ||
import { ChatCompletionFunctions, ChatCompletionRequestMessage, CreateChatCompletionRequestFunctionCall, CreateChatCompletionResponse } from "openai"; | ||
export interface CachedLLMParams { | ||
model: string; | ||
messages: ChatCompletionRequestMessage[]; | ||
functions?: ChatCompletionFunctions[]; | ||
function_call?: CreateChatCompletionRequestFunctionCall; | ||
temperature?: number; | ||
@@ -6,0 +8,0 @@ max_tokens?: number; |
@@ -1,2 +0,7 @@ | ||
import { Battle, LLMClassifierFromTemplate, OpenAIClassifier } from "../js/llm"; | ||
import { | ||
Battle, | ||
LLMClassifierFromTemplate, | ||
OpenAIClassifier, | ||
buildClassificationFunctions, | ||
} from "../js/llm"; | ||
import { ChatCompletionRequestMessage } from "openai"; | ||
@@ -28,7 +33,7 @@ import { ChatCache } from "../js/oai"; | ||
Title 1: {{output}} | ||
Title 2: {{expected}} | ||
1: {{output}} | ||
2: {{expected}} | ||
Please discuss each title briefly (one line for pros, one for cons), and then pick which one you think more accurately | ||
summarizes the issue by writing "Winner: 1" or "Winner: 2", and then a short rationale for your choice`, | ||
Please discuss each title briefly (one line for pros, one for cons), and then answer the question by calling | ||
the select_choice function with "1" or "2".`, | ||
}, | ||
@@ -52,3 +57,4 @@ ]; | ||
parseScoreFn: parseBestTitle, | ||
choiceScores: { 1: 1, 2: 0 }, | ||
choiceScores: { "1": 1, "2": 0 }, | ||
classificationFunctions: buildClassificationFunctions(true), | ||
page_content, | ||
@@ -55,0 +61,0 @@ maxTokens: 500, |
{ | ||
"name": "autoevals", | ||
"version": "0.0.22", | ||
"version": "0.0.23", | ||
"description": "Universal library for evaluating AI models", | ||
@@ -5,0 +5,0 @@ "main": "jsdist/bundle.js", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
265611
50
3012