autoevals - npm Package Compare versions

Comparing version 0.0.57 to 0.0.58

jsdist/index.d.ts

		@@ -177,5 +177,20 @@ import { ScorerArgs, Score, Scorer } from '@braintrust/core';
		pairwiseScorer?: Scorer<string, {}>;
		allowExtraEntities?: boolean;
		}>;

		/**
		* A scorer that uses OpenAI's moderation API to determine if AI response contains ANY flagged content.
		*
		* @param args
		* @param args.threshold Optional. Threshold to use to determine whether content has exceeded threshold. By
		* default, it uses OpenAI's default. (Using `flagged` from the response payload.)
		* @param args.categories Optional. Specific categories to look for. If not set, all categories will
		* be considered.
		* @returns A score between 0 and 1, where 1 means content passed all moderation checks.
		*/
		declare const Moderation: Scorer<string, {
		threshold?: number;
		} & OpenAIAuth>;

		/**
		* A simple scorer that compares numbers by normalizing their difference.
		@@ -199,2 +214,2 @@ */

		export { Battle, ClosedQA, EmbeddingSimilarity, Evaluators, Factuality, Humor, JSONDiff, type LLMClassifierArgs, LLMClassifierFromSpec, LLMClassifierFromSpecFile, LLMClassifierFromTemplate, Levenshtein, LevenshteinScorer, ListContains, type ModelGradedSpec, NumericDiff, OpenAIClassifier, type OpenAIClassifierArgs, Possible, Security, Sql, Summary, Translation, buildClassificationFunctions, templates };
		export { Battle, ClosedQA, EmbeddingSimilarity, Evaluators, Factuality, Humor, JSONDiff, type LLMArgs, type LLMClassifierArgs, LLMClassifierFromSpec, LLMClassifierFromSpecFile, LLMClassifierFromTemplate, Levenshtein, LevenshteinScorer, ListContains, type ModelGradedSpec, Moderation, NumericDiff, OpenAIClassifier, type OpenAIClassifierArgs, Possible, Security, Sql, Summary, Translation, buildClassificationFunctions, templates };

jsdist/index.js

		@@ -46,2 +46,3 @@ "use strict";
		ListContains: () => ListContains,
		Moderation: () => Moderation,
		NumericDiff: () => NumericDiff,
		@@ -496,3 +497,3 @@ OpenAIClassifier: () => OpenAIClassifier,
		var ListContains = async (args) => {
		const { output, expected } = args;
		const { output, expected, allowExtraEntities } = args;
		if (expected === void 0) {
		@@ -542,3 +543,4 @@ throw new Error("ListContains requires an expected value");
		).filter((pair) => pair !== null);
		const avgScore = pairs.reduce((acc, pair) => acc + pair.score, 0) / Math.max(output.length, expected.length);
		const denominator = allowExtraEntities ? expected.length : Math.max(output.length, expected.length);
		const avgScore = pairs.reduce((acc, pair) => acc + pair.score, 0) / denominator;
		return {
		@@ -553,2 +555,40 @@ name: "ListContains",

		// js/moderation.ts
		var MODERATION_NAME = "Moderation";
		function computeScore(result, threshold) {
		if (threshold === void 0) {
		return result.flagged ? 0 : 1;
		}
		for (const key of Object.keys(result.category_scores)) {
		const score = result.category_scores[key];
		if (score > threshold) {
		return 0;
		}
		}
		return 1;
		}
		var Moderation = async (args) => {
		var _a;
		const threshold = (_a = args.threshold) != null ? _a : void 0;
		const output = args.output;
		const openai = buildOpenAIClient(args);
		const moderationResults = await openai.moderations.create({
		input: output
		});
		const result = moderationResults.results[0];
		return {
		name: MODERATION_NAME,
		score: computeScore(result, threshold),
		metadata: {
		threshold,
		// @NOTE: `as unknown ...` is intentional. See https://stackoverflow.com/a/57280262
		category_scores: result.category_scores \|\| void 0
		}
		};
		};
		Object.defineProperty(Moderation, "name", {
		value: MODERATION_NAME,
		configurable: true
		});

		// js/number.ts
		@@ -677,2 +717,3 @@ var NumericDiff = (args) => {
		ListContains,
		Moderation,
		NumericDiff,
		@@ -679,0 +720,0 @@ OpenAIClassifier,

package.json

		{
		"name": "autoevals",
		"version": "0.0.57",
		"version": "0.0.58",
		"description": "Universal library for evaluating AI models",
		@@ -52,4 +52,6 @@ "main": "./jsdist/index.js",
		"mustache": "^4.2.0",
		"openai": "4.23.0"
		"openai": "4.23.0",
		"zod": "^3.22.4",
		"zod-to-json-schema": "^3.22.5"
		}
		}

README.md

		@@ -85,2 +85,3 @@ # Autoevals
		- Factuality
		- Moderation
		- Security
		@@ -92,2 +93,18 @@ - Summarization

		### RAGAS

		- Context precision
		- Context relevancy
		- Context recall
		- Context entities recall
		- [ ] Faithfullness
		- [ ] Answer relevance
		- [ ] Answer semantic similarity
		- [ ] Answer correctness
		- [ ] Aspect critique

		### Composite

		- Semantic list contains

		### Embeddings
		@@ -94,0 +111,0 @@

jsdist/index.d.mts

Sorry, the diff of this file is not supported yet

jsdist/index.mjs

Sorry, the diff of this file is not supported yet

autoevals - npm Package Compare versions

Improved metrics

Worsened metrics

Dependency changes