Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

autoevals

Package Overview
Dependencies
Maintainers
1
Versions
110
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

autoevals - npm Package Compare versions

Comparing version 0.0.57 to 0.0.58

17

jsdist/index.d.ts

@@ -177,5 +177,20 @@ import { ScorerArgs, Score, Scorer } from '@braintrust/core';

pairwiseScorer?: Scorer<string, {}>;
allowExtraEntities?: boolean;
}>;
/**
* A scorer that uses OpenAI's moderation API to determine if AI response contains ANY flagged content.
*
* @param args
* @param args.threshold Optional. Threshold to use to determine whether content has exceeded threshold. By
* default, it uses OpenAI's default. (Using `flagged` from the response payload.)
* @param args.categories Optional. Specific categories to look for. If not set, all categories will
* be considered.
* @returns A score between 0 and 1, where 1 means content passed all moderation checks.
*/
declare const Moderation: Scorer<string, {
threshold?: number;
} & OpenAIAuth>;
/**
* A simple scorer that compares numbers by normalizing their difference.

@@ -199,2 +214,2 @@ */

export { Battle, ClosedQA, EmbeddingSimilarity, Evaluators, Factuality, Humor, JSONDiff, type LLMClassifierArgs, LLMClassifierFromSpec, LLMClassifierFromSpecFile, LLMClassifierFromTemplate, Levenshtein, LevenshteinScorer, ListContains, type ModelGradedSpec, NumericDiff, OpenAIClassifier, type OpenAIClassifierArgs, Possible, Security, Sql, Summary, Translation, buildClassificationFunctions, templates };
export { Battle, ClosedQA, EmbeddingSimilarity, Evaluators, Factuality, Humor, JSONDiff, type LLMArgs, type LLMClassifierArgs, LLMClassifierFromSpec, LLMClassifierFromSpecFile, LLMClassifierFromTemplate, Levenshtein, LevenshteinScorer, ListContains, type ModelGradedSpec, Moderation, NumericDiff, OpenAIClassifier, type OpenAIClassifierArgs, Possible, Security, Sql, Summary, Translation, buildClassificationFunctions, templates };

@@ -46,2 +46,3 @@ "use strict";

ListContains: () => ListContains,
Moderation: () => Moderation,
NumericDiff: () => NumericDiff,

@@ -496,3 +497,3 @@ OpenAIClassifier: () => OpenAIClassifier,

var ListContains = async (args) => {
const { output, expected } = args;
const { output, expected, allowExtraEntities } = args;
if (expected === void 0) {

@@ -542,3 +543,4 @@ throw new Error("ListContains requires an expected value");

).filter((pair) => pair !== null);
const avgScore = pairs.reduce((acc, pair) => acc + pair.score, 0) / Math.max(output.length, expected.length);
const denominator = allowExtraEntities ? expected.length : Math.max(output.length, expected.length);
const avgScore = pairs.reduce((acc, pair) => acc + pair.score, 0) / denominator;
return {

@@ -553,2 +555,40 @@ name: "ListContains",

// js/moderation.ts
var MODERATION_NAME = "Moderation";
function computeScore(result, threshold) {
if (threshold === void 0) {
return result.flagged ? 0 : 1;
}
for (const key of Object.keys(result.category_scores)) {
const score = result.category_scores[key];
if (score > threshold) {
return 0;
}
}
return 1;
}
var Moderation = async (args) => {
var _a;
const threshold = (_a = args.threshold) != null ? _a : void 0;
const output = args.output;
const openai = buildOpenAIClient(args);
const moderationResults = await openai.moderations.create({
input: output
});
const result = moderationResults.results[0];
return {
name: MODERATION_NAME,
score: computeScore(result, threshold),
metadata: {
threshold,
// @NOTE: `as unknown ...` is intentional. See https://stackoverflow.com/a/57280262
category_scores: result.category_scores || void 0
}
};
};
Object.defineProperty(Moderation, "name", {
value: MODERATION_NAME,
configurable: true
});
// js/number.ts

@@ -677,2 +717,3 @@ var NumericDiff = (args) => {

ListContains,
Moderation,
NumericDiff,

@@ -679,0 +720,0 @@ OpenAIClassifier,

6

package.json
{
"name": "autoevals",
"version": "0.0.57",
"version": "0.0.58",
"description": "Universal library for evaluating AI models",

@@ -52,4 +52,6 @@ "main": "./jsdist/index.js",

"mustache": "^4.2.0",
"openai": "4.23.0"
"openai": "4.23.0",
"zod": "^3.22.4",
"zod-to-json-schema": "^3.22.5"
}
}

@@ -85,2 +85,3 @@ # Autoevals

- Factuality
- Moderation
- Security

@@ -92,2 +93,18 @@ - Summarization

### RAGAS
- Context precision
- Context relevancy
- Context recall
- Context entities recall
- [ ] Faithfullness
- [ ] Answer relevance
- [ ] Answer semantic similarity
- [ ] Answer correctness
- [ ] Aspect critique
### Composite
- Semantic list contains
### Embeddings

@@ -94,0 +111,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc