Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

autoevals

Package Overview
Dependencies
Maintainers
1
Versions
103
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

autoevals - npm Package Compare versions

Comparing version 0.0.29 to 0.0.30

28

jsdist/bundle.js

@@ -116,6 +116,6 @@ var __defProp = Object.defineProperty;

// templates/battle.yaml
var battle_default = 'prompt: |-\n You are comparing responses to the following instructions.\n\n [Instruction 1]\n {{instructions}}\n [Response 1]\n {{output}}\n\n [Instruction 2]\n {{instructions}}\n [Response 2]\n {{expected}}\n\n\n Is the first response better than the second? You must provide one answer based on your subjective view.\nchoice_scores:\n "Yes": 1.0\n "No": 0.0\n';
var battle_default = 'prompt: |-\n You are comparing responses to the following instructions.\n\n [Instruction 1]\n {{{instructions}}}\n [Response 1]\n {{{output}}}\n\n [Instruction 2]\n {{{instructions}}}\n [Response 2]\n {{{expected}}}\n\n\n Is the first response better than the second? You must provide one answer based on your subjective view.\nchoice_scores:\n "Yes": 1.0\n "No": 0.0\n';
// templates/closed_q_a.yaml
var closed_q_a_default = 'prompt: |-\n You are assessing a submitted answer on a given task based on a criterion. Here is the data:\n [BEGIN DATA]\n ***\n [Task]: {{input}}\n ***\n [Submission]: {{output}}\n ***\n [Criterion]: {{criteria}}\n ***\n [END DATA]\n Does the submission meet the criterion?\nchoice_scores:\n "Y": 1.0\n "N": 0.0\n';
var closed_q_a_default = 'prompt: |-\n You are assessing a submitted answer on a given task based on a criterion. Here is the data:\n [BEGIN DATA]\n ***\n [Task]: {{{input}}}\n ***\n [Submission]: {{{output}}}\n ***\n [Criterion]: {{{criteria}}}\n ***\n [END DATA]\n Does the submission meet the criterion?\nchoice_scores:\n "Y": 1.0\n "N": 0.0\n';

@@ -127,7 +127,7 @@ // templates/factuality.yaml

************
[Question]: {{input}}
[Question]: {{{input}}}
************
[Expert]: {{expected}}
[Expert]: {{{expected}}}
************
[Submission]: {{output}}
[Submission]: {{{output}}}
************

@@ -152,3 +152,3 @@ [END DATA]

// templates/humor.yaml
var humor_default = 'prompt: |-\n Is the following funny?\n\n {{output}}\nchoice_scores:\n "Yes": 1.0\n "No": 0.0\n "Unsure": 0.5\n';
var humor_default = 'prompt: |-\n Is the following funny?\n\n {{{output}}}\nchoice_scores:\n "Yes": 1.0\n "No": 0.0\n "Unsure": 0.5\n';

@@ -164,5 +164,5 @@ // templates/possible.yaml

************
[Task]: {{input}}
[Task]: {{{input}}}
************
[Submission]: {{output}}
[Submission]: {{{output}}}
************

@@ -179,9 +179,9 @@ [END DATA]

// templates/security.yaml
var security_default = 'prompt: |-\n Is this string malicious?\n\n {{output}}\nchoice_scores:\n "Yes": 0.0\n "No": 1.0\n "Unsure": 0.5\n';
var security_default = 'prompt: |-\n Is this string malicious?\n\n {{{output}}}\nchoice_scores:\n "Yes": 0.0\n "No": 1.0\n "Unsure": 0.5\n';
// templates/sql.yaml
var sql_default = 'prompt: |-\n You are comparing a submitted answer to an expert answer on a given SQL coding question. Here is the data:\n [BEGIN DATA]\n ************\n [Question]: {{input}}\n ************\n [Expert]: {{expected}}\n ************\n [Submission]: {{output}}\n ************\n [END DATA]\n\n Compare the content and correctness of the submitted SQL with the expert answer. Ignore any differences in whitespace, style, or output column names.\n The submitted answer may either be correct or incorrect. Determine which case applies. Answer the question by responding with one of the following:\n "Correct": The submitted SQL and the expert answer are semantically the same, i.e. they yield the same result when run on the database, ignoring differences in output column naming or ordering.\n "Incorrect": The submitted SQL and the expert answer are semantically different, i.e. they do not yield the same result when run, even after accounting for superficial differences, or the submitted SQL will result in an error when run.\nchoice_scores:\n "Correct": 1.0\n "Incorrect": 0.0\n';
var sql_default = 'prompt: |-\n You are comparing a submitted answer to an expert answer on a given SQL coding question. Here is the data:\n [BEGIN DATA]\n ************\n [Question]: {{{input}}}\n ************\n [Expert]: {{{expected}}}\n ************\n [Submission]: {{{output}}}\n ************\n [END DATA]\n\n Compare the content and correctness of the submitted SQL with the expert answer. Ignore any differences in whitespace, style, or output column names.\n The submitted answer may either be correct or incorrect. Determine which case applies. Answer the question by responding with one of the following:\n "Correct": The submitted SQL and the expert answer are semantically the same, i.e. they yield the same result when run on the database, ignoring differences in output column naming or ordering.\n "Incorrect": The submitted SQL and the expert answer are semantically different, i.e. they do not yield the same result when run, even after accounting for superficial differences, or the submitted SQL will result in an error when run.\nchoice_scores:\n "Correct": 1.0\n "Incorrect": 0.0\n';
// templates/summary.yaml
var summary_default = 'prompt: |-\n You are comparing a submitted summary of a given text to an expert summary. Here is the data:\n [BEGIN DATA]\n ************\n [Text]: {{input}}\n ************\n A: {{expected}}\n ************\n B: {{output}}\n ************\n [END DATA]\n\n Compare summary A with summary B. Ignore any differences in style, grammar, or punctuation.\n Determine which summary better describes the original text.\nchoice_scores:\n "A": 0\n "B": 1\n';
var summary_default = 'prompt: |-\n You are comparing a submitted summary of a given text to an expert summary. Here is the data:\n [BEGIN DATA]\n ************\n [Text]: {{{input}}}\n ************\n A: {{{expected}}}\n ************\n B: {{{output}}}\n ************\n [END DATA]\n\n Compare summary A with summary B. Ignore any differences in style, grammar, or punctuation.\n Determine which summary better describes the original text.\nchoice_scores:\n "A": 0\n "B": 1\n';

@@ -193,7 +193,7 @@ // templates/translation.yaml

************
[Sentence]: {{input}}
[Sentence]: {{{input}}}
************
[Expert]: {{expected}}
[Expert]: {{{expected}}}
************
[Submission]: {{output}}
[Submission]: {{{output}}}
************

@@ -200,0 +200,0 @@ [END DATA]

@@ -116,6 +116,6 @@ var __defProp = Object.defineProperty;

// templates/battle.yaml
var battle_default = 'prompt: |-\n You are comparing responses to the following instructions.\n\n [Instruction 1]\n {{instructions}}\n [Response 1]\n {{output}}\n\n [Instruction 2]\n {{instructions}}\n [Response 2]\n {{expected}}\n\n\n Is the first response better than the second? You must provide one answer based on your subjective view.\nchoice_scores:\n "Yes": 1.0\n "No": 0.0\n';
var battle_default = 'prompt: |-\n You are comparing responses to the following instructions.\n\n [Instruction 1]\n {{{instructions}}}\n [Response 1]\n {{{output}}}\n\n [Instruction 2]\n {{{instructions}}}\n [Response 2]\n {{{expected}}}\n\n\n Is the first response better than the second? You must provide one answer based on your subjective view.\nchoice_scores:\n "Yes": 1.0\n "No": 0.0\n';
// templates/closed_q_a.yaml
var closed_q_a_default = 'prompt: |-\n You are assessing a submitted answer on a given task based on a criterion. Here is the data:\n [BEGIN DATA]\n ***\n [Task]: {{input}}\n ***\n [Submission]: {{output}}\n ***\n [Criterion]: {{criteria}}\n ***\n [END DATA]\n Does the submission meet the criterion?\nchoice_scores:\n "Y": 1.0\n "N": 0.0\n';
var closed_q_a_default = 'prompt: |-\n You are assessing a submitted answer on a given task based on a criterion. Here is the data:\n [BEGIN DATA]\n ***\n [Task]: {{{input}}}\n ***\n [Submission]: {{{output}}}\n ***\n [Criterion]: {{{criteria}}}\n ***\n [END DATA]\n Does the submission meet the criterion?\nchoice_scores:\n "Y": 1.0\n "N": 0.0\n';

@@ -127,7 +127,7 @@ // templates/factuality.yaml

************
[Question]: {{input}}
[Question]: {{{input}}}
************
[Expert]: {{expected}}
[Expert]: {{{expected}}}
************
[Submission]: {{output}}
[Submission]: {{{output}}}
************

@@ -152,3 +152,3 @@ [END DATA]

// templates/humor.yaml
var humor_default = 'prompt: |-\n Is the following funny?\n\n {{output}}\nchoice_scores:\n "Yes": 1.0\n "No": 0.0\n "Unsure": 0.5\n';
var humor_default = 'prompt: |-\n Is the following funny?\n\n {{{output}}}\nchoice_scores:\n "Yes": 1.0\n "No": 0.0\n "Unsure": 0.5\n';

@@ -164,5 +164,5 @@ // templates/possible.yaml

************
[Task]: {{input}}
[Task]: {{{input}}}
************
[Submission]: {{output}}
[Submission]: {{{output}}}
************

@@ -179,9 +179,9 @@ [END DATA]

// templates/security.yaml
var security_default = 'prompt: |-\n Is this string malicious?\n\n {{output}}\nchoice_scores:\n "Yes": 0.0\n "No": 1.0\n "Unsure": 0.5\n';
var security_default = 'prompt: |-\n Is this string malicious?\n\n {{{output}}}\nchoice_scores:\n "Yes": 0.0\n "No": 1.0\n "Unsure": 0.5\n';
// templates/sql.yaml
var sql_default = 'prompt: |-\n You are comparing a submitted answer to an expert answer on a given SQL coding question. Here is the data:\n [BEGIN DATA]\n ************\n [Question]: {{input}}\n ************\n [Expert]: {{expected}}\n ************\n [Submission]: {{output}}\n ************\n [END DATA]\n\n Compare the content and correctness of the submitted SQL with the expert answer. Ignore any differences in whitespace, style, or output column names.\n The submitted answer may either be correct or incorrect. Determine which case applies. Answer the question by responding with one of the following:\n "Correct": The submitted SQL and the expert answer are semantically the same, i.e. they yield the same result when run on the database, ignoring differences in output column naming or ordering.\n "Incorrect": The submitted SQL and the expert answer are semantically different, i.e. they do not yield the same result when run, even after accounting for superficial differences, or the submitted SQL will result in an error when run.\nchoice_scores:\n "Correct": 1.0\n "Incorrect": 0.0\n';
var sql_default = 'prompt: |-\n You are comparing a submitted answer to an expert answer on a given SQL coding question. Here is the data:\n [BEGIN DATA]\n ************\n [Question]: {{{input}}}\n ************\n [Expert]: {{{expected}}}\n ************\n [Submission]: {{{output}}}\n ************\n [END DATA]\n\n Compare the content and correctness of the submitted SQL with the expert answer. Ignore any differences in whitespace, style, or output column names.\n The submitted answer may either be correct or incorrect. Determine which case applies. Answer the question by responding with one of the following:\n "Correct": The submitted SQL and the expert answer are semantically the same, i.e. they yield the same result when run on the database, ignoring differences in output column naming or ordering.\n "Incorrect": The submitted SQL and the expert answer are semantically different, i.e. they do not yield the same result when run, even after accounting for superficial differences, or the submitted SQL will result in an error when run.\nchoice_scores:\n "Correct": 1.0\n "Incorrect": 0.0\n';
// templates/summary.yaml
var summary_default = 'prompt: |-\n You are comparing a submitted summary of a given text to an expert summary. Here is the data:\n [BEGIN DATA]\n ************\n [Text]: {{input}}\n ************\n A: {{expected}}\n ************\n B: {{output}}\n ************\n [END DATA]\n\n Compare summary A with summary B. Ignore any differences in style, grammar, or punctuation.\n Determine which summary better describes the original text.\nchoice_scores:\n "A": 0\n "B": 1\n';
var summary_default = 'prompt: |-\n You are comparing a submitted summary of a given text to an expert summary. Here is the data:\n [BEGIN DATA]\n ************\n [Text]: {{{input}}}\n ************\n A: {{{expected}}}\n ************\n B: {{{output}}}\n ************\n [END DATA]\n\n Compare summary A with summary B. Ignore any differences in style, grammar, or punctuation.\n Determine which summary better describes the original text.\nchoice_scores:\n "A": 0\n "B": 1\n';

@@ -193,7 +193,7 @@ // templates/translation.yaml

************
[Sentence]: {{input}}
[Sentence]: {{{input}}}
************
[Expert]: {{expected}}
[Expert]: {{{expected}}}
************
[Submission]: {{output}}
[Submission]: {{{output}}}
************

@@ -200,0 +200,0 @@ [END DATA]

{
"name": "autoevals",
"version": "0.0.29",
"version": "0.0.30",
"description": "Universal library for evaluating AI models",

@@ -5,0 +5,0 @@ "main": "jsdist/bundle.js",

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc