@formula-monks/kurt
Advanced tools
Comparing version 1.0.0 to 1.1.0
@@ -66,2 +66,3 @@ import type { RequireExactlyOne } from "type-fest"; | ||
private makeMessages; | ||
private makeSamplingOptions; | ||
} | ||
@@ -128,3 +129,85 @@ export type KurtMessage = { | ||
systemPrompt?: string; | ||
/** | ||
* Default sampling options to use, for any generation method call which | ||
* does not specify a different sampling options that override these ones. | ||
*/ | ||
sampling?: KurtSamplingOptions; | ||
} | ||
/** | ||
* Options which control how output tokens are sampled from the underlying LLM. | ||
*/ | ||
export type KurtSamplingOptions = Partial<typeof KurtSamplingOptionsDefault>; | ||
/** | ||
* The default values to use for `KurtSamplingOptions` when the application | ||
* doesn't specify any explicit values to override them. | ||
* | ||
* These values are loosely based on the defaults for major LLM providers, | ||
* erring on the side of more conservative choices where there is variance. | ||
* | ||
* Kurt has uniform defaults no matter which LLM you select, rather than | ||
* using defaults which vary from one LLM provider to another, to make it | ||
* easier to "compare apples to apples" when using different LLMs with Kurt. | ||
*/ | ||
export declare const KurtSamplingOptionsDefault: { | ||
/** | ||
* Maximum number of output tokens to sample from the model. | ||
* | ||
* This is mean to be a cost control measure, to protect against scenarios | ||
* where the model might get "stuck" and generate excessive output. | ||
* | ||
* When the model hits the output limit, whatever it has generated will | ||
* be cut off abruptly - the model has no awareness of this limit or how | ||
* concise its output needs to be, so if you need more concise output, | ||
* you'll need to include that in the (user or system) prompt instructions, | ||
* rather than relying on this parameter alone. | ||
*/ | ||
maxOutputTokens: number; | ||
/** | ||
* A factor to increase the amount of randomness in final token sampling. | ||
* | ||
* Along with `temperature`, this parameter can control the amount of | ||
* variation, "creativity", and topic drift of the generated output. | ||
* Higher values for each of these parameters will increase the variation, | ||
* but most LLM vendors recommend only adjusting one and not the other. | ||
* If you know what you're doing, then adjusting both may be helpful. | ||
* | ||
* Using a temperature value near 0 will cause sampling to almost always | ||
* choose the "most likely" next token from the "top tokens" set, | ||
* while increasing toward a value near 1 will make sampling more random. | ||
* | ||
* In all cases, the sampling occurs within the set of "top tokens" that | ||
* were above the cutoff thresholds introduced by `topK` and `topP`, so | ||
* even high temperatures will be constrained by those thresholds. | ||
* | ||
* Some models allow for values higher than 1, and the precise meaning | ||
* of this parameter is not consistently defined for all models, | ||
* so as much as Kurt would like to make the behavior uniform across | ||
* all supported LLMs, in practice you may need to tune this parameter | ||
* differently for different models used by your application. | ||
*/ | ||
temperature: number; | ||
/** | ||
* The width of the "probability"-based filter for initial token sampling. | ||
* | ||
* Along with `temperature`, this parameter can control the amount of | ||
* variation, "creativity", and topic drift of the generated output. | ||
* Higher values for each of these parameters will increase the variation, | ||
* but most LLM vendors recommend only adjusting one and not the other. | ||
* If you know what you're doing, then adjusting both may be helpful. | ||
* | ||
* This parameter specifies the inclusiveness of the probability threshold | ||
* that must be met in order to consider a token for inclusion in the | ||
* "top tokens" set that is to be sampled from. This threshold filtering | ||
* happens before the `temperature` parameter is applied for sampling. | ||
* | ||
* Therefore, narrowing or widening this value will narrow/widen the set of | ||
* tokens that are considered for sampling, and decreasing or increasing | ||
* the temperature will modify how the selection happens within that set. | ||
* | ||
* Valid values are greater than 0 and less than or equal to 1. | ||
* A value of 1 means that all tokens are considered for sampling, | ||
* without any "top tokens" filtering being applied before sampling. | ||
*/ | ||
topP: number; | ||
}; | ||
export interface KurtGenerateNaturalLanguageOptions { | ||
@@ -168,2 +251,10 @@ /** | ||
extraMessages?: KurtMessage[]; | ||
/** | ||
* Sampling options to use for this generation. | ||
* | ||
* Any options not specified here will be taken from the options given | ||
* in the constructor call for this Kurt instance if present there, or | ||
* otherwise from the `KurtSamplingOptionsDefault` values. | ||
*/ | ||
sampling?: KurtSamplingOptions; | ||
} | ||
@@ -170,0 +261,0 @@ export type KurtGenerateStructuredDataOptions<I extends KurtSchemaInner> = KurtGenerateNaturalLanguageOptions & { |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.Kurt = void 0; | ||
exports.KurtSamplingOptionsDefault = exports.Kurt = void 0; | ||
const KurtStream_1 = require("./KurtStream"); | ||
@@ -35,2 +35,3 @@ /** | ||
messages: this.adapter.transformToRawMessages(this.makeMessages(options)), | ||
sampling: this.makeSamplingOptions(options.sampling), | ||
tools: {}, | ||
@@ -53,2 +54,3 @@ }))); | ||
messages: this.adapter.transformToRawMessages(this.makeMessages(options)), | ||
sampling: this.makeSamplingOptions(options.sampling), | ||
tools: { | ||
@@ -88,2 +90,3 @@ structured_data: this.adapter.transformToRawTool({ | ||
messages: this.adapter.transformToRawMessages(this.makeMessages(options)), | ||
sampling: this.makeSamplingOptions(options.sampling), | ||
tools: Object.fromEntries(Object.entries(options.tools).map(([name, schema]) => [ | ||
@@ -108,3 +111,96 @@ name, | ||
} | ||
makeSamplingOptions(overrides) { | ||
const sampling = Object.assign(Object.assign(Object.assign({}, exports.KurtSamplingOptionsDefault), this.options.sampling), overrides); | ||
// Round integers. | ||
sampling.maxOutputTokens = Math.round(sampling.maxOutputTokens); | ||
// Enforce "hard" limits. | ||
if (sampling.maxOutputTokens < 1) | ||
throw new Error(`maxOutputTokens must be at least 1 (got: ${sampling.maxOutputTokens})`); | ||
if (sampling.temperature < 0) | ||
throw new Error(`temperature must be no less than 0 (got: ${sampling.temperature})`); | ||
if (sampling.topP < 0) | ||
throw new Error(`topP must be no less than 0 (got: ${sampling.topP})`); | ||
if (sampling.topP > 1) | ||
throw new Error(`topP must be no greater than 1 (got: ${sampling.topP})`); | ||
// Enforce "soft" limits. | ||
if (sampling.temperature === 0) | ||
sampling.temperature = Number.MIN_VALUE; | ||
if (sampling.topP === 0) | ||
sampling.topP = Number.MIN_VALUE; | ||
return sampling; | ||
} | ||
} | ||
exports.Kurt = Kurt; | ||
/** | ||
* The default values to use for `KurtSamplingOptions` when the application | ||
* doesn't specify any explicit values to override them. | ||
* | ||
* These values are loosely based on the defaults for major LLM providers, | ||
* erring on the side of more conservative choices where there is variance. | ||
* | ||
* Kurt has uniform defaults no matter which LLM you select, rather than | ||
* using defaults which vary from one LLM provider to another, to make it | ||
* easier to "compare apples to apples" when using different LLMs with Kurt. | ||
*/ | ||
exports.KurtSamplingOptionsDefault = { | ||
/** | ||
* Maximum number of output tokens to sample from the model. | ||
* | ||
* This is mean to be a cost control measure, to protect against scenarios | ||
* where the model might get "stuck" and generate excessive output. | ||
* | ||
* When the model hits the output limit, whatever it has generated will | ||
* be cut off abruptly - the model has no awareness of this limit or how | ||
* concise its output needs to be, so if you need more concise output, | ||
* you'll need to include that in the (user or system) prompt instructions, | ||
* rather than relying on this parameter alone. | ||
*/ | ||
maxOutputTokens: 4096, | ||
/** | ||
* A factor to increase the amount of randomness in final token sampling. | ||
* | ||
* Along with `temperature`, this parameter can control the amount of | ||
* variation, "creativity", and topic drift of the generated output. | ||
* Higher values for each of these parameters will increase the variation, | ||
* but most LLM vendors recommend only adjusting one and not the other. | ||
* If you know what you're doing, then adjusting both may be helpful. | ||
* | ||
* Using a temperature value near 0 will cause sampling to almost always | ||
* choose the "most likely" next token from the "top tokens" set, | ||
* while increasing toward a value near 1 will make sampling more random. | ||
* | ||
* In all cases, the sampling occurs within the set of "top tokens" that | ||
* were above the cutoff thresholds introduced by `topK` and `topP`, so | ||
* even high temperatures will be constrained by those thresholds. | ||
* | ||
* Some models allow for values higher than 1, and the precise meaning | ||
* of this parameter is not consistently defined for all models, | ||
* so as much as Kurt would like to make the behavior uniform across | ||
* all supported LLMs, in practice you may need to tune this parameter | ||
* differently for different models used by your application. | ||
*/ | ||
temperature: 0.5, | ||
/** | ||
* The width of the "probability"-based filter for initial token sampling. | ||
* | ||
* Along with `temperature`, this parameter can control the amount of | ||
* variation, "creativity", and topic drift of the generated output. | ||
* Higher values for each of these parameters will increase the variation, | ||
* but most LLM vendors recommend only adjusting one and not the other. | ||
* If you know what you're doing, then adjusting both may be helpful. | ||
* | ||
* This parameter specifies the inclusiveness of the probability threshold | ||
* that must be met in order to consider a token for inclusion in the | ||
* "top tokens" set that is to be sampled from. This threshold filtering | ||
* happens before the `temperature` parameter is applied for sampling. | ||
* | ||
* Therefore, narrowing or widening this value will narrow/widen the set of | ||
* tokens that are considered for sampling, and decreasing or increasing | ||
* the temperature will modify how the selection happens within that set. | ||
* | ||
* Valid values are greater than 0 and less than or equal to 1. | ||
* A value of 1 means that all tokens are considered for sampling, | ||
* without any "top tokens" filtering being applied before sampling. | ||
*/ | ||
topP: 0.95, | ||
}; |
@@ -1,2 +0,2 @@ | ||
import type { KurtMessage } from "./Kurt"; | ||
import type { KurtMessage, KurtSamplingOptions } from "./Kurt"; | ||
import type { KurtStreamEvent } from "./KurtStream"; | ||
@@ -27,2 +27,3 @@ import type { KurtSchema, KurtSchemaInner, KurtSchemaInnerMap, KurtSchemaMap, KurtSchemaMapSingleResult, KurtSchemaResult } from "./KurtSchema"; | ||
messages: A["rawMessage"][]; | ||
sampling: Required<KurtSamplingOptions>; | ||
tools: { | ||
@@ -29,0 +30,0 @@ [key: string]: A["rawTool"]; |
@@ -5,3 +5,3 @@ { | ||
"license": "MIT", | ||
"version": "1.0.0", | ||
"version": "1.1.0", | ||
"main": "dist/index.js", | ||
@@ -12,2 +12,17 @@ "types": "dist/index.d.ts", | ||
], | ||
"scripts": { | ||
"test": "jest", | ||
"build": "tsc --build", | ||
"prepack": "pnpm run build", | ||
"format": "pnpm biome format --write .", | ||
"lint": "pnpm biome lint --apply .", | ||
"check": "pnpm biome check .", | ||
"release": "pnpm exec semantic-release" | ||
}, | ||
"release": { | ||
"branches": [ | ||
"main" | ||
], | ||
"extends": "semantic-release-monorepo" | ||
}, | ||
"devDependencies": { | ||
@@ -18,2 +33,4 @@ "@jest/globals": "^29.7.0", | ||
"jest": "^29.7.0", | ||
"semantic-release": "^23.0.8", | ||
"semantic-release-monorepo": "^8.0.2", | ||
"ts-jest": "^29.1.2", | ||
@@ -23,10 +40,3 @@ "type-fest": "^4.18.1", | ||
"zod": "^3.23.5" | ||
}, | ||
"scripts": { | ||
"test": "jest", | ||
"build": "tsc --build", | ||
"format": "pnpm biome format --write .", | ||
"lint": "pnpm biome lint --apply .", | ||
"check": "pnpm biome check ." | ||
} | ||
} | ||
} |
No README
QualityPackage does not have a README. This may indicate a failed publish or a low quality package.
Found 1 instance in 1 package
51088
944
1
146
10