@huggingface/inference
Advanced tools
Comparing version 2.6.6 to 2.6.7
@@ -34,3 +34,3 @@ | ||
/** | ||
* (Default: "same-origin"). String | Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all. | ||
* Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers). | ||
*/ | ||
@@ -707,2 +707,197 @@ includeCredentials?: string | boolean; | ||
/** | ||
* The reason why the generation was stopped. | ||
* | ||
* length: The generated sequence reached the maximum allowed length | ||
* | ||
* eos_token: The model generated an end-of-sentence (EOS) token | ||
* | ||
* stop_sequence: One of the sequence in stop_sequences was generated | ||
*/ | ||
export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence"; | ||
/** | ||
* Inputs for Text Generation inference | ||
*/ | ||
export interface TextGenerationInput { | ||
/** | ||
* The text to initialize generation with | ||
*/ | ||
inputs: string; | ||
/** | ||
* Additional inference parameters | ||
*/ | ||
parameters?: TextGenerationParameters; | ||
/** | ||
* Whether to stream output tokens | ||
*/ | ||
stream?: boolean; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* Additional inference parameters | ||
* | ||
* Additional inference parameters for Text Generation | ||
*/ | ||
export interface TextGenerationParameters { | ||
/** | ||
* The number of sampling queries to run. Only the best one (in terms of total logprob) will | ||
* be returned. | ||
*/ | ||
best_of?: number; | ||
/** | ||
* Whether or not to output decoder input details | ||
*/ | ||
decoder_input_details?: boolean; | ||
/** | ||
* Whether or not to output details | ||
*/ | ||
details?: boolean; | ||
/** | ||
* Whether to use logits sampling instead of greedy decoding when generating new tokens. | ||
*/ | ||
do_sample?: boolean; | ||
/** | ||
* The maximum number of tokens to generate. | ||
*/ | ||
max_new_tokens?: number; | ||
/** | ||
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this | ||
* paper](https://hf.co/papers/1909.05858) for more details. | ||
*/ | ||
repetition_penalty?: number; | ||
/** | ||
* Whether to prepend the prompt to the generated text. | ||
*/ | ||
return_full_text?: boolean; | ||
/** | ||
* The random sampling seed. | ||
*/ | ||
seed?: number; | ||
/** | ||
* Stop generating tokens if a member of `stop_sequences` is generated. | ||
*/ | ||
stop_sequences?: string[]; | ||
/** | ||
* The value used to modulate the logits distribution. | ||
*/ | ||
temperature?: number; | ||
/** | ||
* The number of highest probability vocabulary tokens to keep for top-k-filtering. | ||
*/ | ||
top_k?: number; | ||
/** | ||
* If set to < 1, only the smallest set of most probable tokens with probabilities that add | ||
* up to `top_p` or higher are kept for generation. | ||
*/ | ||
top_p?: number; | ||
/** | ||
* Truncate input tokens to the given size. | ||
*/ | ||
truncate?: number; | ||
/** | ||
* Typical Decoding mass. See [Typical Decoding for Natural Language | ||
* Generation](https://hf.co/papers/2202.00666) for more information | ||
*/ | ||
typical_p?: number; | ||
/** | ||
* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226) | ||
*/ | ||
watermark?: boolean; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* Outputs for Text Generation inference | ||
*/ | ||
export interface TextGenerationOutput { | ||
/** | ||
* When enabled, details about the generation | ||
*/ | ||
details?: TextGenerationOutputDetails; | ||
/** | ||
* The generated text | ||
*/ | ||
generated_text: string; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* When enabled, details about the generation | ||
*/ | ||
export interface TextGenerationOutputDetails { | ||
/** | ||
* Details about additional sequences when best_of is provided | ||
*/ | ||
best_of_sequences?: TextGenerationOutputSequenceDetails[]; | ||
/** | ||
* The reason why the generation was stopped. | ||
*/ | ||
finish_reason: TextGenerationFinishReason; | ||
/** | ||
* The number of generated tokens | ||
*/ | ||
generated_tokens: number; | ||
prefill: TextGenerationPrefillToken[]; | ||
/** | ||
* The random seed used for generation | ||
*/ | ||
seed?: number; | ||
/** | ||
* The generated tokens and associated details | ||
*/ | ||
tokens: TextGenerationOutputToken[]; | ||
/** | ||
* Most likely tokens | ||
*/ | ||
top_tokens?: Array<TextGenerationOutputToken[]>; | ||
[property: string]: unknown; | ||
} | ||
export interface TextGenerationOutputSequenceDetails { | ||
finish_reason: TextGenerationFinishReason; | ||
/** | ||
* The generated text | ||
*/ | ||
generated_text: string; | ||
/** | ||
* The number of generated tokens | ||
*/ | ||
generated_tokens: number; | ||
prefill: TextGenerationPrefillToken[]; | ||
/** | ||
* The random seed used for generation | ||
*/ | ||
seed?: number; | ||
/** | ||
* The generated tokens and associated details | ||
*/ | ||
tokens: TextGenerationOutputToken[]; | ||
/** | ||
* Most likely tokens | ||
*/ | ||
top_tokens?: Array<TextGenerationOutputToken[]>; | ||
[property: string]: unknown; | ||
} | ||
export interface TextGenerationPrefillToken { | ||
id: number; | ||
logprob: number; | ||
/** | ||
* The text associated with that token | ||
*/ | ||
text: string; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* Generated token. | ||
*/ | ||
export interface TextGenerationOutputToken { | ||
id: number; | ||
logprob?: number; | ||
/** | ||
* Whether or not that token is a special one | ||
*/ | ||
special: boolean; | ||
/** | ||
* The text associated with that token | ||
*/ | ||
text: string; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with). | ||
@@ -709,0 +904,0 @@ */ |
@@ -137,6 +137,4 @@ /// <reference path="./index.d.ts" /> | ||
credentials = includeCredentials; | ||
} else if (typeof includeCredentials === "boolean") { | ||
credentials = includeCredentials ? "include" : void 0; | ||
} else if (includeCredentials === void 0) { | ||
credentials = "same-origin"; | ||
} else if (includeCredentials === true) { | ||
credentials = "include"; | ||
} | ||
@@ -150,3 +148,3 @@ const info = { | ||
}), | ||
credentials, | ||
...credentials && { credentials }, | ||
signal: options?.signal | ||
@@ -153,0 +151,0 @@ }; |
{ | ||
"name": "@huggingface/inference", | ||
"version": "2.6.6", | ||
"version": "2.6.7", | ||
"packageManager": "pnpm@8.10.5", | ||
@@ -43,3 +43,3 @@ "license": "MIT", | ||
"@types/node": "18.13.0", | ||
"@huggingface/tasks": "^0.6.0" | ||
"@huggingface/tasks": "^0.8.0" | ||
}, | ||
@@ -46,0 +46,0 @@ "resolutions": {}, |
@@ -5,2 +5,5 @@ import * as tasks from "./tasks"; | ||
/* eslint-disable @typescript-eslint/no-empty-interface */ | ||
/* eslint-disable @typescript-eslint/no-unsafe-declaration-merging */ | ||
type Task = typeof tasks; | ||
@@ -64,6 +67,4 @@ | ||
// eslint-disable-next-line @typescript-eslint/no-empty-interface | ||
export interface HfInference extends TaskWithNoAccessToken {} | ||
// eslint-disable-next-line @typescript-eslint/no-empty-interface | ||
export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoModel {} |
@@ -92,15 +92,10 @@ import type { InferenceTask, Options, RequestArgs } from "../types"; | ||
// Let users configure credentials, or disable them all together (or keep default behavior). | ||
// --- | ||
// This used to be an internal property only and never exposed to users. This means that most usages will never define this value | ||
// So in order to make this backwards compatible, if it's undefined we go to "same-origin" (default behaviour before). | ||
// If it's a boolean and set to true then set to "include". If false, don't define credentials at all (useful for edge runtimes) | ||
// Then finally, if it's a string, use it as-is. | ||
/** | ||
* For edge runtimes, leave 'credentials' undefined, otherwise cloudflare workers will error | ||
*/ | ||
let credentials: RequestCredentials | undefined; | ||
if (typeof includeCredentials === "string") { | ||
credentials = includeCredentials as RequestCredentials; | ||
} else if (typeof includeCredentials === "boolean") { | ||
credentials = includeCredentials ? "include" : undefined; | ||
} else if (includeCredentials === undefined) { | ||
credentials = "same-origin"; | ||
} else if (includeCredentials === true) { | ||
credentials = "include"; | ||
} | ||
@@ -117,3 +112,3 @@ | ||
}), | ||
credentials, | ||
...(credentials && { credentials }), | ||
signal: options?.signal, | ||
@@ -120,0 +115,0 @@ }; |
@@ -1,2 +0,1 @@ | ||
import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks/src/tasks/text-generation/inference"; | ||
import { InferenceOutputError } from "../../lib/InferenceOutputError"; | ||
@@ -7,2 +6,205 @@ import type { BaseArgs, Options } from "../../types"; | ||
/** | ||
* Inputs for Text Generation inference | ||
*/ | ||
export interface TextGenerationInput { | ||
/** | ||
* The text to initialize generation with | ||
*/ | ||
inputs: string; | ||
/** | ||
* Additional inference parameters | ||
*/ | ||
parameters?: TextGenerationParameters; | ||
/** | ||
* Whether to stream output tokens | ||
*/ | ||
stream?: boolean; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* Additional inference parameters | ||
* | ||
* Additional inference parameters for Text Generation | ||
*/ | ||
export interface TextGenerationParameters { | ||
/** | ||
* The number of sampling queries to run. Only the best one (in terms of total logprob) will | ||
* be returned. | ||
*/ | ||
best_of?: number; | ||
/** | ||
* Whether or not to output decoder input details | ||
*/ | ||
decoder_input_details?: boolean; | ||
/** | ||
* Whether or not to output details | ||
*/ | ||
details?: boolean; | ||
/** | ||
* Whether to use logits sampling instead of greedy decoding when generating new tokens. | ||
*/ | ||
do_sample?: boolean; | ||
/** | ||
* The maximum number of tokens to generate. | ||
*/ | ||
max_new_tokens?: number; | ||
/** | ||
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this | ||
* paper](https://hf.co/papers/1909.05858) for more details. | ||
*/ | ||
repetition_penalty?: number; | ||
/** | ||
* Whether to prepend the prompt to the generated text. | ||
*/ | ||
return_full_text?: boolean; | ||
/** | ||
* The random sampling seed. | ||
*/ | ||
seed?: number; | ||
/** | ||
* Stop generating tokens if a member of `stop_sequences` is generated. | ||
*/ | ||
stop_sequences?: string[]; | ||
/** | ||
* The value used to modulate the logits distribution. | ||
*/ | ||
temperature?: number; | ||
/** | ||
* The number of highest probability vocabulary tokens to keep for top-k-filtering. | ||
*/ | ||
top_k?: number; | ||
/** | ||
* If set to < 1, only the smallest set of most probable tokens with probabilities that add | ||
* up to `top_p` or higher are kept for generation. | ||
*/ | ||
top_p?: number; | ||
/** | ||
* Truncate input tokens to the given size. | ||
*/ | ||
truncate?: number; | ||
/** | ||
* Typical Decoding mass. See [Typical Decoding for Natural Language | ||
* Generation](https://hf.co/papers/2202.00666) for more information | ||
*/ | ||
typical_p?: number; | ||
/** | ||
* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226) | ||
*/ | ||
watermark?: boolean; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* Outputs for Text Generation inference | ||
*/ | ||
export interface TextGenerationOutput { | ||
/** | ||
* When enabled, details about the generation | ||
*/ | ||
details?: TextGenerationOutputDetails; | ||
/** | ||
* The generated text | ||
*/ | ||
generated_text: string; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* When enabled, details about the generation | ||
*/ | ||
export interface TextGenerationOutputDetails { | ||
/** | ||
* Details about additional sequences when best_of is provided | ||
*/ | ||
best_of_sequences?: TextGenerationOutputSequenceDetails[]; | ||
/** | ||
* The reason why the generation was stopped. | ||
*/ | ||
finish_reason: TextGenerationFinishReason; | ||
/** | ||
* The number of generated tokens | ||
*/ | ||
generated_tokens: number; | ||
prefill: TextGenerationPrefillToken[]; | ||
/** | ||
* The random seed used for generation | ||
*/ | ||
seed?: number; | ||
/** | ||
* The generated tokens and associated details | ||
*/ | ||
tokens: TextGenerationOutputToken[]; | ||
/** | ||
* Most likely tokens | ||
*/ | ||
top_tokens?: Array<TextGenerationOutputToken[]>; | ||
[property: string]: unknown; | ||
} | ||
export interface TextGenerationOutputSequenceDetails { | ||
finish_reason: TextGenerationFinishReason; | ||
/** | ||
* The generated text | ||
*/ | ||
generated_text: string; | ||
/** | ||
* The number of generated tokens | ||
*/ | ||
generated_tokens: number; | ||
prefill: TextGenerationPrefillToken[]; | ||
/** | ||
* The random seed used for generation | ||
*/ | ||
seed?: number; | ||
/** | ||
* The generated tokens and associated details | ||
*/ | ||
tokens: TextGenerationOutputToken[]; | ||
/** | ||
* Most likely tokens | ||
*/ | ||
top_tokens?: Array<TextGenerationOutputToken[]>; | ||
[property: string]: unknown; | ||
} | ||
export interface TextGenerationPrefillToken { | ||
id: number; | ||
logprob: number; | ||
/** | ||
* The text associated with that token | ||
*/ | ||
text: string; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* Generated token. | ||
*/ | ||
export interface TextGenerationOutputToken { | ||
id: number; | ||
logprob?: number; | ||
/** | ||
* Whether or not that token is a special one | ||
*/ | ||
special: boolean; | ||
/** | ||
* The text associated with that token | ||
*/ | ||
text: string; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* The reason why the generation was stopped. | ||
* | ||
* length: The generated sequence reached the maximum allowed length | ||
* | ||
* eos_token: The model generated an end-of-sentence (EOS) token | ||
* | ||
* stop_sequence: One of the sequence in stop_sequences was generated | ||
*/ | ||
export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence"; | ||
/** | ||
* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with). | ||
@@ -9,0 +211,0 @@ */ |
import type { BaseArgs, Options } from "../../types"; | ||
import { streamingRequest } from "../custom/streamingRequest"; | ||
import type { TextGenerationInput } from "./textGeneration"; | ||
import type { TextGenerationInput } from "@huggingface/tasks/src/tasks/text-generation/inference"; | ||
export interface TextGenerationStreamToken { | ||
@@ -7,0 +6,0 @@ /** Token ID from the model tokenizer */ |
@@ -35,3 +35,3 @@ import type { PipelineType } from "@huggingface/tasks"; | ||
/** | ||
* (Default: "same-origin"). String | Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all. | ||
* Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers). | ||
*/ | ||
@@ -38,0 +38,0 @@ includeCredentials?: string | boolean; |
Sorry, the diff of this file is not supported yet
208957
5729