@huggingface/inference
Advanced tools
Comparing version 2.6.7 to 2.7.0
@@ -1,2 +0,1 @@ | ||
/// <reference path="./index.d.ts" /> | ||
var __defProp = Object.defineProperty; | ||
@@ -14,2 +13,4 @@ var __export = (target, all) => { | ||
automaticSpeechRecognition: () => automaticSpeechRecognition, | ||
chatCompletion: () => chatCompletion, | ||
chatCompletionStream: () => chatCompletionStream, | ||
documentQuestionAnswering: () => documentQuestionAnswering, | ||
@@ -43,2 +44,26 @@ featureExtraction: () => featureExtraction, | ||
// src/utils/pick.ts | ||
function pick(o, props) { | ||
return Object.assign( | ||
{}, | ||
...props.map((prop) => { | ||
if (o[prop] !== void 0) { | ||
return { [prop]: o[prop] }; | ||
} | ||
}) | ||
); | ||
} | ||
// src/utils/typedInclude.ts | ||
function typedInclude(arr, v) { | ||
return arr.includes(v); | ||
} | ||
// src/utils/omit.ts | ||
function omit(o, props) { | ||
const propsArr = Array.isArray(props) ? props : [props]; | ||
const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop)); | ||
return pick(o, letsKeep); | ||
} | ||
// src/lib/isUrl.ts | ||
@@ -84,3 +109,3 @@ function isUrl(modelOrUrl) { | ||
async function makeRequestOptions(args, options) { | ||
const { accessToken, model: _model, ...otherArgs } = args; | ||
const { accessToken, endpointUrl, ...otherArgs } = args; | ||
let { model } = args; | ||
@@ -94,3 +119,3 @@ const { | ||
dont_load_model, | ||
...otherOptions | ||
chatCompletion: chatCompletion2 | ||
} = options ?? {}; | ||
@@ -129,6 +154,13 @@ const headers = {}; | ||
} | ||
const url = (() => { | ||
let url = (() => { | ||
if (endpointUrl && isUrl(model)) { | ||
throw new TypeError("Both model and endpointUrl cannot be URLs"); | ||
} | ||
if (isUrl(model)) { | ||
console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead"); | ||
return model; | ||
} | ||
if (endpointUrl) { | ||
return endpointUrl; | ||
} | ||
if (task) { | ||
@@ -139,2 +171,5 @@ return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`; | ||
})(); | ||
if (chatCompletion2 && !url.endsWith("/chat/completions")) { | ||
url += "/v1/chat/completions"; | ||
} | ||
let credentials; | ||
@@ -150,4 +185,3 @@ if (typeof includeCredentials === "string") { | ||
body: binary ? args.data : JSON.stringify({ | ||
...otherArgs, | ||
options: options && otherOptions | ||
...otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs | ||
}), | ||
@@ -173,2 +207,5 @@ ...credentials && { credentials }, | ||
const output = await response.json(); | ||
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) { | ||
throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`); | ||
} | ||
if (output.error) { | ||
@@ -298,2 +335,5 @@ throw new Error(output.error); | ||
const output = await response.json(); | ||
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) { | ||
throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`); | ||
} | ||
if (output.error) { | ||
@@ -335,2 +375,5 @@ throw new Error(output.error); | ||
if (event.data.length > 0) { | ||
if (event.data === "[DONE]") { | ||
return; | ||
} | ||
const data = JSON.parse(event.data); | ||
@@ -482,3 +525,3 @@ if (typeof data === "object" && data !== null && "error" in data) { | ||
// ../shared/src/base64FromBytes.ts | ||
// src/utils/base64FromBytes.ts | ||
function base64FromBytes(arr) { | ||
@@ -496,6 +539,2 @@ if (globalThis.Buffer) { | ||
// ../shared/src/isBackend.ts | ||
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined"; | ||
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope"; | ||
// src/tasks/cv/imageToImage.ts | ||
@@ -741,2 +780,25 @@ async function imageToImage(args, options) { | ||
// src/tasks/nlp/chatCompletion.ts | ||
async function chatCompletion(args, options) { | ||
const res = await request(args, { | ||
...options, | ||
taskHint: "text-generation", | ||
chatCompletion: true | ||
}); | ||
const isValidOutput = typeof res === "object" && Array.isArray(res?.choices) && typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && typeof res?.system_fingerprint === "string" && typeof res?.usage === "object"; | ||
if (!isValidOutput) { | ||
throw new InferenceOutputError("Expected ChatCompletionOutput"); | ||
} | ||
return res; | ||
} | ||
// src/tasks/nlp/chatCompletionStream.ts | ||
async function* chatCompletionStream(args, options) { | ||
yield* streamingRequest(args, { | ||
...options, | ||
taskHint: "text-generation", | ||
chatCompletion: true | ||
}); | ||
} | ||
// src/tasks/multimodal/documentQuestionAnswering.ts | ||
@@ -853,3 +915,3 @@ async function documentQuestionAnswering(args, options) { | ||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
fn({ ...params, accessToken, model: endpointUrl }, { ...defaultOptions, ...options }) | ||
fn({ ...params, accessToken, endpointUrl }, { ...defaultOptions, ...options }) | ||
) | ||
@@ -867,2 +929,4 @@ }); | ||
automaticSpeechRecognition, | ||
chatCompletion, | ||
chatCompletionStream, | ||
documentQuestionAnswering, | ||
@@ -869,0 +933,0 @@ featureExtraction, |
{ | ||
"name": "@huggingface/inference", | ||
"version": "2.6.7", | ||
"version": "2.7.0", | ||
"packageManager": "pnpm@8.10.5", | ||
@@ -32,7 +32,7 @@ "license": "MIT", | ||
"source": "src/index.ts", | ||
"types": "./dist/index.d.ts", | ||
"types": "./dist/src/index.d.ts", | ||
"main": "./dist/index.cjs", | ||
"module": "./dist/index.js", | ||
"exports": { | ||
"types": "./dist/index.d.ts", | ||
"types": "./dist/src/index.d.ts", | ||
"require": "./dist/index.cjs", | ||
@@ -42,10 +42,12 @@ "import": "./dist/index.js" | ||
"type": "module", | ||
"dependencies": { | ||
"@huggingface/tasks": "^0.10.0" | ||
}, | ||
"devDependencies": { | ||
"@types/node": "18.13.0", | ||
"@huggingface/tasks": "^0.8.0" | ||
"@types/node": "18.13.0" | ||
}, | ||
"resolutions": {}, | ||
"scripts": { | ||
"build": "tsup src/index.ts --format cjs,esm --clean && pnpm run dts", | ||
"dts": "tsx scripts/generate-dts.ts", | ||
"build": "tsup src/index.ts --format cjs,esm --clean && tsc --emitDeclarationOnly --declaration", | ||
"dts": "tsx scripts/generate-dts.ts && tsc --noEmit dist/index.d.ts", | ||
"lint": "eslint --quiet --fix --ext .cjs,.ts .", | ||
@@ -52,0 +54,0 @@ "lint:check": "eslint --ext .cjs,.ts .", |
153
README.md
@@ -8,3 +8,3 @@ # 🤗 Hugging Face Inference Endpoints | ||
You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523). | ||
You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523). | ||
@@ -34,3 +34,2 @@ ## Getting Started | ||
### Initialize | ||
@@ -48,3 +47,2 @@ | ||
#### Tree-shaking | ||
@@ -69,2 +67,81 @@ | ||
### Text Generation | ||
Generates text from an input prompt. | ||
[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation) | ||
```typescript | ||
await hf.textGeneration({ | ||
model: 'gpt2', | ||
inputs: 'The answer to the universe is' | ||
}) | ||
for await (const output of hf.textGenerationStream({ | ||
model: "google/flan-t5-xxl", | ||
inputs: 'repeat "one two three four"', | ||
parameters: { max_new_tokens: 250 } | ||
})) { | ||
console.log(output.token.text, output.generated_text); | ||
} | ||
``` | ||
### Text Generation (Chat Completion API Compatible) | ||
Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API. | ||
[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion) | ||
```typescript | ||
// Non-streaming API | ||
const out = await hf.chatCompletion({ | ||
model: "mistralai/Mistral-7B-Instruct-v0.2", | ||
messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }], | ||
max_tokens: 500, | ||
temperature: 0.1, | ||
seed: 0, | ||
}); | ||
// Streaming API | ||
let out = ""; | ||
for await (const chunk of hf.chatCompletionStream({ | ||
model: "mistralai/Mistral-7B-Instruct-v0.2", | ||
messages: [ | ||
{ role: "user", content: "Complete the equation 1+1= ,just the answer" }, | ||
], | ||
max_tokens: 500, | ||
temperature: 0.1, | ||
seed: 0, | ||
})) { | ||
if (chunk.choices && chunk.choices.length > 0) { | ||
out += chunk.choices[0].delta.content; | ||
} | ||
} | ||
``` | ||
It's also possible to call Mistral or OpenAI endpoints directly: | ||
```typescript | ||
const openai = new HfInference(OPENAI_TOKEN).endpoint("https://api.openai.com"); | ||
let out = ""; | ||
for await (const chunk of openai.chatCompletionStream({ | ||
model: "gpt-3.5-turbo", | ||
messages: [ | ||
{ role: "user", content: "Complete the equation 1+1= ,just the answer" }, | ||
], | ||
max_tokens: 500, | ||
temperature: 0.1, | ||
seed: 0, | ||
})) { | ||
if (chunk.choices && chunk.choices.length > 0) { | ||
out += chunk.choices[0].delta.content; | ||
} | ||
} | ||
// For mistral AI: | ||
// endpointUrl: "https://api.mistral.ai" | ||
// model: "mistral-tiny" | ||
``` | ||
### Fill Mask | ||
@@ -138,23 +215,2 @@ | ||
### Text Generation | ||
Generates text from an input prompt. | ||
[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation) | ||
```typescript | ||
await hf.textGeneration({ | ||
model: 'gpt2', | ||
inputs: 'The answer to the universe is' | ||
}) | ||
for await (const output of hf.textGenerationStream({ | ||
model: "google/flan-t5-xxl", | ||
inputs: 'repeat "one two three four"', | ||
parameters: { max_new_tokens: 250 } | ||
})) { | ||
console.log(output.token.text, output.generated_text); | ||
} | ||
``` | ||
### Token Classification | ||
@@ -185,5 +241,5 @@ | ||
parameters: { | ||
"src_lang": "en_XX", | ||
"tgt_lang": "fr_XX" | ||
} | ||
"src_lang": "en_XX", | ||
"tgt_lang": "fr_XX" | ||
} | ||
}) | ||
@@ -506,2 +562,22 @@ ``` | ||
You can use any Chat Completion API-compatible provider with the `chatCompletion` method. | ||
```typescript | ||
// Chat Completion Example | ||
const MISTRAL_KEY = process.env.MISTRAL_KEY; | ||
const hf = new HfInference(MISTRAL_KEY); | ||
const ep = hf.endpoint("https://api.mistral.ai"); | ||
const stream = ep.chatCompletionStream({ | ||
model: "mistral-tiny", | ||
messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }], | ||
}); | ||
let out = ""; | ||
for await (const chunk of stream) { | ||
if (chunk.choices && chunk.choices.length > 0) { | ||
out += chunk.choices[0].delta.content; | ||
console.log(out); | ||
} | ||
} | ||
``` | ||
## Custom Inference Endpoints | ||
@@ -514,2 +590,21 @@ | ||
const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'}); | ||
// Chat Completion Example | ||
const ep = hf.endpoint( | ||
"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" | ||
); | ||
const stream = ep.chatCompletionStream({ | ||
model: "tgi", | ||
messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }], | ||
max_tokens: 500, | ||
temperature: 0.1, | ||
seed: 0, | ||
}); | ||
let out = ""; | ||
for await (const chunk of stream) { | ||
if (chunk.choices && chunk.choices.length > 0) { | ||
out += chunk.choices[0].delta.content; | ||
console.log(out); | ||
} | ||
} | ||
``` | ||
@@ -543,1 +638,5 @@ | ||
It also contains demos, example outputs, and other resources should you want to dig deeper into the ML side of things. | ||
## Dependencies | ||
- `@huggingface/tasks` : Typings only |
@@ -17,5 +17,5 @@ import * as tasks from "./tasks"; | ||
type TaskWithNoAccessTokenNoModel = { | ||
type TaskWithNoAccessTokenNoEndpointUrl = { | ||
[key in keyof Task]: ( | ||
args: DistributiveOmit<Parameters<Task[key]>[0], "accessToken" | "model">, | ||
args: DistributiveOmit<Parameters<Task[key]>[0], "accessToken" | "endpointUrl">, | ||
options?: Parameters<Task[key]>[1] | ||
@@ -61,3 +61,3 @@ ) => ReturnType<Task[key]>; | ||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
fn({ ...params, accessToken, model: endpointUrl } as any, { ...defaultOptions, ...options }), | ||
fn({ ...params, accessToken, endpointUrl } as any, { ...defaultOptions, ...options }), | ||
}); | ||
@@ -70,2 +70,2 @@ } | ||
export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoModel {} | ||
export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoEndpointUrl {} |
import type { InferenceTask, Options, RequestArgs } from "../types"; | ||
import { omit } from "../utils/omit"; | ||
import { HF_HUB_URL } from "./getDefaultTask"; | ||
@@ -25,6 +26,6 @@ import { isUrl } from "./isUrl"; | ||
taskHint?: InferenceTask; | ||
chatCompletion?: boolean; | ||
} | ||
): Promise<{ url: string; info: RequestInit }> { | ||
// eslint-disable-next-line @typescript-eslint/no-unused-vars | ||
const { accessToken, model: _model, ...otherArgs } = args; | ||
const { accessToken, endpointUrl, ...otherArgs } = args; | ||
let { model } = args; | ||
@@ -38,3 +39,3 @@ const { | ||
dont_load_model, | ||
...otherOptions | ||
chatCompletion, | ||
} = options ?? {}; | ||
@@ -82,7 +83,13 @@ | ||
const url = (() => { | ||
let url = (() => { | ||
if (endpointUrl && isUrl(model)) { | ||
throw new TypeError("Both model and endpointUrl cannot be URLs"); | ||
} | ||
if (isUrl(model)) { | ||
console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead"); | ||
return model; | ||
} | ||
if (endpointUrl) { | ||
return endpointUrl; | ||
} | ||
if (task) { | ||
@@ -95,2 +102,6 @@ return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`; | ||
if (chatCompletion && !url.endsWith("/chat/completions")) { | ||
url += "/v1/chat/completions"; | ||
} | ||
/** | ||
@@ -112,4 +123,3 @@ * For edge runtimes, leave 'credentials' undefined, otherwise cloudflare workers will error | ||
: JSON.stringify({ | ||
...otherArgs, | ||
options: options && otherOptions, | ||
...(otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs), | ||
}), | ||
@@ -116,0 +126,0 @@ ...(credentials && { credentials }), |
@@ -14,2 +14,4 @@ import type { InferenceTask, Options, RequestArgs } from "../../types"; | ||
taskHint?: InferenceTask; | ||
/** Is chat completion compatible */ | ||
chatCompletion?: boolean; | ||
} | ||
@@ -30,2 +32,5 @@ ): Promise<T> { | ||
const output = await response.json(); | ||
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) { | ||
throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`); | ||
} | ||
if (output.error) { | ||
@@ -32,0 +37,0 @@ throw new Error(output.error); |
@@ -16,2 +16,4 @@ import type { InferenceTask, Options, RequestArgs } from "../../types"; | ||
taskHint?: InferenceTask; | ||
/** Is chat completion compatible */ | ||
chatCompletion?: boolean; | ||
} | ||
@@ -31,2 +33,5 @@ ): AsyncGenerator<T> { | ||
const output = await response.json(); | ||
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) { | ||
throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`); | ||
} | ||
if (output.error) { | ||
@@ -72,2 +77,5 @@ throw new Error(output.error); | ||
if (event.data.length > 0) { | ||
if (event.data === "[DONE]") { | ||
return; | ||
} | ||
const data = JSON.parse(event.data); | ||
@@ -74,0 +82,0 @@ if (typeof data === "object" && data !== null && "error" in data) { |
import { InferenceOutputError } from "../../lib/InferenceOutputError"; | ||
import type { BaseArgs, Options, RequestArgs } from "../../types"; | ||
import { base64FromBytes } from "../../utils/base64FromBytes"; | ||
import { request } from "../custom/request"; | ||
import { base64FromBytes } from "../../../../shared"; | ||
@@ -6,0 +6,0 @@ export type ImageToImageArgs = BaseArgs & { |
@@ -5,3 +5,3 @@ import { InferenceOutputError } from "../../lib/InferenceOutputError"; | ||
import type { RequestArgs } from "../../types"; | ||
import { base64FromBytes } from "../../../../shared"; | ||
import { base64FromBytes } from "../../utils/base64FromBytes"; | ||
@@ -8,0 +8,0 @@ export type ZeroShotImageClassificationArgs = BaseArgs & { |
@@ -33,2 +33,4 @@ // Custom tasks with arbitrary inputs and outputs | ||
export * from "./nlp/zeroShotClassification"; | ||
export * from "./nlp/chatCompletion"; | ||
export * from "./nlp/chatCompletionStream"; | ||
@@ -35,0 +37,0 @@ // Multimodal tasks |
@@ -5,4 +5,4 @@ import { InferenceOutputError } from "../../lib/InferenceOutputError"; | ||
import type { RequestArgs } from "../../types"; | ||
import { base64FromBytes } from "../../../../shared"; | ||
import { toArray } from "../../utils/toArray"; | ||
import { base64FromBytes } from "../../utils/base64FromBytes"; | ||
@@ -9,0 +9,0 @@ export type DocumentQuestionAnsweringArgs = BaseArgs & { |
import { InferenceOutputError } from "../../lib/InferenceOutputError"; | ||
import type { BaseArgs, Options, RequestArgs } from "../../types"; | ||
import { base64FromBytes } from "../../utils/base64FromBytes"; | ||
import { request } from "../custom/request"; | ||
import { base64FromBytes } from "../../../../shared"; | ||
@@ -6,0 +6,0 @@ export type VisualQuestionAnsweringArgs = BaseArgs & { |
@@ -0,1 +1,2 @@ | ||
import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks"; | ||
import { InferenceOutputError } from "../../lib/InferenceOutputError"; | ||
@@ -5,206 +6,5 @@ import type { BaseArgs, Options } from "../../types"; | ||
/** | ||
* Inputs for Text Generation inference | ||
*/ | ||
export interface TextGenerationInput { | ||
/** | ||
* The text to initialize generation with | ||
*/ | ||
inputs: string; | ||
/** | ||
* Additional inference parameters | ||
*/ | ||
parameters?: TextGenerationParameters; | ||
/** | ||
* Whether to stream output tokens | ||
*/ | ||
stream?: boolean; | ||
[property: string]: unknown; | ||
} | ||
export type { TextGenerationInput, TextGenerationOutput }; | ||
/** | ||
* Additional inference parameters | ||
* | ||
* Additional inference parameters for Text Generation | ||
*/ | ||
export interface TextGenerationParameters { | ||
/** | ||
* The number of sampling queries to run. Only the best one (in terms of total logprob) will | ||
* be returned. | ||
*/ | ||
best_of?: number; | ||
/** | ||
* Whether or not to output decoder input details | ||
*/ | ||
decoder_input_details?: boolean; | ||
/** | ||
* Whether or not to output details | ||
*/ | ||
details?: boolean; | ||
/** | ||
* Whether to use logits sampling instead of greedy decoding when generating new tokens. | ||
*/ | ||
do_sample?: boolean; | ||
/** | ||
* The maximum number of tokens to generate. | ||
*/ | ||
max_new_tokens?: number; | ||
/** | ||
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this | ||
* paper](https://hf.co/papers/1909.05858) for more details. | ||
*/ | ||
repetition_penalty?: number; | ||
/** | ||
* Whether to prepend the prompt to the generated text. | ||
*/ | ||
return_full_text?: boolean; | ||
/** | ||
* The random sampling seed. | ||
*/ | ||
seed?: number; | ||
/** | ||
* Stop generating tokens if a member of `stop_sequences` is generated. | ||
*/ | ||
stop_sequences?: string[]; | ||
/** | ||
* The value used to modulate the logits distribution. | ||
*/ | ||
temperature?: number; | ||
/** | ||
* The number of highest probability vocabulary tokens to keep for top-k-filtering. | ||
*/ | ||
top_k?: number; | ||
/** | ||
* If set to < 1, only the smallest set of most probable tokens with probabilities that add | ||
* up to `top_p` or higher are kept for generation. | ||
*/ | ||
top_p?: number; | ||
/** | ||
* Truncate input tokens to the given size. | ||
*/ | ||
truncate?: number; | ||
/** | ||
* Typical Decoding mass. See [Typical Decoding for Natural Language | ||
* Generation](https://hf.co/papers/2202.00666) for more information | ||
*/ | ||
typical_p?: number; | ||
/** | ||
* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226) | ||
*/ | ||
watermark?: boolean; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* Outputs for Text Generation inference | ||
*/ | ||
export interface TextGenerationOutput { | ||
/** | ||
* When enabled, details about the generation | ||
*/ | ||
details?: TextGenerationOutputDetails; | ||
/** | ||
* The generated text | ||
*/ | ||
generated_text: string; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* When enabled, details about the generation | ||
*/ | ||
export interface TextGenerationOutputDetails { | ||
/** | ||
* Details about additional sequences when best_of is provided | ||
*/ | ||
best_of_sequences?: TextGenerationOutputSequenceDetails[]; | ||
/** | ||
* The reason why the generation was stopped. | ||
*/ | ||
finish_reason: TextGenerationFinishReason; | ||
/** | ||
* The number of generated tokens | ||
*/ | ||
generated_tokens: number; | ||
prefill: TextGenerationPrefillToken[]; | ||
/** | ||
* The random seed used for generation | ||
*/ | ||
seed?: number; | ||
/** | ||
* The generated tokens and associated details | ||
*/ | ||
tokens: TextGenerationOutputToken[]; | ||
/** | ||
* Most likely tokens | ||
*/ | ||
top_tokens?: Array<TextGenerationOutputToken[]>; | ||
[property: string]: unknown; | ||
} | ||
export interface TextGenerationOutputSequenceDetails { | ||
finish_reason: TextGenerationFinishReason; | ||
/** | ||
* The generated text | ||
*/ | ||
generated_text: string; | ||
/** | ||
* The number of generated tokens | ||
*/ | ||
generated_tokens: number; | ||
prefill: TextGenerationPrefillToken[]; | ||
/** | ||
* The random seed used for generation | ||
*/ | ||
seed?: number; | ||
/** | ||
* The generated tokens and associated details | ||
*/ | ||
tokens: TextGenerationOutputToken[]; | ||
/** | ||
* Most likely tokens | ||
*/ | ||
top_tokens?: Array<TextGenerationOutputToken[]>; | ||
[property: string]: unknown; | ||
} | ||
export interface TextGenerationPrefillToken { | ||
id: number; | ||
logprob: number; | ||
/** | ||
* The text associated with that token | ||
*/ | ||
text: string; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* Generated token. | ||
*/ | ||
export interface TextGenerationOutputToken { | ||
id: number; | ||
logprob?: number; | ||
/** | ||
* Whether or not that token is a special one | ||
*/ | ||
special: boolean; | ||
/** | ||
* The text associated with that token | ||
*/ | ||
text: string; | ||
[property: string]: unknown; | ||
} | ||
/** | ||
* The reason why the generation was stopped. | ||
* | ||
* length: The generated sequence reached the maximum allowed length | ||
* | ||
* eos_token: The model generated an end-of-sentence (EOS) token | ||
* | ||
* stop_sequence: One of the sequence in stop_sequences was generated | ||
*/ | ||
export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence"; | ||
/** | ||
* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with). | ||
@@ -211,0 +11,0 @@ */ |
@@ -0,4 +1,4 @@ | ||
import type { TextGenerationInput } from "@huggingface/tasks"; | ||
import type { BaseArgs, Options } from "../../types"; | ||
import { streamingRequest } from "../custom/streamingRequest"; | ||
import type { TextGenerationInput } from "./textGeneration"; | ||
@@ -70,2 +70,3 @@ export interface TextGenerationStreamToken { | ||
export interface TextGenerationStreamOutput { | ||
index?: number; | ||
/** Generated token, one at a time */ | ||
@@ -72,0 +73,0 @@ token: TextGenerationStreamToken; |
import type { PipelineType } from "@huggingface/tasks"; | ||
import type { ChatCompletionInput } from "@huggingface/tasks"; | ||
@@ -35,3 +36,3 @@ export interface Options { | ||
/** | ||
* Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers). | ||
* (Default: "same-origin"). String | Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all. | ||
*/ | ||
@@ -51,13 +52,23 @@ includeCredentials?: string | boolean; | ||
/** | ||
* The model to use. Can be a full URL for a dedicated inference endpoint. | ||
* The model to use. | ||
* | ||
* If not specified, will call huggingface.co/api/tasks to get the default model for the task. | ||
* | ||
* /!\ Legacy behavior allows this to be an URL, but this is deprecated and will be removed in the future. | ||
* Use the `endpointUrl` parameter instead. | ||
*/ | ||
model?: string; | ||
/** | ||
* The URL of the endpoint to use. If not specified, will call huggingface.co/api/tasks to get the default endpoint for the task. | ||
* | ||
* If specified, will use this URL instead of the default one. | ||
*/ | ||
endpointUrl?: string; | ||
} | ||
export type RequestArgs = BaseArgs & | ||
({ data: Blob | ArrayBuffer } | { inputs: unknown }) & { | ||
({ data: Blob | ArrayBuffer } | { inputs: unknown } | ChatCompletionInput) & { | ||
parameters?: Record<string, unknown>; | ||
accessToken?: string; | ||
}; |
Sorry, the diff of this file is not supported yet
234974
1
160
634
1
5283
+ Added@huggingface/tasks@^0.10.0
+ Added@huggingface/tasks@0.10.22(transitive)