@huggingface/inference
Advanced tools
Comparing version 1.6.3 to 1.7.0
@@ -209,2 +209,76 @@ interface Options { | ||
} | ||
interface TextGenerationStreamToken { | ||
/** Token ID from the model tokenizer */ | ||
id: number; | ||
/** Token text */ | ||
text: string; | ||
/** Logprob */ | ||
logprob: number; | ||
/** | ||
* Is the token a special token | ||
* Can be used to ignore tokens when concatenating | ||
*/ | ||
special: boolean; | ||
} | ||
interface TextGenerationStreamPrefillToken { | ||
/** Token ID from the model tokenizer */ | ||
id: number; | ||
/** Token text */ | ||
text: string; | ||
/** | ||
* Logprob | ||
* Optional since the logprob of the first token cannot be computed | ||
*/ | ||
logprob?: number; | ||
} | ||
interface TextGenerationStreamBestOfSequence { | ||
/** Generated text */ | ||
generated_text: string; | ||
/** Generation finish reason */ | ||
finish_reason: TextGenerationStreamFinishReason; | ||
/** Number of generated tokens */ | ||
generated_tokens: number; | ||
/** Sampling seed if sampling was activated */ | ||
seed?: number; | ||
/** Prompt tokens */ | ||
prefill: TextGenerationStreamPrefillToken[]; | ||
/** Generated tokens */ | ||
tokens: TextGenerationStreamToken[]; | ||
} | ||
declare enum TextGenerationStreamFinishReason { | ||
/** number of generated tokens == `max_new_tokens` */ | ||
Length = "length", | ||
/** the model generated its end of sequence token */ | ||
EndOfSequenceToken = "eos_token", | ||
/** the model generated a text included in `stop_sequences` */ | ||
StopSequence = "stop_sequence" | ||
} | ||
interface TextGenerationStreamDetails { | ||
/** Generation finish reason */ | ||
finish_reason: TextGenerationStreamFinishReason; | ||
/** Number of generated tokens */ | ||
generated_tokens: number; | ||
/** Sampling seed if sampling was activated */ | ||
seed?: number; | ||
/** Prompt tokens */ | ||
prefill: TextGenerationStreamPrefillToken[]; | ||
/** */ | ||
tokens: TextGenerationStreamToken[]; | ||
/** Additional sequences when using the `best_of` parameter */ | ||
best_of_sequences?: TextGenerationStreamBestOfSequence[]; | ||
} | ||
interface TextGenerationStreamReturn { | ||
/** Generated token, one at a time */ | ||
token: TextGenerationStreamToken; | ||
/** | ||
* Complete generated text | ||
* Only available when the generation is finished | ||
*/ | ||
generated_text?: string; | ||
/** | ||
* Generation details | ||
* Only available when the generation is finished | ||
*/ | ||
details?: TextGenerationStreamDetails; | ||
} | ||
type TokenClassificationArgs = Args & { | ||
@@ -491,2 +565,6 @@ /** | ||
/** | ||
* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time | ||
*/ | ||
textGenerationStream(args: TextGenerationArgs, options?: Options): AsyncGenerator<TextGenerationStreamReturn>; | ||
/** | ||
* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english | ||
@@ -542,2 +620,6 @@ */ | ||
textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>; | ||
/** | ||
* Helper that prepares request arguments | ||
*/ | ||
private makeRequestOptions; | ||
request<T>(args: Args & { | ||
@@ -551,4 +633,15 @@ data?: Blob | ArrayBuffer; | ||
}): Promise<T>; | ||
/** | ||
* Make request that uses server-sent events and returns response as a generator | ||
*/ | ||
streamingRequest<T>(args: Args & { | ||
data?: Blob | ArrayBuffer; | ||
}, options?: Options & { | ||
binary?: boolean; | ||
blob?: boolean; | ||
/** For internal HF use, which is why it's not exposed in {@link Options} */ | ||
includeCredentials?: boolean; | ||
}): AsyncGenerator<T>; | ||
} | ||
export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue }; | ||
export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue }; |
@@ -22,3 +22,4 @@ var __defProp = Object.defineProperty; | ||
__export(src_exports, { | ||
HfInference: () => HfInference | ||
HfInference: () => HfInference, | ||
TextGenerationStreamFinishReason: () => TextGenerationStreamFinishReason | ||
}); | ||
@@ -35,3 +36,109 @@ module.exports = __toCommonJS(src_exports); | ||
// src/vendor/fetch-event-source/parse.ts | ||
function getLines(onLine) { | ||
let buffer; | ||
let position; | ||
let fieldLength; | ||
let discardTrailingNewline = false; | ||
return function onChunk(arr) { | ||
if (buffer === void 0) { | ||
buffer = arr; | ||
position = 0; | ||
fieldLength = -1; | ||
} else { | ||
buffer = concat(buffer, arr); | ||
} | ||
const bufLength = buffer.length; | ||
let lineStart = 0; | ||
while (position < bufLength) { | ||
if (discardTrailingNewline) { | ||
if (buffer[position] === 10 /* NewLine */) { | ||
lineStart = ++position; | ||
} | ||
discardTrailingNewline = false; | ||
} | ||
let lineEnd = -1; | ||
for (; position < bufLength && lineEnd === -1; ++position) { | ||
switch (buffer[position]) { | ||
case 58 /* Colon */: | ||
if (fieldLength === -1) { | ||
fieldLength = position - lineStart; | ||
} | ||
break; | ||
case 13 /* CarriageReturn */: | ||
discardTrailingNewline = true; | ||
case 10 /* NewLine */: | ||
lineEnd = position; | ||
break; | ||
} | ||
} | ||
if (lineEnd === -1) { | ||
break; | ||
} | ||
onLine(buffer.subarray(lineStart, lineEnd), fieldLength); | ||
lineStart = position; | ||
fieldLength = -1; | ||
} | ||
if (lineStart === bufLength) { | ||
buffer = void 0; | ||
} else if (lineStart !== 0) { | ||
buffer = buffer.subarray(lineStart); | ||
position -= lineStart; | ||
} | ||
}; | ||
} | ||
function getMessages(onId, onRetry, onMessage) { | ||
let message = newMessage(); | ||
const decoder = new TextDecoder(); | ||
return function onLine(line, fieldLength) { | ||
if (line.length === 0) { | ||
onMessage?.(message); | ||
message = newMessage(); | ||
} else if (fieldLength > 0) { | ||
const field = decoder.decode(line.subarray(0, fieldLength)); | ||
const valueOffset = fieldLength + (line[fieldLength + 1] === 32 /* Space */ ? 2 : 1); | ||
const value = decoder.decode(line.subarray(valueOffset)); | ||
switch (field) { | ||
case "data": | ||
message.data = message.data ? message.data + "\n" + value : value; | ||
break; | ||
case "event": | ||
message.event = value; | ||
break; | ||
case "id": | ||
onId(message.id = value); | ||
break; | ||
case "retry": | ||
const retry = parseInt(value, 10); | ||
if (!isNaN(retry)) { | ||
onRetry(message.retry = retry); | ||
} | ||
break; | ||
} | ||
} | ||
}; | ||
} | ||
function concat(a, b) { | ||
const res = new Uint8Array(a.length + b.length); | ||
res.set(a); | ||
res.set(b, a.length); | ||
return res; | ||
} | ||
function newMessage() { | ||
return { | ||
data: "", | ||
event: "", | ||
id: "", | ||
retry: void 0 | ||
}; | ||
} | ||
// src/HfInference.ts | ||
var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/"; | ||
var TextGenerationStreamFinishReason = /* @__PURE__ */ ((TextGenerationStreamFinishReason2) => { | ||
TextGenerationStreamFinishReason2["Length"] = "length"; | ||
TextGenerationStreamFinishReason2["EndOfSequenceToken"] = "eos_token"; | ||
TextGenerationStreamFinishReason2["StopSequence"] = "stop_sequence"; | ||
return TextGenerationStreamFinishReason2; | ||
})(TextGenerationStreamFinishReason || {}); | ||
var HfInference = class { | ||
@@ -119,2 +226,8 @@ apiKey; | ||
/** | ||
* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time | ||
*/ | ||
async *textGenerationStream(args, options) { | ||
yield* this.streamingRequest(args, options); | ||
} | ||
/** | ||
* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english | ||
@@ -279,3 +392,6 @@ */ | ||
} | ||
async request(args, options) { | ||
/** | ||
* Helper that prepares request arguments | ||
*/ | ||
makeRequestOptions(args, options) { | ||
const mergedOptions = { ...this.defaultOptions, ...options }; | ||
@@ -301,3 +417,4 @@ const { model, ...otherArgs } = args; | ||
} | ||
const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, { | ||
const url = `${HF_INFERENCE_API_BASE_URL}${model}`; | ||
const info = { | ||
headers, | ||
@@ -310,3 +427,8 @@ method: "POST", | ||
credentials: options?.includeCredentials ? "include" : "same-origin" | ||
}); | ||
}; | ||
return { url, info, mergedOptions }; | ||
} | ||
async request(args, options) { | ||
const { url, info, mergedOptions } = this.makeRequestOptions(args, options); | ||
const response = await fetch(url, info); | ||
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) { | ||
@@ -330,6 +452,56 @@ return this.request(args, { | ||
} | ||
/** | ||
* Make request that uses server-sent events and returns response as a generator | ||
*/ | ||
async *streamingRequest(args, options) { | ||
const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options); | ||
const response = await fetch(url, info); | ||
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) { | ||
return this.streamingRequest(args, { | ||
...mergedOptions, | ||
wait_for_model: true | ||
}); | ||
} | ||
if (!response.ok) { | ||
throw new Error(`Server response contains error: ${response.status}`); | ||
} | ||
if (response.headers.get("content-type") !== "text/event-stream") { | ||
throw new Error(`Server does not support event stream content type`); | ||
} | ||
const reader = response.body.getReader(); | ||
const events = []; | ||
const onEvent = (event) => { | ||
events.push(event); | ||
}; | ||
const onChunk = getLines( | ||
getMessages( | ||
() => { | ||
}, | ||
() => { | ||
}, | ||
onEvent | ||
) | ||
); | ||
try { | ||
while (true) { | ||
const { done, value } = await reader.read(); | ||
if (done) | ||
return; | ||
onChunk(value); | ||
while (events.length > 0) { | ||
const event = events.shift(); | ||
if (event.data.length > 0) { | ||
yield JSON.parse(event.data); | ||
} | ||
} | ||
} | ||
} finally { | ||
reader.releaseLock(); | ||
} | ||
} | ||
}; | ||
// Annotate the CommonJS export names for ESM import in node: | ||
0 && (module.exports = { | ||
HfInference | ||
HfInference, | ||
TextGenerationStreamFinishReason | ||
}); |
{ | ||
"name": "@huggingface/inference", | ||
"version": "1.6.3", | ||
"version": "1.7.0", | ||
"license": "MIT", | ||
@@ -5,0 +5,0 @@ "author": "Tim Mikeladze <tim.mikeladze@gmail.com>", |
@@ -79,2 +79,9 @@ # 🤗 Hugging Face Inference API | ||
for await const (output of hf.textGenerationStream({ | ||
model: "google/flan-t5-xxl", | ||
inputs: 'repeat "one two three four"' | ||
})) { | ||
console.log(output.token.text, output.generated_text); | ||
} | ||
await hf.tokenClassification({ | ||
@@ -81,0 +88,0 @@ model: 'dbmdz/bert-large-cased-finetuned-conll03-english', |
import { toArray } from "./utils/to-array"; | ||
import type { EventSourceMessage } from "./vendor/fetch-event-source/parse"; | ||
import { getLines, getMessages } from "./vendor/fetch-event-source/parse"; | ||
const HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/"; | ||
export interface Options { | ||
@@ -226,2 +230,82 @@ /** | ||
export interface TextGenerationStreamToken { | ||
/** Token ID from the model tokenizer */ | ||
id: number; | ||
/** Token text */ | ||
text: string; | ||
/** Logprob */ | ||
logprob: number; | ||
/** | ||
* Is the token a special token | ||
* Can be used to ignore tokens when concatenating | ||
*/ | ||
special: boolean; | ||
} | ||
export interface TextGenerationStreamPrefillToken { | ||
/** Token ID from the model tokenizer */ | ||
id: number; | ||
/** Token text */ | ||
text: string; | ||
/** | ||
* Logprob | ||
* Optional since the logprob of the first token cannot be computed | ||
*/ | ||
logprob?: number; | ||
} | ||
export interface TextGenerationStreamBestOfSequence { | ||
/** Generated text */ | ||
generated_text: string; | ||
/** Generation finish reason */ | ||
finish_reason: TextGenerationStreamFinishReason; | ||
/** Number of generated tokens */ | ||
generated_tokens: number; | ||
/** Sampling seed if sampling was activated */ | ||
seed?: number; | ||
/** Prompt tokens */ | ||
prefill: TextGenerationStreamPrefillToken[]; | ||
/** Generated tokens */ | ||
tokens: TextGenerationStreamToken[]; | ||
} | ||
export enum TextGenerationStreamFinishReason { | ||
/** number of generated tokens == `max_new_tokens` */ | ||
Length = "length", | ||
/** the model generated its end of sequence token */ | ||
EndOfSequenceToken = "eos_token", | ||
/** the model generated a text included in `stop_sequences` */ | ||
StopSequence = "stop_sequence", | ||
} | ||
export interface TextGenerationStreamDetails { | ||
/** Generation finish reason */ | ||
finish_reason: TextGenerationStreamFinishReason; | ||
/** Number of generated tokens */ | ||
generated_tokens: number; | ||
/** Sampling seed if sampling was activated */ | ||
seed?: number; | ||
/** Prompt tokens */ | ||
prefill: TextGenerationStreamPrefillToken[]; | ||
/** */ | ||
tokens: TextGenerationStreamToken[]; | ||
/** Additional sequences when using the `best_of` parameter */ | ||
best_of_sequences?: TextGenerationStreamBestOfSequence[]; | ||
} | ||
export interface TextGenerationStreamReturn { | ||
/** Generated token, one at a time */ | ||
token: TextGenerationStreamToken; | ||
/** | ||
* Complete generated text | ||
* Only available when the generation is finished | ||
*/ | ||
generated_text?: string; | ||
/** | ||
* Generation details | ||
* Only available when the generation is finished | ||
*/ | ||
details?: TextGenerationStreamDetails; | ||
} | ||
export type TokenClassificationArgs = Args & { | ||
@@ -620,2 +704,12 @@ /** | ||
/** | ||
* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time | ||
*/ | ||
public async *textGenerationStream( | ||
args: TextGenerationArgs, | ||
options?: Options | ||
): AsyncGenerator<TextGenerationStreamReturn> { | ||
yield* this.streamingRequest<TextGenerationStreamReturn>(args, options); | ||
} | ||
/** | ||
* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english | ||
@@ -839,4 +933,10 @@ */ | ||
public async request<T>( | ||
args: Args & { data?: Blob | ArrayBuffer }, | ||
/** | ||
* Helper that prepares request arguments | ||
*/ | ||
private makeRequestOptions( | ||
args: Args & { | ||
data?: Blob | ArrayBuffer; | ||
stream?: boolean; | ||
}, | ||
options?: Options & { | ||
@@ -848,3 +948,3 @@ binary?: boolean; | ||
} | ||
): Promise<T> { | ||
) { | ||
const mergedOptions = { ...this.defaultOptions, ...options }; | ||
@@ -874,3 +974,4 @@ const { model, ...otherArgs } = args; | ||
const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, { | ||
const url = `${HF_INFERENCE_API_BASE_URL}${model}`; | ||
const info: RequestInit = { | ||
headers, | ||
@@ -885,4 +986,19 @@ method: "POST", | ||
credentials: options?.includeCredentials ? "include" : "same-origin", | ||
}); | ||
}; | ||
return { url, info, mergedOptions }; | ||
} | ||
public async request<T>( | ||
args: Args & { data?: Blob | ArrayBuffer }, | ||
options?: Options & { | ||
binary?: boolean; | ||
blob?: boolean; | ||
/** For internal HF use, which is why it's not exposed in {@link Options} */ | ||
includeCredentials?: boolean; | ||
} | ||
): Promise<T> { | ||
const { url, info, mergedOptions } = this.makeRequestOptions(args, options); | ||
const response = await fetch(url, info); | ||
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) { | ||
@@ -908,2 +1024,63 @@ return this.request(args, { | ||
} | ||
/** | ||
* Make request that uses server-sent events and returns response as a generator | ||
*/ | ||
public async *streamingRequest<T>( | ||
args: Args & { data?: Blob | ArrayBuffer }, | ||
options?: Options & { | ||
binary?: boolean; | ||
blob?: boolean; | ||
/** For internal HF use, which is why it's not exposed in {@link Options} */ | ||
includeCredentials?: boolean; | ||
} | ||
): AsyncGenerator<T> { | ||
const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options); | ||
const response = await fetch(url, info); | ||
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) { | ||
return this.streamingRequest(args, { | ||
...mergedOptions, | ||
wait_for_model: true, | ||
}); | ||
} | ||
if (!response.ok) { | ||
throw new Error(`Server response contains error: ${response.status}`); | ||
} | ||
if (response.headers.get("content-type") !== "text/event-stream") { | ||
throw new Error(`Server does not support event stream content type`); | ||
} | ||
const reader = response.body.getReader(); | ||
const events: EventSourceMessage[] = []; | ||
const onEvent = (event: EventSourceMessage) => { | ||
// accumulate events in array | ||
events.push(event); | ||
}; | ||
const onChunk = getLines( | ||
getMessages( | ||
() => {}, | ||
() => {}, | ||
onEvent | ||
) | ||
); | ||
try { | ||
while (true) { | ||
const { done, value } = await reader.read(); | ||
if (done) return; | ||
onChunk(value); | ||
while (events.length > 0) { | ||
const event = events.shift(); | ||
if (event.data.length > 0) { | ||
yield JSON.parse(event.data) as T; | ||
} | ||
} | ||
} | ||
} finally { | ||
reader.releaseLock(); | ||
} | ||
} | ||
} |
Sorry, the diff of this file is not supported yet
125166
12
3140
202
4