@huggingface/inference - npm Package Compare versions

Comparing version 1.6.3 to 1.7.0

src/vendor/fetch-event-source/parse.spec.ts

src/vendor/fetch-event-source/parse.ts

dist/index.d.ts

		@@ -209,2 +209,76 @@ interface Options {
		}
		interface TextGenerationStreamToken {
		/** Token ID from the model tokenizer */
		id: number;
		/** Token text */
		text: string;
		/** Logprob */
		logprob: number;
		/**
		* Is the token a special token
		* Can be used to ignore tokens when concatenating
		*/
		special: boolean;
		}
		interface TextGenerationStreamPrefillToken {
		/** Token ID from the model tokenizer */
		id: number;
		/** Token text */
		text: string;
		/**
		* Logprob
		* Optional since the logprob of the first token cannot be computed
		*/
		logprob?: number;
		}
		interface TextGenerationStreamBestOfSequence {
		/** Generated text */
		generated_text: string;
		/** Generation finish reason */
		finish_reason: TextGenerationStreamFinishReason;
		/** Number of generated tokens */
		generated_tokens: number;
		/** Sampling seed if sampling was activated */
		seed?: number;
		/** Prompt tokens */
		prefill: TextGenerationStreamPrefillToken[];
		/** Generated tokens */
		tokens: TextGenerationStreamToken[];
		}
		declare enum TextGenerationStreamFinishReason {
		/** number of generated tokens == `max_new_tokens` */
		Length = "length",
		/** the model generated its end of sequence token */
		EndOfSequenceToken = "eos_token",
		/** the model generated a text included in `stop_sequences` */
		StopSequence = "stop_sequence"
		}
		interface TextGenerationStreamDetails {
		/** Generation finish reason */
		finish_reason: TextGenerationStreamFinishReason;
		/** Number of generated tokens */
		generated_tokens: number;
		/** Sampling seed if sampling was activated */
		seed?: number;
		/** Prompt tokens */
		prefill: TextGenerationStreamPrefillToken[];
		/** */
		tokens: TextGenerationStreamToken[];
		/** Additional sequences when using the `best_of` parameter */
		best_of_sequences?: TextGenerationStreamBestOfSequence[];
		}
		interface TextGenerationStreamReturn {
		/** Generated token, one at a time */
		token: TextGenerationStreamToken;
		/**
		* Complete generated text
		* Only available when the generation is finished
		*/
		generated_text?: string;
		/**
		* Generation details
		* Only available when the generation is finished
		*/
		details?: TextGenerationStreamDetails;
		}
		type TokenClassificationArgs = Args & {
		@@ -491,2 +565,6 @@ /**
		/**
		* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
		*/
		textGenerationStream(args: TextGenerationArgs, options?: Options): AsyncGenerator<TextGenerationStreamReturn>;
		/**
		* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
		@@ -542,2 +620,6 @@ */
		textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
		/**
		* Helper that prepares request arguments
		*/
		private makeRequestOptions;
		request<T>(args: Args & {
		@@ -551,4 +633,15 @@ data?: Blob \| ArrayBuffer;
		}): Promise<T>;
		/**
		* Make request that uses server-sent events and returns response as a generator
		*/
		streamingRequest<T>(args: Args & {
		data?: Blob \| ArrayBuffer;
		}, options?: Options & {
		binary?: boolean;
		blob?: boolean;
		/** For internal HF use, which is why it's not exposed in {@link Options} */
		includeCredentials?: boolean;
		}): AsyncGenerator<T>;
		}

		export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
		export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };

182

dist/index.js

		@@ -22,3 +22,4 @@ var __defProp = Object.defineProperty;
		__export(src_exports, {
		HfInference: () => HfInference
		HfInference: () => HfInference,
		TextGenerationStreamFinishReason: () => TextGenerationStreamFinishReason
		});
		@@ -35,3 +36,109 @@ module.exports = __toCommonJS(src_exports);

		// src/vendor/fetch-event-source/parse.ts
		function getLines(onLine) {
		let buffer;
		let position;
		let fieldLength;
		let discardTrailingNewline = false;
		return function onChunk(arr) {
		if (buffer === void 0) {
		buffer = arr;
		position = 0;
		fieldLength = -1;
		} else {
		buffer = concat(buffer, arr);
		}
		const bufLength = buffer.length;
		let lineStart = 0;
		while (position < bufLength) {
		if (discardTrailingNewline) {
		if (buffer[position] === 10 /* NewLine */) {
		lineStart = ++position;
		}
		discardTrailingNewline = false;
		}
		let lineEnd = -1;
		for (; position < bufLength && lineEnd === -1; ++position) {
		switch (buffer[position]) {
		case 58 /* Colon */:
		if (fieldLength === -1) {
		fieldLength = position - lineStart;
		}
		break;
		case 13 /* CarriageReturn */:
		discardTrailingNewline = true;
		case 10 /* NewLine */:
		lineEnd = position;
		break;
		}
		}
		if (lineEnd === -1) {
		break;
		}
		onLine(buffer.subarray(lineStart, lineEnd), fieldLength);
		lineStart = position;
		fieldLength = -1;
		}
		if (lineStart === bufLength) {
		buffer = void 0;
		} else if (lineStart !== 0) {
		buffer = buffer.subarray(lineStart);
		position -= lineStart;
		}
		};
		}
		function getMessages(onId, onRetry, onMessage) {
		let message = newMessage();
		const decoder = new TextDecoder();
		return function onLine(line, fieldLength) {
		if (line.length === 0) {
		onMessage?.(message);
		message = newMessage();
		} else if (fieldLength > 0) {
		const field = decoder.decode(line.subarray(0, fieldLength));
		const valueOffset = fieldLength + (line[fieldLength + 1] === 32 /* Space */ ? 2 : 1);
		const value = decoder.decode(line.subarray(valueOffset));
		switch (field) {
		case "data":
		message.data = message.data ? message.data + "\n" + value : value;
		break;
		case "event":
		message.event = value;
		break;
		case "id":
		onId(message.id = value);
		break;
		case "retry":
		const retry = parseInt(value, 10);
		if (!isNaN(retry)) {
		onRetry(message.retry = retry);
		}
		break;
		}
		}
		};
		}
		function concat(a, b) {
		const res = new Uint8Array(a.length + b.length);
		res.set(a);
		res.set(b, a.length);
		return res;
		}
		function newMessage() {
		return {
		data: "",
		event: "",
		id: "",
		retry: void 0
		};
		}

		// src/HfInference.ts
		var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
		var TextGenerationStreamFinishReason = /* @__PURE__ */ ((TextGenerationStreamFinishReason2) => {
		TextGenerationStreamFinishReason2["Length"] = "length";
		TextGenerationStreamFinishReason2["EndOfSequenceToken"] = "eos_token";
		TextGenerationStreamFinishReason2["StopSequence"] = "stop_sequence";
		return TextGenerationStreamFinishReason2;
		})(TextGenerationStreamFinishReason \|\| {});
		var HfInference = class {
		@@ -119,2 +226,8 @@ apiKey;
		/**
		* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
		*/
		async *textGenerationStream(args, options) {
		yield* this.streamingRequest(args, options);
		}
		/**
		* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
		@@ -279,3 +392,6 @@ */
		}
		async request(args, options) {
		/**
		* Helper that prepares request arguments
		*/
		makeRequestOptions(args, options) {
		const mergedOptions = { ...this.defaultOptions, ...options };
		@@ -301,3 +417,4 @@ const { model, ...otherArgs } = args;
		}
		const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
		const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
		const info = {
		headers,
		@@ -310,3 +427,8 @@ method: "POST",
		credentials: options?.includeCredentials ? "include" : "same-origin"
		});
		};
		return { url, info, mergedOptions };
		}
		async request(args, options) {
		const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
		const response = await fetch(url, info);
		if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
		@@ -330,6 +452,56 @@ return this.request(args, {
		}
		/**
		* Make request that uses server-sent events and returns response as a generator
		*/
		async *streamingRequest(args, options) {
		const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
		const response = await fetch(url, info);
		if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
		return this.streamingRequest(args, {
		...mergedOptions,
		wait_for_model: true
		});
		}
		if (!response.ok) {
		throw new Error(`Server response contains error: ${response.status}`);
		}
		if (response.headers.get("content-type") !== "text/event-stream") {
		throw new Error(`Server does not support event stream content type`);
		}
		const reader = response.body.getReader();
		const events = [];
		const onEvent = (event) => {
		events.push(event);
		};
		const onChunk = getLines(
		getMessages(
		() => {
		},
		() => {
		},
		onEvent
		)
		);
		try {
		while (true) {
		const { done, value } = await reader.read();
		if (done)
		return;
		onChunk(value);
		while (events.length > 0) {
		const event = events.shift();
		if (event.data.length > 0) {
		yield JSON.parse(event.data);
		}
		}
		}
		} finally {
		reader.releaseLock();
		}
		}
		};
		// Annotate the CommonJS export names for ESM import in node:
		0 && (module.exports = {
		HfInference
		HfInference,
		TextGenerationStreamFinishReason
		});

package.json

		{
		"name": "@huggingface/inference",
		"version": "1.6.3",
		"version": "1.7.0",
		"license": "MIT",
		@@ -5,0 +5,0 @@ "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",

README.md

		@@ -79,2 +79,9 @@ # 🤗 Hugging Face Inference API

		for await const (output of hf.textGenerationStream({
		model: "google/flan-t5-xxl",
		inputs: 'repeat "one two three four"'
		})) {
		console.log(output.token.text, output.generated_text);
		}

		await hf.tokenClassification({
		@@ -81,0 +88,0 @@ model: 'dbmdz/bert-large-cased-finetuned-conll03-english',

187

src/HfInference.ts

		import { toArray } from "./utils/to-array";
		import type { EventSourceMessage } from "./vendor/fetch-event-source/parse";
		import { getLines, getMessages } from "./vendor/fetch-event-source/parse";

		const HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";

		export interface Options {
		@@ -226,2 +230,82 @@ /**

		export interface TextGenerationStreamToken {
		/** Token ID from the model tokenizer */
		id: number;
		/** Token text */
		text: string;
		/** Logprob */
		logprob: number;
		/**
		* Is the token a special token
		* Can be used to ignore tokens when concatenating
		*/
		special: boolean;
		}

		export interface TextGenerationStreamPrefillToken {
		/** Token ID from the model tokenizer */
		id: number;
		/** Token text */
		text: string;
		/**
		* Logprob
		* Optional since the logprob of the first token cannot be computed
		*/
		logprob?: number;
		}

		export interface TextGenerationStreamBestOfSequence {
		/** Generated text */
		generated_text: string;
		/** Generation finish reason */
		finish_reason: TextGenerationStreamFinishReason;
		/** Number of generated tokens */
		generated_tokens: number;
		/** Sampling seed if sampling was activated */
		seed?: number;
		/** Prompt tokens */
		prefill: TextGenerationStreamPrefillToken[];
		/** Generated tokens */
		tokens: TextGenerationStreamToken[];
		}

		export enum TextGenerationStreamFinishReason {
		/** number of generated tokens == `max_new_tokens` */
		Length = "length",
		/** the model generated its end of sequence token */
		EndOfSequenceToken = "eos_token",
		/** the model generated a text included in `stop_sequences` */
		StopSequence = "stop_sequence",
		}

		export interface TextGenerationStreamDetails {
		/** Generation finish reason */
		finish_reason: TextGenerationStreamFinishReason;
		/** Number of generated tokens */
		generated_tokens: number;
		/** Sampling seed if sampling was activated */
		seed?: number;
		/** Prompt tokens */
		prefill: TextGenerationStreamPrefillToken[];
		/** */
		tokens: TextGenerationStreamToken[];
		/** Additional sequences when using the `best_of` parameter */
		best_of_sequences?: TextGenerationStreamBestOfSequence[];
		}

		export interface TextGenerationStreamReturn {
		/** Generated token, one at a time */
		token: TextGenerationStreamToken;
		/**
		* Complete generated text
		* Only available when the generation is finished
		*/
		generated_text?: string;
		/**
		* Generation details
		* Only available when the generation is finished
		*/
		details?: TextGenerationStreamDetails;
		}

		export type TokenClassificationArgs = Args & {
		@@ -620,2 +704,12 @@ /**
		/**
		* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
		*/
		public async *textGenerationStream(
		args: TextGenerationArgs,
		options?: Options
		): AsyncGenerator<TextGenerationStreamReturn> {
		yield* this.streamingRequest<TextGenerationStreamReturn>(args, options);
		}

		/**
		* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
		@@ -839,4 +933,10 @@ */

		public async request<T>(
		args: Args & { data?: Blob \| ArrayBuffer },
		/**
		* Helper that prepares request arguments
		*/
		private makeRequestOptions(
		args: Args & {
		data?: Blob \| ArrayBuffer;
		stream?: boolean;
		},
		options?: Options & {
		@@ -848,3 +948,3 @@ binary?: boolean;
		}
		): Promise<T> {
		) {
		const mergedOptions = { ...this.defaultOptions, ...options };
		@@ -874,3 +974,4 @@ const { model, ...otherArgs } = args;

		const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
		const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
		const info: RequestInit = {
		headers,
		@@ -885,4 +986,19 @@ method: "POST",
		credentials: options?.includeCredentials ? "include" : "same-origin",
		});
		};

		return { url, info, mergedOptions };
		}

		public async request<T>(
		args: Args & { data?: Blob \| ArrayBuffer },
		options?: Options & {
		binary?: boolean;
		blob?: boolean;
		/** For internal HF use, which is why it's not exposed in {@link Options} */
		includeCredentials?: boolean;
		}
		): Promise<T> {
		const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
		const response = await fetch(url, info);

		if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
		@@ -908,2 +1024,63 @@ return this.request(args, {
		}

		/**
		* Make request that uses server-sent events and returns response as a generator
		*/
		public async *streamingRequest<T>(
		args: Args & { data?: Blob \| ArrayBuffer },
		options?: Options & {
		binary?: boolean;
		blob?: boolean;
		/** For internal HF use, which is why it's not exposed in {@link Options} */
		includeCredentials?: boolean;
		}
		): AsyncGenerator<T> {
		const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
		const response = await fetch(url, info);

		if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
		return this.streamingRequest(args, {
		...mergedOptions,
		wait_for_model: true,
		});
		}
		if (!response.ok) {
		throw new Error(`Server response contains error: ${response.status}`);
		}
		if (response.headers.get("content-type") !== "text/event-stream") {
		throw new Error(`Server does not support event stream content type`);
		}

		const reader = response.body.getReader();
		const events: EventSourceMessage[] = [];

		const onEvent = (event: EventSourceMessage) => {
		// accumulate events in array
		events.push(event);
		};

		const onChunk = getLines(
		getMessages(
		() => {},
		() => {},
		onEvent
		)
		);

		try {
		while (true) {
		const { done, value } = await reader.read();
		if (done) return;
		onChunk(value);
		while (events.length > 0) {
		const event = events.shift();
		if (event.data.length > 0) {
		yield JSON.parse(event.data) as T;
		}
		}
		}
		} finally {
		reader.releaseLock();
		}
		}
		}

dist/index.mjs

Sorry, the diff of this file is not supported yet

@huggingface/inference - npm Package Compare versions

Improved metrics

Worsened metrics