@huggingface/inference - npm Package Compare versions

Comparing version 2.6.6 to 2.6.7

197

dist/index.d.ts

		@@ -34,3 +34,3 @@
		/**
		* (Default: "same-origin"). String \| Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all.
		* Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers).
		*/
		@@ -707,2 +707,197 @@ includeCredentials?: string \| boolean;
		/**
		* The reason why the generation was stopped.
		*
		* length: The generated sequence reached the maximum allowed length
		*
		* eos_token: The model generated an end-of-sentence (EOS) token
		*
		* stop_sequence: One of the sequence in stop_sequences was generated
		*/
		export type TextGenerationFinishReason = "length" \| "eos_token" \| "stop_sequence";
		/**
		* Inputs for Text Generation inference
		*/
		export interface TextGenerationInput {
		/**
		* The text to initialize generation with
		*/
		inputs: string;
		/**
		* Additional inference parameters
		*/
		parameters?: TextGenerationParameters;
		/**
		* Whether to stream output tokens
		*/
		stream?: boolean;
		[property: string]: unknown;
		}
		/**
		* Additional inference parameters
		*
		* Additional inference parameters for Text Generation
		*/
		export interface TextGenerationParameters {
		/**
		* The number of sampling queries to run. Only the best one (in terms of total logprob) will
		* be returned.
		*/
		best_of?: number;
		/**
		* Whether or not to output decoder input details
		*/
		decoder_input_details?: boolean;
		/**
		* Whether or not to output details
		*/
		details?: boolean;
		/**
		* Whether to use logits sampling instead of greedy decoding when generating new tokens.
		*/
		do_sample?: boolean;
		/**
		* The maximum number of tokens to generate.
		*/
		max_new_tokens?: number;
		/**
		* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
		* paper](https://hf.co/papers/1909.05858) for more details.
		*/
		repetition_penalty?: number;
		/**
		* Whether to prepend the prompt to the generated text.
		*/
		return_full_text?: boolean;
		/**
		* The random sampling seed.
		*/
		seed?: number;
		/**
		* Stop generating tokens if a member of `stop_sequences` is generated.
		*/
		stop_sequences?: string[];
		/**
		* The value used to modulate the logits distribution.
		*/
		temperature?: number;
		/**
		* The number of highest probability vocabulary tokens to keep for top-k-filtering.
		*/
		top_k?: number;
		/**
		* If set to < 1, only the smallest set of most probable tokens with probabilities that add
		* up to `top_p` or higher are kept for generation.
		*/
		top_p?: number;
		/**
		* Truncate input tokens to the given size.
		*/
		truncate?: number;
		/**
		* Typical Decoding mass. See [Typical Decoding for Natural Language
		* Generation](https://hf.co/papers/2202.00666) for more information
		*/
		typical_p?: number;
		/**
		* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
		*/
		watermark?: boolean;
		[property: string]: unknown;
		}
		/**
		* Outputs for Text Generation inference
		*/
		export interface TextGenerationOutput {
		/**
		* When enabled, details about the generation
		*/
		details?: TextGenerationOutputDetails;
		/**
		* The generated text
		*/
		generated_text: string;
		[property: string]: unknown;
		}
		/**
		* When enabled, details about the generation
		*/
		export interface TextGenerationOutputDetails {
		/**
		* Details about additional sequences when best_of is provided
		*/
		best_of_sequences?: TextGenerationOutputSequenceDetails[];
		/**
		* The reason why the generation was stopped.
		*/
		finish_reason: TextGenerationFinishReason;
		/**
		* The number of generated tokens
		*/
		generated_tokens: number;
		prefill: TextGenerationPrefillToken[];
		/**
		* The random seed used for generation
		*/
		seed?: number;
		/**
		* The generated tokens and associated details
		*/
		tokens: TextGenerationOutputToken[];
		/**
		* Most likely tokens
		*/
		top_tokens?: Array<TextGenerationOutputToken[]>;
		[property: string]: unknown;
		}
		export interface TextGenerationOutputSequenceDetails {
		finish_reason: TextGenerationFinishReason;
		/**
		* The generated text
		*/
		generated_text: string;
		/**
		* The number of generated tokens
		*/
		generated_tokens: number;
		prefill: TextGenerationPrefillToken[];
		/**
		* The random seed used for generation
		*/
		seed?: number;
		/**
		* The generated tokens and associated details
		*/
		tokens: TextGenerationOutputToken[];
		/**
		* Most likely tokens
		*/
		top_tokens?: Array<TextGenerationOutputToken[]>;
		[property: string]: unknown;
		}
		export interface TextGenerationPrefillToken {
		id: number;
		logprob: number;
		/**
		* The text associated with that token
		*/
		text: string;
		[property: string]: unknown;
		}
		/**
		* Generated token.
		*/
		export interface TextGenerationOutputToken {
		id: number;
		logprob?: number;
		/**
		* Whether or not that token is a special one
		*/
		special: boolean;
		/**
		* The text associated with that token
		*/
		text: string;
		[property: string]: unknown;
		}
		/**
		* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
		@@ -709,0 +904,0 @@ */

dist/index.js

		@@ -137,6 +137,4 @@ /// <reference path="./index.d.ts" />
		credentials = includeCredentials;
		} else if (typeof includeCredentials === "boolean") {
		credentials = includeCredentials ? "include" : void 0;
		} else if (includeCredentials === void 0) {
		credentials = "same-origin";
		} else if (includeCredentials === true) {
		credentials = "include";
		}
		@@ -150,3 +148,3 @@ const info = {
		}),
		credentials,
		...credentials && { credentials },
		signal: options?.signal
		@@ -153,0 +151,0 @@ };

package.json

		{
		"name": "@huggingface/inference",
		"version": "2.6.6",
		"version": "2.6.7",
		"packageManager": "pnpm@8.10.5",
		@@ -43,3 +43,3 @@ "license": "MIT",
		"@types/node": "18.13.0",
		"@huggingface/tasks": "^0.6.0"
		"@huggingface/tasks": "^0.8.0"
		},
		@@ -46,0 +46,0 @@ "resolutions": {},

src/HfInference.ts

		@@ -5,2 +5,5 @@ import * as tasks from "./tasks";

		/* eslint-disable @typescript-eslint/no-empty-interface */
		/* eslint-disable @typescript-eslint/no-unsafe-declaration-merging */

		type Task = typeof tasks;
		@@ -64,6 +67,4 @@

		// eslint-disable-next-line @typescript-eslint/no-empty-interface
		export interface HfInference extends TaskWithNoAccessToken {}

		// eslint-disable-next-line @typescript-eslint/no-empty-interface
		export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoModel {}

src/lib/makeRequestOptions.ts

		@@ -92,15 +92,10 @@ import type { InferenceTask, Options, RequestArgs } from "../types";

		// Let users configure credentials, or disable them all together (or keep default behavior).
		// ---
		// This used to be an internal property only and never exposed to users. This means that most usages will never define this value
		// So in order to make this backwards compatible, if it's undefined we go to "same-origin" (default behaviour before).
		// If it's a boolean and set to true then set to "include". If false, don't define credentials at all (useful for edge runtimes)
		// Then finally, if it's a string, use it as-is.
		/**
		* For edge runtimes, leave 'credentials' undefined, otherwise cloudflare workers will error
		*/
		let credentials: RequestCredentials \| undefined;
		if (typeof includeCredentials === "string") {
		credentials = includeCredentials as RequestCredentials;
		} else if (typeof includeCredentials === "boolean") {
		credentials = includeCredentials ? "include" : undefined;
		} else if (includeCredentials === undefined) {
		credentials = "same-origin";
		} else if (includeCredentials === true) {
		credentials = "include";
		}
		@@ -117,3 +112,3 @@
		}),
		credentials,
		...(credentials && { credentials }),
		signal: options?.signal,
		@@ -120,0 +115,0 @@ };

204

src/tasks/nlp/textGeneration.ts

		@@ -1,2 +0,1 @@
		import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks/src/tasks/text-generation/inference";
		import { InferenceOutputError } from "../../lib/InferenceOutputError";
		@@ -7,2 +6,205 @@ import type { BaseArgs, Options } from "../../types";
		/**
		* Inputs for Text Generation inference
		*/
		export interface TextGenerationInput {
		/**
		* The text to initialize generation with
		*/
		inputs: string;
		/**
		* Additional inference parameters
		*/
		parameters?: TextGenerationParameters;
		/**
		* Whether to stream output tokens
		*/
		stream?: boolean;
		[property: string]: unknown;
		}

		/**
		* Additional inference parameters
		*
		* Additional inference parameters for Text Generation
		*/
		export interface TextGenerationParameters {
		/**
		* The number of sampling queries to run. Only the best one (in terms of total logprob) will
		* be returned.
		*/
		best_of?: number;
		/**
		* Whether or not to output decoder input details
		*/
		decoder_input_details?: boolean;
		/**
		* Whether or not to output details
		*/
		details?: boolean;
		/**
		* Whether to use logits sampling instead of greedy decoding when generating new tokens.
		*/
		do_sample?: boolean;
		/**
		* The maximum number of tokens to generate.
		*/
		max_new_tokens?: number;
		/**
		* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
		* paper](https://hf.co/papers/1909.05858) for more details.
		*/
		repetition_penalty?: number;
		/**
		* Whether to prepend the prompt to the generated text.
		*/
		return_full_text?: boolean;
		/**
		* The random sampling seed.
		*/
		seed?: number;
		/**
		* Stop generating tokens if a member of `stop_sequences` is generated.
		*/
		stop_sequences?: string[];
		/**
		* The value used to modulate the logits distribution.
		*/
		temperature?: number;
		/**
		* The number of highest probability vocabulary tokens to keep for top-k-filtering.
		*/
		top_k?: number;
		/**
		* If set to < 1, only the smallest set of most probable tokens with probabilities that add
		* up to `top_p` or higher are kept for generation.
		*/
		top_p?: number;
		/**
		* Truncate input tokens to the given size.
		*/
		truncate?: number;
		/**
		* Typical Decoding mass. See [Typical Decoding for Natural Language
		* Generation](https://hf.co/papers/2202.00666) for more information
		*/
		typical_p?: number;
		/**
		* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
		*/
		watermark?: boolean;
		[property: string]: unknown;
		}

		/**
		* Outputs for Text Generation inference
		*/
		export interface TextGenerationOutput {
		/**
		* When enabled, details about the generation
		*/
		details?: TextGenerationOutputDetails;
		/**
		* The generated text
		*/
		generated_text: string;
		[property: string]: unknown;
		}

		/**
		* When enabled, details about the generation
		*/
		export interface TextGenerationOutputDetails {
		/**
		* Details about additional sequences when best_of is provided
		*/
		best_of_sequences?: TextGenerationOutputSequenceDetails[];
		/**
		* The reason why the generation was stopped.
		*/
		finish_reason: TextGenerationFinishReason;
		/**
		* The number of generated tokens
		*/
		generated_tokens: number;
		prefill: TextGenerationPrefillToken[];
		/**
		* The random seed used for generation
		*/
		seed?: number;
		/**
		* The generated tokens and associated details
		*/
		tokens: TextGenerationOutputToken[];
		/**
		* Most likely tokens
		*/
		top_tokens?: Array<TextGenerationOutputToken[]>;
		[property: string]: unknown;
		}

		export interface TextGenerationOutputSequenceDetails {
		finish_reason: TextGenerationFinishReason;
		/**
		* The generated text
		*/
		generated_text: string;
		/**
		* The number of generated tokens
		*/
		generated_tokens: number;
		prefill: TextGenerationPrefillToken[];
		/**
		* The random seed used for generation
		*/
		seed?: number;
		/**
		* The generated tokens and associated details
		*/
		tokens: TextGenerationOutputToken[];
		/**
		* Most likely tokens
		*/
		top_tokens?: Array<TextGenerationOutputToken[]>;
		[property: string]: unknown;
		}

		export interface TextGenerationPrefillToken {
		id: number;
		logprob: number;
		/**
		* The text associated with that token
		*/
		text: string;
		[property: string]: unknown;
		}

		/**
		* Generated token.
		*/
		export interface TextGenerationOutputToken {
		id: number;
		logprob?: number;
		/**
		* Whether or not that token is a special one
		*/
		special: boolean;
		/**
		* The text associated with that token
		*/
		text: string;
		[property: string]: unknown;
		}

		/**
		* The reason why the generation was stopped.
		*
		* length: The generated sequence reached the maximum allowed length
		*
		* eos_token: The model generated an end-of-sentence (EOS) token
		*
		* stop_sequence: One of the sequence in stop_sequences was generated
		*/
		export type TextGenerationFinishReason = "length" \| "eos_token" \| "stop_sequence";

		/**
		* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
		@@ -9,0 +211,0 @@ */

src/tasks/nlp/textGenerationStream.ts

		import type { BaseArgs, Options } from "../../types";
		import { streamingRequest } from "../custom/streamingRequest";
		import type { TextGenerationInput } from "./textGeneration";

		import type { TextGenerationInput } from "@huggingface/tasks/src/tasks/text-generation/inference";

		export interface TextGenerationStreamToken {
		@@ -7,0 +6,0 @@ /** Token ID from the model tokenizer */

src/types.ts

		@@ -35,3 +35,3 @@ import type { PipelineType } from "@huggingface/tasks";
		/**
		* (Default: "same-origin"). String \| Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all.
		* Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers).
		*/
		@@ -38,0 +38,0 @@ includeCredentials?: string \| boolean;

dist/index.cjs

Sorry, the diff of this file is not supported yet

@huggingface/inference - npm Package Compare versions

Improved metrics