tgi - PyPI Package Compare versions

+32

-4

PKG-INFO

		Metadata-Version: 2.1
		Name: tgi
		Version: 1.4.3
		Version: 1.4.4
		Summary: Nightly release of Hugging Face Text Generation Python Client
		@@ -22,3 +22,3 @@ Home-page: https://github.com/huggingface/text-generation-inference
		Requires-Dist: huggingface-hub (>=0.12,<1.0)
		Requires-Dist: pydantic (>1.10,<3)
		Requires-Dist: pydantic (>2,<3)
		Project-URL: Repository, https://github.com/huggingface/text-generation-inference
		@@ -136,3 +136,15 @@ Description-Content-Type: text/markdown
		```python
		# Request Parameters
		# enum for grammar type
		class GrammarType(Enum):
		Json = "json"
		Regex = "regex"


		# Grammar type and value
		class Grammar:
		# Grammar type
		type: GrammarType
		# Grammar value
		value: Union[str, dict]

		class Parameters:
		@@ -146,2 +158,6 @@ # Activate logits sampling
		repetition_penalty: Optional[float]
		# The parameter for frequency penalty. 1.0 means no penalty
		# Penalize new tokens based on their existing frequency in the text so far,
		# decreasing the model's likelihood to repeat the same line verbatim.
		frequency_penalty: Optional[float]
		# Whether to prepend the prompt to the generated text
		@@ -169,2 +185,4 @@ return_full_text: bool
		watermark: bool
		# Get generation details
		details: bool
		# Get decoder input token logprobs and ids
		@@ -174,3 +192,13 @@ decoder_input_details: bool
		top_n_tokens: Optional[int]
		# grammar to use for generation
		grammar: Optional[Grammar]

		class Request:
		# Prompt
		inputs: str
		# Generation parameters
		parameters: Optional[Parameters]
		# Whether to stream output tokens
		stream: bool

		# Decoder input tokens
		@@ -194,3 +222,3 @@ class InputToken:
		# Logprob
		logprob: float
		logprob: Optional[float]
		# Is the token a special token
		@@ -197,0 +225,0 @@ # Can be used to ignore tokens when concatenating

+2

-2

pyproject.toml

		[tool.poetry]
		name = "tgi"
		version = "1.4.3"
		version = "1.4.4"
		description = "Nightly release of Hugging Face Text Generation Python Client"
		@@ -15,3 +15,3 @@ license = "Apache-2.0"
		python = "^3.7"
		pydantic = "> 1.10, < 3"
		pydantic = "> 2, < 3"
		aiohttp = "^3.8"
		@@ -18,0 +18,0 @@ huggingface-hub = ">= 0.12, < 1.0"

+30

-2

README.md

		@@ -110,3 +110,15 @@ # Text Generation
		```python
		# Request Parameters
		# enum for grammar type
		class GrammarType(Enum):
		Json = "json"
		Regex = "regex"


		# Grammar type and value
		class Grammar:
		# Grammar type
		type: GrammarType
		# Grammar value
		value: Union[str, dict]

		class Parameters:
		@@ -120,2 +132,6 @@ # Activate logits sampling
		repetition_penalty: Optional[float]
		# The parameter for frequency penalty. 1.0 means no penalty
		# Penalize new tokens based on their existing frequency in the text so far,
		# decreasing the model's likelihood to repeat the same line verbatim.
		frequency_penalty: Optional[float]
		# Whether to prepend the prompt to the generated text
		@@ -143,2 +159,4 @@ return_full_text: bool
		watermark: bool
		# Get generation details
		details: bool
		# Get decoder input token logprobs and ids
		@@ -148,3 +166,13 @@ decoder_input_details: bool
		top_n_tokens: Optional[int]
		# grammar to use for generation
		grammar: Optional[Grammar]

		class Request:
		# Prompt
		inputs: str
		# Generation parameters
		parameters: Optional[Parameters]
		# Whether to stream output tokens
		stream: bool

		# Decoder input tokens
		@@ -168,3 +196,3 @@ class InputToken:
		# Logprob
		logprob: float
		logprob: Optional[float]
		# Is the token a special token
		@@ -171,0 +199,0 @@ # Can be used to ignore tokens when concatenating

+39

-3

tgi/client.py

		@@ -70,2 +70,3 @@ import json
		messages: List[Message],
		repetition_penalty: Optional[float] = None,
		frequency_penalty: Optional[float] = None,
		@@ -91,5 +92,9 @@ logit_bias: Optional[List[float]] = None,
		List of messages
		repetition_penalty (`float`):
		The parameter for repetition penalty. 0.0 means no penalty. See [this
		paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
		frequency_penalty (`float`):
		The parameter for frequency penalty. 0.0 means no penalty. See [this
		paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
		The parameter for frequency penalty. 0.0 means no penalty
		Penalize new tokens based on their existing frequency in the text so far,
		decreasing the model's likelihood to repeat the same line verbatim.
		logit_bias (`List[float]`):
		@@ -126,2 +131,3 @@ Adjust the likelihood of specified tokens
		messages=messages,
		repetition_penalty=repetition_penalty,
		frequency_penalty=frequency_penalty,
		@@ -185,2 +191,3 @@ logit_bias=logit_bias,
		repetition_penalty: Optional[float] = None,
		frequency_penalty: Optional[float] = None,
		return_full_text: bool = False,
		@@ -214,2 +221,6 @@ seed: Optional[int] = None,
		paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
		frequency_penalty (`float`):
		The parameter for frequency penalty. 1.0 means no penalty
		Penalize new tokens based on their existing frequency in the text so far,
		decreasing the model's likelihood to repeat the same line verbatim.
		return_full_text (`bool`):
		@@ -253,2 +264,3 @@ Whether to prepend the prompt to the generated text
		repetition_penalty=repetition_penalty,
		frequency_penalty=frequency_penalty,
		return_full_text=return_full_text,
		@@ -287,2 +299,3 @@ seed=seed,
		repetition_penalty: Optional[float] = None,
		frequency_penalty: Optional[float] = None,
		return_full_text: bool = False,
		@@ -313,2 +326,6 @@ seed: Optional[int] = None,
		paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
		frequency_penalty (`float`):
		The parameter for frequency penalty. 1.0 means no penalty
		Penalize new tokens based on their existing frequency in the text so far,
		decreasing the model's likelihood to repeat the same line verbatim.
		return_full_text (`bool`):
		@@ -351,2 +368,3 @@ Whether to prepend the prompt to the generated text
		repetition_penalty=repetition_penalty,
		frequency_penalty=frequency_penalty,
		return_full_text=return_full_text,
		@@ -447,2 +465,3 @@ seed=seed,
		messages: List[Message],
		repetition_penalty: Optional[float] = None,
		frequency_penalty: Optional[float] = None,
		@@ -468,5 +487,9 @@ logit_bias: Optional[List[float]] = None,
		List of messages
		frequency_penalty (`float`):
		repetition_penalty (`float`):
		The parameter for frequency penalty. 0.0 means no penalty. See [this
		paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
		frequency_penalty (`float`):
		The parameter for frequency penalty. 0.0 means no penalty
		Penalize new tokens based on their existing frequency in the text so far,
		decreasing the model's likelihood to repeat the same line verbatim.
		logit_bias (`List[float]`):
		@@ -503,2 +526,3 @@ Adjust the likelihood of specified tokens
		messages=messages,
		repetition_penalty=repetition_penalty,
		frequency_penalty=frequency_penalty,
		@@ -561,2 +585,3 @@ logit_bias=logit_bias,
		repetition_penalty: Optional[float] = None,
		frequency_penalty: Optional[float] = None,
		return_full_text: bool = False,
		@@ -590,2 +615,6 @@ seed: Optional[int] = None,
		paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
		frequency_penalty (`float`):
		The parameter for frequency penalty. 1.0 means no penalty
		Penalize new tokens based on their existing frequency in the text so far,
		decreasing the model's likelihood to repeat the same line verbatim.
		return_full_text (`bool`):
		@@ -631,2 +660,3 @@ Whether to prepend the prompt to the generated text
		repetition_penalty=repetition_penalty,
		frequency_penalty=frequency_penalty,
		return_full_text=return_full_text,
		@@ -662,2 +692,3 @@ seed=seed,
		repetition_penalty: Optional[float] = None,
		frequency_penalty: Optional[float] = None,
		return_full_text: bool = False,
		@@ -688,2 +719,6 @@ seed: Optional[int] = None,
		paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
		frequency_penalty (`float`):
		The parameter for frequency penalty. 1.0 means no penalty
		Penalize new tokens based on their existing frequency in the text so far,
		decreasing the model's likelihood to repeat the same line verbatim.
		return_full_text (`bool`):
		@@ -726,2 +761,3 @@ Whether to prepend the prompt to the generated text
		repetition_penalty=repetition_penalty,
		frequency_penalty=frequency_penalty,
		return_full_text=return_full_text,
		@@ -728,0 +764,0 @@ seed=seed,

+39

-24

tgi/types.py

		from enum import Enum
		from pydantic import BaseModel, validator
		from pydantic import BaseModel, field_validator
		from typing import Optional, List, Union, Any
		@@ -35,3 +35,3 @@
		# Content of the message
		content: Optional[str]
		content: Optional[str] = None
		# Optional name of the message sender
		@@ -60,3 +60,3 @@ name: Optional[str] = None
		# Usage details of the chat completion
		usage: Any
		usage: Optional[Any] = None

		@@ -78,3 +78,3 @@
		role: str
		content: Optional[str]
		content: Optional[str] = None
		tool_calls: Optional[ChoiceDeltaToolCall]
		@@ -115,3 +115,8 @@
		messages: List[Message]
		# Penalty for frequency of new tokens
		# The parameter for repetition penalty. 1.0 means no penalty.
		# See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
		repetition_penalty: Optional[float] = None
		# The parameter for frequency penalty. 1.0 means no penalty
		# Penalize new tokens based on their existing frequency in the text so far,
		# decreasing the model's likelihood to repeat the same line verbatim.
		frequency_penalty: Optional[float] = None
		@@ -152,2 +157,6 @@ # Bias values for token selection
		repetition_penalty: Optional[float] = None
		# The parameter for frequency penalty. 1.0 means no penalty
		# Penalize new tokens based on their existing frequency in the text so far,
		# decreasing the model's likelihood to repeat the same line verbatim.
		frequency_penalty: Optional[float] = None
		# Whether to prepend the prompt to the generated text
		@@ -184,3 +193,3 @@ return_full_text: bool = False

		@validator("best_of")
		@field_validator("best_of")
		def valid_best_of(cls, field_value, values):
		@@ -190,10 +199,10 @@ if field_value is not None:
		raise ValidationError("`best_of` must be strictly positive")
		if field_value > 1 and values["seed"] is not None:
		if field_value > 1 and values.data["seed"] is not None:
		raise ValidationError("`seed` must not be set when `best_of` is > 1")
		sampling = (
		values["do_sample"]
		\| (values["temperature"] is not None)
		\| (values["top_k"] is not None)
		\| (values["top_p"] is not None)
		\| (values["typical_p"] is not None)
		values.data["do_sample"]
		\| (values.data["temperature"] is not None)
		\| (values.data["top_k"] is not None)
		\| (values.data["top_p"] is not None)
		\| (values.data["typical_p"] is not None)
		)
		@@ -205,3 +214,3 @@ if field_value > 1 and not sampling:

		@validator("repetition_penalty")
		@field_validator("repetition_penalty")
		def valid_repetition_penalty(cls, v):
		@@ -212,3 +221,9 @@ if v is not None and v <= 0:

		@validator("seed")
		@field_validator("frequency_penalty")
		def valid_frequency_penalty(cls, v):
		if v is not None and v <= 0:
		raise ValidationError("`frequency_penalty` must be strictly positive")
		return v

		@field_validator("seed")
		def valid_seed(cls, v):
		@@ -219,3 +234,3 @@ if v is not None and v < 0:

		@validator("temperature")
		@field_validator("temperature")
		def valid_temp(cls, v):
		@@ -226,3 +241,3 @@ if v is not None and v <= 0:

		@validator("top_k")
		@field_validator("top_k")
		def valid_top_k(cls, v):
		@@ -233,3 +248,3 @@ if v is not None and v <= 0:

		@validator("top_p")
		@field_validator("top_p")
		def valid_top_p(cls, v):
		@@ -240,3 +255,3 @@ if v is not None and (v <= 0 or v >= 1.0):

		@validator("truncate")
		@field_validator("truncate")
		def valid_truncate(cls, v):
		@@ -247,3 +262,3 @@ if v is not None and v <= 0:

		@validator("typical_p")
		@field_validator("typical_p")
		def valid_typical_p(cls, v):
		@@ -254,3 +269,3 @@ if v is not None and (v <= 0 or v >= 1.0):

		@validator("top_n_tokens")
		@field_validator("top_n_tokens")
		def valid_top_n_tokens(cls, v):
		@@ -261,3 +276,3 @@ if v is not None and v <= 0:

		@validator("grammar")
		@field_validator("grammar")
		def valid_grammar(cls, v):
		@@ -280,3 +295,3 @@ if v is not None:

		@validator("inputs")
		@field_validator("inputs")
		def valid_input(cls, v):
		@@ -287,5 +302,5 @@ if not v:

		@validator("stream")
		@field_validator("stream")
		def valid_best_of_stream(cls, field_value, values):
		parameters = values["parameters"]
		parameters = values.data["parameters"]
		if (
		@@ -292,0 +307,0 @@ parameters is not None

tgi - pypi Package Compare versions

Improved metrics