@huggingface/tasks - npm Package Compare versions

Comparing version 0.0.7 to 0.0.8

dist/index.d.ts

		@@ -279,2 +279,6 @@ /**
		name: string;
		subtasks: {
		type: string;
		name: string;
		}[];
		modality: "cv";
		@@ -420,2 +424,12 @@ color: "indigo";
		};
		"text-to-3d": {
		name: string;
		modality: "multimodal";
		color: "yellow";
		};
		"image-to-3d": {
		name: string;
		modality: "multimodal";
		color: "green";
		};
		other: {
		@@ -430,5 +444,5 @@ name: string;
		type PipelineType = keyof typeof PIPELINE_DATA;
		declare const PIPELINE_TYPES: ("other" \| "text-classification" \| "token-classification" \| "table-question-answering" \| "question-answering" \| "zero-shot-classification" \| "translation" \| "summarization" \| "conversational" \| "feature-extraction" \| "text-generation" \| "text2text-generation" \| "fill-mask" \| "sentence-similarity" \| "text-to-speech" \| "text-to-audio" \| "automatic-speech-recognition" \| "audio-to-audio" \| "audio-classification" \| "voice-activity-detection" \| "depth-estimation" \| "image-classification" \| "object-detection" \| "image-segmentation" \| "text-to-image" \| "image-to-text" \| "image-to-image" \| "image-to-video" \| "unconditional-image-generation" \| "video-classification" \| "reinforcement-learning" \| "robotics" \| "tabular-classification" \| "tabular-regression" \| "tabular-to-text" \| "table-to-text" \| "multiple-choice" \| "text-retrieval" \| "time-series-forecasting" \| "text-to-video" \| "visual-question-answering" \| "document-question-answering" \| "zero-shot-image-classification" \| "graph-ml" \| "mask-generation" \| "zero-shot-object-detection")[];
		declare const PIPELINE_TYPES: ("other" \| "text-classification" \| "token-classification" \| "table-question-answering" \| "question-answering" \| "zero-shot-classification" \| "translation" \| "summarization" \| "conversational" \| "feature-extraction" \| "text-generation" \| "text2text-generation" \| "fill-mask" \| "sentence-similarity" \| "text-to-speech" \| "text-to-audio" \| "automatic-speech-recognition" \| "audio-to-audio" \| "audio-classification" \| "voice-activity-detection" \| "depth-estimation" \| "image-classification" \| "object-detection" \| "image-segmentation" \| "text-to-image" \| "image-to-text" \| "image-to-image" \| "image-to-video" \| "unconditional-image-generation" \| "video-classification" \| "reinforcement-learning" \| "robotics" \| "tabular-classification" \| "tabular-regression" \| "tabular-to-text" \| "table-to-text" \| "multiple-choice" \| "text-retrieval" \| "time-series-forecasting" \| "text-to-video" \| "visual-question-answering" \| "document-question-answering" \| "zero-shot-image-classification" \| "graph-ml" \| "mask-generation" \| "zero-shot-object-detection" \| "text-to-3d" \| "image-to-3d")[];
		declare const SUBTASK_TYPES: string[];
		declare const PIPELINE_TYPES_SET: Set<"other" \| "text-classification" \| "token-classification" \| "table-question-answering" \| "question-answering" \| "zero-shot-classification" \| "translation" \| "summarization" \| "conversational" \| "feature-extraction" \| "text-generation" \| "text2text-generation" \| "fill-mask" \| "sentence-similarity" \| "text-to-speech" \| "text-to-audio" \| "automatic-speech-recognition" \| "audio-to-audio" \| "audio-classification" \| "voice-activity-detection" \| "depth-estimation" \| "image-classification" \| "object-detection" \| "image-segmentation" \| "text-to-image" \| "image-to-text" \| "image-to-image" \| "image-to-video" \| "unconditional-image-generation" \| "video-classification" \| "reinforcement-learning" \| "robotics" \| "tabular-classification" \| "tabular-regression" \| "tabular-to-text" \| "table-to-text" \| "multiple-choice" \| "text-retrieval" \| "time-series-forecasting" \| "text-to-video" \| "visual-question-answering" \| "document-question-answering" \| "zero-shot-image-classification" \| "graph-ml" \| "mask-generation" \| "zero-shot-object-detection">;
		declare const PIPELINE_TYPES_SET: Set<"other" \| "text-classification" \| "token-classification" \| "table-question-answering" \| "question-answering" \| "zero-shot-classification" \| "translation" \| "summarization" \| "conversational" \| "feature-extraction" \| "text-generation" \| "text2text-generation" \| "fill-mask" \| "sentence-similarity" \| "text-to-speech" \| "text-to-audio" \| "automatic-speech-recognition" \| "audio-to-audio" \| "audio-classification" \| "voice-activity-detection" \| "depth-estimation" \| "image-classification" \| "object-detection" \| "image-segmentation" \| "text-to-image" \| "image-to-text" \| "image-to-image" \| "image-to-video" \| "unconditional-image-generation" \| "video-classification" \| "reinforcement-learning" \| "robotics" \| "tabular-classification" \| "tabular-regression" \| "tabular-to-text" \| "table-to-text" \| "multiple-choice" \| "text-retrieval" \| "time-series-forecasting" \| "text-to-video" \| "visual-question-answering" \| "document-question-answering" \| "zero-shot-image-classification" \| "graph-ml" \| "mask-generation" \| "zero-shot-object-detection" \| "text-to-3d" \| "image-to-3d">;

		@@ -764,6 +778,9 @@ /**
		declare const snippetZeroShotClassification$1: (model: ModelData) => string;
		declare const snippetZeroShotImageClassification: (model: ModelData) => string;
		declare const snippetBasic$1: (model: ModelData) => string;
		declare const snippetFile$1: (model: ModelData) => string;
		declare const snippetTextToImage$1: (model: ModelData) => string;
		declare const snippetTabular: (model: ModelData) => string;
		declare const snippetTextToAudio$1: (model: ModelData) => string;
		declare const snippetDocumentQuestionAnswering: (model: ModelData) => string;
		declare const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) => string>>;
		@@ -776,2 +793,5 @@ declare function getPythonInferenceSnippet(model: ModelData, accessToken: string): string;
		declare const python_pythonSnippets: typeof pythonSnippets;
		declare const python_snippetDocumentQuestionAnswering: typeof snippetDocumentQuestionAnswering;
		declare const python_snippetTabular: typeof snippetTabular;
		declare const python_snippetZeroShotImageClassification: typeof snippetZeroShotImageClassification;
		declare namespace python {
		@@ -783,6 +803,9 @@ export {
		snippetBasic$1 as snippetBasic,
		python_snippetDocumentQuestionAnswering as snippetDocumentQuestionAnswering,
		snippetFile$1 as snippetFile,
		python_snippetTabular as snippetTabular,
		snippetTextToAudio$1 as snippetTextToAudio,
		snippetTextToImage$1 as snippetTextToImage,
		snippetZeroShotClassification$1 as snippetZeroShotClassification,
		python_snippetZeroShotImageClassification as snippetZeroShotImageClassification,
		};
		@@ -789,0 +812,0 @@ }

package.json

		{
		"name": "@huggingface/tasks",
		"packageManager": "pnpm@8.10.5",
		"version": "0.0.7",
		"version": "0.0.8",
		"description": "List of ML tasks for huggingface.co/tasks",
		@@ -33,5 +33,3 @@ "repository": "https://github.com/huggingface/huggingface.js.git",
		"license": "MIT",
		"devDependencies": {
		"typescript": "^5.0.4"
		},
		"devDependencies": {},
		"scripts": {
		@@ -38,0 +36,0 @@ "lint": "eslint --quiet --fix --ext .cjs,.ts .",

src/pipelines.ts

		@@ -438,2 +438,16 @@ export const MODALITIES = ["cv", "nlp", "audio", "tabular", "multimodal", "rl", "other"] as const;
		name: "Image-to-Image",
		subtasks: [
		{
		type: "image-inpainting",
		name: "Image Inpainting",
		},
		{
		type: "image-colorization",
		name: "Image Colorization",
		},
		{
		type: "super-resolution",
		name: "Super Resolution",
		},
		],
		modality: "cv",
		@@ -625,2 +639,12 @@ color: "indigo",
		},
		"text-to-3d": {
		name: "Text-to-3D",
		modality: "multimodal",
		color: "yellow",
		},
		"image-to-3d": {
		name: "Image-to-3D",
		modality: "multimodal",
		color: "green",
		},
		other: {
		@@ -627,0 +651,0 @@ name: "Other",

src/snippets/inputs.ts

		@@ -34,2 +34,8 @@ import type { ModelData } from "../model-data";

		const inputsVisualQuestionAnswering = () =>
		`{
		"image": "cat.png",
		"question": "What is in this image?"
		}`;

		const inputsQuestionAnswering = () =>
		@@ -83,2 +89,7 @@ `{

		const inputsTabularPrediction = () =>
		`'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`;

		const inputsZeroShotImageClassification = () => `"cats.jpg"`;

		const modelInputSnippets: {
		@@ -91,2 +102,3 @@ [key in PipelineType]?: (model: ModelData) => string;
		conversational: inputsConversational,
		"document-question-answering": inputsVisualQuestionAnswering,
		"feature-extraction": inputsFeatureExtraction,
		@@ -102,2 +114,4 @@ "fill-mask": inputsFillMask,
		"table-question-answering": inputsTableQuestionAnswering,
		"tabular-regression": inputsTabularPrediction,
		"tabular-classification": inputsTabularPrediction,
		"text-classification": inputsTextClassification,
		@@ -112,2 +126,3 @@ "text-generation": inputsTextGeneration,
		"zero-shot-classification": inputsZeroShotClassification,
		"zero-shot-image-classification": inputsZeroShotImageClassification,
		};
		@@ -114,0 +129,0 @@

src/snippets/python.ts

		@@ -15,2 +15,18 @@ import type { ModelData } from "../model-data.js";

		export const snippetZeroShotImageClassification = (model: ModelData): string =>
		`def query(data):
		with open(data["image_path"], "rb") as f:
		img = f.read()
		payload={
		"parameters": data["parameters"],
		"inputs": base64.b64encode(img).decode("utf-8")
		}
		response = requests.post(API_URL, headers=headers, json=payload)
		return response.json()

		output = query({
		"image_path": ${getModelInputSnippet(model)},
		"parameters": {"candidate_labels": ["cat", "dog", "llama"]},
		})`;

		export const snippetBasic = (model: ModelData): string =>
		@@ -46,2 +62,10 @@ `def query(payload):

		export const snippetTabular = (model: ModelData): string =>
		`def query(payload):
		response = requests.post(API_URL, headers=headers, json=payload)
		return response.content
		response = query({
		"inputs": {"data": ${getModelInputSnippet(model)}},
		})`;

		export const snippetTextToAudio = (model: ModelData): string => {
		@@ -75,4 +99,17 @@ // Transformers TTS pipeline and api-inference-community (AIC) pipeline outputs are diverged
		};

		export const snippetDocumentQuestionAnswering = (model: ModelData): string =>
		`def query(payload):
		with open(payload["image"], "rb") as f:
		img = f.read()
		payload["image"] = base64.b64encode(img).decode("utf-8")
		response = requests.post(API_URL, headers=headers, json=payload)
		return response.json()

		output = query({
		"inputs": ${getModelInputSnippet(model)},
		})`;

		export const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) => string>> = {
		// Same order as in js/src/lib/interfaces/Types.ts
		// Same order as in tasks/src/pipelines.ts
		"text-classification": snippetBasic,
		@@ -98,5 +135,9 @@ "token-classification": snippetBasic,
		"image-classification": snippetFile,
		"image-to-text": snippetFile,
		"tabular-regression": snippetTabular,
		"tabular-classification": snippetTabular,
		"object-detection": snippetFile,
		"image-segmentation": snippetFile,
		"document-question-answering": snippetDocumentQuestionAnswering,
		"image-to-text": snippetFile,
		"zero-shot-image-classification": snippetZeroShotImageClassification,
		};
		@@ -103,0 +144,0 @@

src/tasks/index.ts

		@@ -43,8 +43,8 @@ import { type PipelineType, PIPELINE_DATA } from "../pipelines";
		export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
		"audio-classification": ["speechbrain", "transformers"],
		"audio-classification": ["speechbrain", "transformers", "transformers.js"],
		"audio-to-audio": ["asteroid", "speechbrain"],
		"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
		conversational: ["transformers"],
		"depth-estimation": ["transformers"],
		"document-question-answering": ["transformers"],
		"depth-estimation": ["transformers", "transformers.js"],
		"document-question-answering": ["transformers", "transformers.js"],
		"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
		@@ -55,3 +55,3 @@ "fill-mask": ["transformers", "transformers.js"],
		"image-segmentation": ["transformers", "transformers.js"],
		"image-to-image": ["diffusers"],
		"image-to-image": ["diffusers", "transformers.js"],
		"image-to-text": ["transformers.js"],
		@@ -78,4 +78,4 @@ "image-to-video": ["diffusers"],
		"text-to-image": ["diffusers"],
		"text-to-speech": ["espnet", "tensorflowtts", "transformers"],
		"text-to-audio": ["transformers"],
		"text-to-speech": ["espnet", "tensorflowtts", "transformers", "transformers.js"],
		"text-to-audio": ["transformers", "transformers.js"],
		"text-to-video": ["diffusers"],
		@@ -95,7 +95,9 @@ "text2text-generation": ["transformers", "transformers.js"],
		"unconditional-image-generation": ["diffusers"],
		"visual-question-answering": ["transformers"],
		"visual-question-answering": ["transformers", "transformers.js"],
		"voice-activity-detection": [],
		"zero-shot-classification": ["transformers", "transformers.js"],
		"zero-shot-image-classification": ["transformers", "transformers.js"],
		"zero-shot-object-detection": ["transformers"],
		"zero-shot-object-detection": ["transformers", "transformers.js"],
		"text-to-3d": [],
		"image-to-3d": [],
		};
		@@ -168,2 +170,4 @@
		"zero-shot-object-detection": getData("zero-shot-object-detection", placeholder),
		"text-to-3d": getData("text-to-3d", placeholder),
		"image-to-3d": getData("image-to-3d", placeholder),
		} as const;
		@@ -170,0 +174,0 @@

src/tasks/text-generation/about.md

		@@ -35,2 +35,12 @@ This task covers guides on both [text-generation](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads) and [text-to-text generation](https://huggingface.co/models?pipeline_tag=text2text-generation&sort=downloads) models. Popular large language models that are used for chats or following instructions are also covered in this task. You can find the list of selected open-source large language models [here](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard), ranked by their performance scores.

		## Language Model Variants

		When it comes to text generation, the underlying language model can come in several types:

		- Base models: refers to plain language models like [Mistral 7B](mistralai/Mistral-7B-v0.1) and [Llama-2-70b](https://huggingface.co/meta-llama/Llama-2-70b-hf). These models are good for fine-tuning and few-shot prompting.

		- Instruction-trained models: these models are trained in a multi-task manner to follow a broad range of instructions like "Write me a recipe for chocolate cake". Models like [Flan-T5](https://huggingface.co/google/flan-t5-xl), [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1), and [falcon-40b-instruct](https://huggingface.co/tiiuae/falcon-40b-instruct) are examples of instruction-trained models. In general, instruction-trained models will produce better responses to instructions than base models.

		- Human feedback models: these models extend base and instruction-trained models by incorporating human feedback that rates the quality of the generated text according to criteria like [helpfulness, honesty, and harmlessness](https://arxiv.org/abs/2112.00861). The human feedback is then combined with an optimization technique like reinforcement learning to align the original model to be closer with human preferences. The overall methodology is often called [Reinforcement Learning from Human Feedback](https://huggingface.co/blog/rlhf), or RLHF for short. [Llama2-Chat](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) is an open-source model aligned through human feedback.

		## Inference
		@@ -37,0 +47,0 @@

./dist/index.js

dist/index.js

Sorry, the diff of this file is too big to display

dist/index.mjs

Sorry, the diff of this file is not supported yet

@huggingface/tasks - npm Package Compare versions

Fixed alerts

Improved metrics

Worsened metrics