inferless-cli
Advanced tools
| # __init__.py | ||
| __version__ = "2.0.19" | ||
| __version__ = "2.0.20" |
@@ -37,2 +37,3 @@ import os | ||
| create_presigned_upload_url_hf_files_upload, | ||
| get_default_templates_list, | ||
| get_machines, | ||
@@ -80,2 +81,3 @@ get_model_import_details, | ||
| volume_mount_path, | ||
| runtime_type, | ||
| ): | ||
@@ -114,2 +116,3 @@ region, new_beta = validate_machine(gpu, region, fractional, beta) | ||
| volume_mount_path, | ||
| runtime_type, | ||
| ) | ||
@@ -161,2 +164,3 @@ validate_yaml_data(yaml_data) | ||
| volume_mount_path, | ||
| runtime_type, | ||
| ): | ||
@@ -243,2 +247,46 @@ _, _, _, workspace_id, _ = decrypt_tokens() | ||
| config.update_config("configuration.custom_runtime_id", res["id"]) | ||
| else: | ||
| templates = get_default_templates_list() | ||
| if runtime_type == "fastapi": | ||
| fastapi_templates = [ | ||
| t | ||
| for t in templates | ||
| if t["name"] == "Inferless Default (FastAPI)" and t["is_latest_version"] | ||
| ] | ||
| if fastapi_templates: | ||
| config.update_config( | ||
| "configuration.default_runtime_id", fastapi_templates[0]["id"] | ||
| ) | ||
| else: | ||
| # Fallback to any FastAPI template if latest not found | ||
| fastapi_templates = [ | ||
| t for t in templates if t["name"] == "Inferless Default (FastAPI)" | ||
| ] | ||
| if fastapi_templates: | ||
| config.update_config( | ||
| "configuration.default_runtime_id", fastapi_templates[0]["id"] | ||
| ) | ||
| else: | ||
| raise InferlessCLIError("No FastAPI runtime template found") | ||
| else: | ||
| default_templates = [ | ||
| t | ||
| for t in templates | ||
| if t["name"] == "Inferless Default" and t["is_latest_version"] | ||
| ] | ||
| if default_templates: | ||
| config.update_config( | ||
| "configuration.default_runtime_id", default_templates[0]["id"] | ||
| ) | ||
| else: | ||
| # Fallback to any default template if latest not found | ||
| default_templates = [ | ||
| t for t in templates if t["name"] == "Inferless Default" | ||
| ] | ||
| if default_templates: | ||
| config.update_config( | ||
| "configuration.default_runtime_id", default_templates[0]["id"] | ||
| ) | ||
| else: | ||
| raise InferlessCLIError("No default runtime template found") | ||
@@ -885,2 +933,7 @@ if volume: | ||
| if config.get_value("configuration.default_runtime_id"): | ||
| config_payload["configuration"]["default_docker_template"] = ( | ||
| config.get_value("configuration.default_runtime_id") | ||
| ) | ||
| if config.get_value("configuration.custom_runtime_id"): | ||
@@ -887,0 +940,0 @@ runtimes = get_templates_list(workspace_id) |
@@ -181,3 +181,3 @@ from typing import List | ||
| raise InferlessCLIError("Runtime version not found for this model") | ||
| configuration = None | ||
| configuration = {} | ||
| if runtime_path: | ||
@@ -184,0 +184,0 @@ runtime_id = details["models"]["configuration"]["custom_docker_template"] |
@@ -38,2 +38,3 @@ import base64 | ||
| get_cli_files, | ||
| get_default_templates_list, | ||
| get_runtime_by_name, | ||
@@ -69,2 +70,3 @@ get_volume_by_name, | ||
| runtime_version, | ||
| runtime_type, | ||
| ): | ||
@@ -97,2 +99,45 @@ _, _, _, workspace_id, _ = decrypt_tokens() | ||
| default_runtime_id = None | ||
| default_runtime_url = None | ||
| if not runtime: | ||
| templates = get_default_templates_list() | ||
| if runtime_type == "fastapi": | ||
| fastapi_templates = [ | ||
| t | ||
| for t in templates | ||
| if t["name"] == "Inferless Default (FastAPI)" and t["is_latest_version"] | ||
| ] | ||
| if fastapi_templates: | ||
| default_runtime_id = fastapi_templates[0]["id"] | ||
| default_runtime_url = fastapi_templates[0]["template_url"] | ||
| else: | ||
| # Fallback to any FastAPI template if latest not found | ||
| fastapi_templates = [ | ||
| t for t in templates if t["name"] == "Inferless Default (FastAPI)" | ||
| ] | ||
| if fastapi_templates: | ||
| default_runtime_id = fastapi_templates[0]["id"] | ||
| default_runtime_url = fastapi_templates[0]["template_url"] | ||
| else: | ||
| raise InferlessCLIError("No FastAPI runtime template found") | ||
| else: | ||
| default_templates = [ | ||
| t | ||
| for t in templates | ||
| if t["name"] == "Inferless Default (FastAPI)" and t["is_latest_version"] | ||
| ] | ||
| if default_templates: | ||
| default_runtime_id = default_templates[0]["id"] | ||
| default_runtime_url = default_templates[0]["template_url"] | ||
| else: | ||
| # Fallback to any FastAPI template if latest not found | ||
| default_templates = [ | ||
| t for t in templates if t["name"] == "Inferless Default (FastAPI)" | ||
| ] | ||
| if default_templates: | ||
| default_runtime_id = default_templates[0]["id"] | ||
| default_runtime_url = default_templates[0]["template_url"] | ||
| else: | ||
| raise InferlessCLIError("No runtime template found") | ||
| custom_volume_id = None | ||
@@ -117,2 +162,4 @@ custom_volume_name = None | ||
| "custom_volume_name": custom_volume_name, | ||
| "default_runtime_id": default_runtime_id, | ||
| "default_runtime_url": default_runtime_url, | ||
| }, | ||
@@ -475,10 +522,34 @@ } | ||
| run_commands_string = "" | ||
| triton_version_string = "23.06-py3" | ||
| if "cuda_version" in yaml_dict["build"]: | ||
| if yaml_dict["build"]["cuda_version"] == "12.4.1": | ||
| triton_version_string = "24.05-py3" | ||
| if yaml_dict["build"]["cuda_version"] == "12.1.1": | ||
| triton_version_string = "23.06-py3" | ||
| elif yaml_dict["build"]["cuda_version"] == "11.8.0": | ||
| triton_version_string = "22.11-py3" | ||
| base_image_string = "" | ||
| version_tag_string = "" | ||
| base_image = "triton" | ||
| if "base_image" in yaml_dict["build"]: | ||
| base_image = yaml_dict["build"]["base_image"] | ||
| if base_image == "triton": | ||
| base_image_string = f"nvcr.io/nvidia/tritonserver" | ||
| version_tag_string = "23.06-py3" | ||
| if "cuda_version" in yaml_dict["build"]: | ||
| if yaml_dict["build"]["cuda_version"] == "12.9.0": | ||
| version_tag_string = "25.04-py3" | ||
| elif yaml_dict["build"]["cuda_version"] == "12.4.1": | ||
| version_tag_string = "24.05-py3" | ||
| elif yaml_dict["build"]["cuda_version"] == "12.1.1": | ||
| version_tag_string = "23.06-py3" | ||
| elif yaml_dict["build"]["cuda_version"] == "11.8.0": | ||
| version_tag_string = "22.11-py3" | ||
| elif base_image == "fastapi": | ||
| base_image_string = f"inferless/fastapi" | ||
| version_tag_string = "12.1.1-py3" | ||
| if "cuda_version" in yaml_dict["build"]: | ||
| if yaml_dict["build"]["cuda_version"] == "12.9.0": | ||
| version_tag_string = "12.9.0-py3" | ||
| elif yaml_dict["build"]["cuda_version"] == "12.4.1": | ||
| version_tag_string = "12.4.1-py3" | ||
| elif yaml_dict["build"]["cuda_version"] == "12.1.1": | ||
| version_tag_string = "12.1.1-py3" | ||
| elif yaml_dict["build"]["cuda_version"] == "11.8.0": | ||
| version_tag_string = "11.8.0-py3" | ||
| if ( | ||
@@ -500,8 +571,5 @@ "system_packages" in yaml_dict["build"] | ||
| if "run" in yaml_dict["build"] and yaml_dict["build"]["run"] is not None: | ||
| run_commands_string = "RUN " | ||
| run_commands_string = "" | ||
| for index, each in enumerate(yaml_dict["build"]["run"]): | ||
| if index == len(yaml_dict["build"]["run"]) - 1: | ||
| run_commands_string += each | ||
| else: | ||
| run_commands_string += each + " && " | ||
| run_commands_string += "RUN " + each + " \n" | ||
@@ -518,8 +586,13 @@ api_text_template_import = api_text_template_import.replace( | ||
| api_text_template_import = api_text_template_import.replace( | ||
| "##tritonversion##", triton_version_string | ||
| "##base_image##", base_image_string | ||
| ) | ||
| api_text_template_import = api_text_template_import.replace( | ||
| "##version_tag##", version_tag_string | ||
| ) | ||
| return api_text_template_import | ||
| def build_docker_image(dockerfile_content, context_path=".", docker_base_url=None): | ||
| def build_docker_image( | ||
| dockerfile_content, context_path=".", docker_base_url=None | ||
| ): | ||
| log_dir = os.path.join(os.getcwd(), ".inferless-logs") | ||
@@ -526,0 +599,0 @@ os.makedirs(log_dir, exist_ok=True) |
@@ -11,3 +11,2 @@ import base64 | ||
| build_docker_image, | ||
| check_and_convert_runtime_file, | ||
| get_inferless_config, | ||
@@ -43,2 +42,3 @@ get_inputs_from_input_json, | ||
| log_exception, | ||
| yaml, | ||
| ) | ||
@@ -49,2 +49,3 @@ from inferless_cli.utils.inferless_config_handler import InferlessConfigHandler | ||
| get_cli_files, | ||
| get_default_templates_list, | ||
| get_file_download, | ||
@@ -90,6 +91,7 @@ get_templates_list, | ||
| runtime_version, | ||
| runtime_type, | ||
| ) | ||
| config.set_loaded_config(yaml_data) | ||
| if is_local_runtime: | ||
| check_and_convert_runtime_file(runtime, runtime_type) | ||
| # if is_local_runtime: | ||
| # check_and_convert_runtime_file(runtime, runtime_type) | ||
@@ -110,2 +112,3 @@ volume_path = get_volume_path(config, progress, task_id) | ||
| docker_base_url, | ||
| runtime_type, | ||
| ) | ||
@@ -149,2 +152,3 @@ except ConfigurationError as error: | ||
| docker_base_url, | ||
| runtime_type, | ||
| ): | ||
@@ -169,3 +173,3 @@ if config.get_value("source_framework_type") == "PYTORCH": | ||
| runtime_dockerfile = custom_runtime_file( | ||
| config, runtime_file_path, model_name, progress, task_id | ||
| config, runtime_file_path, model_name, progress, task_id, runtime_type | ||
| ) | ||
@@ -245,4 +249,27 @@ progress.update( | ||
| def custom_runtime_file(config, runtime_file_path, model_name, progress, task_id): | ||
| def check_runtime_type(runtime_type, runtime_file_path, runtime_url): | ||
| if runtime_file_path is not None and os.path.exists(runtime_file_path): | ||
| with open(runtime_file_path, "r") as yaml_file: | ||
| yaml_dict = yaml.load(yaml_file) | ||
| if "base_image" in yaml_dict["build"]: | ||
| if yaml_dict["build"]["base_image"] == "fastapi": | ||
| return "fastapi" | ||
| else: | ||
| return "triton" | ||
| if runtime_url is not None: | ||
| yaml_file = get_remote_runtime_docker_yaml(runtime_url) | ||
| if yaml_file is not None: | ||
| yaml_dict = yaml.load(yaml_file) | ||
| if "base_image" in yaml_dict["build"]: | ||
| if yaml_dict["build"]["base_image"] == "fastapi": | ||
| return "fastapi" | ||
| else: | ||
| return "triton" | ||
| return runtime_type | ||
| def custom_runtime_file( | ||
| config, runtime_file_path, model_name, progress, task_id, runtime_type | ||
| ): | ||
| progress.update( | ||
@@ -252,2 +279,4 @@ task_id, | ||
| ) | ||
| runtime_url = check_remote_runtime(config) | ||
| runtime_type = check_runtime_type(runtime_type, runtime_file_path, runtime_url) | ||
| docker_file_contents = get_cli_files("default_template_dockerfile") | ||
@@ -258,3 +287,2 @@ default_template_dockerfile = base64.b64decode(docker_file_contents).decode("utf-8") | ||
| ) | ||
| runtime_url = check_remote_runtime(config) | ||
| if config.get_value("source_framework_type") == "PYTORCH": | ||
@@ -281,31 +309,50 @@ default_template_dockerfile = default_template_dockerfile.replace( | ||
| ) | ||
| runtime_dockerfile = get_default_runtime_docker_file(default_template_dockerfile) | ||
| runtime_dockerfile = get_default_runtime_docker_file(default_template_dockerfile,runtime_type) | ||
| return runtime_dockerfile | ||
| def get_default_runtime_docker_file(default_template_dockerfile): | ||
| requirements_text_contents = get_cli_files("requirements.txt") | ||
| requirements_text = base64.b64decode(requirements_text_contents).decode("utf-8") | ||
| requirements_lines = requirements_text.strip().split("\n") | ||
| requirements_lines = [ | ||
| line for line in requirements_lines if not line.strip().startswith("#") | ||
| ] | ||
| pip_install_commands = "\n".join( | ||
| f"RUN pip install --no-cache-dir {line}" | ||
| for line in requirements_lines | ||
| if line.strip() | ||
| ) | ||
| default_dockerfile = default_template_dockerfile.replace( | ||
| "##piplibraries##", pip_install_commands | ||
| ) | ||
| triton_version_string = "23.06-py3" | ||
| def get_default_runtime_docker_file(default_template_dockerfile, runtime_type): | ||
| templates = get_default_templates_list() | ||
| runtime_url = None | ||
| if runtime_type == "fastapi": | ||
| fastapi_templates = [ | ||
| t | ||
| for t in templates | ||
| if t["name"] == "Inferless Default (FastAPI)" and t["is_latest_version"] | ||
| ] | ||
| if fastapi_templates: | ||
| runtime_url = fastapi_templates[0]["template_url"] | ||
| else: | ||
| # Fallback to any FastAPI template if latest not found | ||
| fastapi_templates = [ | ||
| t for t in templates if t["name"] == "Inferless Default (FastAPI)" | ||
| ] | ||
| if fastapi_templates: | ||
| runtime_url = fastapi_templates[0]["template_url"] | ||
| else: | ||
| raise InferlessCLIError("No FastAPI runtime template found") | ||
| else: | ||
| default_templates = [ | ||
| t | ||
| for t in templates | ||
| if t["name"] == "Inferless Default" and t["is_latest_version"] | ||
| ] | ||
| if default_templates: | ||
| runtime_url = default_templates[0]["template_url"] | ||
| else: | ||
| # Fallback to any default template if latest not found | ||
| default_templates = [ | ||
| t for t in templates if t["name"] == "Inferless Default" | ||
| ] | ||
| if default_templates: | ||
| runtime_url = default_templates[0]["template_url"] | ||
| else: | ||
| raise InferlessCLIError("No default runtime template found") | ||
| default_dockerfile = get_remote_runtime_docker_file(runtime_url, default_template_dockerfile) | ||
| default_dockerfile = default_dockerfile.replace( | ||
| "##tritonversion##", triton_version_string | ||
| ) | ||
| return default_dockerfile | ||
| def get_remote_runtime_docker_file(runtime_url, default_template_dockerfile): | ||
| def get_remote_runtime_docker_yaml(runtime_url): | ||
| runtime_url = runtime_url.split("/") | ||
@@ -321,2 +368,9 @@ filename = runtime_url[len(runtime_url) - 2] + "/" + runtime_url[-1] | ||
| yaml_file = response.content | ||
| return yaml_file | ||
| return None | ||
| def get_remote_runtime_docker_file(runtime_url, default_template_dockerfile): | ||
| yaml_file = get_remote_runtime_docker_yaml(runtime_url) | ||
| if yaml_file is not None: | ||
| default_template_dockerfile = load_yaml_file( | ||
@@ -323,0 +377,0 @@ yaml_file, default_template_dockerfile |
+17
-10
@@ -259,2 +259,8 @@ # EXTERNAL PACKAGES | ||
| ), | ||
| runtime_type: str = typer.Option( | ||
| "triton", | ||
| "--runtime-type", | ||
| "-t", | ||
| help="Type of runtime to deploy [fastapi, triton]. Defaults to triton.", | ||
| ), | ||
| ): | ||
@@ -267,2 +273,8 @@ callback_with_auth_validation() | ||
| if runtime_type is not None and runtime_type not in PROVIDER_CHOICES: | ||
| rich.print( | ||
| f"Error: '--runtime-type' must be one of {PROVIDER_CHOICES}, got '{runtime_type}' instead." | ||
| ) | ||
| raise typer.Exit() | ||
| if not os.path.isfile(config_file_name): | ||
@@ -311,2 +323,3 @@ raise typer.BadParameter("Config file not found.") | ||
| volume_mount_path=volume_mount_path, | ||
| runtime_type=runtime_type, | ||
| ) | ||
@@ -324,6 +337,6 @@ | ||
| runtime_type: str = typer.Option( | ||
| None, | ||
| "--type", | ||
| "triton", | ||
| "--runtime-type", | ||
| "-t", | ||
| help="Type of runtime to run [inferless, replicate]", | ||
| help="Type of runtime to deploy [fastapi, triton]. Defaults to triton.", | ||
| ), | ||
@@ -388,15 +401,9 @@ name: str = typer.Option( | ||
| is_local_runtime = True | ||
| else: | ||
| typer.echo("No runtime specified; using default Inferless runtime.") | ||
| if runtime_type is not None and runtime_type not in PROVIDER_CHOICES: | ||
| rich.print( | ||
| f"Error: '--type' must be one of {PROVIDER_CHOICES}, got '{runtime_type}' instead." | ||
| f"Error: '--runtime-type' must be one of {PROVIDER_CHOICES}, got '{runtime_type}' instead." | ||
| ) | ||
| raise typer.Exit() | ||
| if runtime_type is None and runtime_path is not None: | ||
| rich.print("[yellow]Type not given. Assuming type as Inferless.[/yellow]") | ||
| runtime_type = "inferless" | ||
| env_dict = {} | ||
@@ -403,0 +410,0 @@ if env_file: |
@@ -57,3 +57,2 @@ import json | ||
| response.raise_for_status() | ||
| return response | ||
@@ -60,0 +59,0 @@ |
@@ -40,2 +40,3 @@ from inferless_cli.utils.credentials import select_url | ||
| GET_TEMPLATES_LIST_URL = f"{BASE_URL}/workspace/models/templates/list/" | ||
| GET_DEFAULT_TEMPLATES_LIST_URL = f"{BASE_URL}/workspace/models/templates/default/list/" | ||
| GET_WORKSPACE_MODELS_URL = f"{BASE_URL}/workspace/models/list/" | ||
@@ -342,3 +343,3 @@ DELETE_MODEL_URL = f"{BASE_URL}/workspace/models/delete/" | ||
| PROVIDER_CHOICES = ["replicate", "inferless"] | ||
| PROVIDER_CHOICES = ["fastapi", "triton"] | ||
| PROVIDER_EXPORT_CHOICES = list(set(PROVIDER_CHOICES) - {"inferless"}) | ||
@@ -345,0 +346,0 @@ |
@@ -15,2 +15,3 @@ import os | ||
| GET_CONNECTED_ACCOUNTS_URL, | ||
| GET_DEFAULT_TEMPLATES_LIST_URL, | ||
| GET_EXPLORE_MODELS_LIST, | ||
@@ -218,2 +219,19 @@ GET_HF_DEFAULT_FILES_DATA, | ||
| def get_default_templates_list(): | ||
| try: | ||
| payload = {} | ||
| response = make_request( | ||
| GET_DEFAULT_TEMPLATES_LIST_URL, method="POST", auth=True, data=payload | ||
| ) | ||
| response_json = response.json() | ||
| return response_json["details"] | ||
| except HTTPError as http_err: | ||
| raise ServerError(http_err) | ||
| except Exception as e: | ||
| log_exception(e) | ||
| raise Exception("Failed to get default templates list.") | ||
| def create_presigned_url( | ||
@@ -220,0 +238,0 @@ payload, |
+3
-2
| Metadata-Version: 2.1 | ||
| Name: inferless-cli | ||
| Version: 2.0.19 | ||
| Version: 2.0.20 | ||
| Summary: Inferless - Deploy Machine Learning Models in Minutes. | ||
@@ -98,2 +98,3 @@ Author: Naveen | ||
| * `--min-replica INTEGER`: Minimum number of replicas. [default: 0] | ||
| * `-t, --runtime-type TEXT`: Type of runtime to deploy [fastapi, triton]. Defaults to triton. [default: triton] | ||
| * `-c, --config TEXT`: Inferless config file path to override from inferless.yaml [default: inferless.yaml] | ||
@@ -680,3 +681,3 @@ * `--help`: Show this message and exit. | ||
| * `-r, --runtime TEXT`: custom runtime name or file location. if not provided default Inferless runtime will be used. | ||
| * `-t, --type TEXT`: Type of runtime to run [inferless, replicate] | ||
| * `-t, --runtime-type TEXT`: Type of runtime to deploy [fastapi, triton]. Defaults to triton. [default: triton] | ||
| * `-n, --name TEXT`: Name of the model to deploy on inferless [default: inferless-model] | ||
@@ -683,0 +684,0 @@ * `-f, --env-file TEXT`: Path to an env file containing environment variables (one per line in KEY=VALUE format) |
+1
-1
| [tool.poetry] | ||
| name = "inferless-cli" | ||
| version = "2.0.19" | ||
| version = "2.0.20" | ||
| description = "Inferless - Deploy Machine Learning Models in Minutes." | ||
@@ -5,0 +5,0 @@ authors = ["Naveen <naveen@inferless.com>"] |
+2
-1
@@ -68,2 +68,3 @@ # `inferless` | ||
| * `--min-replica INTEGER`: Minimum number of replicas. [default: 0] | ||
| * `-t, --runtime-type TEXT`: Type of runtime to deploy [fastapi, triton]. Defaults to triton. [default: triton] | ||
| * `-c, --config TEXT`: Inferless config file path to override from inferless.yaml [default: inferless.yaml] | ||
@@ -650,3 +651,3 @@ * `--help`: Show this message and exit. | ||
| * `-r, --runtime TEXT`: custom runtime name or file location. if not provided default Inferless runtime will be used. | ||
| * `-t, --type TEXT`: Type of runtime to run [inferless, replicate] | ||
| * `-t, --runtime-type TEXT`: Type of runtime to deploy [fastapi, triton]. Defaults to triton. [default: triton] | ||
| * `-n, --name TEXT`: Name of the model to deploy on inferless [default: inferless-model] | ||
@@ -653,0 +654,0 @@ * `-f, --env-file TEXT`: Path to an env file containing environment variables (one per line in KEY=VALUE format) |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
402656
2.17%9178
2.15%