import gradio as gr import json import requests def avaliable_providers(): providers = [] headers = { "Content-Type": "application/json", } endpoint_url = "https://api.endpoints.huggingface.cloud/provider" response = requests.get(endpoint_url, headers=headers) for provider in response.json()['items']: if provider['status'] == 'available': providers.append(provider['vendor']) return providers def update_regions(provider): avalialbe_regions = [] headers = { "Content-Type": "application/json", } endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region" response = requests.get(endpoint_url, headers=headers) for region in response.json()['items']: if region['status'] == 'available': avalialbe_regions.append(f"{region['region']}/{region['label']}") return gr.Dropdown.update( choices=avalialbe_regions, value=avalialbe_regions[0] if len(avalialbe_regions) > 0 else None ) def update_compute_options(provider, region): region = region.split("/")[0] avalialbe_compute_options = [] headers = { "Content-Type": "application/json", } endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region/{region}/compute" print(endpoint_url) response = requests.get(endpoint_url, headers=headers) for compute in response.json()['items']: if compute['status'] == 'available': accelerator = compute['accelerator'] numAccelerators = compute['numAccelerators'] memoryGb = compute['memoryGb'].replace("Gi", "GB") architecture = compute['architecture'] instanceType = compute['instanceType'] type = f"{numAccelerators}vCPU {memoryGb} · {architecture}" if accelerator == "cpu" else f"{numAccelerators}x {architecture}" avalialbe_compute_options.append( f"{compute['accelerator'].upper()} [{compute['instanceSize']}] · {type} · {instanceType}" ) return gr.Dropdown.update( choices=avalialbe_compute_options, value=avalialbe_compute_options[0] if len(avalialbe_compute_options) > 0 else None ) def submit( hf_token_input, endpoint_name_input, provider_selector, region_selector, repository_selector, revision_selector, task_selector, framework_selector, compute_selector, min_node_selector, max_node_selector, security_selector ): compute_resources = compute_selector.split("·") accelerator = compute_resources[0][:3].strip() size_l_index = compute_resources[0].index("[") - 1 size_r_index = compute_resources[0].index("]") size = compute_resources[0][size_l_index : size_r_index].strip() type = compute_resources[-1].strip() payload = { "accountId": repository_selector.split("/")[0], "compute": { "accelerator": accelerator.lower(), "instanceSize": size[1:], "instanceType": type, "scaling": { "maxReplica": int(max_node_selector), "minReplica": int(min_node_selector) } }, "model": { "framework": "custom", "image": { "huggingface": {} }, "repository": repository_selector.lower(), "revision": revision_selector, "task": task_selector.lower() }, "name": endpoint_name_input.strip(), "provider": { "region": region_selector.split("/")[0].lower(), "vendor": provider_selector.lower() }, "type": security_selector.lower() } payload = json.dumps(payload) print(f"Payload: {payload}") headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", } endpoint_url = f"https://api.endpoints.huggingface.cloud/endpoint" print(f"Endpoint: {endpoint_url}") response = requests.post(endpoint_url, headers=headers, data=payload) if response.status_code == 400: return f"{response.text}. Malformed data in {payload}" elif response.status_code == 401: return "Invalid token" elif response.status_code == 409: return f"Error: {response.text}" elif response.status_code == 202: return f"Endpoint {endpoint_name_input} created successfully on {provider_selector.lower()} using {repository_selector.lower()}@{revision_selector}. \n Please check out the progress at https://ui.endpoints.huggingface.co/endpoints." else: return f"something went wrong {response.status_code} = {response.text}" def delete_endpoint( hf_token_input, endpoint_name_input ): response = requests.delete( f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}", headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", } ) if response.status_code == 401: return "Invalid token" elif response.status_code == 404: return f"Error: {response.text}" elif response.status_code == 202: return f"Endpoint {endpoint_name_input} deleted successfully." else: return f"something went wrong {response.status_code} = {response.text}" with gr.Blocks() as demo2: gr.Markdown(""" #### Your 🤗 Access Token (Required) """) hf_token_input = gr.Textbox( show_label=False, type="password" ) gr.Markdown( """ ###
------------------ (Deploy Your Model on 🤗 Endpoint) ------------------
""") gr.Markdown(""" #### Endpoint Name """) endpoint_name_input = gr.Textbox( show_label=False ) providers = avaliable_providers() with gr.Row(): gr.Markdown(""" #### Cloud Provider """) gr.Markdown(""" #### Cloud Region """) with gr.Row(): provider_selector = gr.Dropdown( choices=providers, interactive=True, show_label=False, ) region_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) provider_selector.change(update_regions, inputs=provider_selector, outputs=region_selector) with gr.Row(): gr.Markdown(""" #### Target Model """) gr.Markdown(""" #### Target Model Version(branch commit hash) """) with gr.Row(): repository_selector = gr.Textbox( value="", interactive=True, show_label=False, ) revision_selector = gr.Textbox( value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Task """) gr.Markdown(""" #### Framework """) with gr.Row(): task_selector = gr.Textbox( value="Custom", interactive=True, show_label=False, ) framework_selector = gr.Textbox( value="Custom", interactive=True, show_label=False, ) gr.Markdown(""" #### Select Compute Instance Type """) compute_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) region_selector.change(update_compute_options, inputs=[provider_selector, region_selector], outputs=compute_selector) with gr.Row(): gr.Markdown(""" #### Min Number of Nodes """) gr.Markdown(""" #### Max Number of Nodes """) gr.Markdown(""" #### Security Level """) with gr.Row(): min_node_selector = gr.Number( value=1, interactive=True, show_label=False, ) max_node_selector = gr.Number( value=1, interactive=True, show_label=False, ) security_selector = gr.Radio( choices=["Protected", "Public", "Private"], value="Public", interactive=True, show_label=False, ) submit_button = gr.Button( value="Submit", ) status_txt = gr.Textbox( value="status", interactive=False ) submit_button.click( submit, inputs=[ hf_token_input, endpoint_name_input, provider_selector, region_selector, repository_selector, revision_selector, task_selector, framework_selector, compute_selector, min_node_selector, max_node_selector, security_selector], outputs=status_txt) # Delete Endpoint gr.Markdown(""" ###
------------------ (Delete Endpoint) ------------------
""") gr.Markdown(""" #### Endpoint Name """) delete_endpoint_name_input = gr.Textbox( show_label=False ) delete_button = gr.Button( value="Delete", ) delete_status_txt = gr.Textbox( value="status", interactive=False ) delete_button.click( delete_endpoint, inputs=[ hf_token_input, delete_endpoint_name_input ], outputs=delete_status_txt ) gr.Markdown(""" ###
------------------ (Instance Pricing Table) ------------------
#### Pricing Table(CPU) - 2023/2/22 """) gr.Dataframe( headers=["provider", "size", "$/h", "vCPUs", "Memory", "Architecture"], datatype=["str", "str", "str", "number", "str", "str"], row_count=8, col_count=(6, "fixed"), value=[ ["aws", "small", "$0.06", 1, "2GB", "Intel Xeon - Ice Lake"], ["aws", "medium", "$0.12", 2, "4GB", "Intel Xeon - Ice Lake"], ["aws", "large", "$0.24", 4, "8GB", "Intel Xeon - Ice Lake"], ["aws", "xlarge", "$0.48", 8, "16GB", "Intel Xeon - Ice Lake"], ["azure", "small", "$0.06", 1, "2GB", "Intel Xeon"], ["azure", "medium", "$0.12", 2, "4GB", "Intel Xeon"], ["azure", "large", "$0.24", 4, "8GB", "Intel Xeon"], ["azure", "xlarge", "$0.48", 8, "16GB", "Intel Xeon"], ] ) gr.Markdown(""" #### Pricing Table(GPU) - 2023/2/22 """) gr.Dataframe( headers=["provider", "size", "$/h", "GPUs", "Memory", "Architecture"], datatype=["str", "str", "str", "number", "str", "str"], row_count=6, col_count=(6, "fixed"), value=[ ["aws", "small", "$0.60", 1, "14GB", "NVIDIA T4"], ["aws", "medium", "$1.30", 1, "24GB", "NVIDIA A10G"], ["aws", "large", "$4.50", 4, "56GB", "NVIDIA T4"], ["aws", "xlarge", "$6.50", 1, "80GB", "NVIDIA A100"], ["aws", "xxlarge", "$7.00", 4, "96GB", "NVIDIA A10G"], ["aws", "xxxlarge", "$45.0", 8, "640GB", "NVIDIA A100"], ] ) gr.TabbedInterface( [demo2], ["HF Endpoint Tool"] ).launch(enable_queue=True)