llm-perf-leaderboard

Sleeping

App Files Files Community

BenchmarkBot commited on Jun 26, 2023

Commit

d262fb3

1 Parent(s): d8b9ce2

made models clickable

Browse files

Files changed (3) hide show

app.py +28 -45
src/assets/text_content.py +2 -0
src/utils.py +62 -0

app.py CHANGED Viewed

@@ -1,69 +1,48 @@
 import os
 import gradio as gr
 import pandas as pd
-from huggingface_hub import HfApi, Repository
 from apscheduler.schedulers.background import BackgroundScheduler
 from src.assets.text_content import TITLE, INTRODUCTION_TEXT
 from src.assets.css_html_js import custom_css, get_window_url_params
-OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
 LLM_PERF_LEADERBOARD_REPO = "optimum/llm-perf-leaderboard"
-LLM_PERF_DATASET_REPO = "optimum/llm-perf"
-def restart_space():
-    HfApi().restart_space(
-        repo_id=LLM_PERF_LEADERBOARD_REPO, token=OPTIMUM_TOKEN
-    )
-def load_dataset_repo():
-    llm_perf_repo = None
-    if OPTIMUM_TOKEN:
-        print("Loading LLM-Perf-Dataset from Hub...")
-        llm_perf_repo = Repository(
-            local_dir="./llm-perf/",
-            clone_from=LLM_PERF_DATASET_REPO,
-            token=OPTIMUM_TOKEN,
-            repo_type="dataset",
-        )
-        llm_perf_repo.git_pull()
-    return llm_perf_repo
-def get_leaderboard_df():
-    if llm_perf_repo:
-        llm_perf_repo.git_pull()
-    df = pd.read_csv("./llm-perf/reports/cuda_1_100/inference_report.csv")
     df = df[["model", "backend.name", "backend.torch_dtype", "backend.quantization",
              "generate.latency(s)", "generate.throughput(tokens/s)"]]
     df.rename(columns={
         "model": "Model",
-        "backend.name": "Backend",
-        "backend.torch_dtype": "Torch dtype",
-        "backend.quantization": "Quantization",
-        "generate.latency(s)": "Latency (s)",
-        "generate.throughput(tokens/s)": "Throughput (tokens/s)"
     }, inplace=True)
-    df.sort_values(by=["Throughput (tokens/s)"], ascending=False, inplace=True)
     return df
-def refresh():
-    leaderboard_df = get_leaderboard_df()
-    return leaderboard_df
-llm_perf_repo = load_dataset_repo()
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
@@ -71,15 +50,19 @@ with demo:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("Vanilla Benchmark", elem_id="vanilla-benchmark", id=0):
-            leaderboard_df = get_leaderboard_df()
             leaderboard_table_lite = gr.components.Dataframe(
-                value=leaderboard_df,
-                headers=leaderboard_df.columns.tolist(),
-                max_rows=None,
-                elem_id="leaderboard-table-lite",
             )
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=3600)
 scheduler.start()
 demo.queue(concurrency_count=40).launch()

 import os
 import gradio as gr
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 from src.assets.text_content import TITLE, INTRODUCTION_TEXT
 from src.assets.css_html_js import custom_css, get_window_url_params
+from src.utils import restart_space, load_dataset_repo, make_clickable_model
 LLM_PERF_LEADERBOARD_REPO = "optimum/llm-perf-leaderboard"
+LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
+OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN")
+llm_perf_dataset_repo = load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN)
+def get_vanilla_benchmark_df():
+    if llm_perf_dataset_repo:
+        llm_perf_dataset_repo.git_pull()
+    df = pd.read_csv(
+        "./llm-perf-dataset/reports/cuda_1_100/inference_report.csv")
     df = df[["model", "backend.name", "backend.torch_dtype", "backend.quantization",
              "generate.latency(s)", "generate.throughput(tokens/s)"]]
+    df["model"] = df["model"].apply(make_clickable_model)
     df.rename(columns={
         "model": "Model",
+        "backend.name": "Backend 🏭",
+        "backend.torch_dtype": "Load dtype",
+        "backend.quantization": "Quantization 🗜️",
+        "generate.latency(s)": "Latency (s) ⬇️",
+        "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
     }, inplace=True)
+    df.sort_values(by=["Throughput (tokens/s) ⬆️"],
+                   ascending=False, inplace=True)
     return df
+# Define demo interface
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("Vanilla Benchmark", elem_id="vanilla-benchmark", id=0):
+            vanilla_benchmark_df = get_vanilla_benchmark_df()
             leaderboard_table_lite = gr.components.Dataframe(
+                value=vanilla_benchmark_df,
+                headers=vanilla_benchmark_df.columns.tolist(),
+                elem_id="vanilla-benchmark",
             )
+# Restart space every hour
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=3600)
 scheduler.start()
+# Launch demo
 demo.queue(concurrency_count=40).launch()

src/assets/text_content.py CHANGED Viewed

@@ -2,4 +2,6 @@ TITLE = """<h1 align="center" id="space-title">🤗 Open LLM-Perf Leaderboard</h
 INTRODUCTION_TEXT = f"""
 The 🤗 Open LLM-Perf Leaderboard aims to benchmark the performance (latency & throughput) of Large Language Models (LLMs) on different backends and hardwares using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark)
 """

 INTRODUCTION_TEXT = f"""
 The 🤗 Open LLM-Perf Leaderboard aims to benchmark the performance (latency & throughput) of Large Language Models (LLMs) on different backends and hardwares using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark)
+🤗 Anyone from the community can submit a model for automated benchmarking on the 🤗 GPU cluster, as long as it is a 🤗 Transformers model with weights on the Hub. We also support benchmarks of models with delta-weights for non-commercial licensed models, such as LLaMa.
 """

src/utils.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from huggingface_hub import HfApi, Repository
+def restart_space(LLM_PERF_LEADERBOARD_REPO, OPTIMUM_TOKEN):
+    HfApi().restart_space(
+        repo_id=LLM_PERF_LEADERBOARD_REPO, token=OPTIMUM_TOKEN
+    )
+def load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN):
+    llm_perf_repo = None
+    if OPTIMUM_TOKEN:
+        print("Loading LLM-Perf-Dataset from Hub...")
+        llm_perf_repo = Repository(
+            local_dir="./llm-perf/",
+            clone_from=LLM_PERF_DATASET_REPO,
+            token=OPTIMUM_TOKEN,
+            repo_type="dataset",
+        )
+        llm_perf_repo.git_pull()
+    return llm_perf_repo
+LLAMAS = ["huggingface/llama-7b", "huggingface/llama-13b",
+          "huggingface/llama-30b", "huggingface/llama-65b"]
+KOALA_LINK = "https://huggingface.co/TheBloke/koala-13B-HF"
+VICUNA_LINK = "https://huggingface.co/lmsys/vicuna-13b-delta-v1.1"
+OASST_LINK = "https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
+DOLLY_LINK = "https://huggingface.co/databricks/dolly-v2-12b"
+MODEL_PAGE = "https://huggingface.co/models"
+LLAMA_LINK = "https://ai.facebook.com/blog/large-language-model-llama-meta-ai/"
+VICUNA_LINK = "https://huggingface.co/CarperAI/stable-vicuna-13b-delta"
+ALPACA_LINK = "https://crfm.stanford.edu/2023/03/13/alpaca.html"
+def model_hyperlink(link, model_name):
+    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+def make_clickable_model(model_name):
+    link = f"https://huggingface.co/{model_name}"
+    if model_name in LLAMAS:
+        link = LLAMA_LINK
+        model_name = model_name.split("/")[1]
+    elif model_name == "HuggingFaceH4/stable-vicuna-13b-2904":
+        link = VICUNA_LINK
+        model_name = "stable-vicuna-13b"
+    elif model_name == "HuggingFaceH4/llama-7b-ift-alpaca":
+        link = ALPACA_LINK
+        model_name = "alpaca-13b"
+    if model_name == "dolly-12b":
+        link = DOLLY_LINK
+    elif model_name == "vicuna-13b":
+        link = VICUNA_LINK
+    elif model_name == "koala-13b":
+        link = KOALA_LINK
+    elif model_name == "oasst-12b":
+        link = OASST_LINK
+    return model_hyperlink(link, model_name)