Commit 
							
							Β·
						
						134a499
	
1
								Parent(s):
							
							04a3faa
								
updated the llm-perf
Browse files- .gitignore +2 -0
 - app.py +131 -124
 - requirements.txt +4 -5
 - src/utils.py +5 -24
 
    	
        .gitignore
    CHANGED
    
    | 
         @@ -3,3 +3,5 @@ __pycache__/ 
     | 
|
| 3 | 
         
             
            .ipynb_checkpoints
         
     | 
| 4 | 
         
             
            *ipynb
         
     | 
| 5 | 
         
             
            .vscode/
         
     | 
| 
         | 
|
| 
         | 
| 
         | 
|
| 3 | 
         
             
            .ipynb_checkpoints
         
     | 
| 4 | 
         
             
            *ipynb
         
     | 
| 5 | 
         
             
            .vscode/
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            dataset/
         
     | 
    	
        app.py
    CHANGED
    
    | 
         @@ -1,88 +1,98 @@ 
     | 
|
| 1 | 
         
             
            import os
         
     | 
| 
         | 
|
| 2 | 
         
             
            import gradio as gr
         
     | 
| 3 | 
         
             
            import pandas as pd
         
     | 
| 4 | 
         
             
            import plotly.express as px
         
     | 
| 5 | 
         
            -
            from  
     | 
| 
         | 
|
| 6 | 
         | 
| 
         | 
|
| 7 | 
         
             
            from src.assets.css_html_js import custom_css
         
     | 
| 8 | 
         
             
            from src.assets.text_content import (
         
     | 
| 9 | 
         
             
                TITLE,
         
     | 
| 10 | 
         
            -
                INTRODUCTION_TEXT,
         
     | 
| 11 | 
         
             
                ABOUT_TEXT,
         
     | 
| 
         | 
|
| 12 | 
         
             
                EXAMPLE_CONFIG_TEXT,
         
     | 
| 13 | 
         
             
                CITATION_BUTTON_LABEL,
         
     | 
| 14 | 
         
             
                CITATION_BUTTON_TEXT,
         
     | 
| 15 | 
         
             
            )
         
     | 
| 16 | 
         
            -
            from src.utils import (
         
     | 
| 17 | 
         
            -
                restart_space,
         
     | 
| 18 | 
         
            -
                load_dataset_repo,
         
     | 
| 19 | 
         
            -
                process_model_name,
         
     | 
| 20 | 
         
            -
                process_model_type,
         
     | 
| 21 | 
         
            -
            )
         
     | 
| 22 | 
         | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
            HARDWARES_EMOJIS = ["π₯οΈ", "π»"]
         
     | 
| 25 | 
         
            -
            LLM_PERF_LEADERBOARD_REPO = "optimum/llm-perf-leaderboard"
         
     | 
| 26 | 
         
             
            LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
         
     | 
| 27 | 
         
            -
             
     | 
| 28 | 
         
            -
             
     | 
| 29 | 
         
             
            ALL_COLUMNS_MAPPING = {
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 30 | 
         
             
                "backend.name": "Backend π",
         
     | 
| 31 | 
         
             
                "backend.torch_dtype": "Dtype π₯",
         
     | 
| 32 | 
         
             
                "optimizations": "Optimizations π οΈ",
         
     | 
| 33 | 
         
             
                "quantization": "Quantization ποΈ",
         
     | 
| 34 | 
         
            -
                #
         
     | 
| 35 | 
         
            -
                " 
     | 
| 36 | 
         
            -
                " 
     | 
| 37 | 
         
            -
                #
         
     | 
| 38 | 
         
            -
                " 
     | 
| 39 | 
         
            -
                "generate. 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 40 | 
         
             
                "generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) β¬οΈ",
         
     | 
| 41 | 
         
            -
                 
     | 
| 42 | 
         
            -
                 
     | 
| 43 | 
         
            -
                "best_scored_model": "Best Scored LLM π",
         
     | 
| 44 | 
         
             
            }
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 45 | 
         
             
            ALL_COLUMNS_DATATYPES = [
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 46 | 
         
             
                "str",
         
     | 
| 47 | 
         
             
                "str",
         
     | 
| 48 | 
         
             
                "str",
         
     | 
| 49 | 
         
             
                "str",
         
     | 
| 50 | 
         
            -
                #
         
     | 
| 51 | 
         
            -
                " 
     | 
| 52 | 
         
            -
                " 
     | 
| 53 | 
         
            -
                 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 54 | 
         
             
                "number",
         
     | 
| 55 | 
         
             
                "number",
         
     | 
| 56 | 
         
             
                "number",
         
     | 
| 57 | 
         
            -
                "str",
         
     | 
| 58 | 
         
            -
                #
         
     | 
| 59 | 
         
            -
                "markdown",
         
     | 
| 60 | 
         
            -
            ]
         
     | 
| 61 | 
         
            -
            NO_DUPLICATES_COLUMNS = [
         
     | 
| 62 | 
         
            -
                "backend.name",
         
     | 
| 63 | 
         
            -
                "backend.torch_dtype",
         
     | 
| 64 | 
         
            -
                "optimizations",
         
     | 
| 65 | 
         
            -
                "quantization",
         
     | 
| 66 | 
         
            -
                #
         
     | 
| 67 | 
         
            -
                "weight_class",
         
     | 
| 68 | 
         
            -
                "model_type",
         
     | 
| 69 | 
         
             
            ]
         
     | 
| 70 | 
         
            -
            SORTING_COLUMN = ["best_score", "generate.latency(s)", "generate.peak_memory(MB)"]
         
     | 
| 71 | 
         
            -
            SORTING_ASCENDING = [False, True, True]
         
     | 
| 72 | 
         | 
| 73 | 
         
            -
            llm_perf_dataset_repo = load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN)
         
     | 
| 74 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 75 | 
         | 
| 76 | 
         
            -
            def get_benchmark_df(benchmark="Succeeded-1xA100-80GB"):
         
     | 
| 77 | 
         
            -
                if llm_perf_dataset_repo:
         
     | 
| 78 | 
         
            -
                    llm_perf_dataset_repo.git_pull()
         
     | 
| 79 | 
         
            -
                # load data
         
     | 
| 80 | 
         
            -
                benchmark_df = pd.read_csv(f"./llm-perf-dataset/reports/{benchmark}.csv")
         
     | 
| 81 | 
         
            -
                clusters_df = pd.read_csv("./llm-perf-dataset/Clustered-Open-LLM-Leaderboard.csv")
         
     | 
| 82 | 
         
             
                # merge on model
         
     | 
| 83 | 
         
            -
                merged_df =  
     | 
| 84 | 
         
            -
                    clusters_df, left_on="model", right_on="best_scored_model"
         
     | 
| 85 | 
         
            -
                )
         
     | 
| 86 | 
         
             
                # transpose energy consumption
         
     | 
| 87 | 
         
             
                merged_df["generate.energy_consumption(tokens/kWh)"] = (
         
     | 
| 88 | 
         
             
                    1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
         
     | 
| 
         @@ -91,38 +101,44 @@ def get_benchmark_df(benchmark="Succeeded-1xA100-80GB"): 
     | 
|
| 91 | 
         
             
                merged_df.loc[
         
     | 
| 92 | 
         
             
                    merged_df["generate.energy_consumption(tokens/kWh)"] == 1,
         
     | 
| 93 | 
         
             
                    "generate.energy_consumption(tokens/kWh)",
         
     | 
| 94 | 
         
            -
                ] =  
     | 
| 95 | 
         
            -
                # add optimizations
         
     | 
| 96 | 
         
            -
                merged_df["optimizations"] = merged_df[ 
     | 
| 97 | 
         
            -
                     
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 98 | 
         
             
                )
         
     | 
| 99 | 
         
             
                # add quantization scheme
         
     | 
| 100 | 
         
            -
                merged_df["quantization"] = merged_df["backend. 
     | 
| 101 | 
         
             
                    lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
         
     | 
| 102 | 
         
             
                )
         
     | 
| 103 | 
         
            -
                #  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 104 | 
         
             
                merged_df.sort_values(by=SORTING_COLUMN, ascending=SORTING_ASCENDING, inplace=True)
         
     | 
| 105 | 
         
            -
                #  
     | 
| 106 | 
         
            -
                merged_df. 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 107 | 
         
             
                return merged_df
         
     | 
| 108 | 
         | 
| 109 | 
         | 
| 110 | 
         
             
            def get_benchmark_table(bench_df):
         
     | 
| 111 | 
         
             
                copy_df = bench_df.copy()
         
     | 
| 112 | 
         
            -
                # filter
         
     | 
| 113 | 
         
            -
                copy_df = copy_df[list(ALL_COLUMNS_MAPPING.keys())]
         
     | 
| 114 | 
         
            -
                # rename
         
     | 
| 115 | 
         
            -
                copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
         
     | 
| 116 | 
         
             
                # transform
         
     | 
| 117 | 
         
            -
                copy_df[" 
     | 
| 118 | 
         
            -
                copy_df[" 
     | 
| 119 | 
         
            -
                    process_model_name
         
     | 
| 120 | 
         
            -
                )
         
     | 
| 121 | 
         
             
                # process quantization
         
     | 
| 122 | 
         
            -
                copy_df[" 
     | 
| 123 | 
         
            -
                    lambda x: f"{x[' 
     | 
| 124 | 
         
             
                    if x["Quantization ποΈ"] in ["BnB.4bit", "GPTQ.4bit"]
         
     | 
| 125 | 
         
            -
                    else x[" 
     | 
| 126 | 
         
             
                    axis=1,
         
     | 
| 127 | 
         
             
                )
         
     | 
| 128 | 
         
             
                return copy_df
         
     | 
| 
         @@ -130,17 +146,18 @@ def get_benchmark_table(bench_df): 
     | 
|
| 130 | 
         | 
| 131 | 
         
             
            def get_benchmark_chart(bench_df):
         
     | 
| 132 | 
         
             
                copy_df = bench_df.copy()
         
     | 
| 
         | 
|
| 
         | 
|
| 133 | 
         
             
                # filter latency bigger than 100s
         
     | 
| 134 | 
         
            -
                copy_df = copy_df[copy_df[" 
     | 
| 135 | 
         
            -
             
     | 
| 136 | 
         
            -
                copy_df["model_type"] = copy_df["model_type"].apply(process_model_type)
         
     | 
| 137 | 
         
             
                fig = px.scatter(
         
     | 
| 138 | 
         
             
                    copy_df,
         
     | 
| 139 | 
         
            -
                    y=" 
     | 
| 140 | 
         
            -
                    x=" 
     | 
| 141 | 
         
            -
                    size=" 
     | 
| 142 | 
         
            -
                    color=" 
     | 
| 143 | 
         
            -
                    custom_data=list(ALL_COLUMNS_MAPPING. 
     | 
| 144 | 
         
             
                    color_discrete_sequence=px.colors.qualitative.Light24,
         
     | 
| 145 | 
         
             
                )
         
     | 
| 146 | 
         
             
                fig.update_layout(
         
     | 
| 
         @@ -151,17 +168,17 @@ def get_benchmark_chart(bench_df): 
     | 
|
| 151 | 
         
             
                        "xanchor": "center",
         
     | 
| 152 | 
         
             
                        "yanchor": "top",
         
     | 
| 153 | 
         
             
                    },
         
     | 
| 154 | 
         
            -
                    xaxis_title="Per 1000  
     | 
| 155 | 
         
            -
                    yaxis_title="Open LLM Score (%)",
         
     | 
| 156 | 
         
            -
                    legend_title="LLM  
     | 
| 157 | 
         
             
                    width=1200,
         
     | 
| 158 | 
         
             
                    height=600,
         
     | 
| 159 | 
         
             
                )
         
     | 
| 160 | 
         
             
                fig.update_traces(
         
     | 
| 161 | 
         
             
                    hovertemplate="<br>".join(
         
     | 
| 162 | 
         
             
                        [
         
     | 
| 163 | 
         
            -
                            f"<b>{ 
     | 
| 164 | 
         
            -
                            for i,  
     | 
| 165 | 
         
             
                        ]
         
     | 
| 166 | 
         
             
                    )
         
     | 
| 167 | 
         
             
                )
         
     | 
| 
         @@ -176,17 +193,17 @@ def filter_query( 
     | 
|
| 176 | 
         
             
                quantization_scheme,
         
     | 
| 177 | 
         
             
                score,
         
     | 
| 178 | 
         
             
                memory,
         
     | 
| 179 | 
         
            -
                 
     | 
| 180 | 
         
             
            ):
         
     | 
| 181 | 
         
            -
                raw_df = get_benchmark_df( 
     | 
| 182 | 
         
             
                filtered_df = raw_df[
         
     | 
| 183 | 
         
            -
                    raw_df[" 
     | 
| 184 | 
         
            -
                    & raw_df[" 
     | 
| 185 | 
         
            -
                    & raw_df[" 
     | 
| 186 | 
         
             
                    & (
         
     | 
| 187 | 
         
             
                        pd.concat(
         
     | 
| 188 | 
         
             
                            [
         
     | 
| 189 | 
         
            -
                                raw_df[" 
     | 
| 190 | 
         
             
                                for optimization in optimizations
         
     | 
| 191 | 
         
             
                            ],
         
     | 
| 192 | 
         
             
                            axis=1,
         
     | 
| 
         @@ -197,7 +214,7 @@ def filter_query( 
     | 
|
| 197 | 
         
             
                    & (
         
     | 
| 198 | 
         
             
                        pd.concat(
         
     | 
| 199 | 
         
             
                            [
         
     | 
| 200 | 
         
            -
                                raw_df[" 
     | 
| 201 | 
         
             
                                for quantization in quantization_scheme
         
     | 
| 202 | 
         
             
                            ],
         
     | 
| 203 | 
         
             
                            axis=1,
         
     | 
| 
         @@ -205,8 +222,8 @@ def filter_query( 
     | 
|
| 205 | 
         
             
                        if len(quantization_scheme) > 0
         
     | 
| 206 | 
         
             
                        else True
         
     | 
| 207 | 
         
             
                    )
         
     | 
| 208 | 
         
            -
                    & (raw_df[" 
     | 
| 209 | 
         
            -
                    & (raw_df[" 
     | 
| 210 | 
         
             
                ]
         
     | 
| 211 | 
         
             
                filtered_table = get_benchmark_table(filtered_df)
         
     | 
| 212 | 
         
             
                filtered_chart = get_benchmark_chart(filtered_df)
         
     | 
| 
         @@ -222,29 +239,29 @@ with demo: 
     | 
|
| 222 | 
         
             
                gr.Markdown(INTRODUCTION_TEXT, elem_classes="descriptive-text")
         
     | 
| 223 | 
         | 
| 224 | 
         
             
                with gr.Tabs(elem_classes="leaderboard-tabs"):
         
     | 
| 225 | 
         
            -
                     
     | 
| 226 | 
         
            -
                     
     | 
| 227 | 
         
            -
                     
     | 
| 228 | 
         
             
                    ####################### HARDWARE TABS #######################
         
     | 
| 229 | 
         
            -
                    for i, ( 
     | 
| 230 | 
         
            -
                        # dummy placeholder of the  
     | 
| 231 | 
         
            -
                         
     | 
| 232 | 
         
            -
             
     | 
| 233 | 
         
            -
             
     | 
| 
         | 
|
| 234 | 
         
             
                                # placeholder for full dataframe
         
     | 
| 235 | 
         
            -
                                 
     | 
| 236 | 
         
             
                                with gr.TabItem("Leaderboard π
", id=0):
         
     | 
| 237 | 
         
             
                                    gr.HTML(
         
     | 
| 238 | 
         
             
                                        "π Scroll to the right π for additional columns.",
         
     | 
| 239 | 
         
             
                                        elem_id="descriptive-text",
         
     | 
| 240 | 
         
             
                                    )
         
     | 
| 241 | 
         
             
                                    # Original leaderboard table
         
     | 
| 242 | 
         
            -
                                     
     | 
| 243 | 
         
            -
                                        value=get_benchmark_table( 
     | 
| 244 | 
         
             
                                        headers=list(ALL_COLUMNS_MAPPING.values()),
         
     | 
| 245 | 
         
             
                                        datatype=ALL_COLUMNS_DATATYPES,
         
     | 
| 246 | 
         
            -
                                        elem_id=" 
     | 
| 247 | 
         
            -
                                        # show_label=False,
         
     | 
| 248 | 
         
             
                                    )
         
     | 
| 249 | 
         
             
                                with gr.TabItem("Plot π", id=1):
         
     | 
| 250 | 
         
             
                                    gr.HTML(
         
     | 
| 
         @@ -252,13 +269,13 @@ with demo: 
     | 
|
| 252 | 
         
             
                                        elem_id="descriptive-text",
         
     | 
| 253 | 
         
             
                                    )
         
     | 
| 254 | 
         
             
                                    # Original leaderboard plot
         
     | 
| 255 | 
         
            -
                                     
     | 
| 256 | 
         
            -
                                        value=get_benchmark_chart( 
     | 
| 257 | 
         
            -
                                        elem_id=" 
     | 
| 258 | 
         
             
                                        show_label=False,
         
     | 
| 259 | 
         
             
                                    )
         
     | 
| 260 | 
         | 
| 261 | 
         
            -
                     
     | 
| 262 | 
         
             
                    with gr.TabItem("Control Panel ποΈ", id=2):
         
     | 
| 263 | 
         
             
                        gr.HTML(
         
     | 
| 264 | 
         
             
                            "Use this control panel to filter the leaderboard's table and plot.",  # noqa: E501
         
     | 
| 
         @@ -328,7 +345,7 @@ with demo: 
     | 
|
| 328 | 
         
             
                                value="Filter π",
         
     | 
| 329 | 
         
             
                                elem_id="filter-button",
         
     | 
| 330 | 
         
             
                            )
         
     | 
| 331 | 
         
            -
                        for  
     | 
| 332 | 
         
             
                            filter_button.click(
         
     | 
| 333 | 
         
             
                                filter_query,
         
     | 
| 334 | 
         
             
                                [
         
     | 
| 
         @@ -339,9 +356,9 @@ with demo: 
     | 
|
| 339 | 
         
             
                                    quantization_checkboxes,
         
     | 
| 340 | 
         
             
                                    score_slider,
         
     | 
| 341 | 
         
             
                                    memory_slider,
         
     | 
| 342 | 
         
            -
                                     
     | 
| 343 | 
         
             
                                ],
         
     | 
| 344 | 
         
            -
                                [ 
     | 
| 345 | 
         
             
                            )
         
     | 
| 346 | 
         | 
| 347 | 
         
             
                    ####################### ABOUT TAB #######################
         
     | 
| 
         @@ -356,18 +373,8 @@ with demo: 
     | 
|
| 356 | 
         
             
                            value=CITATION_BUTTON_TEXT,
         
     | 
| 357 | 
         
             
                            label=CITATION_BUTTON_LABEL,
         
     | 
| 358 | 
         
             
                            elem_id="citation-button",
         
     | 
| 359 | 
         
            -
             
     | 
| 360 | 
         
            -
             
     | 
| 361 | 
         
            -
             
     | 
| 362 | 
         
            -
            # Restart space every hour
         
     | 
| 363 | 
         
            -
            scheduler = BackgroundScheduler()
         
     | 
| 364 | 
         
            -
            scheduler.add_job(
         
     | 
| 365 | 
         
            -
                restart_space,
         
     | 
| 366 | 
         
            -
                "interval",
         
     | 
| 367 | 
         
            -
                seconds=3600,
         
     | 
| 368 | 
         
            -
                args=[LLM_PERF_LEADERBOARD_REPO, OPTIMUM_TOKEN],
         
     | 
| 369 | 
         
            -
            )
         
     | 
| 370 | 
         
            -
            scheduler.start()
         
     | 
| 371 | 
         | 
| 372 | 
         
             
            # Launch demo
         
     | 
| 373 | 
         
            -
            demo. 
     | 
| 
         | 
|
| 1 | 
         
             
            import os
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
             
            import gradio as gr
         
     | 
| 4 | 
         
             
            import pandas as pd
         
     | 
| 5 | 
         
             
            import plotly.express as px
         
     | 
| 6 | 
         
            +
            from huggingface_hub.file_download import hf_hub_download
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         | 
| 9 | 
         
            +
            from src.utils import process_model_name, process_model_arch
         
     | 
| 10 | 
         
             
            from src.assets.css_html_js import custom_css
         
     | 
| 11 | 
         
             
            from src.assets.text_content import (
         
     | 
| 12 | 
         
             
                TITLE,
         
     | 
| 
         | 
|
| 13 | 
         
             
                ABOUT_TEXT,
         
     | 
| 14 | 
         
            +
                INTRODUCTION_TEXT,
         
     | 
| 15 | 
         
             
                EXAMPLE_CONFIG_TEXT,
         
     | 
| 16 | 
         
             
                CITATION_BUTTON_LABEL,
         
     | 
| 17 | 
         
             
                CITATION_BUTTON_TEXT,
         
     | 
| 18 | 
         
             
            )
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 19 | 
         | 
| 20 | 
         
            +
            HF_TOKEN = os.environ.get("HF_TOKEN", None)
         
     | 
| 
         | 
|
| 
         | 
|
| 21 | 
         
             
            LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
         
     | 
| 22 | 
         
            +
            MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB π₯οΈ"}
         
     | 
| 
         | 
|
| 23 | 
         
             
            ALL_COLUMNS_MAPPING = {
         
     | 
| 24 | 
         
            +
                # model
         
     | 
| 25 | 
         
            +
                "Model": "Model π€",
         
     | 
| 26 | 
         
            +
                "Arch": "Arch ποΈ",
         
     | 
| 27 | 
         
            +
                "Size": "Size ποΈ",
         
     | 
| 28 | 
         
            +
                # deployment settings
         
     | 
| 29 | 
         
             
                "backend.name": "Backend π",
         
     | 
| 30 | 
         
             
                "backend.torch_dtype": "Dtype π₯",
         
     | 
| 31 | 
         
             
                "optimizations": "Optimizations π οΈ",
         
     | 
| 32 | 
         
             
                "quantization": "Quantization ποΈ",
         
     | 
| 33 | 
         
            +
                # throughput measurements
         
     | 
| 34 | 
         
            +
                "decode.throughput(tokens/s)": "Decode Throughput (tokens/s) β¬οΈ",
         
     | 
| 35 | 
         
            +
                "generate.throughput(tokens/s)": "E2E Throughput (tokens/s) β¬οΈ",
         
     | 
| 36 | 
         
            +
                # latency measurements
         
     | 
| 37 | 
         
            +
                "forward.latency(s)": "Prefill Latency (s) β¬οΈ",
         
     | 
| 38 | 
         
            +
                "generate.latency(s)": "E2E Latency (s) β¬οΈ",
         
     | 
| 39 | 
         
            +
                # memory measurements
         
     | 
| 40 | 
         
            +
                "generate.max_memory_allocated(MB)": "Allocated Memory (MB) β¬οΈ",
         
     | 
| 41 | 
         
            +
                "generate.max_memory_reserved(MB)": "Reserved Memory (MB) β¬οΈ",
         
     | 
| 42 | 
         
            +
                "generate.max_memory_used(MB)": "Used Memory (MB) β¬οΈ",
         
     | 
| 43 | 
         
            +
                # energy measurements
         
     | 
| 44 | 
         
             
                "generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) β¬οΈ",
         
     | 
| 45 | 
         
            +
                # quality measurements
         
     | 
| 46 | 
         
            +
                "Score": "Avg Score (%) β¬οΈ",
         
     | 
| 
         | 
|
| 47 | 
         
             
            }
         
     | 
| 48 | 
         
            +
            SORTING_COLUMN = ["Score", "generate.throughput(tokens/s)"]
         
     | 
| 49 | 
         
            +
            SORTING_ASCENDING = [False, True]
         
     | 
| 50 | 
         
            +
             
     | 
| 51 | 
         
             
            ALL_COLUMNS_DATATYPES = [
         
     | 
| 52 | 
         
            +
                # open llm
         
     | 
| 53 | 
         
            +
                "markdown",
         
     | 
| 54 | 
         
            +
                "markdown",
         
     | 
| 55 | 
         
            +
                "number",
         
     | 
| 56 | 
         
            +
                # deployment settings
         
     | 
| 57 | 
         
             
                "str",
         
     | 
| 58 | 
         
             
                "str",
         
     | 
| 59 | 
         
             
                "str",
         
     | 
| 60 | 
         
             
                "str",
         
     | 
| 61 | 
         
            +
                # measurements
         
     | 
| 62 | 
         
            +
                "number",
         
     | 
| 63 | 
         
            +
                "number",
         
     | 
| 64 | 
         
            +
                "number",
         
     | 
| 65 | 
         
            +
                "number",
         
     | 
| 66 | 
         
            +
                "number",
         
     | 
| 67 | 
         
            +
                "number",
         
     | 
| 68 | 
         
            +
                "number",
         
     | 
| 69 | 
         
             
                "number",
         
     | 
| 70 | 
         
             
                "number",
         
     | 
| 71 | 
         
             
                "number",
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 72 | 
         
             
            ]
         
     | 
| 
         | 
|
| 
         | 
|
| 73 | 
         | 
| 
         | 
|
| 74 | 
         | 
| 75 | 
         
            +
            def get_benchmark_df(machine="hf-dgx-01"):
         
     | 
| 76 | 
         
            +
                # download data
         
     | 
| 77 | 
         
            +
                hf_hub_download(
         
     | 
| 78 | 
         
            +
                    repo_id="optimum/llm-perf-dataset",
         
     | 
| 79 | 
         
            +
                    filename="open-llm.csv",
         
     | 
| 80 | 
         
            +
                    local_dir="dataset",
         
     | 
| 81 | 
         
            +
                    repo_type="dataset",
         
     | 
| 82 | 
         
            +
                    token=HF_TOKEN,
         
     | 
| 83 | 
         
            +
                )
         
     | 
| 84 | 
         
            +
                hf_hub_download(
         
     | 
| 85 | 
         
            +
                    repo_id="optimum/llm-perf-dataset",
         
     | 
| 86 | 
         
            +
                    filename=f"{machine}/full-report.csv",
         
     | 
| 87 | 
         
            +
                    local_dir="dataset",
         
     | 
| 88 | 
         
            +
                    repo_type="dataset",
         
     | 
| 89 | 
         
            +
                    token=HF_TOKEN,
         
     | 
| 90 | 
         
            +
                )
         
     | 
| 91 | 
         
            +
                open_llm = pd.read_csv("dataset/open-llm.csv")
         
     | 
| 92 | 
         
            +
                full_report = pd.read_csv(f"dataset/{machine}/full-report.csv")
         
     | 
| 93 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 94 | 
         
             
                # merge on model
         
     | 
| 95 | 
         
            +
                merged_df = open_llm.merge(full_report, left_on="Model", right_on="model")
         
     | 
| 
         | 
|
| 
         | 
|
| 96 | 
         
             
                # transpose energy consumption
         
     | 
| 97 | 
         
             
                merged_df["generate.energy_consumption(tokens/kWh)"] = (
         
     | 
| 98 | 
         
             
                    1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
         
     | 
| 
         | 
|
| 101 | 
         
             
                merged_df.loc[
         
     | 
| 102 | 
         
             
                    merged_df["generate.energy_consumption(tokens/kWh)"] == 1,
         
     | 
| 103 | 
         
             
                    "generate.energy_consumption(tokens/kWh)",
         
     | 
| 104 | 
         
            +
                ] = pd.NA
         
     | 
| 105 | 
         
            +
                # add optimizations column
         
     | 
| 106 | 
         
            +
                merged_df["optimizations"] = merged_df[
         
     | 
| 107 | 
         
            +
                    ["backend.to_bettertransformer", "backend.use_flash_attention_2"]
         
     | 
| 108 | 
         
            +
                ].apply(
         
     | 
| 109 | 
         
            +
                    lambda x: "BetterTransformer"
         
     | 
| 110 | 
         
            +
                    if x["backend.to_bettertransformer"]
         
     | 
| 111 | 
         
            +
                    else ("FlashAttentionV2" if x["backend.use_flash_attention_2"] else "None"),
         
     | 
| 112 | 
         
            +
                    axis=1,
         
     | 
| 113 | 
         
             
                )
         
     | 
| 114 | 
         
             
                # add quantization scheme
         
     | 
| 115 | 
         
            +
                merged_df["quantization"] = merged_df["backend.quantization_scheme"].apply(
         
     | 
| 116 | 
         
             
                    lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
         
     | 
| 117 | 
         
             
                )
         
     | 
| 118 | 
         
            +
                # add decode throughput
         
     | 
| 119 | 
         
            +
                merged_df["decode.throughput(tokens/s)"] = (
         
     | 
| 120 | 
         
            +
                    1000 / (merged_df["generate.latency(s)"] - merged_df["forward.latency(s)"])
         
     | 
| 121 | 
         
            +
                ).round(2)
         
     | 
| 122 | 
         
            +
                # sort by metric
         
     | 
| 123 | 
         
             
                merged_df.sort_values(by=SORTING_COLUMN, ascending=SORTING_ASCENDING, inplace=True)
         
     | 
| 124 | 
         
            +
                # filter columns
         
     | 
| 125 | 
         
            +
                merged_df = merged_df[list(ALL_COLUMNS_MAPPING.keys())]
         
     | 
| 126 | 
         
            +
                # rename columns
         
     | 
| 127 | 
         
            +
                merged_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
         
     | 
| 128 | 
         
            +
             
     | 
| 129 | 
         
             
                return merged_df
         
     | 
| 130 | 
         | 
| 131 | 
         | 
| 132 | 
         
             
            def get_benchmark_table(bench_df):
         
     | 
| 133 | 
         
             
                copy_df = bench_df.copy()
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 134 | 
         
             
                # transform
         
     | 
| 135 | 
         
            +
                copy_df["Model π€"] = copy_df["Model π€"].apply(process_model_name)
         
     | 
| 136 | 
         
            +
                copy_df["Arch ποΈ"] = copy_df["Arch ποΈ"].apply(process_model_arch)
         
     | 
| 
         | 
|
| 
         | 
|
| 137 | 
         
             
                # process quantization
         
     | 
| 138 | 
         
            +
                copy_df["Avg Score (%) β¬οΈ"] = copy_df.apply(
         
     | 
| 139 | 
         
            +
                    lambda x: f"{x['Avg Score (%) β¬οΈ']}**"
         
     | 
| 140 | 
         
             
                    if x["Quantization ποΈ"] in ["BnB.4bit", "GPTQ.4bit"]
         
     | 
| 141 | 
         
            +
                    else x["Avg Score (%) β¬οΈ"],
         
     | 
| 142 | 
         
             
                    axis=1,
         
     | 
| 143 | 
         
             
                )
         
     | 
| 144 | 
         
             
                return copy_df
         
     | 
| 
         | 
|
| 146 | 
         | 
| 147 | 
         
             
            def get_benchmark_chart(bench_df):
         
     | 
| 148 | 
         
             
                copy_df = bench_df.copy()
         
     | 
| 149 | 
         
            +
                # transform
         
     | 
| 150 | 
         
            +
                copy_df["Arch ποΈ"] = copy_df["Arch ποΈ"].apply(process_model_arch)
         
     | 
| 151 | 
         
             
                # filter latency bigger than 100s
         
     | 
| 152 | 
         
            +
                # copy_df = copy_df[copy_df["E2E Latency (s) β¬οΈ"] <= 100]
         
     | 
| 153 | 
         
            +
             
     | 
| 
         | 
|
| 154 | 
         
             
                fig = px.scatter(
         
     | 
| 155 | 
         
             
                    copy_df,
         
     | 
| 156 | 
         
            +
                    y="Avg Score (%) β¬οΈ",
         
     | 
| 157 | 
         
            +
                    x="E2E Latency (s) β¬οΈ",
         
     | 
| 158 | 
         
            +
                    size="Allocated Memory (MB) β¬οΈ",
         
     | 
| 159 | 
         
            +
                    color="Arch ποΈ",
         
     | 
| 160 | 
         
            +
                    custom_data=list(ALL_COLUMNS_MAPPING.values()),
         
     | 
| 161 | 
         
             
                    color_discrete_sequence=px.colors.qualitative.Light24,
         
     | 
| 162 | 
         
             
                )
         
     | 
| 163 | 
         
             
                fig.update_layout(
         
     | 
| 
         | 
|
| 168 | 
         
             
                        "xanchor": "center",
         
     | 
| 169 | 
         
             
                        "yanchor": "top",
         
     | 
| 170 | 
         
             
                    },
         
     | 
| 171 | 
         
            +
                    xaxis_title="Per 1000 Tokens Latency (s)",
         
     | 
| 172 | 
         
            +
                    yaxis_title="Avg Open LLM Score (%)",
         
     | 
| 173 | 
         
            +
                    legend_title="LLM Architecture",
         
     | 
| 174 | 
         
             
                    width=1200,
         
     | 
| 175 | 
         
             
                    height=600,
         
     | 
| 176 | 
         
             
                )
         
     | 
| 177 | 
         
             
                fig.update_traces(
         
     | 
| 178 | 
         
             
                    hovertemplate="<br>".join(
         
     | 
| 179 | 
         
             
                        [
         
     | 
| 180 | 
         
            +
                            f"<b>{column}:</b> %{{customdata[{i}]}}"
         
     | 
| 181 | 
         
            +
                            for i, column in enumerate(ALL_COLUMNS_MAPPING.values())
         
     | 
| 182 | 
         
             
                        ]
         
     | 
| 183 | 
         
             
                    )
         
     | 
| 184 | 
         
             
                )
         
     | 
| 
         | 
|
| 193 | 
         
             
                quantization_scheme,
         
     | 
| 194 | 
         
             
                score,
         
     | 
| 195 | 
         
             
                memory,
         
     | 
| 196 | 
         
            +
                machine,
         
     | 
| 197 | 
         
             
            ):
         
     | 
| 198 | 
         
            +
                raw_df = get_benchmark_df(machine=machine)
         
     | 
| 199 | 
         
             
                filtered_df = raw_df[
         
     | 
| 200 | 
         
            +
                    raw_df["Model π€"].str.contains(text, case=False)
         
     | 
| 201 | 
         
            +
                    & raw_df["Backend π"].isin(backends)
         
     | 
| 202 | 
         
            +
                    & raw_df["Dtype π₯"].isin(datatypes)
         
     | 
| 203 | 
         
             
                    & (
         
     | 
| 204 | 
         
             
                        pd.concat(
         
     | 
| 205 | 
         
             
                            [
         
     | 
| 206 | 
         
            +
                                raw_df["Optimizations π οΈ"].str.contains(optimization, case=False)
         
     | 
| 207 | 
         
             
                                for optimization in optimizations
         
     | 
| 208 | 
         
             
                            ],
         
     | 
| 209 | 
         
             
                            axis=1,
         
     | 
| 
         | 
|
| 214 | 
         
             
                    & (
         
     | 
| 215 | 
         
             
                        pd.concat(
         
     | 
| 216 | 
         
             
                            [
         
     | 
| 217 | 
         
            +
                                raw_df["Quantization ποΈ"].str.contains(quantization, case=False)
         
     | 
| 218 | 
         
             
                                for quantization in quantization_scheme
         
     | 
| 219 | 
         
             
                            ],
         
     | 
| 220 | 
         
             
                            axis=1,
         
     | 
| 
         | 
|
| 222 | 
         
             
                        if len(quantization_scheme) > 0
         
     | 
| 223 | 
         
             
                        else True
         
     | 
| 224 | 
         
             
                    )
         
     | 
| 225 | 
         
            +
                    & (raw_df["Avg Score (%) β¬οΈ"] >= score)
         
     | 
| 226 | 
         
            +
                    & (raw_df["Allocated Memory (MB) β¬οΈ"] <= memory)
         
     | 
| 227 | 
         
             
                ]
         
     | 
| 228 | 
         
             
                filtered_table = get_benchmark_table(filtered_df)
         
     | 
| 229 | 
         
             
                filtered_chart = get_benchmark_chart(filtered_df)
         
     | 
| 
         | 
|
| 239 | 
         
             
                gr.Markdown(INTRODUCTION_TEXT, elem_classes="descriptive-text")
         
     | 
| 240 | 
         | 
| 241 | 
         
             
                with gr.Tabs(elem_classes="leaderboard-tabs"):
         
     | 
| 242 | 
         
            +
                    machine_placeholders = {}
         
     | 
| 243 | 
         
            +
                    machine_tables = {}
         
     | 
| 244 | 
         
            +
                    machine_plots = {}
         
     | 
| 245 | 
         
             
                    ####################### HARDWARE TABS #######################
         
     | 
| 246 | 
         
            +
                    for i, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()):
         
     | 
| 247 | 
         
            +
                        # dummy placeholder of the machine name
         
     | 
| 248 | 
         
            +
                        machine_placeholders[machine] = gr.Textbox(value=machine, visible=False)
         
     | 
| 249 | 
         
            +
             
     | 
| 250 | 
         
            +
                        with gr.TabItem(hardware, id=i):
         
     | 
| 251 | 
         
            +
                            with gr.Tabs(elem_classes="machine-tabs"):
         
     | 
| 252 | 
         
             
                                # placeholder for full dataframe
         
     | 
| 253 | 
         
            +
                                machine_df = get_benchmark_df(machine=machine)
         
     | 
| 254 | 
         
             
                                with gr.TabItem("Leaderboard π
", id=0):
         
     | 
| 255 | 
         
             
                                    gr.HTML(
         
     | 
| 256 | 
         
             
                                        "π Scroll to the right π for additional columns.",
         
     | 
| 257 | 
         
             
                                        elem_id="descriptive-text",
         
     | 
| 258 | 
         
             
                                    )
         
     | 
| 259 | 
         
             
                                    # Original leaderboard table
         
     | 
| 260 | 
         
            +
                                    machine_tables[machine] = gr.components.Dataframe(
         
     | 
| 261 | 
         
            +
                                        value=get_benchmark_table(machine_df),
         
     | 
| 262 | 
         
             
                                        headers=list(ALL_COLUMNS_MAPPING.values()),
         
     | 
| 263 | 
         
             
                                        datatype=ALL_COLUMNS_DATATYPES,
         
     | 
| 264 | 
         
            +
                                        elem_id="machine-table",
         
     | 
| 
         | 
|
| 265 | 
         
             
                                    )
         
     | 
| 266 | 
         
             
                                with gr.TabItem("Plot π", id=1):
         
     | 
| 267 | 
         
             
                                    gr.HTML(
         
     | 
| 
         | 
|
| 269 | 
         
             
                                        elem_id="descriptive-text",
         
     | 
| 270 | 
         
             
                                    )
         
     | 
| 271 | 
         
             
                                    # Original leaderboard plot
         
     | 
| 272 | 
         
            +
                                    machine_plots[machine] = gr.components.Plot(
         
     | 
| 273 | 
         
            +
                                        value=get_benchmark_chart(machine_df),
         
     | 
| 274 | 
         
            +
                                        elem_id="machine-plot",
         
     | 
| 275 | 
         
             
                                        show_label=False,
         
     | 
| 276 | 
         
             
                                    )
         
     | 
| 277 | 
         | 
| 278 | 
         
            +
                    ###################### CONTROL PANEL #######################
         
     | 
| 279 | 
         
             
                    with gr.TabItem("Control Panel ποΈ", id=2):
         
     | 
| 280 | 
         
             
                        gr.HTML(
         
     | 
| 281 | 
         
             
                            "Use this control panel to filter the leaderboard's table and plot.",  # noqa: E501
         
     | 
| 
         | 
|
| 345 | 
         
             
                                value="Filter π",
         
     | 
| 346 | 
         
             
                                elem_id="filter-button",
         
     | 
| 347 | 
         
             
                            )
         
     | 
| 348 | 
         
            +
                        for machine in MACHINE_TO_HARDWARE:
         
     | 
| 349 | 
         
             
                            filter_button.click(
         
     | 
| 350 | 
         
             
                                filter_query,
         
     | 
| 351 | 
         
             
                                [
         
     | 
| 
         | 
|
| 356 | 
         
             
                                    quantization_checkboxes,
         
     | 
| 357 | 
         
             
                                    score_slider,
         
     | 
| 358 | 
         
             
                                    memory_slider,
         
     | 
| 359 | 
         
            +
                                    machine_placeholders[machine],
         
     | 
| 360 | 
         
             
                                ],
         
     | 
| 361 | 
         
            +
                                [machine_tables[machine], machine_plots[machine]],
         
     | 
| 362 | 
         
             
                            )
         
     | 
| 363 | 
         | 
| 364 | 
         
             
                    ####################### ABOUT TAB #######################
         
     | 
| 
         | 
|
| 373 | 
         
             
                            value=CITATION_BUTTON_TEXT,
         
     | 
| 374 | 
         
             
                            label=CITATION_BUTTON_LABEL,
         
     | 
| 375 | 
         
             
                            elem_id="citation-button",
         
     | 
| 376 | 
         
            +
                            show_copy_button=True,
         
     | 
| 377 | 
         
            +
                        )
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 378 | 
         | 
| 379 | 
         
             
            # Launch demo
         
     | 
| 380 | 
         
            +
            demo.launch(show_api=False)
         
     | 
    	
        requirements.txt
    CHANGED
    
    | 
         @@ -1,5 +1,4 @@ 
     | 
|
| 1 | 
         
            -
             
     | 
| 2 | 
         
            -
             
     | 
| 3 | 
         
            -
             
     | 
| 4 | 
         
            -
             
     | 
| 5 | 
         
            -
            pandas==2.1.0
         
     | 
| 
         | 
|
| 1 | 
         
            +
            huggingface_hub
         
     | 
| 2 | 
         
            +
            gradio
         
     | 
| 3 | 
         
            +
            plotly
         
     | 
| 4 | 
         
            +
            pandas
         
     | 
| 
         | 
    	
        src/utils.py
    CHANGED
    
    | 
         @@ -17,26 +17,7 @@ def change_tab(query_param): 
     | 
|
| 17 | 
         
             
                    return gr.Tabs.update(selected=0)
         
     | 
| 18 | 
         | 
| 19 | 
         | 
| 20 | 
         
            -
             
     | 
| 21 | 
         
            -
                HfApi().restart_space(repo_id=LLM_PERF_LEADERBOARD_REPO, token=OPTIMUM_TOKEN)
         
     | 
| 22 | 
         
            -
             
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
            def load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN):
         
     | 
| 25 | 
         
            -
                llm_perf_dataset_repo = None
         
     | 
| 26 | 
         
            -
                if OPTIMUM_TOKEN:
         
     | 
| 27 | 
         
            -
                    print("Loading LLM-Perf-Dataset from Hub...")
         
     | 
| 28 | 
         
            -
                    llm_perf_dataset_repo = Repository(
         
     | 
| 29 | 
         
            -
                        local_dir="./llm-perf-dataset",
         
     | 
| 30 | 
         
            -
                        clone_from=LLM_PERF_DATASET_REPO,
         
     | 
| 31 | 
         
            -
                        token=OPTIMUM_TOKEN,
         
     | 
| 32 | 
         
            -
                        repo_type="dataset",
         
     | 
| 33 | 
         
            -
                    )
         
     | 
| 34 | 
         
            -
                    llm_perf_dataset_repo.git_pull()
         
     | 
| 35 | 
         
            -
             
     | 
| 36 | 
         
            -
                return llm_perf_dataset_repo
         
     | 
| 37 | 
         
            -
             
     | 
| 38 | 
         
            -
             
     | 
| 39 | 
         
            -
            LLM_MODEL_TYPES = {
         
     | 
| 40 | 
         
             
                # branded ?
         
     | 
| 41 | 
         
             
                "gpt_bigcode": "GPT-BigCode πΈ",
         
     | 
| 42 | 
         
             
                "RefinedWebModel": "Falcon π¦
",
         
     | 
| 
         @@ -69,8 +50,8 @@ def process_model_name(model_name): 
     | 
|
| 69 | 
         
             
                return model_hyperlink(link, model_name)
         
     | 
| 70 | 
         | 
| 71 | 
         | 
| 72 | 
         
            -
            def  
     | 
| 73 | 
         
            -
                if  
     | 
| 74 | 
         
            -
                    return  
     | 
| 75 | 
         
             
                else:
         
     | 
| 76 | 
         
            -
                    return  
     | 
| 
         | 
|
| 17 | 
         
             
                    return gr.Tabs.update(selected=0)
         
     | 
| 18 | 
         | 
| 19 | 
         | 
| 20 | 
         
            +
            LLM_MODEL_ARCHS = {
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 21 | 
         
             
                # branded ?
         
     | 
| 22 | 
         
             
                "gpt_bigcode": "GPT-BigCode πΈ",
         
     | 
| 23 | 
         
             
                "RefinedWebModel": "Falcon π¦
",
         
     | 
| 
         | 
|
| 50 | 
         
             
                return model_hyperlink(link, model_name)
         
     | 
| 51 | 
         | 
| 52 | 
         | 
| 53 | 
         
            +
            def process_model_arch(model_arch):
         
     | 
| 54 | 
         
            +
                if model_arch in LLM_MODEL_ARCHS:
         
     | 
| 55 | 
         
            +
                    return LLM_MODEL_ARCHS[model_arch]
         
     | 
| 56 | 
         
             
                else:
         
     | 
| 57 | 
         
            +
                    return model_arch
         
     |