File size: 3,776 Bytes
85d3bc8
 
60ba391
19112df
 
85d3bc8
 
 
 
 
 
19112df
 
9c3e8aa
 
 
 
 
 
 
85d3bc8
 
60ba391
19112df
 
 
 
 
 
 
 
 
 
 
 
 
9c3e8aa
61f4e5e
85d3bc8
 
19112df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c3e8aa
19112df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db92967
9c3e8aa
 
 
db92967
 
 
9c3e8aa
db92967
 
7b638ad
 
85d3bc8
 
 
 
 
19112df
 
db92967
19112df
 
 
85d3bc8
19112df
85d3bc8
 
 
 
19112df
 
 
 
 
 
 
 
 
 
 
 
 
 
9c3e8aa
19112df
 
 
85d3bc8
 
 
7b638ad
19112df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import pandas as pd
import requests
from datetime import datetime
from apscheduler.schedulers.background import BackgroundScheduler
from src.about import (
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from huggingface_hub import HfApi
api = HfApi()
last_updated = "---"


def set_last_update():
    global last_updated
    last_updated = "<p style='text-align: right; padding-right: 5px;'>" + datetime.now().strftime(
        'Last updated at %Y-%m-%d %H:%m:%S') + "</p>"


def get_evaluation():
    global last_updated
    try:
        response = requests.get("http://aim100.qinference.com/api/leaderboard/list")
        data_json = response.json()
        df = pd.DataFrame(data_json)
        for col in df.columns:
            df.loc[df[col] == 0, col] = '-'
        df.insert(0, 'No', df.reset_index().index + 1)
        ret = df.drop(columns='nodeSeq').rename(columns={'modelName': 'Model'})
        ret.columns = [x.capitalize() for x in ret.columns]
    except ValueError:
        ret = default_evaluation()

    set_last_update()
    return ret


def default_evaluation():
    global last_updated

    default_data = [{
        "No": "-",
        "Model": "-",
        "Total": "-",
        "Inference": "-",
        "Grammar": "-",
        "Understanding": "-",
        "Coding": "-",
        "Math": "-",
        "Writing": "-",
        "Etc": "-"
    }]
    df = pd.DataFrame(default_data)
    set_last_update()
    return df


# Searching and filtering
def update_table(
    hidden_df: pd.DataFrame,
    query: str,
):
    filtered_df = hidden_df[hidden_df["Model"].str.contains(query, case=False)]
    return filtered_df


original_df = get_evaluation()
leaderboard_df = original_df.copy()


def restart_leaderboard():
    # global original_df, leaderboard_df
    # original_df = get_evaluation()
    # leaderboard_df = original_df.copy()
    repo_id = "qinference/AIM100Leaderboard"
    api.pause_space(repo_id=repo_id, repo_type="space")
    api.restart_space(repo_id=repo_id, repo_type="space")
    set_last_update()


leaderboard = gr.Blocks(css=custom_css)
with leaderboard:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
            with gr.Row():
                search_bar = gr.Textbox(
                    placeholder=" πŸ” Search for your model type and press ENTER...",
                    show_label=False,
                    elem_id="search-bar",
                )
            leaderboard_table = gr.components.Dataframe(
                value=leaderboard_df,
                elem_id="leaderboard-table",
                interactive=False,
                visible=True,
            )
            # Dummy leaderboard for handling the case when the user uses backspace key
            hidden_leaderboard_table_for_search = gr.components.Dataframe(
                value=original_df,
                visible=False,
            )
            search_bar.submit(
                update_table,
                [
                    hidden_leaderboard_table_for_search,
                    search_bar
                ],
                leaderboard_table,
            )
            scheduler = BackgroundScheduler()
            scheduler.add_job(restart_leaderboard, "interval", seconds=1800)
            scheduler.start()
        with gr.Row():
            gr.HTML(last_updated)
        with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

leaderboard.queue(default_concurrency_limit=40).launch()