File size: 3,779 Bytes
85d3bc8
 
60ba391
19112df
 
85d3bc8
 
 
 
 
 
19112df
 
db92967
85d3bc8
 
60ba391
19112df
 
 
 
 
 
 
 
 
 
 
 
 
 
61f4e5e
85d3bc8
 
19112df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db92967
 
 
 
 
 
 
 
 
7b638ad
 
85d3bc8
 
 
 
 
19112df
 
db92967
19112df
 
 
85d3bc8
19112df
85d3bc8
 
 
 
19112df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85d3bc8
 
 
7b638ad
19112df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr
import pandas as pd
import requests
from datetime import datetime
from apscheduler.schedulers.background import BackgroundScheduler
from src.about import (
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from huggingface_hub import HfApi
api = HfApi()
last_updated = datetime.now().strftime('Last updated at %Y-%m-%d %H:%m:%S')


def get_evaluation():
    global last_updated
    try:
        response = requests.get("http://aim100.qinference.com/api/leaderboard/list")
        data_json = response.json()
        df = pd.DataFrame(data_json)
        for col in df.columns:
            df.loc[df[col] == 0, col] = '-'
        df.insert(0, 'No', df.reset_index().index + 1)
        ret = df.drop(columns='nodeSeq').rename(columns={'modelName': 'Model'})
        ret.columns = [x.capitalize() for x in ret.columns]
    except ValueError:
        ret = default_evaluation()

    last_updated = "<p style='text-align: right; padding-right: 5px;'>" + datetime.now().strftime('Last updated at %Y-%m-%d %H:%m:%S') + "</p>"
    return ret


def default_evaluation():
    global last_updated

    default_data = [{
        "No": "-",
        "Model": "-",
        "Total": "-",
        "Inference": "-",
        "Grammar": "-",
        "Understanding": "-",
        "Coding": "-",
        "Math": "-",
        "Writing": "-",
        "Etc": "-"
    }]
    df = pd.DataFrame(default_data)
    last_updated = datetime.now().strftime('Last updated at %Y-%m-%d %H:%m:%S')
    return df


# Searching and filtering
def update_table(
    hidden_df: pd.DataFrame,
    query: str,
):
    filtered_df = hidden_df[hidden_df["Model"].str.contains(query, case=False)]
    return filtered_df


original_df = get_evaluation()
leaderboard_df = original_df.copy()


def restart_leaderboard():
    global original_df, leaderboard_df
    original_df = get_evaluation()
    leaderboard_df = original_df.copy()
    repo_id = "qinference/AIM100Leaderboard"
    api.pause_space(repo_id=repo_id, repo_type="space")
    api.restart_space(repo_id=repo_id, repo_type="space")


leaderboard = gr.Blocks(css=custom_css)
with leaderboard:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
            with gr.Row():
                search_bar = gr.Textbox(
                    placeholder=" πŸ” Search for your model type and press ENTER...",
                    show_label=False,
                    elem_id="search-bar",
                )
            leaderboard_table = gr.components.Dataframe(
                value=leaderboard_df,
                elem_id="leaderboard-table",
                interactive=False,
                visible=True,
            )
            # Dummy leaderboard for handling the case when the user uses backspace key
            hidden_leaderboard_table_for_search = gr.components.Dataframe(
                value=original_df,
                visible=False,
            )
            search_bar.submit(
                update_table,
                [
                    hidden_leaderboard_table_for_search,
                    search_bar
                ],
                leaderboard_table,
            )
            scheduler = BackgroundScheduler()
            scheduler.add_job(restart_leaderboard, "interval", seconds=60)
            scheduler.start()
        with gr.Row():
            gr.HTML(last_updated)
        with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

leaderboard.queue(default_concurrency_limit=40).launch()