File size: 3,656 Bytes
85d3bc8
 
60ba391
19112df
 
85d3bc8
 
 
 
 
 
19112df
 
 
 
 
 
 
 
 
85d3bc8
 
60ba391
19112df
 
 
 
 
 
 
 
 
 
 
 
 
 
61f4e5e
85d3bc8
 
19112df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b638ad
 
85d3bc8
 
 
 
 
19112df
 
 
 
 
 
85d3bc8
19112df
85d3bc8
 
 
 
19112df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85d3bc8
 
 
7b638ad
19112df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr
import pandas as pd
import requests
from datetime import datetime
from apscheduler.schedulers.background import BackgroundScheduler
from src.about import (
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from huggingface_hub import HfApi
repo_id = "qinference/AIM100Leaderboard"
api = HfApi()
last_updated =  datetime.now().strftime('Last updated at %Y-%m-%d %H:%m:%S')


def restart_leaderboard():
    api.pause_space(repo_id=repo_id)
    api.restart_space(repo_id=repo_id)


def get_evaluation():
    global last_updated
    try:
        response = requests.get("http://aim100.qinference.com/api/leaderboard/list")
        data_json = response.json()
        df = pd.DataFrame(data_json)
        for col in df.columns:
            df.loc[df[col] == 0, col] = '-'
        df.insert(0, 'No', df.reset_index().index + 1)
        ret = df.drop(columns='nodeSeq').rename(columns={'modelName': 'Model'})
        ret.columns = [x.capitalize() for x in ret.columns]
    except ValueError:
        ret = default_evaluation()

    last_updated = "<p style='text-align: right; padding-right: 5px;'>" + datetime.now().strftime('Last updated at %Y-%m-%d %H:%m:%S') + "</p>"
    return ret


def default_evaluation():
    global last_updated

    default_data = [{
        "No": "-",
        "Model": "-",
        "Total": "-",
        "Inference": "-",
        "Grammar": "-",
        "Understanding": "-",
        "Coding": "-",
        "Math": "-",
        "Writing": "-",
        "Etc": "-"
    }]
    df = pd.DataFrame(default_data)
    last_updated = datetime.now().strftime('Last updated at %Y-%m-%d %H:%m:%S')
    return df


# Searching and filtering
def update_table(
    hidden_df: pd.DataFrame,
    query: str,
):
    filtered_df = hidden_df[hidden_df["Model"].str.contains(query, case=False)]
    return filtered_df


original_df = get_evaluation()
leaderboard_df = original_df.copy()


leaderboard = gr.Blocks(css=custom_css)
with leaderboard:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
            with gr.Row():
                search_bar = gr.Textbox(
                    placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
                    show_label=False,
                    elem_id="search-bar",
                )
            leaderboard_table = gr.components.Dataframe(
                value=leaderboard_df,
                elem_id="leaderboard-table",
                interactive=False,
                visible=True,
            )
            # Dummy leaderboard for handling the case when the user uses backspace key
            hidden_leaderboard_table_for_search = gr.components.Dataframe(
                value=original_df,
                visible=False,
            )
            search_bar.submit(
                update_table,
                [
                    hidden_leaderboard_table_for_search,
                    search_bar
                ],
                leaderboard_table,
            )
            scheduler = BackgroundScheduler()
            scheduler.add_job(restart_leaderboard, "interval", seconds=60)
            scheduler.start()
        with gr.Row():
            gr.HTML(last_updated)
        with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

leaderboard.queue(default_concurrency_limit=40).launch()