Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
from apscheduler.schedulers.background import BackgroundScheduler | |
""" | |
MLE‑Dojo Benchmark Leaderboard — Dark Elegance v3 | |
================================================ | |
* Fix: removed unsupported `height` param for `gr.Dataframe`. | |
* Font tweak: leaderboard cells slightly smaller. | |
""" | |
# --------------------------------------------------------------------------- | |
# Import copy or fallback placeholders | |
# --------------------------------------------------------------------------- | |
try: | |
from src.about import ( | |
CITATION_BUTTON_LABEL, | |
CITATION_BUTTON_TEXT, | |
INTRODUCTION_TEXT, | |
LLM_BENCHMARKS_TEXT, | |
TITLE, | |
) | |
from src.display.css_html_js import custom_css | |
from src.envs import REPO_ID | |
from src.submission.submit import add_new_eval | |
except ImportError: | |
CITATION_BUTTON_LABEL = "Citation" | |
CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark…" | |
INTRODUCTION_TEXT = "Welcome to the **MLE‑Dojo Benchmark Leaderboard** — compare LLM agents across realistic ML‑engineering tasks." | |
LLM_BENCHMARKS_TEXT = "Further details about tasks, metrics, and evaluation pipelines." | |
TITLE = ( | |
"<h1 class='hero-title gradient-text'>🏆 MLE‑Dojo Benchmark Leaderboard</h1>" | |
"<p class='subtitle'>Interactive, reproducible & community‑driven ML‑agent benchmarking</p>" | |
) | |
custom_css = "" | |
REPO_ID = "your/space-id" | |
def add_new_eval(*_): | |
return "Submission placeholder." | |
# --------------------------------------------------------------------------- | |
# Data | |
# --------------------------------------------------------------------------- | |
# (unchanged)------------------------------------------- | |
data = [ | |
{"model_name": "gpt-4o-mini", "url": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 753, "Tabular_Elo": 839, "NLP_Elo": 758, "CV_Elo": 754, "Overall": 778}, | |
{"model_name": "gpt-4o", "url": "https://openai.com/index/hello-gpt-4o/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 830, "Tabular_Elo": 861, "NLP_Elo": 903, "CV_Elo": 761, "Overall": 841}, | |
{"model_name": "o3-mini", "url": "https://openai.com/index/openai-o3-mini/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 1108, "Tabular_Elo": 1019, "NLP_Elo": 1056, "CV_Elo": 1207, "Overall": 1096}, | |
{"model_name": "deepseek-v3", "url": "https://api-docs.deepseek.com/news/news1226", "organizer": "DeepSeek", "license": "DeepSeek", "MLE-Lite_Elo": 1004, "Tabular_Elo": 1015, "NLP_Elo": 1028, "CV_Elo": 1067, "Overall": 1023}, | |
{"model_name": "deepseek-r1", "url": "https://api-docs.deepseek.com/news/news250120", "organizer": "DeepSeek", "license": "DeepSeek", "MLE-Lite_Elo": 1137, "Tabular_Elo": 1053, "NLP_Elo": 1103, "CV_Elo": 1083, "Overall": 1100}, | |
{"model_name": "gemini-2.0-flash", "url": "https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 847, "Tabular_Elo": 923, "NLP_Elo": 860, "CV_Elo": 978, "Overall": 895}, | |
{"model_name": "gemini-2.0-pro", "url": "https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 1064, "Tabular_Elo": 1139, "NLP_Elo": 1028, "CV_Elo": 973, "Overall": 1054}, | |
{"model_name": "gemini-2.5-pro", "url": "https://deepmind.google/technologies/gemini/pro/", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 1257, "Tabular_Elo": 1150, "NLP_Elo": 1266, "CV_Elo": 1177, "Overall": 1214}, | |
] | |
master_df = pd.DataFrame(data) | |
# --------------------------------------------------------------------------- | |
# Helpers | |
# --------------------------------------------------------------------------- | |
CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"] | |
DEFAULT_CATEGORY = "Overall" | |
CATEGORY_MAP = { | |
"Overall": "Overall", | |
"MLE-Lite": "MLE-Lite_Elo", | |
"Tabular": "Tabular_Elo", | |
"NLP": "NLP_Elo", | |
"CV": "CV_Elo", | |
} | |
MEDALS = {1: "🥇", 2: "🥈", 3: "🥉"} | |
def update_leaderboard(category: str, ascending: bool): | |
col = CATEGORY_MAP.get(category, CATEGORY_MAP[DEFAULT_CATEGORY]) | |
df = ( | |
master_df[["model_name", "url", "organizer", "license", col]] | |
.sort_values(by=col, ascending=ascending) | |
.reset_index(drop=True) | |
) | |
df.insert(0, "Rank", df.index + 1) | |
df["Rank"] = df["Rank"].apply(lambda r: MEDALS.get(r, str(r))) | |
df["Model"] = df.apply(lambda r: f"<a href='{r.url}' target='_blank'>{r.model_name}</a>", axis=1) | |
df.rename(columns={"organizer": "Organizer", "license": "License", col: "Elo Score"}, inplace=True) | |
return df[["Rank", "Model", "Organizer", "License", "Elo Score"]] | |
# --------------------------------------------------------------------------- | |
# CSS (dark + slightly smaller table font) | |
# --------------------------------------------------------------------------- | |
custom_css += """ | |
#leaderboard-table td{padding:.7em;font-size:1.05rem;border-top:1px solid #334155;} | |
""" | |
# --------------------------------------------------------------------------- | |
# Gradio App | |
# --------------------------------------------------------------------------- | |
app = gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="sky", neutral_hue="slate", font=["Inter",])) | |
with app: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(): | |
with gr.TabItem("🏅 Leaderboard"): | |
gr.HTML("<h3 class='section-title'><span class='icon'>📊</span>Model Elo Rankings by Category</h3>") | |
with gr.Row(): | |
category_radio = gr.Radio(CATEGORIES, value=DEFAULT_CATEGORY, label="Category") | |
asc_check = gr.Checkbox(label="⬆️ Asc. order", value=False) | |
board = gr.Dataframe( | |
value=update_leaderboard(DEFAULT_CATEGORY, False), | |
headers=["Rank", "Model", "Organizer", "License", "Elo Score"], | |
datatype=["html", "html", "str", "str", "number"], | |
row_count=(len(master_df), "fixed"), | |
col_count=(5, "fixed"), | |
interactive=False, | |
elem_id="leaderboard-table", | |
) | |
category_radio.change(update_leaderboard, [category_radio, asc_check], board) | |
asc_check.change(update_leaderboard, [category_radio, asc_check], board) | |
with gr.TabItem("ℹ️ About"): | |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
with gr.Accordion("📖 Citation", open=False): | |
gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=10, show_copy_button=True) | |
# --------------------------------------------------------------------------- | |
# Optional scheduler | |
# --------------------------------------------------------------------------- | |
def restart_space(): | |
print(f"Restarting space → {REPO_ID}") | |
if __name__ == "__main__": | |
if REPO_ID != "your/space-id": | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(restart_space, "interval", seconds=1800) | |
scheduler.start() | |
print("Launching app…") | |
app.launch() | |