Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
from apscheduler.schedulers.background import BackgroundScheduler | |
""" | |
MLEβDojo Leaderboard β polished Gradio app (v2.4) | |
------------------------------------------------- | |
Displays Elo scores of mainstream LLMs across MLEβDojo benchmark categories | |
with a clean, modern aesthetic. | |
""" | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 0οΈβ£ Safe imports (fallback to placeholders when src.* unavailable) | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
try: | |
from src.about import ( | |
CITATION_BUTTON_LABEL, | |
CITATION_BUTTON_TEXT, | |
INTRODUCTION_TEXT, | |
LLM_BENCHMARKS_TEXT, | |
TITLE as RAW_TITLE, | |
) | |
from src.display.css_html_js import custom_css as EXTRA_CSS | |
from src.envs import REPO_ID | |
from src.submission.submit import add_new_eval | |
print("β Imported projectβlocal definitions from src.*") | |
except ImportError: | |
print("β src.* imports failed β using internal placeholders.") | |
CITATION_BUTTON_LABEL = "Citation" | |
CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark β¦" | |
INTRODUCTION_TEXT = "Welcome to the **MLEβDojo** benchmark leaderboard." | |
LLM_BENCHMARKS_TEXT = "Further details about our evaluation suite." | |
RAW_TITLE = None | |
EXTRA_CSS = "" | |
REPO_ID = "" | |
def add_new_eval(*_): | |
return "Submission placeholder." | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 1οΈβ£ Static data (replace with live data source if desired) | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
MODELS = [ | |
{"model_name": "gpt-4o-mini", "url": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 753, "Tabular_Elo": 839, "NLP_Elo": 758, "CV_Elo": 754, "Overall": 778}, | |
{"model_name": "gpt-4o", "url": "https://openai.com/index/hello-gpt-4o/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 830, "Tabular_Elo": 861, "NLP_Elo": 903, "CV_Elo": 761, "Overall": 841}, | |
{"model_name": "o3-mini", "url": "https://openai.com/index/openai-o3-mini/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 1108, "Tabular_Elo": 1019, "NLP_Elo": 1056, "CV_Elo": 1207, "Overall": 1096}, | |
{"model_name": "deepseek-v3", "url": "https://api-docs.deepseek.com/news/news1226", "organizer": "DeepSeek", "license": "DeepSeek", "MLE-Lite_Elo": 1004, "Tabular_Elo": 1015, "NLP_Elo": 1028, "CV_Elo": 1067, "Overall": 1023}, | |
{"model_name": "deepseek-r1", "url": "https://api-docs.deepseek.com/news/news250120", "organizer": "DeepSeek", "license": "DeepSeek", "MLE-Lite_Elo": 1137, "Tabular_Elo": 1053, "NLP_Elo": 1103, "CV_Elo": 1083, "Overall": 1100}, | |
{"model_name": "gemini-2.0-flash", "url": "https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 847, "Tabular_Elo": 923, "NLP_Elo": 860, "CV_Elo": 978, "Overall": 895}, | |
{"model_name": "gemini-2.0-pro", "url": "https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 1064, "Tabular_Elo": 1139, "NLP_Elo": 1028, "CV_Elo": 973, "Overall": 1054}, | |
{"model_name": "gemini-2.5-pro", "url": "https://deepmind.google/technologies/gemini/pro/", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 1257, "Tabular_Elo": 1150, "NLP_Elo": 1266, "CV_Elo": 1177, "Overall": 1214}, | |
] | |
master_df = pd.DataFrame(MODELS) | |
CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"] | |
COL_MAP = { | |
"Overall": "Overall", | |
"MLE-Lite": "MLE-Lite_Elo", | |
"Tabular": "Tabular_Elo", | |
"NLP": "NLP_Elo", | |
"CV": "CV_Elo", | |
} | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 2οΈβ£ Helper to produce leaderboard DataFrame | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def make_leaderboard(category: str = "Overall") -> pd.DataFrame: | |
col = COL_MAP.get(category, "Overall") | |
df = ( | |
master_df.loc[:, ["model_name", "url", "organizer", "license", col]] | |
.rename(columns={"model_name": "Model", col: "Elo", "organizer": "Organizer", "license": "License"}) | |
.sort_values("Elo", ascending=False) | |
.assign(Rank=lambda d: range(1, len(d) + 1)) | |
.loc[:, ["Rank", "Model", "Organizer", "License", "Elo", "url"]] | |
) | |
df["Model"] = df.apply(lambda r: f"<a href='{r.url}' target='_blank'>{r.Model}</a>", axis=1) | |
return df.drop(columns=["url"]).reset_index(drop=True) | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 3οΈβ£ Theme + CSS | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
THEME = gr.themes.Soft( | |
primary_hue="cyan", | |
neutral_hue="slate", | |
font=["Inter", "Helvetica", "Arial", "sans-serif"], | |
) | |
BASE_CSS = r""" | |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); | |
html { scroll-behavior:smooth; } | |
body { font-family:'Inter',system-ui,Arial,sans-serif; background:#f9fafb; color:#1f2937; } | |
.hero-title { font-size:3rem; font-weight:700; line-height:1.1; margin:.2em 0 .1em; background:linear-gradient(90deg,#06b6d4,#6366f1); -webkit-background-clip:text; color:transparent; } | |
.subtitle { font-size:1.25rem; color:#4b5563; margin-bottom:1.5rem; } | |
.gr-radio { flex-direction:row !important; gap:.5rem; } | |
.gr-radio label { padding:.45em 1.2em; border:1px solid #e5e7eb; border-radius:9999px; cursor:pointer; transition:all .15s ease; font-size:.95rem; } | |
.gr-radio input[type=radio]:checked+span { background:#06b6d4; color:white; border-color:#06b6d4; } | |
.gr-radio label:hover { box-shadow:0 0 0 2px #bae6fd inset; } | |
#leaderboard-table { background:white; border-radius:.75rem; box-shadow:0 4px 14px rgba(0,0,0,.05); overflow:hidden; } | |
#leaderboard-table table { width:100%; border-collapse:collapse; } | |
#leaderboard-table th, #leaderboard-table td { padding:.75em 1em; text-align:left; border-bottom:1px solid #f1f5f9; } | |
#leaderboard-table th { background:#f8fafc; font-weight:600; color:#475569; } | |
#leaderboard-table tr:nth-child(even) { background:#f9fafb; } | |
#leaderboard-table tr:hover { background:#ecfeff; } | |
#leaderboard-table td a { color:#2563eb; text-decoration:none; } | |
#leaderboard-table td a:hover { text-decoration:underline; } | |
.gr-accordion label { font-weight:600; } | |
.gr-button { font-weight:500; padding:.55em 1.4em; } | |
@media (max-width:480px){ .hero-title{font-size:2.4rem;} .subtitle{font-size:1.05rem;} } | |
""" | |
custom_css = EXTRA_CSS + BASE_CSS | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 4οΈβ£ Build Gradio UI | |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
demo = gr.Blocks(css=custom_css, theme=THEME) | |
with demo: | |
# Hero section | |
gr.HTML(RAW_TITLE or ( | |
""" | |
<h1 class='hero | |