Leaderboard / app.py
Jerrycool's picture
Update app.py
bd2286c verified
# -*- coding: utf-8 -*-
"""Gradio frontend for the MLE‑Dojo leaderboard.
Changes made in this version
----------------------------
1. **Fixed CSS syntax errors** (missing semicolons, misplaced `!important`).
2. **Introduced a single, clean rule‑set** for the introduction block so the
font size, family and alignment are now reliably applied.
3. All new comments are in English for clarity.
4. Adjusted font sizes for Radio button labels per user request.
5. **Added dark mode support** using `@media (prefers-color-scheme: dark)`
to override conflicting styles and ensure readability in dark themes.
"""
import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
# ---------------------------------------------------------------------------
# Optional imports from the project package. If they fail we fall back to
# placeholders so the app still launches locally.
# ---------------------------------------------------------------------------
try:
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT, # still referenced in commented‑out submit tab
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE, # contains <h1 id="main-leaderboard-title">
)
try:
from src.display.css_html_js import custom_css # extra project CSS
except ImportError:
print("Warning: src.display.css_html_js not found. Using empty CSS.")
custom_css = ""
from src.envs import REPO_ID
from src.submission.submit import add_new_eval
except ImportError:
# -------- PLACEHOLDERS so the Space can still run --------
print("Warning: using placeholder values because src module imports failed.")
CITATION_BUTTON_LABEL = "Citation"
CITATION_BUTTON_TEXT = """@misc{qiang2025mledojointeractiveenvironmentsempowering,
title={MLE-Dojo: Interactive Environments for Empowering LLM Agents in Machine Learning Engineering},
author={Rushi Qiang and Yuchen Zhuang and Yinghao Li and Dingu Sagar V K and Rongzhi Zhang and Changhao Li and Ian Shu-Hei Wong and Sherry Yang and Percy Liang and Chao Zhang and Bo Dai},
year={2025},
eprint={2505.07782},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2505.07782},
}"""
EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
TITLE = (
"<h1 id=\"main-leaderboard-title\" align=\"center\">🏆 MLE-Dojo "
"Benchmark Leaderboard (Placeholder)</h1>"
)
INTRODUCTION_TEXT = (
"<div class=\"introduction-section\">"
"<p>Welcome to the MLE‑Dojo Benchmark Leaderboard (placeholder).</p>"
"<p>Edit <code>src/about.py</code> to change this text.</p>"
"</div>"
)
LLM_BENCHMARKS_TEXT = (
"## About Section (placeholder)\nInformation about the benchmarks goes here."
)
custom_css = ""
REPO_ID = "your/space-id"
# Dummy function so the callback in the (commented) submit tab still works
def add_new_eval(*_):
return "Submission placeholder."
# icons_html = """
# <p align="center" style="font-family:'Segoe UI', Roboto, sans-serif; font-weight:bold; text-transform:uppercase;">
# <a href="https://arxiv.org/abs/1706.03762">
# <img src="https://img.shields.io/badge/Arxiv-1706.03762-000000.svg?style=flat-square&logo=arxiv&logoColor=%23FFD700&labelColor=000000" height="28">
# </a>
# &nbsp;&nbsp;&nbsp;
# <a href="https://mle-dojo.github.io/MLE-Dojo-page/">
# <img src="https://img.shields.io/badge/Project%20Website-%20-000000.svg?style=flat-square&logo=Google-Chrome&logoColor=%23FFD700&labelColor=000000" height="28">
# </a>
# &nbsp;&nbsp;&nbsp;
# <a href="https://huggingface.co/spaces/MLE-Dojo/Leaderboard">
# <img src="https://img.shields.io/badge/Leaderboard-000000.svg?style=flat-square&logo=github&logoColor=%23FFD700&labelColor=000000" height="28">
# </a>
# </p>
# """
# Append the icons HTML to the INTRODUCTION_TEXT
# It's important that INTRODUCTION_TEXT is a string here.
# If IMPORTED_INTRODUCTION_TEXT from src.about could be non-string, you'd need more checks.
# INTRODUCTION_TEXT += icons_html
# ---------------------------------------------------------------------------
# Leaderboard data (static demo data for now)
# ---------------------------------------------------------------------------
data = [
{
"model_name": "gpt-4o-mini (24-07-18)",
"url": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/",
"organizer": "OpenAI",
"license": "Proprietary",
"MLE-Lite_Elo": 753,
"Tabular_Elo": 839,
"NLP_Elo": 758,
"CV_Elo": 754,
"Overall": 778,
},
{
"model_name": "gpt-4o (24-11-20)",
"url": "https://openai.com/index/hello-gpt-4o/",
"organizer": "OpenAI",
"license": "Proprietary",
"MLE-Lite_Elo": 830,
"Tabular_Elo": 861,
"NLP_Elo": 903,
"CV_Elo": 761,
"Overall": 841,
},
{
"model_name": "o3-mini (25-01-31)",
"url": "https://openai.com/index/openai-o3-mini/",
"organizer": "OpenAI",
"license": "Proprietary",
"MLE-Lite_Elo": 1108,
"Tabular_Elo": 1019,
"NLP_Elo": 1056,
"CV_Elo": 1207,
"Overall": 1096,
},
{
"model_name": "deepseek-v3 (25-03-24)",
"url": "https://api-docs.deepseek.com/news/news1226",
"organizer": "DeepSeek",
"license": "DeepSeek",
"MLE-Lite_Elo": 1004,
"Tabular_Elo": 1015,
"NLP_Elo": 1028,
"CV_Elo": 1067,
"Overall": 1023,
},
{
"model_name": "deepseek-r1",
"url": "https://api-docs.deepseek.com/news/news250120",
"organizer": "DeepSeek",
"license": "DeepSeek",
"MLE-Lite_Elo": 1137,
"Tabular_Elo": 1053,
"NLP_Elo": 1103,
"CV_Elo": 1083,
"Overall": 1100,
},
{
"model_name": "gemini-2.0-flash",
"url": "https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash",
"organizer": "Google",
"license": "Proprietary",
"MLE-Lite_Elo": 847,
"Tabular_Elo": 923,
"NLP_Elo": 860,
"CV_Elo": 978,
"Overall": 895,
},
{
"model_name": "gemini-2.0-pro (exp)",
"url": "https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/",
"organizer": "Google",
"license": "Proprietary",
"MLE-Lite_Elo": 1064,
"Tabular_Elo": 1139,
"NLP_Elo": 1028,
"CV_Elo": 973,
"Overall": 1054,
},
{
"model_name": "gemini-2.5-pro (exp-3-25)",
"url": "https://deepmind.google/technologies/gemini/pro/",
"organizer": "Google",
"license": "Proprietary",
"MLE-Lite_Elo": 1257,
"Tabular_Elo": 1150,
"NLP_Elo": 1266,
"CV_Elo": 1177,
"Overall": 1214,
},
]
master_df = pd.DataFrame(data)
CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
DEFAULT_CATEGORY = "Overall"
category_to_column = {
"MLE-Lite": "MLE-Lite_Elo",
"Tabular": "Tabular_Elo",
"NLP": "NLP_Elo",
"CV": "CV_Elo",
"Overall": "Overall",
}
# ---------------------------------------------------------------------------
# Helper to slice & rank the DataFrame when category radio changes
# ---------------------------------------------------------------------------
def update_leaderboard(category: str) -> pd.DataFrame:
"""Return a DataFrame limited to the selected category and sorted by score."""
score_column = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
cols = ["model_name", "url", "organizer", "license", score_column]
df = master_df[cols].copy()
df = df.sort_values(score_column, ascending=False).reset_index(drop=True)
df.insert(0, "Rank", df.index + 1)
# Convert model name → clickable link (HTML will be rendered in the table)
df["Model"] = df.apply(
lambda r: (
f"<a href='{r.url if pd.notna(r.url) else '#'}' target='_blank' "
f"class='model-link'>{r.model_name}</a>"
),
axis=1,
)
df = df.rename(columns={
score_column: "Elo Score",
"organizer": "Organizer",
"license": "License",
})
return df[["Rank", "Model", "Organizer", "License", "Elo Score"]]
# ---------------------------------------------------------------------------
# Basic placeholder DataFrames for the (currently disabled) evaluation queue
# ---------------------------------------------------------------------------
print("Warning: evaluation queue fetching is disabled/mocked.")
empty_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
# ---------------------------------------------------------------------------
# Helper for HF Spaces restart (optional)
# ---------------------------------------------------------------------------
def restart_space():
"""Restart the current Hugging Face Space (if running in one)."""
print(f"Attempting to restart space: {REPO_ID}")
# Insert actual restart logic if needed.
# ---------------------------------------------------------------------------
# CSS: project CSS (custom_css) + enhanced overrides + DARK MODE SUPPORT
# ---------------------------------------------------------------------------
# --- CLEAN introduction typography override ---
intro_css = """
/* --------------------------------------------------
INTRODUCTION BLOCK (font, size, alignment)
-------------------------------------------------- */
.introduction-wrapper, .introduction-section {
font-family: Arial, sans-serif; /* Added sans-serif fallback */
font-size: 1.3rem !important; /* ≈22–23 px */
line-height: 1.75;
/* color: #344054; */ /* Rely on theme for base text color */
text-align: center;
max-width: 1000px;
margin: 0 auto 1rem auto;
}
.introduction-wrapper p, .introduction-section p {
font-size: 1.3rem !important; /* ≈22–23 px */
margin-bottom: 0.5rem;
/* color: inherit; */ /* Inherit color from parent/theme */
}
@media (max-width: 768px) {
.introduction-wrapper, .introduction-section {
font-size: 1.3rem !important; /* Maintain size on mobile */
}
}
"""
# --- Existing CSS (base layout, table, etc.) ---
# Removed hardcoded light background/text colors where possible
# Kept structural styles and specific element styling
base_css = """
/* Base & layout overrides (truncated for brevity) */
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen,
Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
font-size: 1.3em; /* Slightly increased base size */
line-height: 1.6;
/* background-color: #f8f9fa; REMOVED - Rely on theme */
/* color: #343a40; REMOVED - Rely on theme */
}
.gradio-container {
max-width: 1200px !important;
margin: 0 auto !important;
padding: 2rem !important;
}
#main-leaderboard-title {
font-size: 3.2em;
font-weight: 700;
/* color: #212529; REMOVED - Rely on theme */
text-align: center;
margin-bottom: 1.5rem;
padding-bottom: 0.5rem;
border-bottom: 2px solid var(--border-color-primary); /* Use Gradio variable */
}
/* Leaderboard table */
#leaderboard-table th {
/* background-color: #e9ecef; REMOVED - Rely on theme header bg */
font-size: 1.3em;
font-weight: 500;
/* color: var(--table-header-text-color, inherit); */ /* Use theme variable or inherit */
}
#leaderboard-table td {
font-size: 1.1em;
/* color: var(--table-body-text-color, inherit); */ /* Use theme variable or inherit */
}
#leaderboard-table .model-link {
color: var(--link-text-color, #0056b3); /* Use theme link color or fallback */
font-weight: 500;
text-decoration: none;
}
#leaderboard-table .model-link:hover {
text-decoration: underline;
}
#leaderboard-table {
border-radius: 0.75rem;
overflow: hidden;
border: 1px solid var(--border-color-primary); /* Use Gradio variable */
}
#leaderboard-table th,
#leaderboard-table td {
padding: 0.9rem 1.2rem;
line-height: 1.4;
}
"""
markdown_css = """
/* ===============================
About Tab – Markdown Typography (Updated)
=============================== */
.markdown-text, .markdown-text p {
font-family: "Segoe UI", "Helvetica Neue", sans-serif;
font-size: 1.2rem;
line-height: 1.8;
margin-bottom: 1rem;
}
.markdown-text h1,
.markdown-text h2,
.markdown-text h3 {
font-weight: 600;
line-height: 1.3;
margin: 1.5rem 0 0.9rem 0;
}
.markdown-text h1 { font-size: 1.9rem; }
.markdown-text h2 { font-size: 1.6rem; }
.markdown-text h3 { font-size: 1.4rem; }
.markdown-text ul,
.markdown-text ol {
padding-left: 1.8rem;
margin-bottom: 1.2rem;
}
/* Code blocks - light mode */
.markdown-text pre, .markdown-text code {
font-family: "Source Code Pro", "Menlo", monospace;
background-color: #f1f3f5;
color: #333;
border-radius: 0.4rem;
border: 1px solid #dee2e6;
}
.markdown-text pre {
padding: 1rem 1.2rem;
overflow-x: auto;
}
.markdown-text code {
padding: 0.2rem 0.45rem;
font-size: 1rem;
}
@media (max-width: 768px) {
.markdown-text, .markdown-text p { font-size: 1.15rem; }
.markdown-text h1 { font-size: 1.75rem; }
.markdown-text h2 { font-size: 1.5rem; }
.markdown-text h3 { font-size: 1.35rem; }
}
"""
tab_css = """
/* Tabs ▸ target ALL tab buttons using the actual structure */
.tabs .tab-container > button {
font-size: 1.6rem !important;
font-weight: 500;
font-style: normal;
/* color: #333; REMOVED - Rely on theme */
/* Using theme variables where possible might be better, but direct styling is okay */
}
/* Optional: Style for the selected tab button if needed */
.tabs .tab-container > button.selected {
font-weight: 700;
/* color: #0056b3; */ /* Use theme's primary color if possible */
color: var(--primary-500, #0056b3); /* Example using a common theme variable name */
}
"""
# --- MODIFIED CSS FOR RADIO BUTTONS ---
radio_css = """
/* --- Radio Button Styling --- */
/* Style for the main label ("Select Category:") */
.gradio-radio > label span { /* Targets the main label text */
font-size: 1.6rem !important;
font-weight: 600;
color: var(--primary-500, #2a6099); /* Use theme primary or fallback */
padding-bottom: 10px;
display: inline-block;
}
/* Style for the individual option labels (Overall, MLE-Lite, etc.) */
.gradio-radio .wrap > label > span { /* Targets the individual option text */
font-size: 1.0rem !important;
font-family: Verdana, Geneva, sans-serif;
/* color: #444; REMOVED - Rely on theme for base option color */
font-weight: normal;
font-style: normal;
padding-left: 6px;
}
/* Style for the selected option's text */
.gradio-radio .wrap > label.selected > span { /* Style for selected option */
font-weight: bold; /* Make selected bold for emphasis */
color: var(--primary-500, #0056b3); /* Use theme's primary color */
/* color: #87CEEB; Original light blue - might lack contrast in light mode */
}
"""
citation_css = """
/* Accordion Header Text ("📙 Citation") */
.gradio-accordion button.label-wrap > span.svelte-1w6vloh {
font-size: 1.4rem !important;
font-weight: 400;
color: var(--neutral-800, #8B4513); /* Use theme neutral or fallback brown */
font-family: Georgia, serif;
}
/* Citation Textbox Label ("Copy the following snippet...") */
#citation-button span.svelte-1gfkn6j {
font-size: 0.95rem !important;
/* color: #666; REMOVED - Rely on theme secondary text color */
color: var(--neutral-500, #666);
font-style: normal;
display: block;
margin-bottom: 5px;
}
/* Citation Textbox Content (The actual text) - Light mode */
#citation-button textarea {
font-size: 1.0rem !important;
font-family: monospace;
/* color: #222; REMOVED - Rely on theme */
line-height: 1.6;
/* background-color: #fdfdfd; REMOVED - Rely on theme input bg */
/* border: 1px solid #ccc; REMOVED - Rely on theme border */
border: 1px solid var(--border-color-accent);
background-color: var(--input-background-fill);
color: var(--input-text-color);
}
"""
# Combined Image CSS (Intro + About) - Adjusted shadow
image_css = """
/* --- CSS for Image containers (Intro/About) --- */
#intro-image, #about-image {
background-color: transparent !important;
padding: 0 !important;
border: none !important;
box-shadow: none !important; /* Remove container shadow if any */
}
/* --- CSS for Image tags themselves (Intro/About) --- */
#intro-image img, #about-image img { /* Target the actual <img> tag */
display: block;
height: auto;
margin: 0rem auto 1rem auto; /* Center and add bottom margin */
border-radius: 8px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); /* Default shadow for light mode */
}
#intro-image img {
max-width: 500px;
width: 90%;
}
#about-image img {
max-width: 1000px;
width: 90%;
}
/* Optional: Different styles for smaller screens */
@media (max-width: 768px) {
#intro-image img, #about-image img {
max-width: 100%;
width: 95%;
margin-top: 1.5rem;
}
}
"""
# --- ⭐ NEW: Dark Mode Overrides ⭐ ---
# --- ⭐ NEW: Dark Mode Overrides ⭐ ---
dark_mode_css = """
@media (prefers-color-scheme: dark) {
body {
/* Ensure theme handles dark bg/text */
}
#main-leaderboard-title {
/* color: var(--body-text-color); */ /* Ensure title color matches theme */
border-bottom-color: var(--border-color-primary); /* Ensure border matches theme */
}
/* Make links slightly brighter if needed */
#leaderboard-table .model-link {
/* color: var(--link-text-color-dark, #58a6ff); */ /* Optional: specific dark link color */
}
#leaderboard-table {
border-color: var(--border-color-primary); /* Ensure border matches theme */
}
/* Ensure table header/cell text contrasts with dark theme background */
#leaderboard-table th {
/* color: var(--table-header-text-color); */
}
#leaderboard-table td {
/* color: var(--table-body-text-color); */
}
/* Dark mode for Markdown Code blocks */
.markdown-text pre, .markdown-text code {
background-color: #2d333b; /* Dark background */
color: #c9d1d9; /* Light text */
border: 1px solid #444c56; /* Darker border */
}
/* Dark mode for Radio Button selected option */
.gradio-radio .wrap > label.selected > span {
color: var(--primary-300, #add8e6); /* Lighter blue for dark background */
font-weight: bold;
}
/* --- START: Citation Dark Mode Fixes --- */
/* Dark mode for Citation Accordion Header ("📙 Citation") */
.gradio-accordion button.label-wrap > span.svelte-1w6vloh {
/* Use a lighter, more visible color for dark backgrounds */
color: var(--neutral-300, #D2B48C) !important; /* Lighter brown/tan - uncommented and made important */
}
/* Dark mode for Citation Textbox Label ("Copy the following snippet...") */
#citation-button span.svelte-1gfkn6j {
/* Use a lighter grey for better visibility */
color: var(--neutral-400, #b0b0b0) !important; /* Lighter grey - uncommented, slightly lighter, and made important */
}
/* Dark mode for Citation Textbox Content (The actual text) */
#citation-button textarea {
/* Rely on theme variables first, but provide explicit fallbacks if needed */
background-color: var(--input-background-fill, #22272e); /* Rely on theme dark vars or fallback dark bg */
color: var(--input-text-color, #adbac7); /* Rely on theme dark vars or fallback light text */
border: 1px solid var(--border-color-accent, #444c56); /* Rely on theme dark vars or fallback dark border */
}
/* --- END: Citation Dark Mode Fixes --- */
/* Dark mode for Image shadows */
#intro-image img, #about-image img {
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4); /* Darker shadow for dark mode */
/* Or use a light shadow: */
/* box-shadow: 0 4px 8px rgba(255, 255, 255, 0.1); */
}
}
"""
# Combine all CSS parts
final_css = f"{custom_css}\n{base_css}\n{intro_css}\n{markdown_css}\n{tab_css}\n{radio_css}\n{citation_css}\n{image_css}\n{dark_mode_css}"
# ---------------------------------------------------------------------------
# Build the Gradio UI
# ---------------------------------------------------------------------------
# Use a theme that supports dark mode well, like Soft or Default
demo = gr.Blocks(css=final_css, theme=gr.themes.Soft()) # Soft theme is generally good
with demo:
# NEW ⭐: image immediately below the title (Moved from below intro for better flow)
with gr.Row():
gr.Image(
value="icon.jpg",
show_label=False,
elem_id="intro-image", # Keep ID for CSS targeting
# container=False # Optional: removes Gradio's container styling if needed
)
# Title
gr.HTML(TITLE)
# Introduction (Markdown wrapped so CSS can target .introduction-wrapper)
with gr.Row():
gr.Markdown(INTRODUCTION_TEXT, elem_classes="introduction-wrapper")
with gr.Tabs(elem_classes="tab-buttons"):
# ------------------ Leaderboard tab ------------------
with gr.TabItem("🏅 MLE-Dojo Benchmark", id=0):
with gr.Column():
# gr.Markdown("## Model Elo Rankings by Category", elem_classes="markdown-text") # Optional sub-header
category_selector = gr.Radio(
choices=CATEGORIES,
label="Select Category:", # The label whose size is increased via CSS
value=DEFAULT_CATEGORY,
interactive=True,
elem_classes="gradio-radio", # Class used for CSS targeting
)
leaderboard_df_component = gr.Dataframe(
value=update_leaderboard(DEFAULT_CATEGORY),
headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
datatype=["number", "html", "str", "str", "number"],
interactive=False,
row_count=(len(master_df), "fixed"),
col_count=(5, "fixed"),
wrap=True,
elem_id="leaderboard-table",
)
category_selector.change(update_leaderboard, category_selector, leaderboard_df_component)
# ------------------ About tab ------------------
with gr.TabItem("📝 About", id=1):
# NEW: wrap in a full-width column
with gr.Column():
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
# NEW ⭐: image immediately below the text in the About tab
with gr.Row():
gr.Image(
value="overview.jpg",
show_label=False,
elem_id="about-image", # Keep ID for CSS targeting
# container=False # Optional: removes Gradio's container styling
)
# Citation accordion (bottom of page)
with gr.Accordion("📙 Citation", open=False, elem_classes="gradio-accordion"):
gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=8,
elem_id="citation-button",
show_copy_button=True,
)
# ---------------------------------------------------------------------------
# Scheduler (optional) & launch
# ---------------------------------------------------------------------------
if __name__ == "__main__":
try:
if callable(restart_space) and REPO_ID != "your/space-id":
scheduler = BackgroundScheduler()
# Schedule restart every 30 minutes (1800 seconds)
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
print("Scheduler started for space restart.")
else:
print("Space restart scheduler not started (no REPO_ID or restart function).")
except Exception as exc:
print(f"Scheduler init failed: {exc}")
print("Launching Gradio app…")
demo.launch()