Leaderboard / app.py
Jerrycool's picture
Update app.py
ea96be1 verified
raw
history blame
36.9 kB
import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from datetime import datetime
# --- Make sure these imports work relative to your file structure ---
# Option 1: If src is a directory in the same folder as your script:
try:
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css # Assuming this might exist
from src.envs import REPO_ID
from src.submission.submit import add_new_eval
print("Successfully imported from src module.")
# Option 2: If you don't have these files, define placeholders
except ImportError:
print("Warning: Using placeholder values because src module imports failed.")
CITATION_BUTTON_LABEL = "Citation"
CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark...\n@misc{mledojo2025benchmark,\n title={MLE-Dojo Benchmark},\n author={MLE-Dojo Team},\n year={2025},\n howpublished={\\url{https://your-benchmark-url.example.com}},\n}" # Added example citation text
EVALUATION_QUEUE_TEXT = "### Current evaluation queue:" # Use Markdown heading
INTRODUCTION_TEXT = """
## Welcome to the MLE-Dojo Benchmark Leaderboard
This leaderboard tracks the performance of various AI models across multiple machine learning engineering domains.
Our comprehensive evaluation system uses ELO ratings to provide a fair comparison between different models.
### How to read this leaderboard
- Select a domain category using the radio buttons below to view specialized rankings.
- Higher ELO scores indicate better performance within that category.
- Click on a model name to visit its page (if available).
"""
LLM_BENCHMARKS_TEXT = """
## About the MLE-Dojo Benchmark
### Evaluation Methodology
The MLE-Dojo benchmark evaluates models across various domains including:
- **MLE-Lite**: Basic machine learning engineering tasks (data preprocessing, feature engineering, model selection).
- **Tabular**: Data manipulation, analysis, and modeling with structured data.
- **NLP**: Natural language processing tasks (classification, generation, understanding).
- **CV**: Computer vision tasks (image classification, object detection, generation).
Our evaluation uses a sophisticated ELO rating system that considers the relative performance of models against each other based on competitions within each domain.
### Contact
For more information or to submit your model, please contact us at `[email protected]` (replace with actual contact).
"""
TITLE = "<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>" # Keep title simple for header
custom_css = "" # Will be populated by enhanced_css later
REPO_ID = "your/space-id" # Replace with your actual Hugging Face Space ID if restarting
def add_new_eval(*args): return "Submission placeholder." # Placeholder function
# --- Elo Leaderboard Configuration ---
# Enhanced data with Rank (placeholder), Organizer, License, and URL
data = [
{'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
{'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
{'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
{'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
{'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
{'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
{'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
{'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
]
# Add organization logos (for visual enhancement)
org_logos = {
'OpenAI': 'πŸ“±', # Replace with actual icon URLs or keep emojis
'DeepSeek': 'πŸ”',
'Google': '🌐',
'Default': 'πŸ€–'
}
# Create a master DataFrame
master_df = pd.DataFrame(data)
# Add last updated timestamp
last_updated = datetime.now().strftime("%B %d, %Y at %H:%M:%S %Z") # Added Timezone
# Define categories with fancy icons
CATEGORIES = [
("πŸ† Overall", "Overall"),
("πŸ’‘ MLE-Lite", "MLE-Lite"),
("πŸ“Š Tabular", "Tabular"),
("πŸ“ NLP", "NLP"),
("πŸ‘οΈ CV", "CV")
]
DEFAULT_CATEGORY_LABEL = "πŸ† Overall" # Use the label for default value
DEFAULT_CATEGORY_VALUE = "Overall" # The actual value
# Map user-facing category *values* to DataFrame column names
category_to_column = {
"Overall": "Overall",
"MLE-Lite": "MLE-Lite_Elo",
"Tabular": "Tabular_Elo",
"NLP": "NLP_Elo",
"CV": "CV_Elo",
}
# --- Helper function to update leaderboard ---
def update_leaderboard(category_label):
"""
Enhanced function to update the leaderboard with visual improvements and numerical rank.
"""
# Find the category value corresponding to the selected label
category_value = DEFAULT_CATEGORY_VALUE # Default fallback
for label, value in CATEGORIES:
if label == category_label:
category_value = value
break
score_column = category_to_column.get(category_value)
if score_column is None or score_column not in master_df.columns:
print(f"Warning: Invalid category value '{category_value}' or column '{score_column}'. Falling back to default.")
score_column = category_to_column[DEFAULT_CATEGORY_VALUE] # Fallback to default value
category_value = DEFAULT_CATEGORY_VALUE # Ensure category value matches fallback
if score_column not in master_df.columns:
print(f"Error: Default column '{score_column}' also not found.")
# Return an empty DataFrame with the correct structure
return pd.DataFrame({
"Rank": [], "Model": [], "Organization": [], "License": [], f"Elo Score ({category_value})": []
})
# Select base columns + the score column for sorting
cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
df = master_df[cols_to_select].copy()
# Sort by the selected 'Elo Score' descending
df.sort_values(by=score_column, ascending=False, inplace=True)
# Add Rank (numerical)
df.reset_index(drop=True, inplace=True)
df.insert(0, 'Rank', df.index + 1) # Insert numerical rank starting from 1
# Add organization icons to model names with clickable links
df['Model'] = df.apply(
lambda row: f"""<div style="display: flex; align-items: center;">
<span style="font-size: 1.5em; margin-right: 10px;">{org_logos.get(row['organizer'], org_logos['Default'])}</span>
<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank'
style='color: #0066cc; text-decoration: none; font-weight: 500; font-size: 1.05em;'>
{row['model_name']}
</a>
</div>""",
axis=1
)
# Format Elo scores with visual indicators (bar + color)
df['Elo Display'] = df[score_column].apply(
lambda score: f"""<div style="display: flex; align-items: center; justify-content: flex-start;">
<span style="font-weight: bold; min-width: 40px; text-align: right; color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'}">
{score}
</span>
<div style="margin-left: 10px; height: 12px; width: 80px; background-color: #eaeaea; border-radius: 6px; overflow: hidden;">
<div style="height: 100%; width: {min(100, max(5, (score - 700) / 7))}%; background-color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'};"></div>
</div>
</div>"""
)
# Rename columns for display
df.rename(columns={'organizer': 'Organization', 'license': 'License'}, inplace=True) # Renamed for clarity
# Select and reorder columns for final display
# Use the determined category_value for the score column header
final_columns = ["Rank", "Model", "Organization", "License", "Elo Display"]
df_display = df[final_columns].copy()
# Rename the score column dynamically
df_display.rename(columns={"Elo Display": f"Elo Score ({category_value})"}, inplace=True)
return df_display
# --- Mock/Placeholder functions/data for other tabs ---
print("Warning: Evaluation queue data fetching is disabled/mocked.")
finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
EVAL_COLS = ["Model", "Status", "Requested", "Started"]
EVAL_TYPES = ["str", "str", "str", "str"]
# --- Keep restart function if relevant ---
def restart_space():
"""Placeholder for Hugging Face space restart logic."""
print(f"Attempting to trigger restart for space: {REPO_ID} (placeholder action)")
# In a real HF Space, you might use the hf_hub API client if it supports this,
# or trigger a webhook, or rely on built-in auto-restart features.
# This function likely cannot directly restart the space from within itself.
# --- Enhanced CSS for beauty and readability ---
# (Your enhanced_css string remains the same as provided in the prompt)
enhanced_css = """
/* Base styling */
:root {
--primary-color: #1a5fb4;
--secondary-color: #2ec27e;
--accent-color: #e5a50a;
--warning-color: #ff7800;
--text-color: #333333; /* Darker text for better contrast */
--background-color: #f4f6f8; /* Light grey background */
--card-background: #ffffff; /* White background for cards/tables */
--border-color: #e0e0e0;
--shadow-color: rgba(0, 0, 0, 0.08);
}
/* Typography */
body, .gradio-container {
font-family: 'Inter', 'Segoe UI', Roboto, -apple-system, BlinkMacSystemFont, system-ui, sans-serif !important;
font-size: 16px !important;
line-height: 1.6 !important;
color: var(--text-color) !important;
background-color: var(--background-color) !important; /* Ensure body background is set */
}
/* Headings */
h1 {
font-size: 2.5rem !important;
font-weight: 700 !important;
margin-bottom: 1.5rem !important;
color: var(--primary-color) !important;
text-align: center !important;
letter-spacing: -0.02em !important;
line-height: 1.2 !important;
}
h2 {
font-size: 1.8rem !important;
font-weight: 600 !important;
margin-top: 1.5rem !important;
margin-bottom: 1rem !important;
color: var(--primary-color) !important;
letter-spacing: -0.01em !important;
}
h3 {
font-size: 1.4rem !important;
font-weight: 600 !important;
margin-top: 1.2rem !important;
margin-bottom: 0.8rem !important;
color: var(--text-color) !important;
}
/* Tabs styling */
.tabs {
margin-top: 1rem !important;
border-radius: 12px !important;
overflow: hidden !important;
box-shadow: 0 4px 12px var(--shadow-color) !important;
background-color: var(--card-background); /* White background for tabs container */
}
.tab-nav button {
font-size: 1.1rem !important;
font-weight: 500 !important;
padding: 0.8rem 1.5rem !important;
border-radius: 0 !important;
transition: all 0.2s ease !important;
border-bottom: 2px solid transparent !important;
background-color: transparent !important; /* Ensure buttons are transparent */
color: var(--text-color) !important;
}
.tab-nav button.selected {
background-color: transparent !important; /* Keep transparent */
color: var(--primary-color) !important;
font-weight: 600 !important;
border-bottom: 2px solid var(--primary-color) !important;
}
/* Card styling */
.gradio-container .gr-block { /* Target blocks for card styling */
border-radius: 12px !important;
border: 1px solid var(--border-color) !important;
box-shadow: 0 4px 12px var(--shadow-color) !important;
overflow: hidden !important;
background-color: var(--card-background) !important; /* White background */
padding: 1.5rem !important; /* Add padding to cards */
margin-bottom: 1.5rem !important; /* Add space between cards */
}
/* Ensure panels also get card styling */
.gradio-container .gr-panel {
border-radius: 12px !important;
border: 1px solid var(--border-color) !important;
box-shadow: 0 4px 12px var(--shadow-color) !important;
overflow: hidden !important;
background-color: var(--card-background) !important;
padding: 1.5rem !important;
margin-bottom: 1.5rem !important;
}
/* Table styling */
.gr-dataframe { /* Target the dataframe component specifically */
border-radius: 8px !important;
overflow: hidden !important; /* Needed for border-radius on table */
box-shadow: 0 4px 12px var(--shadow-color) !important;
border: 1px solid var(--border-color) !important; /* Add border around table */
margin: 1.5rem 0 !important;
}
table {
width: 100% !important;
border-collapse: separate !important; /* Needed for spacing and rounded corners */
border-spacing: 0 !important;
background-color: var(--card-background); /* White background for table */
}
th {
background-color: #f0f5ff !important; /* Lighter blue for header */
color: var(--primary-color) !important;
font-weight: 600 !important;
padding: 1rem 1.2rem !important; /* Adjust padding */
font-size: 1.05rem !important; /* Slightly smaller header font */
text-align: left !important;
border-bottom: 2px solid var(--primary-color) !important;
position: sticky !important; /* Make header sticky */
top: 0 !important; /* Stick to the top */
z-index: 1 !important; /* Ensure header is above scrolling content */
}
td {
padding: 0.9rem 1.2rem !important; /* Adjust padding */
border-bottom: 1px solid var(--border-color) !important;
font-size: 1rem !important;
vertical-align: middle !important;
background-color: var(--card-background); /* Ensure cell background is white */
color: var(--text-color); /* Ensure text color is applied */
}
tr:last-child td {
border-bottom: none !important;
}
tr:nth-child(even) td {
background-color: #f8fafd !important; /* Very light blue for alternating rows */
}
tr:hover td {
background-color: #edf2fb !important; /* Slightly darker blue on hover */
}
/* Button styling */
button.primary, .gr-button.primary {
background-color: var(--primary-color) !important;
color: white !important;
font-weight: 500 !important;
padding: 0.8rem 1.5rem !important;
border-radius: 8px !important;
border: none !important;
cursor: pointer !important;
transition: all 0.2s ease !important;
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1) !important;
}
button.primary:hover, .gr-button.primary:hover {
background-color: #0b4a9e !important;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important;
transform: translateY(-1px) !important;
}
/* Radio buttons */
.gr-radio {
display: flex !important;
flex-wrap: wrap !important;
gap: 10px !important;
margin: 1rem 0 !important;
background-color: transparent !important; /* Ensure container is transparent */
border: none !important; /* Remove default border */
box-shadow: none !important; /* Remove default shadow */
padding: 0 !important; /* Remove default padding */
}
.gr-radio > label { /* Target the label inside gr-radio */
background-color: #f5f7fa !important;
border: 1px solid var(--border-color) !important;
border-radius: 8px !important;
padding: 0.7rem 1.2rem !important;
font-size: 1rem !important;
font-weight: 500 !important;
cursor: pointer !important;
transition: all 0.2s ease !important;
display: flex !important;
align-items: center !important;
gap: 8px !important;
color: var(--text-color) !important;
box-shadow: none !important; /* Override potential inner shadows */
}
/* Remove inner block styling if gradio adds extra divs */
.gr-radio > div {
background: none !important;
border: none !important;
padding: 0 !important;
margin: 0 !important;
box-shadow: none !important;
}
.gr-radio label:hover {
background-color: #eaeef3 !important;
border-color: #c0c9d6 !important;
}
.gr-radio label.selected {
background-color: #e0e9f7 !important;
border-color: var(--primary-color) !important;
color: var(--primary-color) !important;
font-weight: 600 !important;
}
/* Input fields */
input[type="text"], textarea, select { /* Be more specific */
font-size: 1rem !important;
padding: 0.8rem !important;
border-radius: 8px !important;
border: 1px solid var(--border-color) !important;
transition: all 0.2s ease !important;
background-color: #ffffff !important;
color: var(--text-color) !important;
width: 100%; /* Make inputs take full width */
box-sizing: border-box; /* Include padding in width */
}
input[type="text"]:focus, textarea:focus, select:focus {
border-color: var(--primary-color) !important;
box-shadow: 0 0 0 2px rgba(26, 95, 180, 0.2) !important;
outline: none !important;
}
/* Accordion styling */
.gr-accordion {
border-radius: 8px !important;
overflow: hidden !important;
margin: 1rem 0 !important;
border: 1px solid var(--border-color) !important;
background-color: var(--card-background) !important; /* White background */
box-shadow: 0 2px 6px var(--shadow-color) !important; /* Lighter shadow for accordion */
}
.gr-accordion > .gr-block { /* Target inner block of accordion */
border: none !important;
box-shadow: none !important;
padding: 0 !important; /* Remove padding from inner block */
margin: 0 !important;
}
.gr-accordion-header { /* Check Gradio structure for header class */
padding: 1rem 1.2rem !important; /* Adjust padding */
background-color: #f5f7fa !important;
font-weight: 600 !important;
font-size: 1.1rem !important;
color: var(--text-color) !important;
border-bottom: 1px solid var(--border-color) !important;
cursor: pointer; /* Indicate clickable */
}
/* Style for open accordion header */
.gr-accordion[open] > .gr-accordion-header { /* Might need adjustment based on Gradio version */
border-bottom: 1px solid var(--border-color) !important;
}
/* Style for accordion content (might be nested) */
.gr-accordion .gr-panel, .gr-accordion .gr-box { /* Check which element holds content */
padding: 1.2rem !important; /* Add padding to content */
background-color: var(--card-background) !important; /* White background */
border: none !important; /* Remove borders inside accordion */
box-shadow: none !important;
border-radius: 0 0 8px 8px !important; /* Round bottom corners */
}
/* Markdown text improvements */
.markdown-text { /* Might need a more specific selector like .gr-markdown */
font-size: 1.05rem !important;
line-height: 1.7 !important;
color: var(--text-color) !important;
background-color: transparent !important; /* Ensure markdown bg is transparent */
}
.markdown-text p {
margin-bottom: 1rem !important;
}
.markdown-text ul, .markdown-text ol {
margin-left: 1.5rem !important;
margin-bottom: 1rem !important;
}
.markdown-text li {
margin-bottom: 0.5rem !important;
}
.markdown-text strong, .markdown-text b {
font-weight: 600 !important;
color: #111 !important; /* Slightly darker for emphasis */
}
.markdown-text code { /* Style inline code */
background-color: #eef0f2;
padding: 0.2em 0.4em;
border-radius: 4px;
font-size: 0.9em;
color: #3a4a5b;
}
.markdown-text a { /* Style links */
color: var(--primary-color);
text-decoration: none;
}
.markdown-text a:hover {
text-decoration: underline;
}
/* Status indicators (if used in submission tab) */
.status-badge {
display: inline-block;
padding: 0.3rem 0.7rem;
border-radius: 99px;
font-size: 0.85rem;
font-weight: 500;
text-align: center;
}
/* Add specific styles for statuses if needed */
/* Footer */
.footer {
margin-top: 2rem;
padding: 1.5rem 1rem;
text-align: center;
font-size: 0.9rem;
color: #555;
border-top: 1px solid var(--border-color);
background-color: #e9edf1; /* Light background for footer */
}
/* Enhanced leaderboard title area */
.leaderboard-header {
display: flex;
flex-direction: column; /* Stack elements vertically on small screens */
align-items: center; /* Center items */
justify-content: space-between;
margin-bottom: 1.5rem;
padding: 1.5rem;
background-color: var(--card-background); /* White background */
border-radius: 12px;
border: 1px solid var(--border-color);
box-shadow: 0 4px 12px var(--shadow-color);
text-align: center; /* Center text */
}
@media (min-width: 768px) { /* Apply side-by-side layout on larger screens */
.leaderboard-header {
flex-direction: row;
text-align: left;
}
}
.leaderboard-title {
font-size: 2.0rem; /* Adjusted size */
font-weight: 700;
color: var(--primary-color);
margin: 0 0 0.5rem 0; /* Add bottom margin */
display: flex;
align-items: center;
gap: 0.7rem; /* Increase gap */
justify-content: center; /* Center on small screens */
}
@media (min-width: 768px) {
.leaderboard-title {
justify-content: flex-start; /* Align left on large screens */
font-size: 2.2rem; /* Restore size */
}
}
.leaderboard-subtitle {
font-size: 1.1rem;
color: #666;
margin: 0 0 1rem 0; /* Add bottom margin */
}
@media (min-width: 768px) {
.leaderboard-subtitle { margin-bottom: 0; } /* Remove bottom margin on large screens */
}
.timestamp {
font-size: 0.85rem;
color: #666;
font-style: italic;
background-color: #f5f7fa;
padding: 5px 10px;
border-radius: 6px;
margin-top: 0.5rem; /* Add space above timestamp */
}
@media (min-width: 768px) {
.timestamp { margin-top: 0; } /* Remove top margin on large screens */
}
/* Category selector buttons (Already styled via .gr-radio) */
/* Logo and brand styling */
.logo {
font-size: 2.5em; /* Keep logo size */
margin-right: 0.5rem;
}
/* Style for About section cards */
.about-card {
background-color: #f5f7fa; /* Lighter background for these cards */
padding: 20px;
border-radius: 12px;
height: 100%; /* Make cards in a row equal height */
border: 1px solid var(--border-color);
display: flex; /* Use flexbox for alignment */
flex-direction: column; /* Stack content vertically */
text-align: center; /* Center text */
box-shadow: 0 2px 6px var(--shadow-color); /* Add subtle shadow */
}
.about-card h3 {
text-align: center;
margin-top: 0;
margin-bottom: 10px; /* Space below heading */
color: var(--primary-color);
}
.about-card p {
color: var(--text-color);
font-size: 0.95rem;
line-height: 1.6;
flex-grow: 1; /* Allow paragraph to take up space */
}
.about-card-icon {
font-size: 2.5em;
text-align: center;
margin-bottom: 15px;
display: block;
color: var(--secondary-color); /* Use secondary color for icons */
}
/* Ensure citation textbox has good contrast */
#citation-button textarea {
background-color: #f5f7fa !important;
color: var(--text-color) !important;
border: 1px solid var(--border-color) !important;
font-family: monospace !important; /* Use monospace for citation */
font-size: 0.95rem !important;
}
"""
# Combine with any existing CSS (if custom_css was loaded from src)
# If custom_css from src exists, append enhanced_css. Otherwise, just use enhanced_css.
if custom_css:
custom_css += "\n" + enhanced_css
else:
custom_css = enhanced_css
# --- Gradio App Definition ---
# REMOVED theme=gr.themes.Soft() to allow custom CSS to dominate
demo = gr.Blocks(css=custom_css)
with demo:
# Enhanced header with timestamp
gr.HTML(f"""
<div class="leaderboard-header">
<div>
<div class="leaderboard-title">
<span class="logo">πŸ†</span> MLE-Dojo Benchmark Leaderboard
</div>
<div class="leaderboard-subtitle">
Comprehensive evaluation of AI models across multiple domains
</div>
</div>
<div class="timestamp">
Last updated: {last_updated}
</div>
</div>
""")
# Introduction moved outside Tabs for permanent visibility
with gr.Blocks(elem_classes="gr-block"): # Wrap intro in a styled block
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ“Š Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
with gr.Column():
with gr.Blocks(elem_classes="gr-block"): # Wrap leaderboard controls/table
gr.HTML("""
<h2 style="display: flex; align-items: center; gap: 10px; margin-bottom: 0.5rem;">
<span style="font-size: 1.3em;">πŸ“ˆ</span> Model Performance Rankings
</h2>
<p class="leaderboard-subtitle" style="margin-top: 0;">Select a category to view specialized performance metrics</p>
""")
# Enhanced category selector
category_selector = gr.Radio(
# Use labels from CATEGORIES
choices=[label for label, value in CATEGORIES],
label="Select Performance Domain:",
value=DEFAULT_CATEGORY_LABEL, # Default to the label
interactive=True,
elem_classes="gr-radio" # Apply custom radio styling
)
# Visual separator (Optional)
# gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
# Enhanced leaderboard table with scrolling
leaderboard_df_component = gr.Dataframe(
# Initialize with default category value
value=update_leaderboard(DEFAULT_CATEGORY_LABEL),
# Headers will be set dynamically by the update function's output DataFrame
# Set datatypes for correct rendering (Rank is now number)
datatype=["number", "html", "str", "str", "html"],
interactive=False, # IMPORTANT: Keep False to disable UI sorting/editing
# row_count=(10, "dynamic"), # Alternative: show 10 rows, scroll others
col_count=(5, "fixed"), # We have 5 columns
wrap=True, # Allow text wrapping in cells
elem_id="leaderboard-table", # ID for potential CSS targeting
)
# Stats cards (visual enhancement) - Placed after leaderboard table
with gr.Blocks(elem_classes="gr-block"): # Wrap stats in a styled block
gr.HTML("<h2>Benchmark Statistics</h2>") # Add title for stats section
with gr.Row(equal_height=True):
with gr.Column(scale=1):
gr.HTML(f"""
<div class="about-card" style="text-align: center;">
<div class="about-card-icon">πŸ”</div>
<div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
<div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
</div>
""")
with gr.Column(scale=1):
gr.HTML(f"""
<div class="about-card" style="text-align: center;">
<div class="about-card-icon">🌐</div>
<div style="font-size: 2em; font-weight: bold; color: #00875a;">{50}</div>
<div style="font-size: 1.1em; color: #666;">Competitions (Example)</div>
</div>
""")
with gr.Column(scale=1):
gr.HTML(f"""
<div class="about-card" style="text-align: center;">
<div class="about-card-icon">πŸ…</div>
<div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)-1}</div>
<div style="font-size: 1.1em; color: #666;">Performance Domains</div>
</div>
""")
# Link the radio button change to the update function
category_selector.change(
fn=update_leaderboard,
inputs=category_selector,
outputs=leaderboard_df_component
)
with gr.TabItem("πŸ“š About", elem_id="llm-benchmark-tab-about", id=1):
# Wrap content in a styled block for consistent padding/background
with gr.Blocks(elem_classes="gr-block"):
# Enhanced about section header
gr.HTML("""
<div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
<div style="font-size: 4em;">πŸ§ͺ</div>
<div>
<h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
<p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
</div>
</div>
""")
# Use the LLM_BENCHMARKS_TEXT variable
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
# Add methodology cards for visual enhancement
gr.HTML("<h2 style='margin-top: 2rem;'>Evaluation Domains</h2>") # Title for cards
with gr.Row(equal_height=True):
with gr.Column():
gr.HTML("""
<div class="about-card">
<div class="about-card-icon">πŸ’‘</div>
<h3>MLE-Lite</h3>
<p>Evaluates basic ML engineering tasks: data preprocessing, feature engineering, model selection, and basic deployment concepts.</p>
</div>
""")
with gr.Column():
gr.HTML("""
<div class="about-card">
<div class="about-card-icon">πŸ“Š</div>
<h3>Tabular</h3>
<p>Tests processing, analysis, and modeling of structured data, including statistical analysis, predictive modeling, and visualization.</p>
</div>
""")
with gr.Row(equal_height=True):
with gr.Column():
gr.HTML("""
<div class="about-card">
<div class="about-card-icon">πŸ“</div>
<h3>NLP</h3>
<p>Evaluates natural language processing: text classification, sentiment analysis, entity recognition, text generation, and understanding.</p>
</div>
""")
with gr.Column():
gr.HTML("""
<div class="about-card">
<div class="about-card-icon">πŸ‘οΈ</div>
<h3>CV</h3>
<p>Tests computer vision capabilities: image classification, object detection, image generation, and visual understanding tasks.</p>
</div>
""")
# Optional: Uncomment if you want to re-enable the Submit tab
# with gr.TabItem("πŸš€ Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
# with gr.Blocks(elem_classes="gr-block"): # Wrap in styled block
# # ... (Your submission form code here) ...
# gr.Markdown("Submit tab content goes here.") # Placeholder
# Enhanced citation section (outside tabs, inside main demo block)
with gr.Blocks(elem_classes="gr-block"): # Wrap in styled block
with gr.Accordion("πŸ“„ Citation", open=False): # Accordion itself doesn't need gr-block
gr.HTML("""
<div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
<div style="font-size: 2.5em;">πŸ“„</div>
<div>
<h3 style="margin: 0;">How to Cite This Benchmark</h3>
<p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research.</p>
</div>
</div>
""")
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=5, # Adjust lines as needed
elem_id="citation-button", # Use this ID for CSS
show_copy_button=True,
interactive=False # Textbox is not meant for user input here
)
# Footer (outside tabs, inside main demo block)
gr.HTML("""
<div class="footer">
<p>Β© 2025 MLE-Dojo Benchmark. All rights reserved.</p>
<p style="margin-top: 5px; display: flex; justify-content: center; flex-wrap: wrap; gap: 20px;">
<a href="#" style="color: #1a5fb4; text-decoration: none;">Privacy Policy</a>
<a href="#" style="color: #1a5fb4; text-decoration: none;">Terms of Service</a>
<a href="#" style="color: #1a5fb4; text-decoration: none;">Contact Us</a>
</p>
</div>
""")
# --- Keep scheduler if relevant ---
# Note: Scheduler might not work reliably in all Gradio deployment environments (like Spaces free tier)
if __name__ == "__main__":
try:
scheduler = BackgroundScheduler()
if callable(restart_space):
# Ensure REPO_ID is set correctly for your HF Space
if REPO_ID and REPO_ID != "your/space-id":
print(f"Scheduling space restart job for {REPO_ID} every 30 minutes.")
# Restart interval might need adjustment based on environment limits
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
else:
print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
else:
print("Warning: restart_space function not available; space restart job not scheduled.")
except Exception as e:
print(f"Failed to initialize or start scheduler: {e}")
# --- Launch the app ---
if __name__ == "__main__":
print("Launching Enhanced Gradio App...")
# share=True is needed for public access if running locally and want others to see it
# demo.launch(share=True)
demo.launch() # Launches locally by default