Spaces:

MLE-Dojo
/

Leaderboard

Running

App Files Files Community

Jerrycool commited on Apr 26

Commit

ffb569a

verified ·

1 Parent(s): a84f158

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -830

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import gradio as gr
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
-from datetime import datetime
-# --- Make sure these imports work relative to your file structure ---
-# Option 1: If src is a directory in the same folder as your script:
 try:
     from src.about import (
         CITATION_BUTTON_LABEL,
@@ -12,903 +10,211 @@ try:
         EVALUATION_QUEUE_TEXT,
         INTRODUCTION_TEXT,
         LLM_BENCHMARKS_TEXT,
-        TITLE,
     )
     from src.display.css_html_js import custom_css
     from src.envs import REPO_ID
     from src.submission.submit import add_new_eval
-    print("Successfully imported from src module.")
-# Option 2: If you don't have these files, define placeholders
 except ImportError:
-    print("Warning: Using placeholder values because src module imports failed.")
     CITATION_BUTTON_LABEL = "Citation"
     CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
     EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
-    INTRODUCTION_TEXT = """
-    # Welcome to the MLE-Dojo Benchmark Leaderboard
-    This leaderboard tracks the performance of various AI models across multiple machine learning engineering domains.
-    Our comprehensive evaluation system uses ELO ratings to provide a fair comparison between different models.
-    ## How to read this leaderboard
-    - Select a domain category to view specialized rankings
-    - Higher ELO scores indicate better performance
-    - Click on any model name to learn more about it
-    """
-    LLM_BENCHMARKS_TEXT = """
-    # About the MLE-Dojo Benchmark
-    ## Evaluation Methodology
-    The MLE-Dojo benchmark evaluates models across various domains including:
-    - **MLE-Lite**: Basic machine learning engineering tasks
-    - **Tabular**: Data manipulation, analysis, and modeling with structured data
-    - **NLP**: Natural language processing tasks including classification, generation, and understanding
-    - **CV**: Computer vision tasks including image classification, object detection, and generation
-    Our evaluation uses a sophisticated ELO rating system that considers the relative performance of models against each other.
-    ## Contact
-    For more information or to submit your model, please contact us at [email protected]
-    """
-    TITLE = "<h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>"
     custom_css = ""
     REPO_ID = "your/space-id"
     def add_new_eval(*args): return "Submission placeholder."
-# --- Elo Leaderboard Configuration ---
-# Enhanced data with Rank (placeholder), Organizer, License, and URL
 data = [
-    {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
-    {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
-    {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
-    {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
-    {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
-    {'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
-    {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
-    {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 ]
-# Add organization logos (for visual enhancement)
-org_logos = {
-    'OpenAI': '📱',  # You can replace these with actual icon URLs in production
-    'DeepSeek': '🔍',
-    'Google': '🌐',
-    'Default': '🤖'
-}
-# Create a master DataFrame
 master_df = pd.DataFrame(data)
-# Add last updated timestamp
-last_updated = datetime.now().strftime("%B %d, %Y at %H:%M:%S")
-# Define categories with fancy icons
-CATEGORIES = [
-    ("🏆 Overall", "Overall"),
-    ("💡 MLE-Lite", "MLE-Lite"),
-    ("📊 Tabular", "Tabular"),
-    ("📝 NLP", "NLP"),
-    ("👁️ CV", "CV")
-]
 DEFAULT_CATEGORY = "Overall"
-# Map user-facing categories to DataFrame column names
 category_to_column = {
     "MLE-Lite": "MLE-Lite_Elo",
     "Tabular": "Tabular_Elo",
     "NLP": "NLP_Elo",
     "CV": "CV_Elo",
-    "Overall": "Overall"
 }
-# --- Helper function to update leaderboard ---
-def update_leaderboard(category_label):
-    """
-    Enhanced function to update the leaderboard with visual improvements
-    """
-    # Extract the category value from the label if it's a tuple (icon, value)
-    if isinstance(category_label, tuple):
-        category = category_label[1]
-    else:
-        # For backward compatibility or direct values
-        category = category_label.split(" ")[-1] if " " in category_label else category_label
-    score_column = category_to_column.get(category)
-    if score_column is None or score_column not in master_df.columns:
-        print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
-        score_column = category_to_column[DEFAULT_CATEGORY]
-        if score_column not in master_df.columns:
-            print(f"Error: Default column '{score_column}' also not found.")
-            return pd.DataFrame({
-                "Rank": [],
-                "Model": [],
-                "Organizer": [],
-                "License": [],
-                "Elo Score": []
-            })
-    # Select base columns + the score column for sorting
-    cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
-    df = master_df[cols_to_select].copy()
-    # Sort by the selected 'Elo Score' descending
-    df.sort_values(by=score_column, ascending=False, inplace=True)
-    # Add Rank with just numbers (no medals)
     df.reset_index(drop=True, inplace=True)
-    df.insert(0, 'Rank', df.index.map(lambda idx: f"{idx + 1}"))
-    # Add organization icons to model names
     df['Model'] = df.apply(
-        lambda row: f"""<div style="display: flex; align-items: center;">
-            <span style="font-size: 1.5em; margin-right: 10px;">{org_logos.get(row['organizer'], org_logos['Default'])}</span>
-            <a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank'
-               style='color: #0066cc; text-decoration: none; font-weight: 500; font-size: 1.05em;'>
-               {row['model_name']}
-            </a>
-        </div>""",
         axis=1
     )
-    # Format Elo scores with visual indicators
-    df['Elo Display'] = df[score_column].apply(
-        lambda score: f"""<div style="display: flex; align-items: center;">
-            <span style="font-weight: bold; color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'}">
-                {score}
-            </span>
-            <div style="margin-left: 10px; height: 12px; width: 60px; background-color: #eaeaea; border-radius: 6px; overflow: hidden;">
-                <div style="height: 100%; width: {min(100, max(5, (score-700)/7))}%; background-color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'};"></div>
-            </div>
-        </div>"""
-    )
-    # Rename columns for display
-    df.rename(columns={score_column: 'Elo Score'}, inplace=True)
-    df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
-    # Select and reorder columns for final display
-    final_columns = ["Rank", "Model", "Organizer", "License", "Elo Display"]
-    df = df[final_columns]
-    # Rename for display
-    df.columns = ["Rank", "Model", "Organization", "License", f"Elo Score ({category})"]
-    return df
-# --- Mock/Placeholder functions/data for other tabs ---
-print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
-finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
-running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
-pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
-EVAL_COLS = ["Model", "Status", "Requested", "Started"]
-EVAL_TYPES = ["str", "str", "str", "str"]
-# --- Keep restart function if relevant ---
-def restart_space():
-    print(f"Attempting to restart space: {REPO_ID}")
-    # Replace with your actual space restart mechanism if needed
-# --- Enhanced CSS for beauty and readability ---
-# FIXED CSS with better contrast, improved scrolling for tables, and other fixes
-enhanced_css = """
-/* Base styling */
-:root {
-    --primary-color: #1a5fb4;
-    --secondary-color: #2ec27e;
-    --accent-color: #e5a50a;
-    --warning-color: #ff7800;
-    --text-color: #333333;
-    --background-color: #e9edf1;  /* Lightened background */
-    --card-background: #ffffff;
-    --border-color: #c0c9d6;  /* Darkened border */
-    --shadow-color: rgba(0, 0, 0, 0.12);  /* Increased shadow opacity */
 }
-/* Typography */
-body, .gradio-container {
-    font-family: 'Inter', 'Segoe UI', Roboto, -apple-system, BlinkMacSystemFont, system-ui, sans-serif !important;
-    font-size: 16px !important;
-    line-height: 1.6 !important;
-    color: var(--text-color) !important;
-    background-color: var(--background-color) !important;
 }
-/* Headings */
-h1 {
     font-size: 2.5rem !important;
     font-weight: 700 !important;
-    margin-bottom: 1.5rem !important;
-    color: var(--primary-color) !important;
-    text-align: center !important;
-    letter-spacing: -0.02em !important;
-    line-height: 1.2 !important;
 }
-h2 {
-    font-size: 1.8rem !important;
-    font-weight: 600 !important;
-    margin-top: 1.5rem !important;
-    margin-bottom: 1rem !important;
-    color: var(--primary-color) !important;
-    letter-spacing: -0.01em !important;
-}
-h3 {
-    font-size: 1.4rem !important;
-    font-weight: 600 !important;
-    margin-top: 1.2rem !important;
-    margin-bottom: 0.8rem !important;
-    color: var(--text-color) !important;
-}
-/* Tabs styling */
-.tabs {
-    margin-top: 1rem !important;
-    border-radius: 12px !important;
-    overflow: hidden !important;
-    box-shadow: 0 4px 12px var(--shadow-color) !important;
-    background-color: var(--card-background);
-}
-.tab-nav button {
-    font-size: 1.1rem !important;
-    font-weight: 500 !important;
-    padding: 0.8rem 1.5rem !important;
-    border-radius: 0 !important;
-    transition: all 0.2s ease !important;
-    border-bottom: 2px solid transparent !important;
-    background-color: transparent !important;
-    color: var(--text-color) !important;
-}
-.tab-nav button.selected {
-    background-color: transparent !important;
-    color: var(--primary-color) !important;
-    font-weight: 600 !important;
-    border-bottom: 2px solid var(--primary-color) !important;
-}
-/* Card styling */
-.gradio-container .gr-box, .gradio-container .gr-panel {
-    border-radius: 12px !important;
-    border: 1px solid var(--border-color) !important;
-    box-shadow: 0 4px 12px var(--shadow-color) !important;
-    overflow: hidden !important;
-    background-color: var(--card-background) !important;
-}
-/* Table styling - FIXING SCROLLING ISSUES */
-table {
-    width: 100% !important;
-    border-collapse: separate !important;
-    border-spacing: 0 !important;
-    margin: 1.5rem 0 !important;
-    border-radius: 8px !important;
-    overflow: visible !important; /* Changed from hidden to visible */
-    box-shadow: 0 4px 12px var(--shadow-color) !important;
-    background-color: var(--card-background);
 }
-/* Data table container - ensure scrolling works */
-.gr-table-container {
-    overflow: auto !important;
-    max-height: 600px !important; /* Add max height to ensure scrolling */
-    margin-bottom: 20px !important;
-}
-th {
-    background-color: #e0ebff !important; /* Darker header background */
-    color: var(--primary-color) !important;
-    font-weight: 600 !important;
-    padding: 1rem !important;
-    font-size: 1.1rem !important;
-    text-align: left !important;
-    border-bottom: 2px solid var(--primary-color) !important;
-    position: sticky !important; /* Keep headers visible when scrolling */
-    top: 0 !important;
-    z-index: 10 !important;
-}
-td {
-    padding: 1rem !important;
-    border-bottom: 1px solid var(--border-color) !important;
-    font-size: 1rem !important;
-    vertical-align: middle !important;
-    background-color: var(--card-background);
-}
-tr:last-child td {
-    border-bottom: none !important;
-}
-tr:nth-child(even) td {
-    background-color: #f0f5ff !important; /* Increased contrast for even rows */
-}
-tr:hover td {
-    background-color: #e0ebff !important; /* Darker hover color */
-}
-/* Button styling */
-button.primary, .gr-button.primary {
-    background-color: var(--primary-color) !important;
-    color: white !important;
-    font-weight: 500 !important;
-    padding: 0.8rem 1.5rem !important;
-    border-radius: 8px !important;
     border: none !important;
-    cursor: pointer !important;
-    transition: all 0.2s ease !important;
-    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1) !important;
-}
-button.primary:hover, .gr-button.primary:hover {
-    background-color: #0b4a9e !important;
-    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important;
-    transform: translateY(-1px) !important;
-}
-/* Radio buttons */
-.gr-radio {
-    display: flex !important;
-    flex-wrap: wrap !important;
-    gap: 10px !important;
-    margin: 1rem 0 !important;
-}
-.gr-radio label {
-    background-color: #f0f5ff !important; /* Darker radio button background */
-    border: 1px solid var(--border-color) !important;
-    border-radius: 8px !important;
-    padding: 0.7rem 1.2rem !important;
-    font-size: 1rem !important;
     font-weight: 500 !important;
-    cursor: pointer !important;
-    transition: all 0.2s ease !important;
-    display: flex !important;
-    align-items: center !important;
-    gap: 8px !important;
-    color: var(--text-color) !important;
-}
-.gr-radio label:hover {
-    background-color: #e0e9f7 !important;
-    border-color: #a0b0c0 !important; /* Darker border on hover */
-}
-.gr-radio label.selected {
-    background-color: #d0dfff !important; /* Darker selected background */
-    border-color: var(--primary-color) !important;
-    color: var(--primary-color) !important;
-    font-weight: 600 !important;
-}
-/* Input fields */
-input, textarea, select {
-    font-size: 1rem !important;
-    padding: 0.8rem !important;
-    border-radius: 8px !important;
-    border: 1px solid var(--border-color) !important;
-    transition: all 0.2s ease !important;
-    background-color: #ffffff !important;
-    color: var(--text-color) !important;
-}
-input:focus, textarea:focus, select:focus {
-    border-color: var(--primary-color) !important;
-    box-shadow: 0 0 0 2px rgba(26, 95, 180, 0.2) !important;
-    outline: none !important;
 }
-/* Accordion styling */
-.gr-accordion {
-    border-radius: 8px !important;
-    overflow: hidden !important;
-    margin: 1rem 0 !important;
-    border: 1px solid var(--border-color) !important;
-    background-color: var(--card-background);
 }
-.gr-accordion-header {
-    padding: 1rem !important;
-    background-color: #f0f5ff !important; /* Darker accordion header */
-    font-weight: 600 !important;
-    font-size: 1.1rem !important;
-    color: var(--text-color) !important;
-    border-bottom: 1px solid var(--border-color) !important;
 }
-.gr-accordion-content {
-    padding: 1rem !important;
-    background-color: var(--card-background) !important;
-}
-/* Markdown text improvements */
-.markdown-text {
-    font-size: 1.05rem !important;
-    line-height: 1.7 !important;
-    color: var(--text-color) !important;
-}
-.markdown-text p {
-    margin-bottom: 1rem !important;
-}
-.markdown-text ul, .markdown-text ol {
-    margin-left: 1.5rem !important;
-    margin-bottom: 1rem !important;
-}
-.markdown-text li {
-    margin-bottom: 0.5rem !important;
-}
-.markdown-text strong {
-    font-weight: 600 !important;
-    color: #111 !important;
-}
-/* Status indicators */
-.status-badge {
     display: inline-block;
-    padding: 0.3rem 0.7rem;
-    border-radius: 99px;
-    font-size: 0.85rem;
-    font-weight: 500;
-    text-align: center;
-}
-.status-pending {
-    background-color: #fff3cc;
-    color: #b58a00;
-    border: 1px solid #ffd74d;
-}
-.status-running {
-    background-color: #ccebff;
-    color: #0066cc;
-    border: 1px solid #66b3ff;
-}
-.status-completed {
-    background-color: #d6f5e6;
-    color: #00875a;
-    border: 1px solid #57d9a3;
-}
-/* Footer */
-.footer {
-    margin-top: 2rem;
-    padding: 1.5rem 1rem;
-    text-align: center;
-    font-size: 0.9rem;
-    color: #333;
-    border-top: 1px solid var(--border-color);
-    background-color: #d9e0e8; /* Darker footer background */
-}
-/* Enhanced leaderboard title area */
-.leaderboard-header {
-    display: flex;
-    align-items: center;
-    justify-content: space-between;
-    margin-bottom: 1.5rem;
-    padding: 1.5rem;
-    background-color: var(--card-background);
-    border-radius: 12px;
-    border: 1px solid var(--border-color);
-    box-shadow: 0 4px 12px var(--shadow-color);
-}
-.leaderboard-title {
-    font-size: 2.2rem;
-    font-weight: 700;
-    color: var(--primary-color);
-    margin: 0;
-    display: flex;
-    align-items: center;
-    gap: 0.5rem;
-}
-.leaderboard-subtitle {
-    font-size: 1.1rem;
-    color: #444; /* Darker subtitle text */
-    margin-top: 0.5rem;
-}
-.timestamp {
-    font-size: 0.85rem;
-    color: #444; /* Darker timestamp text */
-    font-style: italic;
-    background-color: #f0f5ff; /* Darker timestamp background */
-    padding: 5px 10px;
-    border-radius: 6px;
-}
-/* Category selector buttons */
-.category-buttons {
-    display: flex;
-    flex-wrap: wrap;
-    gap: 10px;
-    margin-bottom: 1.5rem;
-}
-.category-button {
-    padding: 0.7rem 1.2rem;
-    background-color: #e0ebff; /* Darker button background */
-    border: 1px solid #b0d0ff;
-    border-radius: 8px;
-    font-weight: 500;
-    cursor: pointer;
-    transition: all 0.2s ease;
-    display: flex;
-    align-items: center;
-    gap: 8px;
-}
-.category-button:hover {
-    background-color: #c0d0ff; /* Darker hover state */
-    border-color: #80a0ff;
-}
-.category-button.active {
-    background-color: var(--primary-color);
-    color: white;
-    border-color: var(--primary-color);
-}
-/* Logo and brand styling */
-.logo {
-    font-size: 2.5em;
     margin-right: 0.5rem;
 }
-/* Properly display sorting arrows */
-table th.sort-asc::after {
-    content: " ↑";
-    color: var(--primary-color);
-}
-table th.sort-desc::after {
-    content: " ↓";
-    color: var(--primary-color);
 }
-/* Style for About section cards */
-.about-card {
-    background-color: #f0f5ff; /* Darker card background */
-    padding: 20px;
-    border-radius: 12px;
-    height: 100%;
-    border: 1px solid var(--border-color);
 }
-.about-card h3 {
-    text-align: center;
-    margin-top: 0;
-    color: var(--primary-color);
 }
-.about-card p {
-    color: var(--text-color);
-    font-size: 0.95rem;
-    line-height: 1.6;
 }
-.about-card-icon {
-    font-size: 2.5em;
-    text-align: center;
-    margin-bottom: 15px;
-    display: block;
 }
-/* Ensure the table container has a fixed height and scrolls properly */
-#leaderboard-table {
-    overflow: auto !important;
-    max-height: 500px !important;
 }
-/* Fix for dataframe component scrolling */
-.gradio-dataframe {
-    overflow: auto !important;
-    max-height: 500px !important;
 }
-/* Fix sorting issues */
-.sort-column {
-    cursor: pointer;
 }
 """
-# Combine with any existing CSS
-custom_css = enhanced_css
-# --- Gradio App Definition ---
-demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 with demo:
-    # Enhanced header with timestamp
-    gr.HTML(f"""
-    <div class="leaderboard-header">
-        <div>
-            <div class="leaderboard-title">
-                <span class="logo">🏆</span> MLE-Dojo Benchmark Leaderboard
-            </div>
-            <div class="leaderboard-subtitle">
-                Comprehensive evaluation of AI models across multiple domains
-            </div>
-        </div>
-        <div class="timestamp">
-            Last updated: {last_updated}
-        </div>
-    </div>
-    """)
-    # Introduction with enhanced styling
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("📊 Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
-            with gr.Column():
-                gr.HTML("""
-                <h2 style="display: flex; align-items: center; gap: 10px;">
-                    <span style="font-size: 1.3em;">📈</span> Model Performance Rankings
-                </h2>
-                <p class="leaderboard-subtitle">Select a category to view specialized performance metrics</p>
-                """)
-                # Enhanced category selector
-                category_selector = gr.Radio(
-                    choices=[x[0] for x in CATEGORIES],
-                    label="Select Performance Domain:",
-                    value="🏆 Overall",
-                    interactive=True,
-                    elem_classes="fancy-radio"
-                )
-                # Visual separator
-                gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
-                # Enhanced leaderboard table
-                leaderboard_df_component = gr.Dataframe(
-                    value=update_leaderboard(DEFAULT_CATEGORY),
-                    headers=["Rank", "Model", "Organization", "License", f"Elo Score ({DEFAULT_CATEGORY})"],
-                    datatype=["html", "html", "str", "str", "html"],
-                    interactive=False,
-                    row_count=(len(master_df), "fixed"),
-                    col_count=(5, "fixed"),
-                    wrap=True,
-                    elem_id="leaderboard-table",
-                )
-                # Stats cards (visual enhancement)
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        gr.HTML(f"""
-                        <div style="background-color: #f0f5ff; padding: 20px; border-radius: 12px; text-align: center;">
-                            <div style="font-size: 2em;">🔍</div>
-                            <div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
-                            <div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
-                        </div>
-                        """)
-                    with gr.Column(scale=1):
-                        gr.HTML(f"""
-                        <div style="background-color: #e6f7ef; padding: 20px; border-radius: 12px; text-align: center;">
-                            <div style="font-size: 2em;">🌐</div>
-                            <div style="font-size: 2em; font-weight: bold; color: #00875a;">{master_df['organizer'].nunique()}</div>
-                            <div style="font-size: 1.1em; color: #666;">Organizations</div>
-                        </div>
-                        """)
-                    with gr.Column(scale=1):
-                        gr.HTML(f"""
-                        <div style="background-color: #fff8e0; padding: 20px; border-radius: 12px; text-align: center;">
-                            <div style="font-size: 2em;">🏅</div>
-                            <div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)}</div>
-                            <div style="font-size: 1.1em; color: #666;">Performance Domains</div>
-                        </div>
-                        """)
-                # Link the radio button change to the update function
-                category_selector.change(
-                    fn=update_leaderboard,
-                    inputs=category_selector,
-                    outputs=leaderboard_df_component
-                )
-        with gr.TabItem("📚 About", elem_id="llm-benchmark-tab-about", id=1):
-            # Enhanced about section
-            gr.HTML("""
-            <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
-                <div style="font-size: 4em;">🧪</div>
-                <div>
-                    <h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
-                    <p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
-                </div>
-            </div>
-            """)
-            # Use the LLM_BENCHMARKS_TEXT variable
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-            # Add methodology cards for visual enhancement
-            with gr.Row():
-                with gr.Column():
-                    gr.HTML("""
-                    <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
-                        <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">💡</div>
-                        <h3 style="text-align: center; margin-top: 0;">MLE-Lite</h3>
-                        <p>Evaluates a model's ability to handle basic machine learning engineering tasks including
-                        data preprocessing, feature engineering, model selection, and basic deployment.</p>
-                    </div>
-                    """)
-                with gr.Column():
-                    gr.HTML("""
-                    <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
-                        <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">📊</div>
-                        <h3 style="text-align: center; margin-top: 0;">Tabular</h3>
-                        <p>Tests a model's ability to process, analyze and model structured data, including
-                        statistical analysis,statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
-                    </div>
-                    """)
-            with gr.Row():
-                with gr.Column():
-                    gr.HTML("""
-                    <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
-                        <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">📝</div>
-                        <h3 style="text-align: center; margin-top: 0;">NLP</h3>
-                        <p>Evaluates natural language processing capabilities including text classification,
-                        sentiment analysis, entity recognition, text generation, and language understanding.</p>
-                    </div>
-                    """)
-                with gr.Column():
-                    gr.HTML("""
-                    <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
-                        <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">👁️</div>
-                        <h3 style="text-align: center; margin-top: 0;">CV</h3>
-                        <p>Tests computer vision capabilities including image classification, object detection,
-                        image generation, and visual understanding tasks across various domains.</p>
-                    </div>
-                    """)
-        # Optional: Uncomment if you want to re-enable the Submit tab
-        # with gr.TabItem("🚀 Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
-        #     with gr.Column():
-        #         gr.HTML("""
-        #         <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
-        #             <div style="font-size: 4em;">🚀</div>
-        #             <div>
-        #                 <h2 style="margin: 0;">Submit Your Model for Evaluation</h2>
-        #                 <p style="margin: 5px 0 0 0; color: #666;">Add your model to the MLE-Dojo leaderboard</p>
-        #             </div>
-        #         </div>
-        #         """)
-        #
-        #         with gr.Row():
-        #             gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-        #
-        #         with gr.Column():
-        #             with gr.Accordion(f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
-        #                  finished_eval_table = gr.components.Dataframe(
-        #                      value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
-        #                 )
-        #             with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
-        #                  running_eval_table = gr.components.Dataframe(
-        #                      value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
-        #                 )
-        #             with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
-        #                 pending_eval_table = gr.components.Dataframe(
-        #                     value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
-        #                 )
-        #
-        #         gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
-        #
-        #         gr.HTML("""
-        #         <h2 style="display: flex; align-items: center; gap: 10px;">
-        #             <span style="font-size: 1.3em;">📝</span> Model Submission Form
-        #         </h2>
-        #         """)
-        #
-        #         with gr.Row():
-        #             with gr.Column():
-        #                 model_name_textbox = gr.Textbox(
-        #                     label="Model Name (on Hugging Face Hub)",
-        #                     placeholder="Enter your model name...",
-        #                     elem_classes="enhanced-input"
-        #                 )
-        #                 revision_name_textbox = gr.Textbox(
-        #                     label="Revision / Commit Hash",
-        #                     placeholder="main",
-        #                     elem_classes="enhanced-input"
-        #                 )
-        #                 model_type = gr.Dropdown(
-        #                     choices=["Type A", "Type B", "Type C"],
-        #                     label="Model Type",
-        #                     multiselect=False,
-        #                     value=None,
-        #                     interactive=True,
-        #                     elem_classes="enhanced-dropdown"
-        #                 )
-        #             with gr.Column():
-        #                 precision = gr.Dropdown(
-        #                     choices=["float16", "bfloat16", "float32", "int8", "auto"],
-        #                     label="Precision",
-        #                     multiselect=False,
-        #                     value="auto",
-        #                     interactive=True,
-        #                     elem_classes="enhanced-dropdown"
-        #                 )
-        #                 weight_type = gr.Dropdown(
-        #                     choices=["Original", "Adapter", "Delta"],
-        #                     label="Weights Type",
-        #                     multiselect=False,
-        #                     value="Original",
-        #                     interactive=True,
-        #                     elem_classes="enhanced-dropdown"
-        #                 )
-        #                 base_model_name_textbox = gr.Textbox(
-        #                     label="Base Model (for delta or adapter weights)",
-        #                     placeholder="Only needed for adapter/delta weights",
-        #                     elem_classes="enhanced-input"
-        #                 )
-        #
-        #         submit_button = gr.Button(
-        #             "Submit for Evaluation",
-        #             elem_classes="primary-button"
-        #         )
-        #         submission_result = gr.Markdown()
-        #         submit_button.click(
-        #             add_new_eval,
-        #             [model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type],
-        #             submission_result,
-        #         )
-    # Enhanced citation section
-    with gr.Accordion("📄 Citation", open=False, elem_classes="citation-accordion"):
-        gr.HTML("""
-        <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
-            <div style="font-size: 2.5em;">📄</div>
-            <div>
-                <h3 style="margin: 0;">How to Cite This Benchmark</h3>
-                <p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research</p>
-            </div>
-        </div>
-        """)
-        citation_button = gr.Textbox(
             value=CITATION_BUTTON_TEXT,
             label=CITATION_BUTTON_LABEL,
             lines=10,
             elem_id="citation-button",
-            show_copy_button=True,
         )
-    # Footer
-    gr.HTML("""
-    <div class="footer">
-        <p>© 2025 MLE-Dojo Benchmark. All rights reserved.</p>
-        <p style="margin-top: 5px; display: flex; justify-content: center; gap: 20px;">
-            <a href="#" style="color: #1a5fb4; text-decoration: none;">Privacy Policy</a>
-            <a href="#" style="color: #1a5fb4; text-decoration: none;">Terms of Service</a>
-            <a href="#" style="color: #1a5fb4; text-decoration: none;">Contact Us</a>
-        </p>
-    </div>
-    """)
-# --- Keep scheduler if relevant ---
-if __name__ == "__main__":
-    try:
-        scheduler = BackgroundScheduler()
-        if callable(restart_space):
-             if REPO_ID and REPO_ID != "your/space-id":
-                 scheduler.add_job(restart_space, "interval", seconds=1800)  # Restart every 30 mins
-                 scheduler.start()
-             else:
-                 print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
-        else:
-             print("Warning: restart_space function not available; space restart job not scheduled.")
-    except Exception as e:
-        print(f"Failed to initialize or start scheduler: {e}")
-# --- Launch the app ---
 if __name__ == "__main__":
-    print("Launching Enhanced Gradio App...")
-    demo.launch()

 import gradio as gr
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
+# --- Placeholder Imports / Definitions ---
 try:
     from src.about import (
         CITATION_BUTTON_LABEL,
         EVALUATION_QUEUE_TEXT,
         INTRODUCTION_TEXT,
         LLM_BENCHMARKS_TEXT,
+        TITLE,  # Will override below
     )
     from src.display.css_html_js import custom_css
     from src.envs import REPO_ID
     from src.submission.submit import add_new_eval
 except ImportError:
     CITATION_BUTTON_LABEL = "Citation"
     CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
     EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
+    INTRODUCTION_TEXT = "Welcome to the MLE-Dojo Benchmark Leaderboard."
+    LLM_BENCHMARKS_TEXT = "Information about the benchmarks..."
     custom_css = ""
     REPO_ID = "your/space-id"
     def add_new_eval(*args): return "Submission placeholder."
+# --- Elo Data ---
 data = [
+    {'model_name': 'gpt-4o-mini',    'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 753,  'Tabular_Elo': 839,  'NLP_Elo': 758,  'CV_Elo': 754,  'Overall': 778},
+    {'model_name': 'gpt-4o',         'url': 'https://openai.com/index/hello-gpt-4o/',                              'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 830,  'Tabular_Elo': 861,  'NLP_Elo': 903,  'CV_Elo': 761,  'Overall': 841},
+    {'model_name': 'o3-mini',        'url': 'https://openai.com/index/openai-o3-mini/',                              'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
+    {'model_name': 'deepseek-v3',    'url': 'https://api-docs.deepseek.com/news/news1226',                          'organizer': 'DeepSeek','license': 'DeepSeek',     'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
+    {'model_name': 'deepseek-r1',    'url': 'https://api-docs.deepseek.com/news/news250120',                         'organizer': 'DeepSeek','license': 'DeepSeek',     'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
+    {'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash',        'organizer': 'Google',   'license': 'Proprietary', 'MLE-Lite_Elo': 847,  'Tabular_Elo': 923,  'NLP_Elo': 860,  'CV_Elo': 978,  'Overall': 895},
+    {'model_name': 'gemini-2.0-pro',  'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973,  'Overall': 1054},
+    {'model_name': 'gemini-2.5-pro',  'url': 'https://deepmind.google/technologies/gemini/pro/',               'organizer': 'Google',   'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 ]
 master_df = pd.DataFrame(data)
+CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
 DEFAULT_CATEGORY = "Overall"
 category_to_column = {
+    "Overall": "Overall",
     "MLE-Lite": "MLE-Lite_Elo",
     "Tabular": "Tabular_Elo",
     "NLP": "NLP_Elo",
     "CV": "CV_Elo",
 }
+def update_leaderboard(category):
+    col = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
+    df = master_df[['model_name','url','organizer','license',col]].copy()
+    df.sort_values(by=col, ascending=False, inplace=True)
     df.reset_index(drop=True, inplace=True)
+    df.insert(0, 'Rank', df.index+1)
     df['Model'] = df.apply(
+        lambda r: f"<a href='{r['url']}' target='_blank'>{r['model_name']}</a>",
         axis=1
     )
+    df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
+    return df[['Rank','Model','Organizer','License','Elo Score']]
+# --- Dark Theme + Custom CSS ---
+custom_css += """
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
+body {
+    font-family: 'Inter', sans-serif;
+    background-color: #1e1e2f !important;
+    color: #e0e0f0 !important;
 }
+/* Hero Section */
+.hero-section {
+    background: linear-gradient(135deg, #6c63ff, #8f94fb);
+    color: #fff;
+    padding: 2rem 1rem;
+    border-radius: .75rem;
+    margin-bottom: 1.5rem;
+    text-align: center;
+    box-shadow: 0 4px 10px rgba(0,0,0,0.3);
 }
+.hero-section h1 {
+    margin: 0;
     font-size: 2.5rem !important;
     font-weight: 700 !important;
 }
+.hero-section h2 {
+    margin: .5rem 0 0 !important;
+    font-size: 1.25rem !important;
+    font-weight: 400 !important;
+    opacity: 0.9;
 }
+/* Tab Buttons */
+.tab-buttons button {
+    border-radius: 20px !important;
+    padding: 0.5rem 1rem !important;
+    margin-right: 0.5rem !important;
+    background: #3a3a4c !important;
+    color: #e0e0f0 !important;
     border: none !important;
+    transition: background 0.3s !important;
     font-weight: 500 !important;
 }
+.tab-buttons button:hover {
+    background: #4a4a6f !important;
 }
+.tab-buttons button[aria-selected="true"] {
+    background: #6c63ff !important;
+    color: #fff !important;
 }
+/* Category Selector Pills */
+#category-selector input[type="radio"] { display: none; }
+#category-selector label {
     display: inline-block;
+    padding: 0.5rem 1rem;
     margin-right: 0.5rem;
+    border-radius: 999px;
+    background: #3a3a4c;
+    color: #e0e0f0;
+    cursor: pointer;
+    transition: background 0.3s, color 0.3s;
+    font-weight: 500;
 }
+#category-selector input[type="radio"]:checked + label {
+    background: #6c63ff;
+    color: #fff;
 }
+/* Table Styling */
+table {
+    width: 100%;
+    border: none;
+    border-radius: .5rem;
+    overflow: hidden;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.3);
+    margin: 1rem 0;
 }
+th {
+    background: #6c63ff !important;
+    color: #fff !important;
 }
+td, th {
+    padding: 0.75rem 1rem;
+    background: #1e1e2f;
+    color: #e0e0f0;
 }
+tr:nth-child(even) td {
+    background: #2a2a3c;
 }
+tr:hover td {
+    background: #3c3b52;
 }
+td a {
+    color: #9afeff;
+    text-decoration: none;
 }
+td a:hover {
+    text-decoration: underline;
 }
 """
+# --- Override Title with Hero ---
+TITLE = """
+<div class="hero-section">
+  <h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>
+  <h2>Improving LLM Agents for Machine Learning Engineering</h2>
+</div>
+"""
+# --- Build App with valid Dark theme ---
+demo = gr.Blocks(css=custom_css, theme=gr.themes.Dark())
 with demo:
+    gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("📋 Leaderboard"):
+            gr.Markdown("## Model Elo Rankings by Category")
+            category_selector = gr.Radio(
+                choices=CATEGORIES,
+                value=DEFAULT_CATEGORY,
+                interactive=True,
+                elem_id="category-selector"
+            )
+            leaderboard_df = gr.Dataframe(
+                value=update_leaderboard(DEFAULT_CATEGORY),
+                headers=["Rank","Model","Organizer","License","Elo Score"],
+                datatype=["number","html","str","str","number"],
+                interactive=False,
+                row_count=(len(master_df),"fixed"),
+                col_count=(5,"fixed"),
+                wrap=True,
+                elem_id="leaderboard-table"
+            )
+            category_selector.change(
+                fn=update_leaderboard,
+                inputs=category_selector,
+                outputs=leaderboard_df
+            )
+        with gr.TabItem("ℹ️ About"):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+    with gr.Accordion("📙 Citation", open=False):
+        gr.Textbox(
             value=CITATION_BUTTON_TEXT,
             label=CITATION_BUTTON_LABEL,
             lines=10,
             elem_id="citation-button",
+            show_copy_button=True
         )
 if __name__ == "__main__":
+    print("Launching Gradio App...")
+    demo.launch()