Spaces:

MLE-Dojo
/

Leaderboard

Running

App Files Files Community

Jerrycool commited on Apr 26

Commit

3dd92ec

verified ·

1 Parent(s): 3aab004

Update app.py

Browse files

Files changed (1) hide show

app.py +224 -278

app.py CHANGED Viewed

@@ -1,305 +1,251 @@
 import gradio as gr
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
-# Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
-# --- Make sure these imports work relative to your file structure ---
-# Option 1: If src is a directory in the same folder as your script:
 try:
-    from src.about import (
-        CITATION_BUTTON_LABEL,
-        CITATION_BUTTON_TEXT,
-        EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
-        INTRODUCTION_TEXT,
-        LLM_BENCHMARKS_TEXT,
-        TITLE,
-    )
-    from src.display.css_html_js import custom_css # Assuming this exists but might be empty
-    from src.envs import REPO_ID # Keep if needed for restart_space or other functions
-    from src.submission.submit import add_new_eval # Keep if using the submit tab
-    print("Successfully imported from src module.")
-# Option 2: If you don't have these files, define placeholders (REMOVE THIS if using Option 1)
 except ImportError:
-    print("Warning: Using placeholder values because src module imports failed.")
-    CITATION_BUTTON_LABEL="Citation"
-    CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
-    EVALUATION_QUEUE_TEXT="Current evaluation queue:"
-    INTRODUCTION_TEXT="Welcome to the MLE-Dojo Benchmark Leaderboard."
-    LLM_BENCHMARKS_TEXT="Information about the benchmarks..."
-    TITLE="<h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>"
-    custom_css="" # Start with empty CSS if not imported
-    REPO_ID="your/space-id" # Replace with actual ID if needed
-    def add_new_eval(*args): return "Submission placeholder."
-# --- End Placeholder Definitions ---
-# --- Elo Leaderboard Configuration ---
-# Enhanced data with Rank (placeholder), Organizer, License, and URL
-# !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!!
-# Verify organizer and license information for accuracy.
 data = [
-{'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
-{'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
-{'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later
-{'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
-{'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
-{'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
-{'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
-{'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 ]
-# Create a master DataFrame
-# Note: Columns 'organizer' and 'license' are created in lowercase here.
 master_df = pd.DataFrame(data)
-# Define categories for selection (user-facing)
-CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"] # Overall first
-DEFAULT_CATEGORY = "Overall" # Set a default category
-# Map user-facing categories to DataFrame column names
-category_to_column = {
-    "MLE-Lite": "MLE-Lite_Elo",
-    "Tabular": "Tabular_Elo",
-    "NLP": "NLP_Elo",
-    "CV": "CV_Elo",
-    "Overall": "Overall"
 }
-# --- Helper function to update leaderboard ---
-def update_leaderboard(category):
-    """
-    Selects relevant columns, sorts by the chosen category's Elo score,
-    adds Rank, formats model name as a link, and returns the DataFrame.
-    """
-    score_column = category_to_column.get(category)
-    if score_column is None or score_column not in master_df.columns:
-        print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
-        score_column = category_to_column[DEFAULT_CATEGORY]
-        # Check fallback column too
-        if score_column not in master_df.columns:
-            # Return empty df with correct columns if still invalid
-            # Use lowercase keys here consistent with master_df for the empty case
-            print(f"Error: Default column '{score_column}' also not found.")
-            return pd.DataFrame({
-                "Rank": [],
-                "Model": [],
-                "Elo Score": [],
-                "Organizer": [], # Changed 'organizer' -> 'Organizer' for consistency in empty case
-                "License": []   # Changed 'license' -> 'License' for consistency in empty case
-            })
-    # Select base columns + the score column for sorting
-    # Ensure 'organizer' and 'license' are selected correctly (lowercase)
-    cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
-    df = master_df[cols_to_select].copy()
-    # Sort by the selected 'Elo Score' descending
-    df.sort_values(by=score_column, ascending=False, inplace=True)
-    # Add Rank based on the sorted order
-    df.reset_index(drop=True, inplace=True)
-    df.insert(0, 'Rank', df.index + 1)
-    # Format Model Name as HTML Hyperlink
-    # The resulting column name will be 'Model' (capitalized)
-    df['Model'] = df.apply(
-        lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none;'>{row['model_name']}</a>",
-        axis=1
-    )
-    # Rename the score column to 'Elo Score' for consistent display
-    df.rename(columns={score_column: 'Elo Score'}, inplace=True)
-    # Rename 'organizer' and 'license' to match desired display headers
-    df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
-    # Select and reorder columns for final display using the ACTUAL column names in df
-    # Use capitalized 'Organizer' and 'License' here because they have been renamed.
-    final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
-    df = df[final_columns]
-    # Note: The DataFrame returned now has columns:
-    # 'Rank', 'Model', 'Organizer', 'License', 'Elo Score'
-    return df
-# --- Mock/Placeholder functions/data for other tabs ---
-# (If the Submit tab is used, ensure these variables are appropriately populated or handled)
-print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
-finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
-running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
-pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
-EVAL_COLS = ["Model", "Status", "Requested", "Started"] # Define for the dataframe headers
-EVAL_TYPES = ["str", "str", "str", "str"] # Define for the dataframe types
-# --- Keep restart function if relevant ---
-def restart_space():
-    # Make sure REPO_ID is correctly defined/imported if this function is used
-    print(f"Attempting to restart space: {REPO_ID}")
-    # Replace with your actual space restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
-# --- Gradio App Definition ---
-# ***** FONT SIZE INCREASED HERE *****
-# Add CSS rules to make the base font size larger.
-# Adjust the '1.2em' value (e.g., to '1.4em', '16px') to change the size.
-# The !important flag helps override theme defaults.
-# If the imported custom_css already has content, append to it.
-font_size_css = """
-body {
-    font-size: 1.5em !important; /* Increase base font size */
-}
-/* Optional: Target specific elements if needed */
-/*
-#leaderboard-table th, #leaderboard-table td {
-    font-size: 1em !important; /* Adjust table font size relative to new body size */
-    padding: 5px 7px !important; /* Increase padding for better spacing */
 }
-h1, .markdown-text h1 { font-size: 2.2em !important; } /* Make main title larger */
-h2, .markdown-text h2 { font-size: 1.8em !important; } /* Make section titles larger */
-button { font-size: 1.1em !important; padding: 8px 16px !important; } /* Slightly larger buttons */
-.gr-input, .gr-dropdown, .gr-textbox textarea { font-size: 1em !important; } /* Ensure inputs scale too */
-*/
 """
-# Append the new CSS to any existing custom_css
-custom_css += font_size_css
-# Add basic table styling if not already present
-if "table {" not in custom_css:
-    custom_css += """
-table { width: 100%; border-collapse: collapse; margin-top: 10px; margin-bottom: 10px; }
-th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; white-space: normal; vertical-align: top; } /* Allow wrapping, top align */
-th { background-color: #f2f2f2; font-weight: bold; }
-tr:nth-child(even) { background-color: #f9f9f9; }
-tr:hover { background-color: #e9e9e9; }
-td a { color: #007bff; text-decoration: none; }
-td a:hover { text-decoration: underline; }
-"""
-# Use a theme for better default styling
-demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 with demo:
-    # Use the TITLE variable imported or defined above
-    gr.HTML(TITLE)
-    # Use the INTRODUCTION_TEXT variable imported or defined above
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            with gr.Column():
-                gr.Markdown("## Model Elo Rankings by Category")
-                category_selector = gr.Radio(
-                    choices=CATEGORIES,
-                    label="Select Category:",
-                    value=DEFAULT_CATEGORY,
-                    interactive=True,
-                )
-                leaderboard_df_component = gr.Dataframe(
-                    # Initialize with sorted data for the default category
-                    value=update_leaderboard(DEFAULT_CATEGORY),
-                    # Headers for DISPLAY should match the *renamed* columns from update_leaderboard
-                    headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
-                    # Datatype maps to the final df columns: Rank, Model, Organizer, License, Elo Score
-                    datatype=["number", "html", "str", "str", "number"],
-                    interactive=False,
-                    # --- FIX APPLIED: Removed unsupported 'height' argument ---
-                    # row_count determines the number of rows to display
-                    row_count=(len(master_df), "fixed"), # Display all rows
-                    col_count=(5, "fixed"),
-                    wrap=True, # Allow text wrapping in cells
-                    elem_id="leaderboard-table" # CSS hook for custom styling
-                )
-                # Link the radio button change to the update function
-                category_selector.change(
-                    fn=update_leaderboard,
-                    inputs=category_selector,
-                    outputs=leaderboard_df_component
-                )
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-about", id=1):
-            # Use the LLM_BENCHMARKS_TEXT variable imported or defined above
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        # --- Submit Tab (Commented out as in original request) ---
-        # Make sure EVALUATION_QUEUE_TEXT and add_new_eval are imported/defined if uncommented
-        # with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
-        #     with gr.Column():
-        #          with gr.Row():
-        #              gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # Requires import/definition
-        #          with gr.Column():
-        #              with gr.Accordion(f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
-        #                   finished_eval_table = gr.components.Dataframe(
-        #                       value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
-        #                  )
-        #              with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
-        #                   running_eval_table = gr.components.Dataframe(
-        #                       value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
-        #                  )
-        #              with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
-        #                  pending_eval_table = gr.components.Dataframe(
-        #                      value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
-        #                  )
-        #     with gr.Row():
-        #          gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-        #     with gr.Row():
-        #          with gr.Column():
-        #              model_name_textbox = gr.Textbox(label="Model name (on Hugging Face Hub)")
-        #              revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main")
-        #              model_type = gr.Dropdown(choices=["Type A", "Type B", "Type C"], label="Model type", multiselect=False, value=None, interactive=True) # Example choices
-        #          with gr.Column():
-        #              precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True)
-        #              weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights type", multiselect=False, value="Original", interactive=True)
-        #              base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-        #     submit_button = gr.Button("Submit Eval")
-        #     submission_result = gr.Markdown()
-        #     # Ensure add_new_eval is correctly imported/defined and handles these inputs
-        #     submit_button.click(
-        #          add_new_eval, # Requires import/definition
-        #          [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ],
-        #          submission_result,
-        #      )
-    # --- Citation Row (at the bottom, outside Tabs) ---
-    with gr.Accordion("📙 Citation", open=False):
-        # Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables imported or defined above
-        citation_button = gr.Textbox(
-            value=CITATION_BUTTON_TEXT,
-            label=CITATION_BUTTON_LABEL,
-            lines=10, # Adjust lines if needed for new font size
-            elem_id="citation-button",
-            show_copy_button=True,
-        )
-# IGNORE_WHEN_COPYING_START
-# content_copy  download
-# Use code with caution.
-# IGNORE_WHEN_COPYING_END
-# --- Keep scheduler if relevant ---
-# Only start scheduler if the script is run directly
-if __name__ == "__main__":
-    try:
-        scheduler = BackgroundScheduler()
-        # Add job only if restart_space is callable (i.e., not a placeholder or failed import)
-        if callable(restart_space):
-             # Check if REPO_ID seems valid before scheduling
-             if REPO_ID and REPO_ID != "your/space-id":
-                 scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
-                 scheduler.start()
-             else:
-                 print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
-        else:
-             print("Warning: restart_space function not available; space restart job not scheduled.")
-    except Exception as e:
-        print(f"Failed to initialize or start scheduler: {e}")
-# --- Launch the app ---
-# Ensures the app launches only when the script is run directly
 if __name__ == "__main__":
-    # Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
-    # Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions above.
-    print("Launching Gradio App...")
-    demo.launch()

 import gradio as gr
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
+"""
+MLE‑Dojo Benchmark Leaderboard — Polished Edition
+-------------------------------------------------
+This version focuses on premium typography, elegant color palette, and richer
+UI controls (including ascending/descending sort) while remaining completely
+self‑contained.
+*️⃣  HOW TO USE
+-------------------------------------------------
+1. Install deps →  `pip install gradio pandas apscheduler`
+2. Launch         →  `python mle_dojo_leaderboard_app.py`
+3. Tailor any of the placeholder values (TITLE, INTRODUCTION_TEXT, etc.) to your
+   project or import them from your own `src` package — the try/except block at
+   the top handles either workflow gracefully.
+"""
+# ---------------------------------------------------------------------------
+#  Placeholder fall‑back imports (remove once your own src/ is in PYTHONPATH)
+# ---------------------------------------------------------------------------
 try:
+    from src.about import (
+        CITATION_BUTTON_LABEL,
+        CITATION_BUTTON_TEXT,
+        EVALUATION_QUEUE_TEXT,
+        INTRODUCTION_TEXT,
+        LLM_BENCHMARKS_TEXT,
+        TITLE,
+    )
+    from src.display.css_html_js import custom_css  # optional
+    from src.envs import REPO_ID
+    from src.submission.submit import add_new_eval
+    print("✅ Imported UI copy & helpers from src package.")
 except ImportError:
+    print("⚠️  Falling back to local placeholders — customise as needed.")
+    CITATION_BUTTON_LABEL = "Citation"
+    CITATION_BUTTON_TEXT  = "Please cite us if you use this benchmark…"
+    EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
+    INTRODUCTION_TEXT     = "Welcome to the **MLE‑Dojo Benchmark Leaderboard** — compare LLM agents across real‑world ML engineering tasks."
+    LLM_BENCHMARKS_TEXT   = "Further details about tasks, metrics, and evaluation pipelines."
+    TITLE = (
+        "<h1 class='hero-title gradient-text'>\U0001F3C6 MLE‑Dojo Benchmark Leaderboard</h1>"
+        "<p class='subtitle'>Interactive, reproducible &amp; community‑driven ML agent benchmarking</p>"
+    )
+    custom_css = ""  # will be extended below
+    REPO_ID = "your/space-id"
+    def add_new_eval(*_):
+        return "Submission placeholder."
+# ---------------------------------------------------------------------------
+#  Data — extend / refresh as new checkpoints are evaluated
+# ---------------------------------------------------------------------------
 data = [
+    {"model_name": "gpt-4o-mini",     "url": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/", "organizer": "OpenAI",  "license": "Proprietary", "MLE-Lite_Elo": 753,  "Tabular_Elo": 839,  "NLP_Elo": 758,  "CV_Elo": 754,  "Overall": 778},
+    {"model_name": "gpt-4o",          "url": "https://openai.com/index/hello-gpt-4o/",                                      "organizer": "OpenAI",  "license": "Proprietary", "MLE-Lite_Elo": 830,  "Tabular_Elo": 861,  "NLP_Elo": 903,  "CV_Elo": 761,  "Overall": 841},
+    {"model_name": "o3-mini",         "url": "https://openai.com/index/openai-o3-mini/",                                    "organizer": "OpenAI",  "license": "Proprietary", "MLE-Lite_Elo": 1108, "Tabular_Elo": 1019, "NLP_Elo": 1056, "CV_Elo": 1207, "Overall": 1096},
+    {"model_name": "deepseek-v3",     "url": "https://api-docs.deepseek.com/news/news1226",                                   "organizer": "DeepSeek", "license": "DeepSeek",    "MLE-Lite_Elo": 1004, "Tabular_Elo": 1015, "NLP_Elo": 1028, "CV_Elo": 1067, "Overall": 1023},
+    {"model_name": "deepseek-r1",     "url": "https://api-docs.deepseek.com/news/news250120",                                 "organizer": "DeepSeek", "license": "DeepSeek",    "MLE-Lite_Elo": 1137, "Tabular_Elo": 1053, "NLP_Elo": 1103, "CV_Elo": 1083, "Overall": 1100},
+    {"model_name": "gemini-2.0-flash", "url": "https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash",               "organizer": "Google",  "license": "Proprietary", "MLE-Lite_Elo": 847,  "Tabular_Elo": 923,  "NLP_Elo": 860,  "CV_Elo": 978,  "Overall": 895},
+    {"model_name": "gemini-2.0-pro",   "url": "https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/", "organizer": "Google",  "license": "Proprietary", "MLE-Lite_Elo": 1064, "Tabular_Elo": 1139, "NLP_Elo": 1028, "CV_Elo": 973,  "Overall": 1054},
+    {"model_name": "gemini-2.5-pro",   "url": "https://deepmind.google/technologies/gemini/pro/",                           "organizer": "Google",  "license": "Proprietary", "MLE-Lite_Elo": 1257, "Tabular_Elo": 1150, "NLP_Elo": 1266, "CV_Elo": 1177, "Overall": 1214},
 ]
 master_df = pd.DataFrame(data)
+# ---------------------------------------------------------------------------
+#  Category helpers
+# ---------------------------------------------------------------------------
+CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
+DEFAULT_CATEGORY = "Overall"
+CATEGORY_MAP = {
+    "Overall": "Overall",
+    "MLE-Lite": "MLE-Lite_Elo",
+    "Tabular": "Tabular_Elo",
+    "NLP": "NLP_Elo",
+    "CV": "CV_Elo",
 }
+# ---------------------------------------------------------------------------
+#  Leaderboard Update Routine
+# ---------------------------------------------------------------------------
+def update_leaderboard(category: str, ascending: bool):
+    """Return a fresh, nicely formatted DataFrame based on user selections."""
+    score_col = CATEGORY_MAP.get(category, CATEGORY_MAP[DEFAULT_CATEGORY])
+    df = (
+        master_df[["model_name", "url", "organizer", "license", score_col]].copy()
+        .sort_values(by=score_col, ascending=ascending)
+        .reset_index(drop=True)
+    )
+    # Add Rank & hyperlink the model name
+    df.insert(0, "Rank", df.index + 1)
+    df["Model"] = (
+        df.apply(lambda r: f"<a href='{r.url}' target='_blank'>{r.model_name}</a>", axis=1)
+    )
+    df.rename(columns={
+        "organizer": "Organizer",
+        "license": "License",
+        score_col: "Elo Score",
+    }, inplace=True)
+    return df[["Rank", "Model", "Organizer", "License", "Elo Score"]]
+# ---------------------------------------------------------------------------
+#  Custom CSS — premium typography & subtle surfaces
+# ---------------------------------------------------------------------------
+custom_css += """
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
+html, body {
+    font-family: 'Inter', 'Helvetica Neue', Arial, sans-serif !important;
+    font-size: 17px !important;   /* slightly larger default */
+    color: #1f2937;
+    background-color: #f9fafb;
+    line-height: 1.55;
+}
+/* Gradient text utility */
+.gradient-text {
+    background: linear-gradient(90deg, #0284c7 0%, #6366f1 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+}
+/* Markdown tweaks */
+.markdown-text h2 {
+    font-weight: 600;
+    margin-top: 1.2em;
+}
+/* Radio buttons & checkboxes */
+.gr-radio, .gr-checkbox {
+    padding: 0.35em 0.75em;
+    border-radius: 0.5rem;
+    background-color: #ffffff;
+    box-shadow: 0 1px 2px rgba(0,0,0,0.06);
+}
+/* Data table */
+#leaderboard-table table {
+    width: 100%;
+    border-collapse: collapse;
+}
+#leaderboard-table th {
+    background-color: #e2e8f0;
+    font-weight: 600;
+    text-transform: uppercase;
+    font-size: 0.85rem;
+    letter-spacing: 0.03em;
+    padding: 0.6em;
+}
+#leaderboard-table td {
+    padding: 0.55em 0.6em;
+    vertical-align: top;
+}
+#leaderboard-table tr:nth-child(even) { background-color: #f8fafc; }
+#leaderboard-table tr:hover          { background-color: #eef2ff; }
+/* Links */
+a { color: #2563eb; text-decoration: none; }
+a:hover { text-decoration: underline; }
+/* Accordion style tweak */
+.gr-accordion .label {
+    font-weight: 600;
+    font-size: 1rem;
 }
 """
+# ---------------------------------------------------------------------------
+#  Gradio App  ✨
+# ---------------------------------------------------------------------------
+demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft(
+    primary_hue="indigo",
+    neutral_hue="slate",
+    font=["Inter", "Helvetica Neue", "Arial", "sans-serif"],
+))
 with demo:
+    gr.HTML(TITLE)
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    with gr.Tabs():
+        # ---------- Leaderboard Tab ----------
+        with gr.TabItem("🏅 Leaderboard"):
+            gr.Markdown("### Model Elo Rankings by Category")
+            with gr.Row():
+                category_selector = gr.Radio(
+                    choices=CATEGORIES,
+                    value=DEFAULT_CATEGORY,
+                    label="Category",
+                    interactive=True,
+                )
+                order_checkbox = gr.Checkbox(
+                    label="⬆️ Ascending order (lower Elo first)",
+                    value=False,
+                )
+            leaderboard_table = gr.Dataframe(
+                value=update_leaderboard(DEFAULT_CATEGORY, False),
+                headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
+                datatype=["number", "html", "str", "str", "number"],
+                row_count=(len(master_df), "fixed"),
+                col_count=(5, "fixed"),
+                interactive=False,
+                elem_id="leaderboard-table",
+            )
+            # wire‑up events
+            category_selector.change(update_leaderboard, [category_selector, order_checkbox], leaderboard_table)
+            order_checkbox.change(update_leaderboard, [category_selector, order_checkbox], leaderboard_table)
+        # ---------- About Tab ----------
+        with gr.TabItem("ℹ️ About"):
+            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        # ---------- (Optional) Submit Tab ----------
+        # You can re‑enable this section when your `add_new_eval()` & REPO_ID are ready.
+        # with gr.TabItem("🚀 Submit"):
+        #     pass
+    # ---------- Citation Accordion ----------
+    with gr.Accordion("📖 Citation", open=False):
+        gr.Textbox(
+            value=CITATION_BUTTON_TEXT,
+            label=CITATION_BUTTON_LABEL,
+            lines=10,
+            show_copy_button=True,
+        )
+# ---------------------------------------------------------------------------
+#  Scheduler (optional) — restart the HF Space every 30 min to free memory
+# ---------------------------------------------------------------------------
+def restart_space():
+    print(f"🔄 Restarting Space → {REPO_ID}")
+    # Example:  `HfApi().restart_space(repo_id=REPO_ID)`
 if __name__ == "__main__":
+    if REPO_ID != "your/space-id":
+        scheduler = BackgroundScheduler()
+        scheduler.add_job(restart_space, "interval", seconds=1800)
+        scheduler.start()
+        print("🗓️ Background scheduler active (30 min restart).")
+    print("🚀 Launching Gradio app…")
+    demo.launch()