Spaces:

MLE-Dojo
/

Leaderboard

Running

App Files Files Community

Jerrycool commited on Apr 26

Commit

1117820

verified ·

1 Parent(s): 5134a55

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -108

app.py CHANGED Viewed

@@ -1,8 +1,14 @@
 import gradio as gr
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
-# --- Placeholder Imports / Definitions ---
 try:
     from src.about import (
         CITATION_BUTTON_LABEL,
@@ -10,14 +16,12 @@ try:
         EVALUATION_QUEUE_TEXT,
         INTRODUCTION_TEXT,
         LLM_BENCHMARKS_TEXT,
-        TITLE,  # We will override TITLE below for styling
     )
     from src.display.css_html_js import custom_css
     from src.envs import REPO_ID
     from src.submission.submit import add_new_eval
-    print("Successfully imported from src module.")
 except ImportError:
-    print("Warning: Using placeholder values because src module imports failed.")
     CITATION_BUTTON_LABEL = "Citation"
     CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
     EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
@@ -25,225 +29,247 @@ except ImportError:
     LLM_BENCHMARKS_TEXT = "Information about the benchmarks..."
     custom_css = ""
     REPO_ID = "your/space-id"
-    def add_new_eval(*args): return "Submission placeholder."
-# --- Elo Data ---
 data = [
-    {'model_name': 'gpt-4o-mini',   'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 753,  'Tabular_Elo': 839,  'NLP_Elo': 758,  'CV_Elo': 754,  'Overall': 778},
-    {'model_name': 'gpt-4o',        'url': 'https://openai.com/index/hello-gpt-4o/',                              'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 830,  'Tabular_Elo': 861,  'NLP_Elo': 903,  'CV_Elo': 761,  'Overall': 841},
-    {'model_name': 'o3-mini',       'url': 'https://openai.com/index/openai-o3-mini/',                              'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
-    {'model_name': 'deepseek-v3',   'url': 'https://api-docs.deepseek.com/news/news1226',                          'organizer': 'DeepSeek','license': 'DeepSeek',     'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
-    {'model_name': 'deepseek-r1',   'url': 'https://api-docs.deepseek.com/news/news250120',                         'organizer': 'DeepSeek','license': 'DeepSeek',     'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
-    {'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash',        'organizer': 'Google',   'license': 'Proprietary', 'MLE-Lite_Elo': 847,  'Tabular_Elo': 923,  'NLP_Elo': 860,  'CV_Elo': 978,  'Overall': 895},
-    {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973,  'Overall': 1054},
-    {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/',               'organizer': 'Google',   'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 ]
 master_df = pd.DataFrame(data)
 CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
 DEFAULT_CATEGORY = "Overall"
 category_to_column = {
     "Overall": "Overall",
-    "MLE-Lite": "MLE-Lite_Elo",
     "Tabular": "Tabular_Elo",
     "NLP": "NLP_Elo",
     "CV": "CV_Elo",
 }
-def update_leaderboard(category):
     col = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
-    df = master_df[['model_name','url','organizer','license',col]].copy()
     df.sort_values(by=col, ascending=False, inplace=True)
     df.reset_index(drop=True, inplace=True)
-    df.insert(0, 'Rank', df.index+1)
-    df['Model'] = df.apply(
-        lambda r: f"<a href='{r['url']}' target='_blank'>{r['model_name']}</a>",
-        axis=1
     )
-    df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
-    return df[['Rank','Model','Organizer','License','Elo Score']]
-# --- Dark Mode Styling ---
-font_size_css = """
-body { font-size: 1em !important; }
-"""
-custom_css += font_size_css
 dark_css = """
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
-@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');
 body {
     font-family: 'Inter', sans-serif;
     background-color: #121212;
     color: #e0e0e0;
 }
-/* Hero Section */
 .hero-section {
     background: linear-gradient(135deg, #333, #222);
     color: #e0e0e0;
-    padding: 2rem 1rem;
     border-radius: .75rem;
     margin-bottom: 1.5rem;
     text-align: center;
-    box-shadow: 0 4px 10px rgba(0,0,0,0.5);
 }
 .hero-section h1 {
     margin: 0;
-    font-size: 2.5rem !important;
-    font-weight: 700 !important;
 }
 .hero-section h2 {
-    margin: .5rem 0 0 !important;
-    font-size: 1.25rem !important;
-    font-weight: 400 !important;
-    opacity: 0.8;
 }
-/* Tab Buttons */
 .tab-buttons button {
     border-radius: 20px !important;
-    padding: 0.5rem 1rem !important;
-    margin-right: 0.5rem !important;
     background: #1e1e1e !important;
     color: #e0e0e0 !important;
     border: none !important;
-    transition: background 0.3s !important;
     font-weight: 500 !important;
 }
-.tab-buttons button:hover {
-    background: #2c2c2c !important;
-}
 .tab-buttons button[aria-selected="true"] {
     background: #444 !important;
     color: #fff !important;
 }
-/* Category Selector Pills */
-#category-selector input[type="radio"] { display: none; }
 #category-selector label {
     display: inline-block;
-    padding: 0.5rem 1rem;
-    margin-right: 0.5rem;
     border-radius: 999px;
-    background: #1e1e1e;
     cursor: pointer;
-    transition: background 0.3s, color 0.3s;
-    font-weight: 500;
     color: #e0e0e0;
 }
 #category-selector input[type="radio"]:checked + label {
-    background: #444;
     color: #fff;
 }
-/* Table Styling */
 .dataframe-container table {
     width: 100%;
     border: none;
-    box-shadow: 0 2px 4px rgba(0,0,0,0.5);
-    border-radius: 0.5rem;
-    overflow: hidden;
 }
-.dataframe-container table th {
-    background: #2c2c2c;
     color: #e0e0e0;
 }
-.dataframe-container table tr:nth-child(odd) {
-    background-color: #1e1e1e !important;
-}
-.dataframe-container table tr:nth-child(even) {
-    background-color: #252525 !important;
 }
-.dataframe-container table td, .dataframe-container table th {
-    padding: 0.75rem 1rem;
-    color: #e0e0e0;
-}
-.dataframe-container table td a {
     color: #8ab4f8;
     text-decoration: none;
 }
-.dataframe-container table td a:hover {
     color: #a3c9ff;
     text-decoration: underline;
 }
-/* Enable scrollbar */
-#leaderboard-table .dataframe-container {
-    max-height: 400px !important;
-    overflow-y: auto !important;
-}
 """
 custom_css += dark_css
-# --- Override Title ---
 TITLE = """
-<div class=\"hero-section\">
-  <h1><i class=\"fas fa-trophy\"></i> MLE-Dojo Benchmark Leaderboard</h1>
   <h2>Improving LLM Agents for Machine Learning Engineering</h2>
 </div>
 """
-# --- Build App ---
-# Use Dark theme for Gradio
 demo = gr.Blocks(css=custom_css, theme=gr.themes.Base())
 with demo:
-    # Inject FontAwesome JS/CSS explicitly
-    gr.HTML("""
-      <link rel=\"stylesheet\"
-            href=\"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css\"
-            crossorigin=\"anonymous\" referrerpolicy=\"no-referrer\"/>
-      <script src=\"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/js/all.min.js\"
-              crossorigin=\"anonymous\" referrerpolicy=\"no-referrer\"></script>
-    """
     )
-    # Header & Intro
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    # Tabs
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("<i class='fas fa-list'></i> Leaderboard"):
-            gr.Markdown("## Model Elo Rankings by Category")
             category_selector = gr.Radio(
                 choices=CATEGORIES,
-                label="Select Category:",
                 value=DEFAULT_CATEGORY,
                 interactive=True,
-                elem_id="category-selector"
             )
             leaderboard_df = gr.Dataframe(
                 value=update_leaderboard(DEFAULT_CATEGORY),
-                headers=["Rank","Model","Organizer","License","Elo Score"],
-                datatype=["number","html","str","str","number"],
                 interactive=False,
-                row_count=(len(master_df),"fixed"),
-                col_count=(5,"fixed"),
                 wrap=True,
-                elem_id="leaderboard-table"
             )
             category_selector.change(
                 fn=update_leaderboard,
                 inputs=category_selector,
-                outputs=leaderboard_df
             )
-        with gr.TabItem("<i class='fas fa-info-circle'></i> About"):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-    # Citation Accordion
     with gr.Accordion("📙 Citation", open=False):
         gr.Textbox(
             value=CITATION_BUTTON_TEXT,
             label=CITATION_BUTTON_LABEL,
             lines=10,
             elem_id="citation-button",
-            show_copy_button=True
         )
 if __name__ == "__main__":
-    print("Launching Gradio App in Dark Mode...")
-    demo.launch()

+"""
+app.py — MLE-Dojo Dark-Theme Leaderboard
+---------------------------------------
+Run:  python app.py
+"""
 import gradio as gr
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
+# ---------- Placeholder / Fallback Imports ----------
 try:
     from src.about import (
         CITATION_BUTTON_LABEL,
         EVALUATION_QUEUE_TEXT,
         INTRODUCTION_TEXT,
         LLM_BENCHMARKS_TEXT,
+        TITLE,  # 将被覆盖
     )
     from src.display.css_html_js import custom_css
     from src.envs import REPO_ID
     from src.submission.submit import add_new_eval
 except ImportError:
     CITATION_BUTTON_LABEL = "Citation"
     CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
     EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
     LLM_BENCHMARKS_TEXT = "Information about the benchmarks..."
     custom_css = ""
     REPO_ID = "your/space-id"
+    def add_new_eval(*args):
+        return "Submission placeholder."
+# ---------- Elo Data ----------
 data = [
+    dict(model_name="gpt-4o-mini", url="https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/",
+         organizer="OpenAI", license="Proprietary", MLE_Lite_Elo=753, Tabular_Elo=839,
+         NLP_Elo=758, CV_Elo=754, Overall=778),
+    dict(model_name="gpt-4o", url="https://openai.com/index/hello-gpt-4o/",
+         organizer="OpenAI", license="Proprietary", MLE_Lite_Elo=830, Tabular_Elo=861,
+         NLP_Elo=903, CV_Elo=761, Overall=841),
+    dict(model_name="o3-mini", url="https://openai.com/index/openai-o3-mini/",
+         organizer="OpenAI", license="Proprietary", MLE_Lite_Elo=1108, Tabular_Elo=1019,
+         NLP_Elo=1056, CV_Elo=1207, Overall=1096),
+    dict(model_name="deepseek-v3", url="https://api-docs.deepseek.com/news/news1226",
+         organizer="DeepSeek", license="DeepSeek", MLE_Lite_Elo=1004, Tabular_Elo=1015,
+         NLP_Elo=1028, CV_Elo=1067, Overall=1023),
+    dict(model_name="deepseek-r1", url="https://api-docs.deepseek.com/news/news250120",
+         organizer="DeepSeek", license="DeepSeek", MLE_Lite_Elo=1137, Tabular_Elo=1053,
+         NLP_Elo=1103, CV_Elo=1083, Overall=1100),
+    dict(model_name="gemini-2.0-flash", url="https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash",
+         organizer="Google", license="Proprietary", MLE_Lite_Elo=847, Tabular_Elo=923,
+         NLP_Elo=860, CV_Elo=978, Overall=895),
+    dict(model_name="gemini-2.0-pro", url="https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/",
+         organizer="Google", license="Proprietary", MLE_Lite_Elo=1064, Tabular_Elo=1139,
+         NLP_Elo=1028, CV_Elo=973, Overall=1054),
+    dict(model_name="gemini-2.5-pro", url="https://deepmind.google/technologies/gemini/pro/",
+         organizer="Google", license="Proprietary", MLE_Lite_Elo=1257, Tabular_Elo=1150,
+         NLP_Elo=1266, CV_Elo=1177, Overall=1214),
 ]
 master_df = pd.DataFrame(data)
+# ---------- Category Logic ----------
 CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
 DEFAULT_CATEGORY = "Overall"
 category_to_column = {
     "Overall": "Overall",
+    "MLE-Lite": "MLE_Lite_Elo",
     "Tabular": "Tabular_Elo",
     "NLP": "NLP_Elo",
     "CV": "CV_Elo",
 }
+def update_leaderboard(category: str) -> pd.DataFrame:
     col = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
+    df = master_df[["model_name", "url", "organizer", "license", col]].copy()
     df.sort_values(by=col, ascending=False, inplace=True)
     df.reset_index(drop=True, inplace=True)
+    df.insert(0, "Rank", df.index + 1)
+    df["Model"] = df.apply(
+        lambda r: f"<a href='{r['url']}' target='_blank'>{r['model_name']}</a>", axis=1
+    )
+    df.rename(
+        columns={col: "Elo Score", "organizer": "Organizer", "license": "License"},
+        inplace=True,
     )
+    return df[["Rank", "Model", "Organizer", "License", "Elo Score"]]
+# ---------- Dark-Theme CSS ----------
 dark_css = """
+/* ---- Google Font & Font Awesome ---- */
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
 body {
     font-family: 'Inter', sans-serif;
     background-color: #121212;
     color: #e0e0e0;
+    font-size: 15px;
 }
+/* ---- Hero Section ---- */
 .hero-section {
     background: linear-gradient(135deg, #333, #222);
     color: #e0e0e0;
+    padding: 1.75rem 1rem;
     border-radius: .75rem;
     margin-bottom: 1.5rem;
     text-align: center;
+    box-shadow: 0 4px 12px rgba(0,0,0,0.55);
 }
 .hero-section h1 {
     margin: 0;
+    font-size: 2.2rem;
+    font-weight: 700;
+    display: inline-flex;
+    align-items: center;
+    gap: .5rem;
+}
+.hero-section h1 i {          /* 奖杯与文字同行 & 对齐 */
+    margin: 0;
+    font-size: 1em;
 }
 .hero-section h2 {
+    margin: .6rem 0 0;
+    font-size: 1.15rem;
+    font-weight: 400;
+    opacity: .8;
 }
+/* ---- Tabs ---- */
 .tab-buttons button {
     border-radius: 20px !important;
+    padding: .55rem 1.15rem !important;
+    margin-right: .6rem !important;
     background: #1e1e1e !important;
     color: #e0e0e0 !important;
     border: none !important;
+    font-size: .95rem !important;
     font-weight: 500 !important;
+    transition: background .28s;
 }
+.tab-buttons button:hover     { background: #2c2c2c !important; }
 .tab-buttons button[aria-selected="true"] {
     background: #444 !important;
     color: #fff !important;
 }
+/* ---- Category Selector ---- */
 #category-selector label {
     display: inline-block;
+    padding: .55rem 1.2rem;
+    margin-right: .5rem;
     border-radius: 999px;
+    background: #1d1d1d;
     cursor: pointer;
+    transition: background .28s, color .28s;
+    font-weight: 600;
+    font-size: .95rem;
     color: #e0e0e0;
 }
 #category-selector input[type="radio"]:checked + label {
+    background: #3d3d3d;
     color: #fff;
 }
+/* ---- Dataframe / Leaderboard ---- */
+.dataframe-container {
+    max-height: 420px;
+    overflow-y: auto;
+}
 .dataframe-container table {
     width: 100%;
+    border-collapse: collapse;
     border: none;
+    box-shadow: 0 2px 6px rgba(0,0,0,.55);
+    border-radius: .55rem;
 }
+.dataframe-container thead th {
+    background: #272727;
     color: #e0e0e0;
+    font-weight: 600;
+    padding: .85rem 1rem;
+    font-size: .9rem;
 }
+.dataframe-container tbody tr:nth-child(odd)  { background: #1c1c1c; }
+.dataframe-container tbody tr:nth-child(even) { background: #222;   }
+.dataframe-container td, .dataframe-container th {
+    padding: .8rem 1rem;
+    font-size: .88rem;
 }
+.dataframe-container td a {
     color: #8ab4f8;
     text-decoration: none;
 }
+.dataframe-container td a:hover {
     color: #a3c9ff;
     text-decoration: underline;
 }
 """
 custom_css += dark_css
+# ---------- Override Title ----------
 TITLE = """
+<div class="hero-section">
+  <h1><i class="fas fa-trophy"></i>MLE-Dojo Benchmark Leaderboard</h1>
   <h2>Improving LLM Agents for Machine Learning Engineering</h2>
 </div>
 """
+# ---------- Build Gradio App ----------
 demo = gr.Blocks(css=custom_css, theme=gr.themes.Base())
 with demo:
+    # 注入 Font Awesome（保证奖杯可用）
+    gr.HTML(
+        """
+<link rel="stylesheet"
+      href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"
+      crossorigin="anonymous" referrerpolicy="no-referrer"/>
+"""
     )
+    # -------- Header & Intro --------
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    # -------- Tabs --------
+    with gr.Tabs(elem_classes="tab-buttons"):
+        # --- Leaderboard Tab ---
+        with gr.TabItem("📊 Leaderboard"):
+            gr.Markdown("### Model Elo Rankings by Category")
             category_selector = gr.Radio(
                 choices=CATEGORIES,
                 value=DEFAULT_CATEGORY,
                 interactive=True,
+                elem_id="category-selector",
+                label="Select Category:",
             )
             leaderboard_df = gr.Dataframe(
                 value=update_leaderboard(DEFAULT_CATEGORY),
+                headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
+                datatype=["number", "html", "str", "str", "number"],
                 interactive=False,
+                row_count=(len(master_df), "fixed"),
+                col_count=(5, "fixed"),
                 wrap=True,
+                elem_id="leaderboard-table",
             )
             category_selector.change(
                 fn=update_leaderboard,
                 inputs=category_selector,
+                outputs=leaderboard_df,
             )
+        # --- About Tab ---
+        with gr.TabItem("ℹ️ About"):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+    # -------- Citation --------
     with gr.Accordion("📙 Citation", open=False):
         gr.Textbox(
             value=CITATION_BUTTON_TEXT,
             label=CITATION_BUTTON_LABEL,
             lines=10,
             elem_id="citation-button",
+            show_copy_button=True,
         )
 if __name__ == "__main__":
+    print("Launching Gradio App in Dark Mode…")
+    demo.launch()