Spaces:

MLE-Dojo
/

Leaderboard

Running

App Files Files Community

Jerrycool commited on Apr 26

Commit

7153753

verified ·

1 Parent(s): 00fb337

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -38

app.py CHANGED Viewed

@@ -10,12 +10,14 @@ try:
         EVALUATION_QUEUE_TEXT,
         INTRODUCTION_TEXT,
         LLM_BENCHMARKS_TEXT,
-        TITLE,  # Will override below
     )
     from src.display.css_html_js import custom_css
     from src.envs import REPO_ID
     from src.submission.submit import add_new_eval
 except ImportError:
     CITATION_BUTTON_LABEL = "Citation"
     CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
     EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
@@ -27,14 +29,14 @@ except ImportError:
 # --- Elo Data ---
 data = [
-    {'model_name': 'gpt-4o-mini',    'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 753,  'Tabular_Elo': 839,  'NLP_Elo': 758,  'CV_Elo': 754,  'Overall': 778},
-    {'model_name': 'gpt-4o',         'url': 'https://openai.com/index/hello-gpt-4o/',                              'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 830,  'Tabular_Elo': 861,  'NLP_Elo': 903,  'CV_Elo': 761,  'Overall': 841},
-    {'model_name': 'o3-mini',        'url': 'https://openai.com/index/openai-o3-mini/',                              'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
-    {'model_name': 'deepseek-v3',    'url': 'https://api-docs.deepseek.com/news/news1226',                          'organizer': 'DeepSeek','license': 'DeepSeek',     'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
-    {'model_name': 'deepseek-r1',    'url': 'https://api-docs.deepseek.com/news/news250120',                         'organizer': 'DeepSeek','license': 'DeepSeek',     'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
     {'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash',        'organizer': 'Google',   'license': 'Proprietary', 'MLE-Lite_Elo': 847,  'Tabular_Elo': 923,  'NLP_Elo': 860,  'CV_Elo': 978,  'Overall': 895},
-    {'model_name': 'gemini-2.0-pro',  'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973,  'Overall': 1054},
-    {'model_name': 'gemini-2.5-pro',  'url': 'https://deepmind.google/technologies/gemini/pro/',               'organizer': 'Google',   'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 ]
 master_df = pd.DataFrame(data)
@@ -61,14 +63,22 @@ def update_leaderboard(category):
     df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
     return df[['Rank','Model','Organizer','License','Elo Score']]
-# --- Dark Theme + Custom CSS ---
-custom_css += """
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
 body {
     font-family: 'Inter', sans-serif;
-    background-color: #1e1e2f !important;
-    color: #e0e0f0 !important;
 }
 /* Hero Section */
@@ -79,7 +89,7 @@ body {
     border-radius: .75rem;
     margin-bottom: 1.5rem;
     text-align: center;
-    box-shadow: 0 4px 10px rgba(0,0,0,0.3);
 }
 .hero-section h1 {
     margin: 0;
@@ -98,14 +108,14 @@ body {
     border-radius: 20px !important;
     padding: 0.5rem 1rem !important;
     margin-right: 0.5rem !important;
-    background: #3a3a4c !important;
-    color: #e0e0f0 !important;
     border: none !important;
     transition: background 0.3s !important;
     font-weight: 500 !important;
 }
 .tab-buttons button:hover {
-    background: #4a4a6f !important;
 }
 .tab-buttons button[aria-selected="true"] {
     background: #6c63ff !important;
@@ -119,8 +129,7 @@ body {
     padding: 0.5rem 1rem;
     margin-right: 0.5rem;
     border-radius: 999px;
-    background: #3a3a4c;
-    color: #e0e0f0;
     cursor: pointer;
     transition: background 0.3s, color 0.3s;
     font-weight: 500;
@@ -134,55 +143,52 @@ body {
 table {
     width: 100%;
     border: none;
-    border-radius: .5rem;
     overflow: hidden;
-    box-shadow: 0 2px 4px rgba(0,0,0,0.3);
     margin: 1rem 0;
 }
 th {
-    background: #6c63ff !important;
-    color: #fff !important;
 }
 td, th {
     padding: 0.75rem 1rem;
-    background: #1e1e2f;
-    color: #e0e0f0;
-}
-tr:nth-child(even) td {
-    background: #2a2a3c;
-}
-tr:hover td {
-    background: #3c3b52;
 }
 td a {
-    color: #9afeff;
     text-decoration: none;
 }
 td a:hover {
     text-decoration: underline;
 }
 """
-# --- Override Title with Hero ---
 TITLE = """
 <div class="hero-section">
-  <h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>
   <h2>Improving LLM Agents for Machine Learning Engineering</h2>
 </div>
 """
-# --- Build App with valid Dark theme ---
-# demo = gr.Blocks(css=custom_css, theme=gr.themes.Dark())
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("📋 Leaderboard"):
             gr.Markdown("## Model Elo Rankings by Category")
             category_selector = gr.Radio(
                 choices=CATEGORIES,
                 value=DEFAULT_CATEGORY,
                 interactive=True,
                 elem_id="category-selector"
@@ -203,7 +209,7 @@ with demo:
                 outputs=leaderboard_df
             )
-        with gr.TabItem("ℹ️ About"):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
     with gr.Accordion("📙 Citation", open=False):
@@ -217,4 +223,7 @@ with demo:
 if __name__ == "__main__":
     print("Launching Gradio App...")
-    demo.launch()

         EVALUATION_QUEUE_TEXT,
         INTRODUCTION_TEXT,
         LLM_BENCHMARKS_TEXT,
+        TITLE,  # We will override TITLE below for styling
     )
     from src.display.css_html_js import custom_css
     from src.envs import REPO_ID
     from src.submission.submit import add_new_eval
+    print("Successfully imported from src module.")
 except ImportError:
+    print("Warning: Using placeholder values because src module imports failed.")
     CITATION_BUTTON_LABEL = "Citation"
     CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
     EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
 # --- Elo Data ---
 data = [
+    {'model_name': 'gpt-4o-mini',   'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 753,  'Tabular_Elo': 839,  'NLP_Elo': 758,  'CV_Elo': 754,  'Overall': 778},
+    {'model_name': 'gpt-4o',        'url': 'https://openai.com/index/hello-gpt-4o/',                              'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 830,  'Tabular_Elo': 861,  'NLP_Elo': 903,  'CV_Elo': 761,  'Overall': 841},
+    {'model_name': 'o3-mini',       'url': 'https://openai.com/index/openai-o3-mini/',                              'organizer': 'OpenAI',   'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
+    {'model_name': 'deepseek-v3',   'url': 'https://api-docs.deepseek.com/news/news1226',                          'organizer': 'DeepSeek','license': 'DeepSeek',     'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
+    {'model_name': 'deepseek-r1',   'url': 'https://api-docs.deepseek.com/news/news250120',                         'organizer': 'DeepSeek','license': 'DeepSeek',     'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
     {'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash',        'organizer': 'Google',   'license': 'Proprietary', 'MLE-Lite_Elo': 847,  'Tabular_Elo': 923,  'NLP_Elo': 860,  'CV_Elo': 978,  'Overall': 895},
+    {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973,  'Overall': 1054},
+    {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/',               'organizer': 'Google',   'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 ]
 master_df = pd.DataFrame(data)
     df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
     return df[['Rank','Model','Organizer','License','Elo Score']]
+# --- Advanced Styling ---
+# Base font-size bump (if any)
+font_size_css = """
+body { font-size: 1em !important; }
+"""
+custom_css += font_size_css
+# Import fonts & icons + component styling
+advanced_css = """
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
+@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');
 body {
     font-family: 'Inter', sans-serif;
+    background-color: #f8f9fa;
+    color: #333;
 }
 /* Hero Section */
     border-radius: .75rem;
     margin-bottom: 1.5rem;
     text-align: center;
+    box-shadow: 0 4px 10px rgba(0,0,0,0.1);
 }
 .hero-section h1 {
     margin: 0;
     border-radius: 20px !important;
     padding: 0.5rem 1rem !important;
     margin-right: 0.5rem !important;
+    background: #e0e0e0 !important;
+    color: #333 !important;
     border: none !important;
     transition: background 0.3s !important;
     font-weight: 500 !important;
 }
 .tab-buttons button:hover {
+    background: #d0d0d0 !important;
 }
 .tab-buttons button[aria-selected="true"] {
     background: #6c63ff !important;
     padding: 0.5rem 1rem;
     margin-right: 0.5rem;
     border-radius: 999px;
+    background: #e0e0e0;
     cursor: pointer;
     transition: background 0.3s, color 0.3s;
     font-weight: 500;
 table {
     width: 100%;
     border: none;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    border-radius: 0.5rem;
     overflow: hidden;
     margin: 1rem 0;
 }
 th {
+    background: #6c63ff;
+    color: #fff;
 }
 td, th {
     padding: 0.75rem 1rem;
 }
+tr:nth-child(even) { background: #f7f5ff; }
+tr:hover { background: #edeaff; }
 td a {
+    color: #6c63ff;
     text-decoration: none;
 }
 td a:hover {
+    color: #534bbe;
     text-decoration: underline;
 }
 """
+custom_css += advanced_css
+# --- Override Title ---
 TITLE = """
 <div class="hero-section">
+  <h1><i class="fas fa-trophy"></i> MLE-Dojo Benchmark Leaderboard</h1>
   <h2>Improving LLM Agents for Machine Learning Engineering</h2>
 </div>
 """
+# --- Build App ---
+demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("<i class='fas fa-list'></i> Leaderboard"):
             gr.Markdown("## Model Elo Rankings by Category")
             category_selector = gr.Radio(
                 choices=CATEGORIES,
+                label="Select Category:",
                 value=DEFAULT_CATEGORY,
                 interactive=True,
                 elem_id="category-selector"
                 outputs=leaderboard_df
             )
+        with gr.TabItem("<i class='fas fa-info-circle'></i> About"):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
     with gr.Accordion("📙 Citation", open=False):
 if __name__ == "__main__":
     print("Launching Gradio App...")
+    try:
+        demo.launch()
+    except Exception as e:
+        print(f"Error launching app: {e}")