Spaces:
Running
Running
File size: 16,935 Bytes
22cc60c b41aa3c 1007e19 7b8bbf4 1007e19 7b8bbf4 1007e19 b41aa3c f364096 7a7ae1e 1007e19 f364096 7b8bbf4 b41aa3c f364096 172585c f364096 7a7ae1e f364096 b41aa3c f364096 7a7ae1e f364096 7b8bbf4 7a7ae1e 7b8bbf4 7a7ae1e 172585c 7a7ae1e 172585c 7a7ae1e f364096 7b8bbf4 172585c 7b8bbf4 7a7ae1e 172585c 7b8bbf4 b41aa3c 172585c b41aa3c 22cc60c 1007e19 b41aa3c 1007e19 b41aa3c 7b8bbf4 b41aa3c 7b8bbf4 e842409 7b8bbf4 e842409 7b8bbf4 7a7ae1e 172585c b41aa3c 22cc60c 1007e19 172585c 7b8bbf4 1007e19 22cc60c 1f8bbc4 b41aa3c 172585c b41aa3c 172585c b41aa3c f364096 7b8bbf4 7a7ae1e 7b8bbf4 7a7ae1e b41aa3c 1007e19 7b8bbf4 7a7ae1e 7b8bbf4 1007e19 b41aa3c 22cc60c 172585c 7b8bbf4 22cc60c 7a7ae1e 1007e19 7a7ae1e 172585c 1007e19 172585c 1007e19 172585c 1007e19 172585c 1007e19 172585c 1007e19 172585c 7a7ae1e 7b8bbf4 22cc60c b41aa3c 7b8bbf4 b41aa3c 172585c 7a7ae1e 172585c 1007e19 7b8bbf4 7a7ae1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
# Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
# --- Make sure these imports work relative to your file structure ---
# Option 1: If src is a directory in the same folder as your script:
try:
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css # Assuming this exists but might be empty
from src.envs import REPO_ID # Keep if needed for restart_space or other functions
from src.submission.submit import add_new_eval # Keep if using the submit tab
print("Successfully imported from src module.")
# Option 2: If you don't have these files, define placeholders (REMOVE THIS if using Option 1)
except ImportError:
print("Warning: Using placeholder values because src module imports failed.")
CITATION_BUTTON_LABEL="Citation"
CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
EVALUATION_QUEUE_TEXT="Current evaluation queue:"
INTRODUCTION_TEXT="Welcome to the MLE-Dojo Benchmark Leaderboard."
LLM_BENCHMARKS_TEXT="Information about the benchmarks..."
TITLE="<h1>π MLE-Dojo Benchmark Leaderboard</h1>"
custom_css="" # Start with empty CSS if not imported
REPO_ID="your/space-id" # Replace with actual ID if needed
def add_new_eval(*args): return "Submission placeholder."
# --- End Placeholder Definitions ---
# --- Elo Leaderboard Configuration ---
# Enhanced data with Rank (placeholder), Organizer, License, and URL
# !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!!
# Verify organizer and license information for accuracy.
data = [
{'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
{'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
{'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later
{'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
{'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
{'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
{'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
{'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
]
# Create a master DataFrame
# Note: Columns 'organizer' and 'license' are created in lowercase here.
master_df = pd.DataFrame(data)
# Define categories for selection (user-facing)
CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"] # Overall first
DEFAULT_CATEGORY = "Overall" # Set a default category
# Map user-facing categories to DataFrame column names
category_to_column = {
"MLE-Lite": "MLE-Lite_Elo",
"Tabular": "Tabular_Elo",
"NLP": "NLP_Elo",
"CV": "CV_Elo",
"Overall": "Overall"
}
# --- Helper function to update leaderboard ---
def update_leaderboard(category):
"""
Selects relevant columns, sorts by the chosen category's Elo score,
adds Rank, formats model name as a link, and returns the DataFrame.
"""
score_column = category_to_column.get(category)
if score_column is None or score_column not in master_df.columns:
print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
score_column = category_to_column[DEFAULT_CATEGORY]
# Check fallback column too
if score_column not in master_df.columns:
# Return empty df with correct columns if still invalid
# Use lowercase keys here consistent with master_df for the empty case
print(f"Error: Default column '{score_column}' also not found.")
return pd.DataFrame({
"Rank": [],
"Model": [],
"Elo Score": [],
"Organizer": [], # Changed 'organizer' -> 'Organizer' for consistency in empty case
"License": [] # Changed 'license' -> 'License' for consistency in empty case
})
# Select base columns + the score column for sorting
# Ensure 'organizer' and 'license' are selected correctly (lowercase)
cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
df = master_df[cols_to_select].copy()
# Sort by the selected 'Elo Score' descending
df.sort_values(by=score_column, ascending=False, inplace=True)
# Add Rank based on the sorted order
df.reset_index(drop=True, inplace=True)
df.insert(0, 'Rank', df.index + 1)
# Format Model Name as HTML Hyperlink
# The resulting column name will be 'Model' (capitalized)
df['Model'] = df.apply(
lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none;'>{row['model_name']}</a>",
axis=1
)
# Rename the score column to 'Elo Score' for consistent display
df.rename(columns={score_column: 'Elo Score'}, inplace=True)
# Rename 'organizer' and 'license' to match desired display headers
df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
# Select and reorder columns for final display using the ACTUAL column names in df
# Use capitalized 'Organizer' and 'License' here because they have been renamed.
final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
df = df[final_columns]
# Note: The DataFrame returned now has columns:
# 'Rank', 'Model', 'Organizer', 'License', 'Elo Score'
return df
# --- Mock/Placeholder functions/data for other tabs ---
# (If the Submit tab is used, ensure these variables are appropriately populated or handled)
print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
EVAL_COLS = ["Model", "Status", "Requested", "Started"] # Define for the dataframe headers
EVAL_TYPES = ["str", "str", "str", "str"] # Define for the dataframe types
# --- Keep restart function if relevant ---
def restart_space():
# Make sure REPO_ID is correctly defined/imported if this function is used
print(f"Attempting to restart space: {REPO_ID}")
# Replace with your actual space restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
# --- Gradio App Definition ---
# ***** FONT SIZE INCREASED HERE *****
# Add CSS rules to make the base font size larger.
# Adjust the '1.2em' value (e.g., to '1.4em', '16px') to change the size.
# The !important flag helps override theme defaults.
# If the imported custom_css already has content, append to it.
font_size_css = """
body {
font-size: 1.5em !important; /* Increase base font size */
}
/* Optional: Target specific elements if needed */
/*
#leaderboard-table th, #leaderboard-table td {
font-size: 1em !important; /* Adjust table font size relative to new body size */
padding: 5px 7px !important; /* Increase padding for better spacing */
}
h1, .markdown-text h1 { font-size: 2.2em !important; } /* Make main title larger */
h2, .markdown-text h2 { font-size: 1.8em !important; } /* Make section titles larger */
button { font-size: 1.1em !important; padding: 8px 16px !important; } /* Slightly larger buttons */
.gr-input, .gr-dropdown, .gr-textbox textarea { font-size: 1em !important; } /* Ensure inputs scale too */
*/
"""
# Append the new CSS to any existing custom_css
custom_css += font_size_css
# Add basic table styling if not already present
if "table {" not in custom_css:
custom_css += """
table { width: 100%; border-collapse: collapse; margin-top: 10px; margin-bottom: 10px; }
th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; white-space: normal; vertical-align: top; } /* Allow wrapping, top align */
th { background-color: #f2f2f2; font-weight: bold; }
tr:nth-child(even) { background-color: #f9f9f9; }
tr:hover { background-color: #e9e9e9; }
td a { color: #007bff; text-decoration: none; }
td a:hover { text-decoration: underline; }
"""
# Use a theme for better default styling
demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
with demo:
# Use the TITLE variable imported or defined above
gr.HTML(TITLE)
# Use the INTRODUCTION_TEXT variable imported or defined above
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("π
MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0):
with gr.Column():
gr.Markdown("## Model Elo Rankings by Category")
category_selector = gr.Radio(
choices=CATEGORIES,
label="Select Category:",
value=DEFAULT_CATEGORY,
interactive=True,
)
leaderboard_df_component = gr.Dataframe(
# Initialize with sorted data for the default category
value=update_leaderboard(DEFAULT_CATEGORY),
# Headers for DISPLAY should match the *renamed* columns from update_leaderboard
headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
# Datatype maps to the final df columns: Rank, Model, Organizer, License, Elo Score
datatype=["number", "html", "str", "str", "number"],
interactive=False,
# --- FIX APPLIED: Removed unsupported 'height' argument ---
# row_count determines the number of rows to display
row_count=(len(master_df), "fixed"), # Display all rows
col_count=(5, "fixed"),
wrap=True, # Allow text wrapping in cells
elem_id="leaderboard-table" # CSS hook for custom styling
)
# Link the radio button change to the update function
category_selector.change(
fn=update_leaderboard,
inputs=category_selector,
outputs=leaderboard_df_component
)
with gr.TabItem("π About", elem_id="llm-benchmark-tab-about", id=1):
# Use the LLM_BENCHMARKS_TEXT variable imported or defined above
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
# --- Submit Tab (Commented out as in original request) ---
# Make sure EVALUATION_QUEUE_TEXT and add_new_eval are imported/defined if uncommented
# with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
# with gr.Column():
# with gr.Row():
# gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # Requires import/definition
# with gr.Column():
# with gr.Accordion(f"β
Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
# finished_eval_table = gr.components.Dataframe(
# value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
# )
# with gr.Accordion(f"π Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
# running_eval_table = gr.components.Dataframe(
# value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
# )
# with gr.Accordion(f"β³ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
# pending_eval_table = gr.components.Dataframe(
# value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
# )
# with gr.Row():
# gr.Markdown("# βοΈβ¨ Submit your model here!", elem_classes="markdown-text")
# with gr.Row():
# with gr.Column():
# model_name_textbox = gr.Textbox(label="Model name (on Hugging Face Hub)")
# revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main")
# model_type = gr.Dropdown(choices=["Type A", "Type B", "Type C"], label="Model type", multiselect=False, value=None, interactive=True) # Example choices
# with gr.Column():
# precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True)
# weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights type", multiselect=False, value="Original", interactive=True)
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
# submit_button = gr.Button("Submit Eval")
# submission_result = gr.Markdown()
# # Ensure add_new_eval is correctly imported/defined and handles these inputs
# submit_button.click(
# add_new_eval, # Requires import/definition
# [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ],
# submission_result,
# )
# --- Citation Row (at the bottom, outside Tabs) ---
with gr.Accordion("π Citation", open=False):
# Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables imported or defined above
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=10, # Adjust lines if needed for new font size
elem_id="citation-button",
show_copy_button=True,
)
# IGNORE_WHEN_COPYING_START
# content_copy download
# Use code with caution.
# IGNORE_WHEN_COPYING_END
# --- Keep scheduler if relevant ---
# Only start scheduler if the script is run directly
if __name__ == "__main__":
try:
scheduler = BackgroundScheduler()
# Add job only if restart_space is callable (i.e., not a placeholder or failed import)
if callable(restart_space):
# Check if REPO_ID seems valid before scheduling
if REPO_ID and REPO_ID != "your/space-id":
scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
scheduler.start()
else:
print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
else:
print("Warning: restart_space function not available; space restart job not scheduled.")
except Exception as e:
print(f"Failed to initialize or start scheduler: {e}")
# --- Launch the app ---
# Ensures the app launches only when the script is run directly
if __name__ == "__main__":
# Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
# Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions above.
print("Launching Gradio App...")
demo.launch() |