Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
from apscheduler.schedulers.background import BackgroundScheduler | |
# Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard | |
# --- Make sure these imports work relative to your file structure --- | |
# Option 1: If src is a directory in the same folder as your script: | |
try: | |
from src.about import ( | |
CITATION_BUTTON_LABEL, | |
CITATION_BUTTON_TEXT, | |
EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab | |
INTRODUCTION_TEXT, | |
LLM_BENCHMARKS_TEXT, | |
TITLE, | |
) | |
from src.display.css_html_js import custom_css # Assuming this exists but might be empty | |
from src.envs import REPO_ID # Keep if needed for restart_space or other functions | |
from src.submission.submit import add_new_eval # Keep if using the submit tab | |
print("Successfully imported from src module.") | |
# Option 2: If you don't have these files, define placeholders (REMOVE THIS if using Option 1) | |
except ImportError: | |
print("Warning: Using placeholder values because src module imports failed.") | |
CITATION_BUTTON_LABEL="Citation" | |
CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..." | |
EVALUATION_QUEUE_TEXT="Current evaluation queue:" | |
INTRODUCTION_TEXT="Welcome to the MLE-Dojo Benchmark Leaderboard." | |
LLM_BENCHMARKS_TEXT="Information about the benchmarks..." | |
TITLE="<h1>π MLE-Dojo Benchmark Leaderboard</h1>" | |
custom_css="" # Start with empty CSS if not imported | |
REPO_ID="your/space-id" # Replace with actual ID if needed | |
def add_new_eval(*args): return "Submission placeholder." | |
# --- End Placeholder Definitions --- | |
# --- Elo Leaderboard Configuration --- | |
# Enhanced data with Rank (placeholder), Organizer, License, and URL | |
# !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!! | |
# Verify organizer and license information for accuracy. | |
data = [ | |
{'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778}, | |
{'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841}, | |
{'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later | |
{'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023}, | |
{'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100}, | |
{'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895}, | |
{'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054}, | |
{'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214}, | |
] | |
# Create a master DataFrame | |
# Note: Columns 'organizer' and 'license' are created in lowercase here. | |
master_df = pd.DataFrame(data) | |
# Define categories for selection (user-facing) | |
CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"] # Overall first | |
DEFAULT_CATEGORY = "Overall" # Set a default category | |
# Map user-facing categories to DataFrame column names | |
category_to_column = { | |
"MLE-Lite": "MLE-Lite_Elo", | |
"Tabular": "Tabular_Elo", | |
"NLP": "NLP_Elo", | |
"CV": "CV_Elo", | |
"Overall": "Overall" | |
} | |
# --- Helper function to update leaderboard --- | |
def update_leaderboard(category): | |
""" | |
Selects relevant columns, sorts by the chosen category's Elo score, | |
adds Rank, formats model name as a link, and returns the DataFrame. | |
""" | |
score_column = category_to_column.get(category) | |
if score_column is None or score_column not in master_df.columns: | |
print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.") | |
score_column = category_to_column[DEFAULT_CATEGORY] | |
# Check fallback column too | |
if score_column not in master_df.columns: | |
# Return empty df with correct columns if still invalid | |
# Use lowercase keys here consistent with master_df for the empty case | |
print(f"Error: Default column '{score_column}' also not found.") | |
return pd.DataFrame({ | |
"Rank": [], | |
"Model": [], | |
"Elo Score": [], | |
"Organizer": [], # Changed 'organizer' -> 'Organizer' for consistency in empty case | |
"License": [] # Changed 'license' -> 'License' for consistency in empty case | |
}) | |
# Select base columns + the score column for sorting | |
# Ensure 'organizer' and 'license' are selected correctly (lowercase) | |
cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column] | |
df = master_df[cols_to_select].copy() | |
# Sort by the selected 'Elo Score' descending | |
df.sort_values(by=score_column, ascending=False, inplace=True) | |
# Add Rank based on the sorted order | |
df.reset_index(drop=True, inplace=True) | |
df.insert(0, 'Rank', df.index + 1) | |
# Format Model Name as HTML Hyperlink | |
# The resulting column name will be 'Model' (capitalized) | |
df['Model'] = df.apply( | |
lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none;'>{row['model_name']}</a>", | |
axis=1 | |
) | |
# Rename the score column to 'Elo Score' for consistent display | |
df.rename(columns={score_column: 'Elo Score'}, inplace=True) | |
# Rename 'organizer' and 'license' to match desired display headers | |
df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True) | |
# Select and reorder columns for final display using the ACTUAL column names in df | |
# Use capitalized 'Organizer' and 'License' here because they have been renamed. | |
final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"] | |
df = df[final_columns] | |
# Note: The DataFrame returned now has columns: | |
# 'Rank', 'Model', 'Organizer', 'License', 'Elo Score' | |
return df | |
# --- Mock/Placeholder functions/data for other tabs --- | |
# (If the Submit tab is used, ensure these variables are appropriately populated or handled) | |
print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.") | |
finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"]) | |
running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"]) | |
pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"]) | |
EVAL_COLS = ["Model", "Status", "Requested", "Started"] # Define for the dataframe headers | |
EVAL_TYPES = ["str", "str", "str", "str"] # Define for the dataframe types | |
# --- Keep restart function if relevant --- | |
def restart_space(): | |
# Make sure REPO_ID is correctly defined/imported if this function is used | |
print(f"Attempting to restart space: {REPO_ID}") | |
# Replace with your actual space restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID)) | |
# --- Gradio App Definition --- | |
# ***** FONT SIZE INCREASED HERE ***** | |
# Add CSS rules to make the base font size larger. | |
# Adjust the '1.2em' value (e.g., to '1.4em', '16px') to change the size. | |
# The !important flag helps override theme defaults. | |
# If the imported custom_css already has content, append to it. | |
font_size_css = """ | |
body { | |
font-size: 1.5em !important; /* Increase base font size */ | |
} | |
/* Optional: Target specific elements if needed */ | |
/* | |
#leaderboard-table th, #leaderboard-table td { | |
font-size: 1em !important; /* Adjust table font size relative to new body size */ | |
padding: 5px 7px !important; /* Increase padding for better spacing */ | |
} | |
h1, .markdown-text h1 { font-size: 2.2em !important; } /* Make main title larger */ | |
h2, .markdown-text h2 { font-size: 1.8em !important; } /* Make section titles larger */ | |
button { font-size: 1.1em !important; padding: 8px 16px !important; } /* Slightly larger buttons */ | |
.gr-input, .gr-dropdown, .gr-textbox textarea { font-size: 1em !important; } /* Ensure inputs scale too */ | |
*/ | |
""" | |
# Append the new CSS to any existing custom_css | |
custom_css += font_size_css | |
# Add basic table styling if not already present | |
if "table {" not in custom_css: | |
custom_css += """ | |
table { width: 100%; border-collapse: collapse; margin-top: 10px; margin-bottom: 10px; } | |
th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; white-space: normal; vertical-align: top; } /* Allow wrapping, top align */ | |
th { background-color: #f2f2f2; font-weight: bold; } | |
tr:nth-child(even) { background-color: #f9f9f9; } | |
tr:hover { background-color: #e9e9e9; } | |
td a { color: #007bff; text-decoration: none; } | |
td a:hover { text-decoration: underline; } | |
""" | |
# Use a theme for better default styling | |
demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft()) | |
with demo: | |
# Use the TITLE variable imported or defined above | |
gr.HTML(TITLE) | |
# Use the INTRODUCTION_TEXT variable imported or defined above | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("π MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0): | |
with gr.Column(): | |
gr.Markdown("## Model Elo Rankings by Category") | |
category_selector = gr.Radio( | |
choices=CATEGORIES, | |
label="Select Category:", | |
value=DEFAULT_CATEGORY, | |
interactive=True, | |
) | |
leaderboard_df_component = gr.Dataframe( | |
# Initialize with sorted data for the default category | |
value=update_leaderboard(DEFAULT_CATEGORY), | |
# Headers for DISPLAY should match the *renamed* columns from update_leaderboard | |
headers=["Rank", "Model", "Organizer", "License", "Elo Score"], | |
# Datatype maps to the final df columns: Rank, Model, Organizer, License, Elo Score | |
datatype=["number", "html", "str", "str", "number"], | |
interactive=False, | |
# --- FIX APPLIED: Removed unsupported 'height' argument --- | |
# row_count determines the number of rows to display | |
row_count=(len(master_df), "fixed"), # Display all rows | |
col_count=(5, "fixed"), | |
wrap=True, # Allow text wrapping in cells | |
elem_id="leaderboard-table" # CSS hook for custom styling | |
) | |
# Link the radio button change to the update function | |
category_selector.change( | |
fn=update_leaderboard, | |
inputs=category_selector, | |
outputs=leaderboard_df_component | |
) | |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-about", id=1): | |
# Use the LLM_BENCHMARKS_TEXT variable imported or defined above | |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
# --- Submit Tab (Commented out as in original request) --- | |
# Make sure EVALUATION_QUEUE_TEXT and add_new_eval are imported/defined if uncommented | |
# with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-submit", id=2): | |
# with gr.Column(): | |
# with gr.Row(): | |
# gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # Requires import/definition | |
# with gr.Column(): | |
# with gr.Accordion(f"β Finished Evaluations ({len(finished_eval_queue_df)})", open=False): | |
# finished_eval_table = gr.components.Dataframe( | |
# value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5, | |
# ) | |
# with gr.Accordion(f"π Running Evaluation Queue ({len(running_eval_queue_df)})", open=False): | |
# running_eval_table = gr.components.Dataframe( | |
# value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5, | |
# ) | |
# with gr.Accordion(f"β³ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False): | |
# pending_eval_table = gr.components.Dataframe( | |
# value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5, | |
# ) | |
# with gr.Row(): | |
# gr.Markdown("# βοΈβ¨ Submit your model here!", elem_classes="markdown-text") | |
# with gr.Row(): | |
# with gr.Column(): | |
# model_name_textbox = gr.Textbox(label="Model name (on Hugging Face Hub)") | |
# revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main") | |
# model_type = gr.Dropdown(choices=["Type A", "Type B", "Type C"], label="Model type", multiselect=False, value=None, interactive=True) # Example choices | |
# with gr.Column(): | |
# precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True) | |
# weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights type", multiselect=False, value="Original", interactive=True) | |
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)") | |
# submit_button = gr.Button("Submit Eval") | |
# submission_result = gr.Markdown() | |
# # Ensure add_new_eval is correctly imported/defined and handles these inputs | |
# submit_button.click( | |
# add_new_eval, # Requires import/definition | |
# [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ], | |
# submission_result, | |
# ) | |
# --- Citation Row (at the bottom, outside Tabs) --- | |
with gr.Accordion("π Citation", open=False): | |
# Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables imported or defined above | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
lines=10, # Adjust lines if needed for new font size | |
elem_id="citation-button", | |
show_copy_button=True, | |
) | |
# IGNORE_WHEN_COPYING_START | |
# content_copy download | |
# Use code with caution. | |
# IGNORE_WHEN_COPYING_END | |
# --- Keep scheduler if relevant --- | |
# Only start scheduler if the script is run directly | |
if __name__ == "__main__": | |
try: | |
scheduler = BackgroundScheduler() | |
# Add job only if restart_space is callable (i.e., not a placeholder or failed import) | |
if callable(restart_space): | |
# Check if REPO_ID seems valid before scheduling | |
if REPO_ID and REPO_ID != "your/space-id": | |
scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins | |
scheduler.start() | |
else: | |
print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.") | |
else: | |
print("Warning: restart_space function not available; space restart job not scheduled.") | |
except Exception as e: | |
print(f"Failed to initialize or start scheduler: {e}") | |
# --- Launch the app --- | |
# Ensures the app launches only when the script is run directly | |
if __name__ == "__main__": | |
# Ensure you have installed necessary libraries: pip install gradio pandas apscheduler | |
# Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions above. | |
print("Launching Gradio App...") | |
demo.launch() |