Spaces:

MLE-Dojo
/

Leaderboard

Running

App Files Files Community

Leaderboard / app.py

Jerrycool

Update app.py

6e3d36f verified about 2 months ago

raw

history blame

32.9 kB

	import gradio as gr
	import pandas as pd
	from apscheduler.schedulers.background import BackgroundScheduler
	from datetime import datetime

	# --- Make sure these imports work relative to your file structure ---
	# Option 1: If src is a directory in the same folder as your script:
	try:
	from src.about import (
	CITATION_BUTTON_LABEL,
	CITATION_BUTTON_TEXT,
	EVALUATION_QUEUE_TEXT,
	INTRODUCTION_TEXT,
	LLM_BENCHMARKS_TEXT,
	TITLE,
	)
	from src.display.css_html_js import custom_css
	from src.envs import REPO_ID
	from src.submission.submit import add_new_eval
	print("Successfully imported from src module.")
	# Option 2: If you don't have these files, define placeholders
	except ImportError:
	print("Warning: Using placeholder values because src module imports failed.")
	CITATION_BUTTON_LABEL = "Citation"
	CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
	EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
	INTRODUCTION_TEXT = """
	# Welcome to the MLE-Dojo Benchmark Leaderboard

	This leaderboard tracks the performance of various AI models across multiple machine learning engineering domains.
	Our comprehensive evaluation system uses ELO ratings to provide a fair comparison between different models.

	## How to read this leaderboard
	- Select a domain category to view specialized rankings
	- Higher ELO scores indicate better performance
	- Click on any model name to learn more about it
	"""
	LLM_BENCHMARKS_TEXT = """
	# About the MLE-Dojo Benchmark

	## Evaluation Methodology
	The MLE-Dojo benchmark evaluates models across various domains including:

	- MLE-Lite: Basic machine learning engineering tasks
	- Tabular: Data manipulation, analysis, and modeling with structured data
	- NLP: Natural language processing tasks including classification, generation, and understanding
	- CV: Computer vision tasks including image classification, object detection, and generation

	Our evaluation uses a sophisticated ELO rating system that considers the relative performance of models against each other.

	## Contact
	For more information or to submit your model, please contact us at [email protected]
	"""
	TITLE = "<h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>"
	custom_css = ""
	REPO_ID = "your/space-id"
	def add_new_eval(*args): return "Submission placeholder."

	# --- Elo Leaderboard Configuration ---
	# Enhanced data with Rank (placeholder), Organizer, License, and URL
	data = [
	{'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
	{'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
	{'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
	{'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
	{'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
	{'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
	{'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
	{'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
	]

	# Add organization logos (for visual enhancement)
	org_logos = {
	'OpenAI': '📱', # You can replace these with actual icon URLs in production
	'DeepSeek': '🔍',
	'Google': '🌐',
	'Default': '🤖'
	}

	# Create a master DataFrame
	master_df = pd.DataFrame(data)

	# Add last updated timestamp
	last_updated = datetime.now().strftime("%B %d, %Y at %H:%M:%S")

	# Define categories with fancy icons
	CATEGORIES = [
	("🏆 Overall", "Overall"),
	("💡 MLE-Lite", "MLE-Lite"),
	("📊 Tabular", "Tabular"),
	("📝 NLP", "NLP"),
	("👁️ CV", "CV")
	]
	DEFAULT_CATEGORY = "Overall"

	# Map user-facing categories to DataFrame column names
	category_to_column = {
	"MLE-Lite": "MLE-Lite_Elo",
	"Tabular": "Tabular_Elo",
	"NLP": "NLP_Elo",
	"CV": "CV_Elo",
	"Overall": "Overall"
	}

	# --- Helper function to update leaderboard ---
	def update_leaderboard(category_label):
	"""
	Enhanced function to update the leaderboard with visual improvements
	"""
	# Extract the category value from the label if it's a tuple (icon, value)
	if isinstance(category_label, tuple):
	category = category_label[1]
	else:
	# For backward compatibility or direct values
	category = category_label.split(" ")[-1] if " " in category_label else category_label

	score_column = category_to_column.get(category)
	if score_column is None or score_column not in master_df.columns:
	print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
	score_column = category_to_column[DEFAULT_CATEGORY]
	if score_column not in master_df.columns:
	print(f"Error: Default column '{score_column}' also not found.")
	return pd.DataFrame({
	"Rank": [],
	"Model": [],
	"Organizer": [],
	"License": [],
	"Elo Score": []
	})

	# Select base columns + the score column for sorting
	cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
	df = master_df[cols_to_select].copy()

	# Sort by the selected 'Elo Score' descending
	df.sort_values(by=score_column, ascending=False, inplace=True)

	# Add Rank with medal emojis for top 3
	df.reset_index(drop=True, inplace=True)

	# Create fancy rank with medals for top positions
	def get_rank_display(idx):
	if idx == 0:
	return "🥇 1"
	elif idx == 1:
	return "🥈 2"
	elif idx == 2:
	return "🥉 3"
	else:
	return f"{idx + 1}"

	df.insert(0, 'Rank', df.index.map(get_rank_display))

	# Add organization icons to model names
	df['Model'] = df.apply(
	lambda row: f"""<div style="display: flex; align-items: center;">
	<span style="font-size: 1.5em; margin-right: 10px;">{org_logos.get(row['organizer'], org_logos['Default'])}</span>
	<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank'
	style='color: #0066cc; text-decoration: none; font-weight: 500; font-size: 1.05em;'>
	{row['model_name']}
	</a>
	</div>""",
	axis=1
	)

	# Format Elo scores with visual indicators
	df['Elo Display'] = df[score_column].apply(
	lambda score: f"""<div style="display: flex; align-items: center;">
	<span style="font-weight: bold; color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'}">
	{score}
	</span>
	<div style="margin-left: 10px; height: 12px; width: 60px; background-color: #eaeaea; border-radius: 6px; overflow: hidden;">
	<div style="height: 100%; width: {min(100, max(5, (score-700)/7))}%; background-color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'};"></div>
	</div>
	</div>"""
	)

	# Rename columns for display
	df.rename(columns={score_column: 'Elo Score'}, inplace=True)
	df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)

	# Select and reorder columns for final display
	final_columns = ["Rank", "Model", "Organizer", "License", "Elo Display"]
	df = df[final_columns]

	# Rename for display
	df.columns = ["Rank", "Model", "Organization", "License", f"Elo Score ({category})"]

	return df

	# --- Mock/Placeholder functions/data for other tabs ---
	print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
	finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
	running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
	pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
	EVAL_COLS = ["Model", "Status", "Requested", "Started"]
	EVAL_TYPES = ["str", "str", "str", "str"]

	# --- Keep restart function if relevant ---
	def restart_space():
	print(f"Attempting to restart space: {REPO_ID}")
	# Replace with your actual space restart mechanism if needed

	# --- Enhanced CSS for beauty and readability ---
	enhanced_css = """
	/* Base styling */
	:root {
	--primary-color: #1a5fb4;
	--secondary-color: #2ec27e;
	--accent-color: #e5a50a;
	--warning-color: #ff7800;
	--text-color: #333333;
	--background-color: #ffffff;
	--card-background: #f9f9f9;
	--border-color: #e0e0e0;
	--shadow-color: rgba(0, 0, 0, 0.1);
	}

	/* Typography */
	body, .gradio-container {
	font-family: 'Inter', 'Segoe UI', Roboto, -apple-system, BlinkMacSystemFont, system-ui, sans-serif !important;
	font-size: 16px !important;
	line-height: 1.6 !important;
	color: var(--text-color) !important;
	background-color: var(--background-color) !important;
	}

	/* Headings */
	h1 {
	font-size: 2.5rem !important;
	font-weight: 700 !important;
	margin-bottom: 1.5rem !important;
	color: var(--primary-color) !important;
	text-align: center !important;
	letter-spacing: -0.02em !important;
	line-height: 1.2 !important;
	}

	h2 {
	font-size: 1.8rem !important;
	font-weight: 600 !important;
	margin-top: 1.5rem !important;
	margin-bottom: 1rem !important;
	color: var(--primary-color) !important;
	letter-spacing: -0.01em !important;
	}

	h3 {
	font-size: 1.4rem !important;
	font-weight: 600 !important;
	margin-top: 1.2rem !important;
	margin-bottom: 0.8rem !important;
	color: var(--text-color) !important;
	}

	/* Tabs styling */
	.tabs {
	margin-top: 1rem !important;
	border-radius: 12px !important;
	overflow: hidden !important;
	box-shadow: 0 4px 12px var(--shadow-color) !important;
	}

	.tab-nav button {
	font-size: 1.1rem !important;
	font-weight: 500 !important;
	padding: 0.8rem 1.5rem !important;
	border-radius: 0 !important;
	transition: all 0.2s ease !important;
	}

	.tab-nav button.selected {
	background-color: var(--primary-color) !important;
	color: white !important;
	font-weight: 600 !important;
	}

	/* Card styling */
	.gradio-container .gr-box, .gradio-container .gr-panel {
	border-radius: 12px !important;
	border: 1px solid var(--border-color) !important;
	box-shadow: 0 4px 12px var(--shadow-color) !important;
	overflow: hidden !important;
	}

	/* Table styling */
	table {
	width: 100% !important;
	border-collapse: separate !important;
	border-spacing: 0 !important;
	margin: 1.5rem 0 !important;
	border-radius: 8px !important;
	overflow: hidden !important;
	box-shadow: 0 4px 12px var(--shadow-color) !important;
	}

	th {
	background-color: #f0f5ff !important;
	color: var(--primary-color) !important;
	font-weight: 600 !important;
	padding: 1rem !important;
	font-size: 1.1rem !important;
	text-align: left !important;
	border-bottom: 2px solid var(--primary-color) !important;
	}

	td {
	padding: 1rem !important;
	border-bottom: 1px solid var(--border-color) !important;
	font-size: 1rem !important;
	vertical-align: middle !important;
	}

	tr:nth-child(even) {
	background-color: #f8fafd !important;
	}

	tr:hover {
	background-color: #edf2fb !important;
	}

	tr:first-child td {
	border-top: none !important;
	}

	/* Button styling */
	button.primary, .gr-button.primary {
	background-color: var(--primary-color) !important;
	color: white !important;
	font-weight: 500 !important;
	padding: 0.8rem 1.5rem !important;
	border-radius: 8px !important;
	border: none !important;
	cursor: pointer !important;
	transition: all 0.2s ease !important;
	box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1) !important;
	}

	button.primary:hover, .gr-button.primary:hover {
	background-color: #0b4a9e !important;
	box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important;
	transform: translateY(-1px) !important;
	}

	/* Radio buttons */
	.gr-radio {
	display: flex !important;
	flex-wrap: wrap !important;
	gap: 10px !important;
	margin: 1rem 0 !important;
	}

	.gr-radio label {
	background-color: #f5f7fa !important;
	border: 1px solid var(--border-color) !important;
	border-radius: 8px !important;
	padding: 0.7rem 1.2rem !important;
	font-size: 1rem !important;
	font-weight: 500 !important;
	cursor: pointer !important;
	transition: all 0.2s ease !important;
	display: flex !important;
	align-items: center !important;
	gap: 8px !important;
	}

	.gr-radio label:hover {
	background-color: #eaeef3 !important;
	border-color: #c0c9d6 !important;
	}

	.gr-radio label.selected {
	background-color: #e0e9f7 !important;
	border-color: var(--primary-color) !important;
	color: var(--primary-color) !important;
	font-weight: 600 !important;
	}

	/* Input fields */
	input, textarea, select {
	font-size: 1rem !important;
	padding: 0.8rem !important;
	border-radius: 8px !important;
	border: 1px solid var(--border-color) !important;
	transition: all 0.2s ease !important;
	}

	input:focus, textarea:focus, select:focus {
	border-color: var(--primary-color) !important;
	box-shadow: 0 0 0 2px rgba(26, 95, 180, 0.2) !important;
	outline: none !important;
	}

	/* Accordion styling */
	.gr-accordion {
	border-radius: 8px !important;
	overflow: hidden !important;
	margin: 1rem 0 !important;
	border: 1px solid var(--border-color) !important;
	}

	.gr-accordion-header {
	padding: 1rem !important;
	background-color: #f5f7fa !important;
	font-weight: 600 !important;
	font-size: 1.1rem !important;
	color: var(--text-color) !important;
	}

	.gr-accordion-content {
	padding: 1rem !important;
	background-color: white !important;
	}

	/* Markdown text improvements */
	.markdown-text {
	font-size: 1.05rem !important;
	line-height: 1.7 !important;
	}

	.markdown-text p {
	margin-bottom: 1rem !important;
	}

	.markdown-text ul, .markdown-text ol {
	margin-left: 1.5rem !important;
	margin-bottom: 1rem !important;
	}

	.markdown-text li {
	margin-bottom: 0.5rem !important;
	}

	.markdown-text strong {
	font-weight: 600 !important;
	color: #333 !important;
	}

	/* Status indicators */
	.status-badge {
	display: inline-block;
	padding: 0.3rem 0.7rem;
	border-radius: 99px;
	font-size: 0.85rem;
	font-weight: 500;
	text-align: center;
	}

	.status-pending {
	background-color: #fff8e0;
	color: #b58a00;
	border: 1px solid #ffd74d;
	}

	.status-running {
	background-color: #e0f2ff;
	color: #0066cc;
	border: 1px solid #66b3ff;
	}

	.status-completed {
	background-color: #e6f7ef;
	color: #00875a;
	border: 1px solid #57d9a3;
	}

	/* Footer */
	.footer {
	margin-top: 2rem;
	padding: 1rem;
	text-align: center;
	font-size: 0.9rem;
	color: #666;
	border-top: 1px solid var(--border-color);
	}

	/* Enhanced leaderboard title */
	.leaderboard-header {
	display: flex;
	align-items: center;
	justify-content: space-between;
	margin-bottom: 1.5rem;
	padding-bottom: 1rem;
	border-bottom: 2px solid var(--border-color);
	}

	.leaderboard-title {
	font-size: 2.2rem;
	font-weight: 700;
	color: var(--primary-color);
	margin: 0;
	display: flex;
	align-items: center;
	gap: 0.5rem;
	}

	.leaderboard-subtitle {
	font-size: 1.1rem;
	color: #666;
	margin-top: 0.5rem;
	}

	.timestamp {
	font-size: 0.85rem;
	color: #666;
	font-style: italic;
	}

	/* Category selector buttons */
	.category-buttons {
	display: flex;
	flex-wrap: wrap;
	gap: 10px;
	margin-bottom: 1.5rem;
	}

	.category-button {
	padding: 0.7rem 1.2rem;
	background-color: #f0f5ff;
	border: 1px solid #d0e0ff;
	border-radius: 8px;
	font-weight: 500;
	cursor: pointer;
	transition: all 0.2s ease;
	display: flex;
	align-items: center;
	gap: 8px;
	}

	.category-button:hover {
	background-color: #e0ebff;
	border-color: #b0d0ff;
	}

	.category-button.active {
	background-color: var(--primary-color);
	color: white;
	border-color: var(--primary-color);
	}

	/* Logo and brand styling */
	.logo {
	font-size: 2.5em;
	margin-right: 0.5rem;
	}

	/* Medal styling for top ranks */
	.rank-1 {
	color: #ffd700;
	font-weight: bold;
	}

	.rank-2 {
	color: #c0c0c0;
	font-weight: bold;
	}

	.rank-3 {
	color: #cd7f32;
	font-weight: bold;
	}
	"""

	# Combine with any existing CSS
	custom_css = enhanced_css + custom_css

	# --- Gradio App Definition ---
	demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())

	with demo:
	# Enhanced header with timestamp
	gr.HTML(f"""
	<div class="leaderboard-header">
	<div>
	<div class="leaderboard-title">
	<span class="logo">🏆</span> MLE-Dojo Benchmark Leaderboard
	</div>
	<div class="leaderboard-subtitle">
	Comprehensive evaluation of AI models across multiple domains
	</div>
	</div>
	<div class="timestamp">
	Last updated: {last_updated}
	</div>
	</div>
	""")

	# Introduction with enhanced styling
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("📊 Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
	with gr.Column():
	gr.HTML("""
	<h2 style="display: flex; align-items: center; gap: 10px;">
	<span style="font-size: 1.3em;">📈</span> Model Performance Rankings
	</h2>
	<p class="leaderboard-subtitle">Select a category to view specialized performance metrics</p>
	""")

	# Enhanced category selector
	category_selector = gr.Radio(
	choices=[x[0] for x in CATEGORIES],
	label="Select Performance Domain:",
	value="🏆 Overall",
	interactive=True,
	elem_classes="fancy-radio"
	)

	# Visual separator
	gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')

	# Enhanced leaderboard table
	leaderboard_df_component = gr.Dataframe(
	value=update_leaderboard(DEFAULT_CATEGORY),
	headers=["Rank", "Model", "Organization", "License", f"Elo Score ({DEFAULT_CATEGORY})"],
	datatype=["html", "html", "str", "str", "html"],
	interactive=False,
	row_count=(len(master_df), "fixed"),
	col_count=(5, "fixed"),
	wrap=True,
	elem_id="leaderboard-table",
	)

	# Stats cards (visual enhancement)
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML(f"""
	<div style="background-color: #f0f5ff; padding: 20px; border-radius: 12px; text-align: center;">
	<div style="font-size: 2em;">🔍</div>
	<div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
	<div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
	</div>
	""")
	with gr.Column(scale=1):
	gr.HTML(f"""
	<div style="background-color: #e6f7ef; padding: 20px; border-radius: 12px; text-align: center;">
	<div style="font-size: 2em;">🌐</div>
	<div style="font-size: 2em; font-weight: bold; color: #00875a;">{master_df['organizer'].nunique()}</div>
	<div style="font-size: 1.1em; color: #666;">Organizations</div>
	</div>
	""")
	with gr.Column(scale=1):
	gr.HTML(f"""
	<div style="background-color: #fff8e0; padding: 20px; border-radius: 12px; text-align: center;">
	<div style="font-size: 2em;">🏅</div>
	<div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)}</div>
	<div style="font-size: 1.1em; color: #666;">Performance Domains</div>
	</div>
	""")

	# Link the radio button change to the update function
	category_selector.change(
	fn=update_leaderboard,
	inputs=category_selector,
	outputs=leaderboard_df_component
	)

	with gr.TabItem("📚 About", elem_id="llm-benchmark-tab-about", id=1):
	# Enhanced about section
	gr.HTML("""
	<div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
	<div style="font-size: 4em;">🧪</div>
	<div>
	<h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
	<p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
	</div>
	</div>
	""")

	# Use the LLM_BENCHMARKS_TEXT variable
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

	# Add methodology cards for visual enhancement
	with gr.Row():
	with gr.Column():
	gr.HTML("""
	<div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
	<div style="font-size: 2em; text-align: center; margin-bottom: 15px;">💡</div>
	<h3 style="text-align: center; margin-top: 0;">MLE-Lite</h3>
	<p>Evaluates a model's ability to handle basic machine learning engineering tasks including
	data preprocessing, feature engineering, model selection, and basic deployment.</p>
	</div>
	""")
	with gr.Column():
	gr.HTML("""
	<div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
	<div style="font-size: 2em; text-align: center; margin-bottom: 15px;">📊</div>
	<h3 style="text-align: center; margin-top: 0;">Tabular</h3>
	<p>Tests a model's ability to process, analyze and model structured data, including
	statistical analysis,statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
	</div>
	""")

	with gr.Row():
	with gr.Column():
	gr.HTML("""
	<div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
	<div style="font-size: 2em; text-align: center; margin-bottom: 15px;">📝</div>
	<h3 style="text-align: center; margin-top: 0;">NLP</h3>
	<p>Evaluates natural language processing capabilities including text classification,
	sentiment analysis, entity recognition, text generation, and language understanding.</p>
	</div>
	""")
	with gr.Column():
	gr.HTML("""
	<div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
	<div style="font-size: 2em; text-align: center; margin-bottom: 15px;">👁️</div>
	<h3 style="text-align: center; margin-top: 0;">CV</h3>
	<p>Tests computer vision capabilities including image classification, object detection,
	image generation, and visual understanding tasks across various domains.</p>
	</div>
	""")

	# Optional: Uncomment if you want to re-enable the Submit tab
	# with gr.TabItem("🚀 Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
	# with gr.Column():
	# gr.HTML("""
	# <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
	# <div style="font-size: 4em;">🚀</div>
	# <div>
	# <h2 style="margin: 0;">Submit Your Model for Evaluation</h2>
	# <p style="margin: 5px 0 0 0; color: #666;">Add your model to the MLE-Dojo leaderboard</p>
	# </div>
	# </div>
	# """)
	#
	# with gr.Row():
	# gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
	#
	# with gr.Column():
	# with gr.Accordion(f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
	# finished_eval_table = gr.components.Dataframe(
	# value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
	# )
	# with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
	# running_eval_table = gr.components.Dataframe(
	# value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
	# )
	# with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
	# pending_eval_table = gr.components.Dataframe(
	# value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
	# )
	#
	# gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
	#
	# gr.HTML("""
	# <h2 style="display: flex; align-items: center; gap: 10px;">
	# <span style="font-size: 1.3em;">📝</span> Model Submission Form
	# </h2>
	# """)
	#
	# with gr.Row():
	# with gr.Column():
	# model_name_textbox = gr.Textbox(
	# label="Model Name (on Hugging Face Hub)",
	# placeholder="Enter your model name...",
	# elem_classes="enhanced-input"
	# )
	# revision_name_textbox = gr.Textbox(
	# label="Revision / Commit Hash",
	# placeholder="main",
	# elem_classes="enhanced-input"
	# )
	# model_type = gr.Dropdown(
	# choices=["Type A", "Type B", "Type C"],
	# label="Model Type",
	# multiselect=False,
	# value=None,
	# interactive=True,
	# elem_classes="enhanced-dropdown"
	# )
	# with gr.Column():
	# precision = gr.Dropdown(
	# choices=["float16", "bfloat16", "float32", "int8", "auto"],
	# label="Precision",
	# multiselect=False,
	# value="auto",
	# interactive=True,
	# elem_classes="enhanced-dropdown"
	# )
	# weight_type = gr.Dropdown(
	# choices=["Original", "Adapter", "Delta"],
	# label="Weights Type",
	# multiselect=False,
	# value="Original",
	# interactive=True,
	# elem_classes="enhanced-dropdown"
	# )
	# base_model_name_textbox = gr.Textbox(
	# label="Base Model (for delta or adapter weights)",
	# placeholder="Only needed for adapter/delta weights",
	# elem_classes="enhanced-input"
	# )
	#
	# submit_button = gr.Button(
	# "Submit for Evaluation",
	# elem_classes="primary-button"
	# )
	# submission_result = gr.Markdown()
	# submit_button.click(
	# add_new_eval,
	# [model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type],
	# submission_result,
	# )

	# Enhanced citation section
	with gr.Accordion("📄 Citation", open=False, elem_classes="citation-accordion"):
	gr.HTML("""
	<div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
	<div style="font-size: 2.5em;">📄</div>
	<div>
	<h3 style="margin: 0;">How to Cite This Benchmark</h3>
	<p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research</p>
	</div>
	</div>
	""")

	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	lines=10,
	elem_id="citation-button",
	show_copy_button=True,
	)

	# Footer
	gr.HTML("""
	<div class="footer">
	<p>© 2025 MLE-Dojo Benchmark. All rights reserved.</p>
	<p style="margin-top: 5px; display: flex; justify-content: center; gap: 20px;">
	<a href="#" style="color: #1a5fb4; text-decoration: none;">Privacy Policy</a>
	<a href="#" style="color: #1a5fb4; text-decoration: none;">Terms of Service</a>
	<a href="#" style="color: #1a5fb4; text-decoration: none;">Contact Us</a>
	</p>
	</div>
	""")

	# --- Keep scheduler if relevant ---
	if __name__ == "__main__":
	try:
	scheduler = BackgroundScheduler()
	if callable(restart_space):
	if REPO_ID and REPO_ID != "your/space-id":
	scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
	scheduler.start()
	else:
	print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
	else:
	print("Warning: restart_space function not available; space restart job not scheduled.")
	except Exception as e:
	print(f"Failed to initialize or start scheduler: {e}")

	# --- Launch the app ---
	if __name__ == "__main__":
	print("Launching Enhanced Gradio App...")
	demo.launch()