Spaces:

MLE-Dojo
/

Leaderboard

Running

App Files Files Community

Leaderboard / app.py

Jerrycool

Update app.py

ffb569a verified 4 months ago

raw

history blame

8.1 kB

	import gradio as gr
	import pandas as pd
	from apscheduler.schedulers.background import BackgroundScheduler

	# --- Placeholder Imports / Definitions ---
	try:
	from src.about import (
	CITATION_BUTTON_LABEL,
	CITATION_BUTTON_TEXT,
	EVALUATION_QUEUE_TEXT,
	INTRODUCTION_TEXT,
	LLM_BENCHMARKS_TEXT,
	TITLE, # Will override below
	)
	from src.display.css_html_js import custom_css
	from src.envs import REPO_ID
	from src.submission.submit import add_new_eval
	except ImportError:
	CITATION_BUTTON_LABEL = "Citation"
	CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
	EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
	INTRODUCTION_TEXT = "Welcome to the MLE-Dojo Benchmark Leaderboard."
	LLM_BENCHMARKS_TEXT = "Information about the benchmarks..."
	custom_css = ""
	REPO_ID = "your/space-id"
	def add_new_eval(*args): return "Submission placeholder."

	# --- Elo Data ---
	data = [
	{'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
	{'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
	{'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
	{'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
	{'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
	{'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
	{'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
	{'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
	]
	master_df = pd.DataFrame(data)

	CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
	DEFAULT_CATEGORY = "Overall"
	category_to_column = {
	"Overall": "Overall",
	"MLE-Lite": "MLE-Lite_Elo",
	"Tabular": "Tabular_Elo",
	"NLP": "NLP_Elo",
	"CV": "CV_Elo",
	}

	def update_leaderboard(category):
	col = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
	df = master_df[['model_name','url','organizer','license',col]].copy()
	df.sort_values(by=col, ascending=False, inplace=True)
	df.reset_index(drop=True, inplace=True)
	df.insert(0, 'Rank', df.index+1)
	df['Model'] = df.apply(
	lambda r: f"<a href='{r['url']}' target='_blank'>{r['model_name']}</a>",
	axis=1
	)
	df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
	return df[['Rank','Model','Organizer','License','Elo Score']]

	# --- Dark Theme + Custom CSS ---
	custom_css += """
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');

	body {
	font-family: 'Inter', sans-serif;
	background-color: #1e1e2f !important;
	color: #e0e0f0 !important;
	}

	/* Hero Section */
	.hero-section {
	background: linear-gradient(135deg, #6c63ff, #8f94fb);
	color: #fff;
	padding: 2rem 1rem;
	border-radius: .75rem;
	margin-bottom: 1.5rem;
	text-align: center;
	box-shadow: 0 4px 10px rgba(0,0,0,0.3);
	}
	.hero-section h1 {
	margin: 0;
	font-size: 2.5rem !important;
	font-weight: 700 !important;
	}
	.hero-section h2 {
	margin: .5rem 0 0 !important;
	font-size: 1.25rem !important;
	font-weight: 400 !important;
	opacity: 0.9;
	}

	/* Tab Buttons */
	.tab-buttons button {
	border-radius: 20px !important;
	padding: 0.5rem 1rem !important;
	margin-right: 0.5rem !important;
	background: #3a3a4c !important;
	color: #e0e0f0 !important;
	border: none !important;
	transition: background 0.3s !important;
	font-weight: 500 !important;
	}
	.tab-buttons button:hover {
	background: #4a4a6f !important;
	}
	.tab-buttons button[aria-selected="true"] {
	background: #6c63ff !important;
	color: #fff !important;
	}

	/* Category Selector Pills */
	#category-selector input[type="radio"] { display: none; }
	#category-selector label {
	display: inline-block;
	padding: 0.5rem 1rem;
	margin-right: 0.5rem;
	border-radius: 999px;
	background: #3a3a4c;
	color: #e0e0f0;
	cursor: pointer;
	transition: background 0.3s, color 0.3s;
	font-weight: 500;
	}
	#category-selector input[type="radio"]:checked + label {
	background: #6c63ff;
	color: #fff;
	}

	/* Table Styling */
	table {
	width: 100%;
	border: none;
	border-radius: .5rem;
	overflow: hidden;
	box-shadow: 0 2px 4px rgba(0,0,0,0.3);
	margin: 1rem 0;
	}
	th {
	background: #6c63ff !important;
	color: #fff !important;
	}
	td, th {
	padding: 0.75rem 1rem;
	background: #1e1e2f;
	color: #e0e0f0;
	}
	tr:nth-child(even) td {
	background: #2a2a3c;
	}
	tr:hover td {
	background: #3c3b52;
	}
	td a {
	color: #9afeff;
	text-decoration: none;
	}
	td a:hover {
	text-decoration: underline;
	}
	"""

	# --- Override Title with Hero ---
	TITLE = """
	<div class="hero-section">
	<h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>
	<h2>Improving LLM Agents for Machine Learning Engineering</h2>
	</div>
	"""

	# --- Build App with valid Dark theme ---
	demo = gr.Blocks(css=custom_css, theme=gr.themes.Dark())

	with demo:
	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("📋 Leaderboard"):
	gr.Markdown("## Model Elo Rankings by Category")
	category_selector = gr.Radio(
	choices=CATEGORIES,
	value=DEFAULT_CATEGORY,
	interactive=True,
	elem_id="category-selector"
	)
	leaderboard_df = gr.Dataframe(
	value=update_leaderboard(DEFAULT_CATEGORY),
	headers=["Rank","Model","Organizer","License","Elo Score"],
	datatype=["number","html","str","str","number"],
	interactive=False,
	row_count=(len(master_df),"fixed"),
	col_count=(5,"fixed"),
	wrap=True,
	elem_id="leaderboard-table"
	)
	category_selector.change(
	fn=update_leaderboard,
	inputs=category_selector,
	outputs=leaderboard_df
	)

	with gr.TabItem("ℹ️ About"):
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

	with gr.Accordion("📙 Citation", open=False):
	gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	lines=10,
	elem_id="citation-button",
	show_copy_button=True
	)

	if __name__ == "__main__":
	print("Launching Gradio App...")
	demo.launch()