Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,7 @@ import pandas as pd
|
|
3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
# Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
|
5 |
|
6 |
-
# ---
|
7 |
-
# Option 1: If src is a directory in the same folder as your script:
|
8 |
try:
|
9 |
from src.about import (
|
10 |
CITATION_BUTTON_LABEL,
|
@@ -14,29 +13,45 @@ try:
|
|
14 |
LLM_BENCHMARKS_TEXT,
|
15 |
TITLE,
|
16 |
)
|
17 |
-
from src.display.css_html_js import custom_css # Assuming this
|
18 |
from src.envs import REPO_ID # Keep if needed for restart_space or other functions
|
19 |
from src.submission.submit import add_new_eval # Keep if using the submit tab
|
20 |
print("Successfully imported from src module.")
|
21 |
-
#
|
|
|
|
|
22 |
except ImportError:
|
23 |
print("Warning: Using placeholder values because src module imports failed.")
|
24 |
CITATION_BUTTON_LABEL="Citation"
|
25 |
-
CITATION_BUTTON_TEXT="Please cite us if you use this benchmark
|
26 |
EVALUATION_QUEUE_TEXT="Current evaluation queue:"
|
27 |
-
INTRODUCTION_TEXT="
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
TITLE="<h1>π MLE-Dojo Benchmark Leaderboard</h1>"
|
30 |
-
custom_css="" # Start with empty CSS
|
31 |
REPO_ID="your/space-id" # Replace with actual ID if needed
|
32 |
def add_new_eval(*args): return "Submission placeholder."
|
|
|
33 |
# --- End Placeholder Definitions ---
|
34 |
|
35 |
|
36 |
# --- Elo Leaderboard Configuration ---
|
37 |
# Enhanced data with Rank (placeholder), Organizer, License, and URL
|
38 |
-
# !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!!
|
39 |
-
# Verify organizer and license information for accuracy.
|
40 |
data = [
|
41 |
{'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
|
42 |
{'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
|
@@ -49,7 +64,6 @@ data = [
|
|
49 |
]
|
50 |
|
51 |
# Create a master DataFrame
|
52 |
-
# Note: Columns 'organizer' and 'license' are created in lowercase here.
|
53 |
master_df = pd.DataFrame(data)
|
54 |
|
55 |
# Define categories for selection (user-facing)
|
@@ -75,21 +89,14 @@ def update_leaderboard(category):
|
|
75 |
if score_column is None or score_column not in master_df.columns:
|
76 |
print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
|
77 |
score_column = category_to_column[DEFAULT_CATEGORY]
|
78 |
-
# Check fallback column too
|
79 |
if score_column not in master_df.columns:
|
80 |
-
# Return empty df with correct columns if still invalid
|
81 |
-
# Use lowercase keys here consistent with master_df for the empty case
|
82 |
print(f"Error: Default column '{score_column}' also not found.")
|
|
|
83 |
return pd.DataFrame({
|
84 |
-
"Rank": [],
|
85 |
-
"Model": [],
|
86 |
-
"Elo Score": [],
|
87 |
-
"Organizer": [], # Changed 'organizer' -> 'Organizer' for consistency in empty case
|
88 |
-
"License": [] # Changed 'license' -> 'License' for consistency in empty case
|
89 |
})
|
90 |
|
91 |
-
# Select base columns + the score column for sorting
|
92 |
-
# Ensure 'organizer' and 'license' are selected correctly (lowercase)
|
93 |
cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
|
94 |
df = master_df[cols_to_select].copy()
|
95 |
|
@@ -100,84 +107,318 @@ def update_leaderboard(category):
|
|
100 |
df.reset_index(drop=True, inplace=True)
|
101 |
df.insert(0, 'Rank', df.index + 1)
|
102 |
|
103 |
-
# Format Model Name as HTML Hyperlink
|
104 |
-
# The resulting column name will be 'Model' (capitalized)
|
105 |
df['Model'] = df.apply(
|
106 |
-
lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none;'>{row['model_name']}</a>",
|
107 |
axis=1
|
108 |
)
|
109 |
|
110 |
# Rename the score column to 'Elo Score' for consistent display
|
111 |
df.rename(columns={score_column: 'Elo Score'}, inplace=True)
|
112 |
|
113 |
-
# Rename 'organizer' and 'license' to match desired display headers
|
114 |
df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
|
115 |
|
116 |
-
# Select and reorder columns for final display
|
117 |
-
# Use capitalized 'Organizer' and 'License' here because they have been renamed.
|
118 |
final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
|
119 |
df = df[final_columns]
|
120 |
|
121 |
-
#
|
122 |
-
# 'Rank', 'Model', 'Organizer', 'License', 'Elo Score'
|
123 |
return df
|
124 |
|
125 |
# --- Mock/Placeholder functions/data for other tabs ---
|
126 |
-
|
127 |
-
print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
|
128 |
finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
129 |
running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
130 |
pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
131 |
-
EVAL_COLS = ["Model", "Status", "Requested", "Started"]
|
132 |
-
EVAL_TYPES = ["str", "str", "str", "str"]
|
133 |
|
134 |
# --- Keep restart function if relevant ---
|
135 |
def restart_space():
|
136 |
-
# Make sure REPO_ID is correctly defined/imported if this function is used
|
137 |
print(f"Attempting to restart space: {REPO_ID}")
|
138 |
-
# Replace with
|
139 |
|
140 |
|
141 |
-
# ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
-
|
144 |
-
# Add CSS rules to make the base font size larger.
|
145 |
-
# Adjust the '1.2em' value (e.g., to '1.4em', '16px') to change the size.
|
146 |
-
# The !important flag helps override theme defaults.
|
147 |
-
# If the imported custom_css already has content, append to it.
|
148 |
-
font_size_css = """
|
149 |
body {
|
150 |
-
font-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
"""
|
178 |
|
179 |
-
#
|
180 |
-
|
|
|
181 |
|
182 |
with demo:
|
183 |
# Use the TITLE variable imported or defined above
|
@@ -187,25 +428,24 @@ with demo:
|
|
187 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
188 |
|
189 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
190 |
-
|
|
|
191 |
with gr.Column():
|
192 |
-
gr.Markdown("##
|
193 |
category_selector = gr.Radio(
|
194 |
choices=CATEGORIES,
|
195 |
-
label="
|
196 |
value=DEFAULT_CATEGORY,
|
197 |
interactive=True,
|
|
|
|
|
|
|
198 |
)
|
199 |
leaderboard_df_component = gr.Dataframe(
|
200 |
-
# Initialize with sorted data for the default category
|
201 |
value=update_leaderboard(DEFAULT_CATEGORY),
|
202 |
-
# Headers for DISPLAY should match the *renamed* columns from update_leaderboard
|
203 |
headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
|
204 |
-
# Datatype maps to the final df columns: Rank, Model, Organizer, License, Elo Score
|
205 |
datatype=["number", "html", "str", "str", "number"],
|
206 |
interactive=False,
|
207 |
-
# --- FIX APPLIED: Removed unsupported 'height' argument ---
|
208 |
-
# row_count determines the number of rows to display
|
209 |
row_count=(len(master_df), "fixed"), # Display all rows
|
210 |
col_count=(5, "fixed"),
|
211 |
wrap=True, # Allow text wrapping in cells
|
@@ -223,71 +463,72 @@ with demo:
|
|
223 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
224 |
|
225 |
# --- Submit Tab (Commented out as in original request) ---
|
226 |
-
#
|
227 |
-
# with gr.TabItem("π Submit
|
228 |
# with gr.Column():
|
229 |
-
#
|
230 |
-
#
|
231 |
-
#
|
232 |
-
#
|
233 |
-
#
|
234 |
-
#
|
235 |
-
#
|
236 |
-
#
|
237 |
-
#
|
238 |
-
#
|
239 |
-
#
|
240 |
-
#
|
241 |
-
#
|
242 |
-
#
|
243 |
-
#
|
244 |
-
#
|
245 |
-
#
|
246 |
-
#
|
247 |
-
#
|
248 |
-
#
|
249 |
-
#
|
250 |
-
#
|
251 |
-
#
|
252 |
-
#
|
253 |
-
#
|
254 |
-
#
|
255 |
-
#
|
256 |
-
#
|
257 |
-
#
|
258 |
-
#
|
259 |
-
#
|
260 |
-
#
|
261 |
-
#
|
262 |
-
#
|
263 |
-
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
with gr.Accordion("π Citation", open=False):
|
266 |
-
# Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables
|
267 |
citation_button = gr.Textbox(
|
268 |
value=CITATION_BUTTON_TEXT,
|
269 |
label=CITATION_BUTTON_LABEL,
|
270 |
-
lines=10, # Adjust lines
|
271 |
-
elem_id="citation-button",
|
272 |
show_copy_button=True,
|
|
|
273 |
)
|
274 |
|
275 |
-
# IGNORE_WHEN_COPYING_START
|
276 |
-
# content_copy download
|
277 |
-
# Use code with caution.
|
278 |
-
# IGNORE_WHEN_COPYING_END
|
279 |
-
|
280 |
# --- Keep scheduler if relevant ---
|
281 |
# Only start scheduler if the script is run directly
|
282 |
if __name__ == "__main__":
|
283 |
try:
|
284 |
scheduler = BackgroundScheduler()
|
285 |
-
# Add job only if restart_space is callable (i.e., not a placeholder or failed import)
|
286 |
if callable(restart_space):
|
287 |
-
# Check if REPO_ID seems valid before scheduling
|
288 |
if REPO_ID and REPO_ID != "your/space-id":
|
289 |
scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
|
290 |
scheduler.start()
|
|
|
291 |
else:
|
292 |
print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
|
293 |
else:
|
@@ -300,6 +541,6 @@ if __name__ == "__main__":
|
|
300 |
# Ensures the app launches only when the script is run directly
|
301 |
if __name__ == "__main__":
|
302 |
# Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
|
303 |
-
# Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions
|
304 |
-
print("Launching Gradio App...")
|
305 |
demo.launch()
|
|
|
3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
# Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
|
5 |
|
6 |
+
# --- Attempt to import from src or use placeholders ---
|
|
|
7 |
try:
|
8 |
from src.about import (
|
9 |
CITATION_BUTTON_LABEL,
|
|
|
13 |
LLM_BENCHMARKS_TEXT,
|
14 |
TITLE,
|
15 |
)
|
16 |
+
from src.display.css_html_js import custom_css # Assuming this might exist but we'll override/append
|
17 |
from src.envs import REPO_ID # Keep if needed for restart_space or other functions
|
18 |
from src.submission.submit import add_new_eval # Keep if using the submit tab
|
19 |
print("Successfully imported from src module.")
|
20 |
+
# Ensure custom_css is initialized if it exists but is None or empty
|
21 |
+
if not isinstance(custom_css, str):
|
22 |
+
custom_css = ""
|
23 |
except ImportError:
|
24 |
print("Warning: Using placeholder values because src module imports failed.")
|
25 |
CITATION_BUTTON_LABEL="Citation"
|
26 |
+
CITATION_BUTTON_TEXT="Please cite us if you use this benchmark...\n[Your BibTeX entry here]" # Added placeholder content
|
27 |
EVALUATION_QUEUE_TEXT="Current evaluation queue:"
|
28 |
+
INTRODUCTION_TEXT="""
|
29 |
+
Welcome to the **MLE-Dojo Benchmark Leaderboard**. Select a category below to see the rankings.
|
30 |
+
Models are ranked based on their Elo scores across various machine learning tasks.
|
31 |
+
"""
|
32 |
+
LLM_BENCHMARKS_TEXT="""
|
33 |
+
## About the Benchmarks
|
34 |
+
|
35 |
+
This leaderboard tracks the performance of various models on the MLE-Dojo benchmark suite.
|
36 |
+
The suite includes tasks covering:
|
37 |
+
|
38 |
+
* **MLE-Lite:** Lightweight ML tasks.
|
39 |
+
* **Tabular:** Tasks involving structured data.
|
40 |
+
* **NLP:** Natural Language Processing tasks.
|
41 |
+
* **CV:** Computer Vision tasks.
|
42 |
+
|
43 |
+
Scores are calculated using an Elo rating system. Higher scores indicate better performance relative to other models in the benchmark.
|
44 |
+
"""
|
45 |
TITLE="<h1>π MLE-Dojo Benchmark Leaderboard</h1>"
|
46 |
+
custom_css="" # Start with empty CSS
|
47 |
REPO_ID="your/space-id" # Replace with actual ID if needed
|
48 |
def add_new_eval(*args): return "Submission placeholder."
|
49 |
+
print("Placeholder function 'add_new_eval' defined.")
|
50 |
# --- End Placeholder Definitions ---
|
51 |
|
52 |
|
53 |
# --- Elo Leaderboard Configuration ---
|
54 |
# Enhanced data with Rank (placeholder), Organizer, License, and URL
|
|
|
|
|
55 |
data = [
|
56 |
{'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
|
57 |
{'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
|
|
|
64 |
]
|
65 |
|
66 |
# Create a master DataFrame
|
|
|
67 |
master_df = pd.DataFrame(data)
|
68 |
|
69 |
# Define categories for selection (user-facing)
|
|
|
89 |
if score_column is None or score_column not in master_df.columns:
|
90 |
print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
|
91 |
score_column = category_to_column[DEFAULT_CATEGORY]
|
|
|
92 |
if score_column not in master_df.columns:
|
|
|
|
|
93 |
print(f"Error: Default column '{score_column}' also not found.")
|
94 |
+
# Return empty df with correct capitalized column names for display
|
95 |
return pd.DataFrame({
|
96 |
+
"Rank": [], "Model": [], "Elo Score": [], "Organizer": [], "License": []
|
|
|
|
|
|
|
|
|
97 |
})
|
98 |
|
99 |
+
# Select base columns + the score column for sorting (use original case from master_df)
|
|
|
100 |
cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
|
101 |
df = master_df[cols_to_select].copy()
|
102 |
|
|
|
107 |
df.reset_index(drop=True, inplace=True)
|
108 |
df.insert(0, 'Rank', df.index + 1)
|
109 |
|
110 |
+
# Format Model Name as HTML Hyperlink (results in 'Model' column)
|
|
|
111 |
df['Model'] = df.apply(
|
112 |
+
lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none; font-weight: 600;'>{row['model_name']}</a>",
|
113 |
axis=1
|
114 |
)
|
115 |
|
116 |
# Rename the score column to 'Elo Score' for consistent display
|
117 |
df.rename(columns={score_column: 'Elo Score'}, inplace=True)
|
118 |
|
119 |
+
# Rename 'organizer' and 'license' to match desired display headers (Capitalized)
|
120 |
df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
|
121 |
|
122 |
+
# Select and reorder columns for final display (use Capitalized names)
|
|
|
123 |
final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
|
124 |
df = df[final_columns]
|
125 |
|
126 |
+
# Return DataFrame with columns: 'Rank', 'Model', 'Organizer', 'License', 'Elo Score'
|
|
|
127 |
return df
|
128 |
|
129 |
# --- Mock/Placeholder functions/data for other tabs ---
|
130 |
+
print("Warning: Evaluation queue data fetching is disabled/mocked.")
|
|
|
131 |
finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
132 |
running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
133 |
pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
134 |
+
EVAL_COLS = ["Model", "Status", "Requested", "Started"]
|
135 |
+
EVAL_TYPES = ["str", "str", "str", "str"]
|
136 |
|
137 |
# --- Keep restart function if relevant ---
|
138 |
def restart_space():
|
|
|
139 |
print(f"Attempting to restart space: {REPO_ID}")
|
140 |
+
# Replace with actual restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
|
141 |
|
142 |
|
143 |
+
# --- Enhanced CSS ---
|
144 |
+
# Concatenate existing CSS (if any) with new styles
|
145 |
+
# Ensure custom_css is a string before appending
|
146 |
+
if not isinstance(custom_css, str):
|
147 |
+
custom_css = ""
|
148 |
+
|
149 |
+
custom_css += """
|
150 |
+
/* --- Import Font --- */
|
151 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
|
152 |
|
153 |
+
/* --- Global Styles & Font --- */
|
|
|
|
|
|
|
|
|
|
|
154 |
body {
|
155 |
+
font-family: 'Inter', sans-serif;
|
156 |
+
background: linear-gradient(to bottom right, #fdfbfb, #ebedee); /* Subtle gradient */
|
157 |
+
color: #333;
|
158 |
+
}
|
159 |
+
|
160 |
+
:root {
|
161 |
+
--primary-color: #007bff; /* Example primary color */
|
162 |
+
--text-color: #333;
|
163 |
+
--border-radius: 8px;
|
164 |
+
--card-background: rgba(255, 255, 255, 0.8); /* Slightly transparent */
|
165 |
+
--shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
|
166 |
+
}
|
167 |
+
|
168 |
+
/* Set base font size on html for rem units */
|
169 |
+
html {
|
170 |
+
font-size: 16px; /* Base font size */
|
171 |
+
}
|
172 |
+
|
173 |
+
/* Increase overall text size slightly using rem */
|
174 |
+
.gradio-container {
|
175 |
+
font-size: 1rem; /* Approx 16px */
|
176 |
+
line-height: 1.6;
|
177 |
+
}
|
178 |
+
|
179 |
+
/* --- Headings --- */
|
180 |
+
h1, .markdown-text h1 {
|
181 |
+
font-size: 2.5rem; /* Larger title */
|
182 |
+
font-weight: 700;
|
183 |
+
color: #2c3e50; /* Darker heading color */
|
184 |
+
margin-bottom: 1rem;
|
185 |
+
text-align: center;
|
186 |
+
padding-top: 1rem;
|
187 |
+
}
|
188 |
+
h2, .markdown-text h2 {
|
189 |
+
font-size: 1.75rem; /* Larger section titles */
|
190 |
+
font-weight: 600;
|
191 |
+
color: #2c3e50;
|
192 |
+
margin-top: 1.5rem;
|
193 |
+
margin-bottom: 0.75rem;
|
194 |
+
border-bottom: 2px solid var(--primary-color);
|
195 |
+
padding-bottom: 0.3rem;
|
196 |
+
}
|
197 |
+
|
198 |
+
/* --- Markdown Text Styling --- */
|
199 |
+
.markdown-text p, .markdown-text li {
|
200 |
+
font-size: 1.05rem; /* Slightly larger paragraph text */
|
201 |
+
color: var(--text-color);
|
202 |
+
}
|
203 |
+
.markdown-text strong {
|
204 |
+
font-weight: 600;
|
205 |
+
color: #0056b3;
|
206 |
+
}
|
207 |
+
|
208 |
+
/* --- Tab Styling --- */
|
209 |
+
.tab-buttons > .tabs > button {
|
210 |
+
font-size: 1.1rem !important;
|
211 |
+
font-weight: 600;
|
212 |
+
padding: 12px 20px !important;
|
213 |
+
border-radius: var(--border-radius) var(--border-radius) 0 0 !important;
|
214 |
+
background-color: #e9ecef !important;
|
215 |
+
border-bottom: 2px solid transparent !important;
|
216 |
+
transition: all 0.3s ease;
|
217 |
+
}
|
218 |
+
.tab-buttons > .tabs > button.selected {
|
219 |
+
background-color: var(--card-background) !important;
|
220 |
+
border-bottom: 2px solid var(--primary-color) !important;
|
221 |
+
color: var(--primary-color) !important;
|
222 |
+
box-shadow: 0 -2px 5px rgba(0, 0, 0, 0.05);
|
223 |
+
}
|
224 |
+
|
225 |
+
/* --- Radio Button "Chips" Styling --- */
|
226 |
+
/* Targeting the container for the radio items */
|
227 |
+
.gradio-container .styler_radio_ MuiFormGroup-root {
|
228 |
+
display: flex;
|
229 |
+
flex-direction: row; /* Arrange horizontally */
|
230 |
+
flex-wrap: wrap;
|
231 |
+
gap: 10px; /* Space between chips */
|
232 |
+
margin-bottom: 1.5rem; /* Space below the chips */
|
233 |
+
}
|
234 |
+
|
235 |
+
/* Styling individual radio items as chips */
|
236 |
+
.gradio-container .styler_radio_ MuiFormControlLabel-root {
|
237 |
+
background-color: #f8f9fa;
|
238 |
+
border: 1px solid #dee2e6;
|
239 |
+
padding: 8px 16px; /* Chip padding */
|
240 |
+
border-radius: 20px; /* Pill shape */
|
241 |
+
cursor: pointer;
|
242 |
+
transition: all 0.2s ease-in-out;
|
243 |
+
margin: 0 !important; /* Override default margins */
|
244 |
+
}
|
245 |
+
|
246 |
+
/* Hide the actual radio button circle */
|
247 |
+
.gradio-container .styler_radio_ .MuiRadio-root {
|
248 |
+
display: none;
|
249 |
+
}
|
250 |
+
|
251 |
+
/* Style for the label text inside the chip */
|
252 |
+
.gradio-container .styler_radio_ .MuiFormControlLabel-label {
|
253 |
+
font-size: 1rem; /* Chip text size */
|
254 |
+
font-weight: 600;
|
255 |
+
color: #495057;
|
256 |
+
}
|
257 |
+
|
258 |
+
/* Style for the selected chip */
|
259 |
+
.gradio-container .styler_radio_ .Mui-checked + .MuiFormControlLabel-label {
|
260 |
+
color: white !important; /* Ensure text is readable on selected background */
|
261 |
+
}
|
262 |
+
|
263 |
+
.gradio-container .styler_radio_ .Mui-checked .MuiFormControlLabel-label {
|
264 |
+
color: white !important; /* Backup selector */
|
265 |
+
}
|
266 |
+
|
267 |
+
.gradio-container .styler_radio_ .MuiFormControlLabel-root.Mui-checked, /* This might target the container*/
|
268 |
+
.gradio-container .styler_radio_ span.Mui-checked + span { /* Or target based on the checked span */
|
269 |
+
/* This seems more complex now, let's try styling the parent container */
|
270 |
+
}
|
271 |
+
.gradio-container .styler_radio_ label:has(input:checked) {
|
272 |
+
background-color: var(--primary-color) !important;
|
273 |
+
border-color: var(--primary-color) !important;
|
274 |
+
color: white !important; /* Text color for selected */
|
275 |
+
box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3);
|
276 |
+
}
|
277 |
+
/* Apply white text color specifically to the label text when checked */
|
278 |
+
.gradio-container .styler_radio_ label:has(input:checked) span {
|
279 |
+
color: white !important;
|
280 |
+
}
|
281 |
+
|
282 |
+
|
283 |
+
/* Hover effect for non-selected chips */
|
284 |
+
.gradio-container .styler_radio_ label:not(:has(input:checked)):hover {
|
285 |
+
background-color: #e9ecef;
|
286 |
+
border-color: #adb5bd;
|
287 |
+
}
|
288 |
+
|
289 |
+
|
290 |
+
/* --- Leaderboard Table Styling --- */
|
291 |
+
#leaderboard-table {
|
292 |
+
background-color: var(--card-background);
|
293 |
+
border-radius: var(--border-radius);
|
294 |
+
box-shadow: var(--shadow);
|
295 |
+
overflow: hidden; /* Ensures rounded corners clip content */
|
296 |
+
border-collapse: separate; /* Needed for border-radius on table */
|
297 |
+
border-spacing: 0;
|
298 |
+
margin-top: 1rem;
|
299 |
+
}
|
300 |
+
|
301 |
+
#leaderboard-table th,
|
302 |
+
#leaderboard-table td {
|
303 |
+
padding: 12px 16px; /* More padding */
|
304 |
+
text-align: left;
|
305 |
+
font-size: 1rem; /* Table font size */
|
306 |
+
border-bottom: 1px solid #eee; /* Lighter border */
|
307 |
+
vertical-align: middle; /* Center content vertically */
|
308 |
+
white-space: normal; /* Allow wrapping */
|
309 |
+
}
|
310 |
+
|
311 |
+
#leaderboard-table th {
|
312 |
+
background-color: #f8f9fa; /* Light grey header */
|
313 |
+
font-weight: 600;
|
314 |
+
color: #495057;
|
315 |
+
font-size: 1.05rem;
|
316 |
+
border-top: 1px solid #eee; /* Add top border for consistency */
|
317 |
+
}
|
318 |
+
|
319 |
+
#leaderboard-table tr:last-child td {
|
320 |
+
border-bottom: none; /* Remove bottom border for last row */
|
321 |
+
}
|
322 |
+
|
323 |
+
#leaderboard-table tr:nth-child(even) td {
|
324 |
+
background-color: rgba(249, 249, 249, 0.7); /* Slightly transparent even rows */
|
325 |
+
}
|
326 |
+
|
327 |
+
#leaderboard-table tr:hover td {
|
328 |
+
background-color: rgba(233, 233, 233, 0.8); /* Hover effect */
|
329 |
+
}
|
330 |
+
|
331 |
+
/* Style for the model link */
|
332 |
+
#leaderboard-table td a {
|
333 |
+
color: var(--primary-color);
|
334 |
+
text-decoration: none;
|
335 |
+
font-weight: 600; /* Make model name stand out */
|
336 |
+
transition: color 0.2s ease;
|
337 |
+
}
|
338 |
+
|
339 |
+
#leaderboard-table td a:hover {
|
340 |
+
color: #0056b3; /* Darker blue on hover */
|
341 |
+
text-decoration: underline;
|
342 |
+
}
|
343 |
+
|
344 |
+
/* Rank column styling */
|
345 |
+
#leaderboard-table td:first-child,
|
346 |
+
#leaderboard-table th:first-child {
|
347 |
+
text-align: center;
|
348 |
+
font-weight: 700;
|
349 |
+
width: 60px; /* Fixed width for Rank */
|
350 |
+
}
|
351 |
+
|
352 |
+
/* Elo Score column styling */
|
353 |
+
#leaderboard-table td:last-child,
|
354 |
+
#leaderboard-table th:last-child {
|
355 |
+
text-align: right;
|
356 |
+
font-weight: 600;
|
357 |
+
width: 100px; /* Fixed width for Elo Score */
|
358 |
+
}
|
359 |
+
|
360 |
+
|
361 |
+
/* --- Accordion Styling --- */
|
362 |
+
.gradio-accordion, .accordion { /* Targeting gradio 4+ */
|
363 |
+
border: 1px solid #ddd;
|
364 |
+
border-radius: var(--border-radius);
|
365 |
+
margin-bottom: 1rem;
|
366 |
+
box-shadow: var(--shadow);
|
367 |
+
background-color: var(--card-background);
|
368 |
+
}
|
369 |
+
.gradio-accordion > button, .accordion > button { /* Targeting header button */
|
370 |
+
font-size: 1.1rem !important;
|
371 |
+
font-weight: 600;
|
372 |
+
padding: 12px 15px !important;
|
373 |
+
background-color: #f8f9fa !important;
|
374 |
+
border-bottom: 1px solid #eee !important;
|
375 |
+
}
|
376 |
+
.gradio-accordion > button[aria-expanded="true"],
|
377 |
+
.accordion > button[aria-expanded="true"] {
|
378 |
+
background-color: #f1f3f5 !important;
|
379 |
+
}
|
380 |
+
|
381 |
+
|
382 |
+
/* --- Textbox/Citation Styling --- */
|
383 |
+
#citation-button textarea {
|
384 |
+
font-family: 'Courier New', Courier, monospace; /* Monospace for code/citation */
|
385 |
+
font-size: 0.95rem;
|
386 |
+
background-color: #fdfdfd;
|
387 |
+
border-radius: var(--border-radius);
|
388 |
+
padding: 15px;
|
389 |
+
line-height: 1.5;
|
390 |
+
border: 1px solid #ccc;
|
391 |
+
box-shadow: inset 0 1px 3px rgba(0,0,0,0.06);
|
392 |
+
}
|
393 |
+
#citation-button button { /* Style copy button */
|
394 |
+
font-size: 0.9rem !important;
|
395 |
+
padding: 5px 10px !important;
|
396 |
+
}
|
397 |
+
|
398 |
+
/* --- General Button Styling (if needed for submit tab) --- */
|
399 |
+
.gradio-button, button.gr-button {
|
400 |
+
font-size: 1.05rem !important;
|
401 |
+
font-weight: 600;
|
402 |
+
padding: 10px 20px !important;
|
403 |
+
border-radius: var(--border-radius) !important;
|
404 |
+
transition: all 0.3s ease !important;
|
405 |
+
}
|
406 |
+
|
407 |
+
/* Adjustments for smaller screens if necessary */
|
408 |
+
@media (max-width: 768px) {
|
409 |
+
html { font-size: 15px; } /* Slightly smaller base font on mobile */
|
410 |
+
h1, .markdown-text h1 { font-size: 2rem; }
|
411 |
+
h2, .markdown-text h2 { font-size: 1.5rem; }
|
412 |
+
#leaderboard-table th, #leaderboard-table td { padding: 8px 10px; font-size: 0.95rem;}
|
413 |
+
.tab-buttons > .tabs > button { font-size: 1rem !important; padding: 10px 15px !important;}
|
414 |
+
.gradio-container .styler_radio_ MuiFormControlLabel-root { padding: 6px 12px; }
|
415 |
+
.gradio-container .styler_radio_ .MuiFormControlLabel-label { font-size: 0.95rem; }
|
416 |
+
}
|
417 |
"""
|
418 |
|
419 |
+
# --- Gradio App Definition ---
|
420 |
+
# Use a theme for better default styling - Glass theme is modern
|
421 |
+
demo = gr.Blocks(css=custom_css, theme=gr.themes.Glass(primary_hue="blue", secondary_hue="sky"))
|
422 |
|
423 |
with demo:
|
424 |
# Use the TITLE variable imported or defined above
|
|
|
428 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
429 |
|
430 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
431 |
+
# Added relevant icons to tab labels
|
432 |
+
with gr.TabItem("π
Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
|
433 |
with gr.Column():
|
434 |
+
gr.Markdown("## Select Category to Rank By", elem_classes="markdown-text") # Changed heading
|
435 |
category_selector = gr.Radio(
|
436 |
choices=CATEGORIES,
|
437 |
+
label="Category:", # Simplified label
|
438 |
value=DEFAULT_CATEGORY,
|
439 |
interactive=True,
|
440 |
+
# elem_classes="category-radio-chips" # Add class for potential CSS targeting if needed
|
441 |
+
# Use internal class instead for more robust targeting: 'styler_radio_'
|
442 |
+
elem_classes="styler_radio_" # Add hook class
|
443 |
)
|
444 |
leaderboard_df_component = gr.Dataframe(
|
|
|
445 |
value=update_leaderboard(DEFAULT_CATEGORY),
|
|
|
446 |
headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
|
|
|
447 |
datatype=["number", "html", "str", "str", "number"],
|
448 |
interactive=False,
|
|
|
|
|
449 |
row_count=(len(master_df), "fixed"), # Display all rows
|
450 |
col_count=(5, "fixed"),
|
451 |
wrap=True, # Allow text wrapping in cells
|
|
|
463 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
464 |
|
465 |
# --- Submit Tab (Commented out as in original request) ---
|
466 |
+
# Uncomment and ensure necessary variables/functions are available if needed
|
467 |
+
# with gr.TabItem("π Submit", elem_id="llm-benchmark-tab-submit", id=2):
|
468 |
# with gr.Column():
|
469 |
+
# with gr.Row():
|
470 |
+
# gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
471 |
+
# with gr.Column():
|
472 |
+
# with gr.Accordion(f"β
Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
|
473 |
+
# finished_eval_table = gr.Dataframe( # Use gr.Dataframe
|
474 |
+
# value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
475 |
+
# )
|
476 |
+
# with gr.Accordion(f"π Running Evaluations ({len(running_eval_queue_df)})", open=False):
|
477 |
+
# running_eval_table = gr.Dataframe( # Use gr.Dataframe
|
478 |
+
# value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
479 |
+
# )
|
480 |
+
# with gr.Accordion(f"β³ Pending Evaluations ({len(pending_eval_queue_df)})", open=False):
|
481 |
+
# pending_eval_table = gr.Dataframe( # Use gr.Dataframe
|
482 |
+
# value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
483 |
+
# )
|
484 |
+
# with gr.Row():
|
485 |
+
# gr.Markdown("## βοΈ Submit Your Model", elem_classes="markdown-text") # Changed heading
|
486 |
+
# with gr.Row():
|
487 |
+
# with gr.Column(scale=1):
|
488 |
+
# model_name_textbox = gr.Textbox(label="Model Name (Hugging Face Hub ID)")
|
489 |
+
# revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main")
|
490 |
+
# model_type = gr.Dropdown(choices=["CausalLM", "Seq2SeqLM", "Other"], label="Model Type", multiselect=False, value="CausalLM", interactive=True) # Example choices
|
491 |
+
# with gr.Column(scale=1):
|
492 |
+
# precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True)
|
493 |
+
# weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights Type", multiselect=False, value="Original", interactive=True)
|
494 |
+
# base_model_name_textbox = gr.Textbox(label="Base Model (for Adapter/Delta)", placeholder="Leave empty if Original weights")
|
495 |
+
# submit_button = gr.Button("Submit for Evaluation", variant="primary") # Added variant
|
496 |
+
# submission_result = gr.Markdown()
|
497 |
+
# # Ensure add_new_eval is correctly imported/defined and handles these inputs
|
498 |
+
# # Make sure add_new_eval is defined if you uncomment this
|
499 |
+
# if callable(add_new_eval):
|
500 |
+
# submit_button.click(
|
501 |
+
# add_new_eval,
|
502 |
+
# [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ],
|
503 |
+
# submission_result,
|
504 |
+
# )
|
505 |
+
# else:
|
506 |
+
# print("Warning: 'add_new_eval' function not callable. Submit button disabled.")
|
507 |
+
# submit_button.interactive = False # Disable button if function missing
|
508 |
+
|
509 |
+
|
510 |
+
# --- Citation Row (at the bottom, outside Tabs, using Accordion) ---
|
511 |
with gr.Accordion("π Citation", open=False):
|
512 |
+
# Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables
|
513 |
citation_button = gr.Textbox(
|
514 |
value=CITATION_BUTTON_TEXT,
|
515 |
label=CITATION_BUTTON_LABEL,
|
516 |
+
lines=10, # Adjust lines based on content and new font size
|
517 |
+
elem_id="citation-button", # Keep ID for CSS targeting
|
518 |
show_copy_button=True,
|
519 |
+
interactive=False # Make it non-editable
|
520 |
)
|
521 |
|
|
|
|
|
|
|
|
|
|
|
522 |
# --- Keep scheduler if relevant ---
|
523 |
# Only start scheduler if the script is run directly
|
524 |
if __name__ == "__main__":
|
525 |
try:
|
526 |
scheduler = BackgroundScheduler()
|
|
|
527 |
if callable(restart_space):
|
|
|
528 |
if REPO_ID and REPO_ID != "your/space-id":
|
529 |
scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
|
530 |
scheduler.start()
|
531 |
+
print("Scheduler started for space restart.")
|
532 |
else:
|
533 |
print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
|
534 |
else:
|
|
|
541 |
# Ensures the app launches only when the script is run directly
|
542 |
if __name__ == "__main__":
|
543 |
# Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
|
544 |
+
# Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions.
|
545 |
+
print("Launching Gradio App with enhanced styling...")
|
546 |
demo.launch()
|