Jerrycool commited on
Commit
39c3577
Β·
verified Β·
1 Parent(s): e842409

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +370 -129
app.py CHANGED
@@ -3,8 +3,7 @@ import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  # Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
5
 
6
- # --- Make sure these imports work relative to your file structure ---
7
- # Option 1: If src is a directory in the same folder as your script:
8
  try:
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
@@ -14,29 +13,45 @@ try:
14
  LLM_BENCHMARKS_TEXT,
15
  TITLE,
16
  )
17
- from src.display.css_html_js import custom_css # Assuming this exists but might be empty
18
  from src.envs import REPO_ID # Keep if needed for restart_space or other functions
19
  from src.submission.submit import add_new_eval # Keep if using the submit tab
20
  print("Successfully imported from src module.")
21
- # Option 2: If you don't have these files, define placeholders (REMOVE THIS if using Option 1)
 
 
22
  except ImportError:
23
  print("Warning: Using placeholder values because src module imports failed.")
24
  CITATION_BUTTON_LABEL="Citation"
25
- CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
26
  EVALUATION_QUEUE_TEXT="Current evaluation queue:"
27
- INTRODUCTION_TEXT="Welcome to the MLE-Dojo Benchmark Leaderboard."
28
- LLM_BENCHMARKS_TEXT="Information about the benchmarks..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  TITLE="<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>"
30
- custom_css="" # Start with empty CSS if not imported
31
  REPO_ID="your/space-id" # Replace with actual ID if needed
32
  def add_new_eval(*args): return "Submission placeholder."
 
33
  # --- End Placeholder Definitions ---
34
 
35
 
36
  # --- Elo Leaderboard Configuration ---
37
  # Enhanced data with Rank (placeholder), Organizer, License, and URL
38
- # !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!!
39
- # Verify organizer and license information for accuracy.
40
  data = [
41
  {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
42
  {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
@@ -49,7 +64,6 @@ data = [
49
  ]
50
 
51
  # Create a master DataFrame
52
- # Note: Columns 'organizer' and 'license' are created in lowercase here.
53
  master_df = pd.DataFrame(data)
54
 
55
  # Define categories for selection (user-facing)
@@ -75,21 +89,14 @@ def update_leaderboard(category):
75
  if score_column is None or score_column not in master_df.columns:
76
  print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
77
  score_column = category_to_column[DEFAULT_CATEGORY]
78
- # Check fallback column too
79
  if score_column not in master_df.columns:
80
- # Return empty df with correct columns if still invalid
81
- # Use lowercase keys here consistent with master_df for the empty case
82
  print(f"Error: Default column '{score_column}' also not found.")
 
83
  return pd.DataFrame({
84
- "Rank": [],
85
- "Model": [],
86
- "Elo Score": [],
87
- "Organizer": [], # Changed 'organizer' -> 'Organizer' for consistency in empty case
88
- "License": [] # Changed 'license' -> 'License' for consistency in empty case
89
  })
90
 
91
- # Select base columns + the score column for sorting
92
- # Ensure 'organizer' and 'license' are selected correctly (lowercase)
93
  cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
94
  df = master_df[cols_to_select].copy()
95
 
@@ -100,84 +107,318 @@ def update_leaderboard(category):
100
  df.reset_index(drop=True, inplace=True)
101
  df.insert(0, 'Rank', df.index + 1)
102
 
103
- # Format Model Name as HTML Hyperlink
104
- # The resulting column name will be 'Model' (capitalized)
105
  df['Model'] = df.apply(
106
- lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none;'>{row['model_name']}</a>",
107
  axis=1
108
  )
109
 
110
  # Rename the score column to 'Elo Score' for consistent display
111
  df.rename(columns={score_column: 'Elo Score'}, inplace=True)
112
 
113
- # Rename 'organizer' and 'license' to match desired display headers
114
  df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
115
 
116
- # Select and reorder columns for final display using the ACTUAL column names in df
117
- # Use capitalized 'Organizer' and 'License' here because they have been renamed.
118
  final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
119
  df = df[final_columns]
120
 
121
- # Note: The DataFrame returned now has columns:
122
- # 'Rank', 'Model', 'Organizer', 'License', 'Elo Score'
123
  return df
124
 
125
  # --- Mock/Placeholder functions/data for other tabs ---
126
- # (If the Submit tab is used, ensure these variables are appropriately populated or handled)
127
- print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
128
  finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
129
  running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
130
  pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
131
- EVAL_COLS = ["Model", "Status", "Requested", "Started"] # Define for the dataframe headers
132
- EVAL_TYPES = ["str", "str", "str", "str"] # Define for the dataframe types
133
 
134
  # --- Keep restart function if relevant ---
135
  def restart_space():
136
- # Make sure REPO_ID is correctly defined/imported if this function is used
137
  print(f"Attempting to restart space: {REPO_ID}")
138
- # Replace with your actual space restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
139
 
140
 
141
- # --- Gradio App Definition ---
 
 
 
 
 
 
 
 
142
 
143
- # ***** FONT SIZE INCREASED HERE *****
144
- # Add CSS rules to make the base font size larger.
145
- # Adjust the '1.2em' value (e.g., to '1.4em', '16px') to change the size.
146
- # The !important flag helps override theme defaults.
147
- # If the imported custom_css already has content, append to it.
148
- font_size_css = """
149
  body {
150
- font-size: 1.5em !important; /* Increase base font size */
151
- }
152
- /* Optional: Target specific elements if needed */
153
- /*
154
- #leaderboard-table th, #leaderboard-table td {
155
- font-size: 1em !important; /* Adjust table font size relative to new body size */
156
- padding: 5px 7px !important; /* Increase padding for better spacing */
157
- }
158
- h1, .markdown-text h1 { font-size: 2.2em !important; } /* Make main title larger */
159
- h2, .markdown-text h2 { font-size: 1.8em !important; } /* Make section titles larger */
160
- button { font-size: 1.1em !important; padding: 8px 16px !important; } /* Slightly larger buttons */
161
- .gr-input, .gr-dropdown, .gr-textbox textarea { font-size: 1em !important; } /* Ensure inputs scale too */
162
- */
163
- """
164
- # Append the new CSS to any existing custom_css
165
- custom_css += font_size_css
166
-
167
- # Add basic table styling if not already present
168
- if "table {" not in custom_css:
169
- custom_css += """
170
- table { width: 100%; border-collapse: collapse; margin-top: 10px; margin-bottom: 10px; }
171
- th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; white-space: normal; vertical-align: top; } /* Allow wrapping, top align */
172
- th { background-color: #f2f2f2; font-weight: bold; }
173
- tr:nth-child(even) { background-color: #f9f9f9; }
174
- tr:hover { background-color: #e9e9e9; }
175
- td a { color: #007bff; text-decoration: none; }
176
- td a:hover { text-decoration: underline; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  """
178
 
179
- # Use a theme for better default styling
180
- demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 
181
 
182
  with demo:
183
  # Use the TITLE variable imported or defined above
@@ -187,25 +428,24 @@ with demo:
187
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
188
 
189
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
190
- with gr.TabItem("πŸ… MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0):
 
191
  with gr.Column():
192
- gr.Markdown("## Model Elo Rankings by Category")
193
  category_selector = gr.Radio(
194
  choices=CATEGORIES,
195
- label="Select Category:",
196
  value=DEFAULT_CATEGORY,
197
  interactive=True,
 
 
 
198
  )
199
  leaderboard_df_component = gr.Dataframe(
200
- # Initialize with sorted data for the default category
201
  value=update_leaderboard(DEFAULT_CATEGORY),
202
- # Headers for DISPLAY should match the *renamed* columns from update_leaderboard
203
  headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
204
- # Datatype maps to the final df columns: Rank, Model, Organizer, License, Elo Score
205
  datatype=["number", "html", "str", "str", "number"],
206
  interactive=False,
207
- # --- FIX APPLIED: Removed unsupported 'height' argument ---
208
- # row_count determines the number of rows to display
209
  row_count=(len(master_df), "fixed"), # Display all rows
210
  col_count=(5, "fixed"),
211
  wrap=True, # Allow text wrapping in cells
@@ -223,71 +463,72 @@ with demo:
223
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
224
 
225
  # --- Submit Tab (Commented out as in original request) ---
226
- # Make sure EVALUATION_QUEUE_TEXT and add_new_eval are imported/defined if uncommented
227
- # with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
228
  # with gr.Column():
229
- # with gr.Row():
230
- # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # Requires import/definition
231
- # with gr.Column():
232
- # with gr.Accordion(f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
233
- # finished_eval_table = gr.components.Dataframe(
234
- # value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
235
- # )
236
- # with gr.Accordion(f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
237
- # running_eval_table = gr.components.Dataframe(
238
- # value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
239
- # )
240
- # with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
241
- # pending_eval_table = gr.components.Dataframe(
242
- # value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
243
- # )
244
- # with gr.Row():
245
- # gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
246
- # with gr.Row():
247
- # with gr.Column():
248
- # model_name_textbox = gr.Textbox(label="Model name (on Hugging Face Hub)")
249
- # revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main")
250
- # model_type = gr.Dropdown(choices=["Type A", "Type B", "Type C"], label="Model type", multiselect=False, value=None, interactive=True) # Example choices
251
- # with gr.Column():
252
- # precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True)
253
- # weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights type", multiselect=False, value="Original", interactive=True)
254
- # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
255
- # submit_button = gr.Button("Submit Eval")
256
- # submission_result = gr.Markdown()
257
- # # Ensure add_new_eval is correctly imported/defined and handles these inputs
258
- # submit_button.click(
259
- # add_new_eval, # Requires import/definition
260
- # [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ],
261
- # submission_result,
262
- # )
263
-
264
- # --- Citation Row (at the bottom, outside Tabs) ---
 
 
 
 
 
 
265
  with gr.Accordion("πŸ“™ Citation", open=False):
266
- # Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables imported or defined above
267
  citation_button = gr.Textbox(
268
  value=CITATION_BUTTON_TEXT,
269
  label=CITATION_BUTTON_LABEL,
270
- lines=10, # Adjust lines if needed for new font size
271
- elem_id="citation-button",
272
  show_copy_button=True,
 
273
  )
274
 
275
- # IGNORE_WHEN_COPYING_START
276
- # content_copy download
277
- # Use code with caution.
278
- # IGNORE_WHEN_COPYING_END
279
-
280
  # --- Keep scheduler if relevant ---
281
  # Only start scheduler if the script is run directly
282
  if __name__ == "__main__":
283
  try:
284
  scheduler = BackgroundScheduler()
285
- # Add job only if restart_space is callable (i.e., not a placeholder or failed import)
286
  if callable(restart_space):
287
- # Check if REPO_ID seems valid before scheduling
288
  if REPO_ID and REPO_ID != "your/space-id":
289
  scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
290
  scheduler.start()
 
291
  else:
292
  print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
293
  else:
@@ -300,6 +541,6 @@ if __name__ == "__main__":
300
  # Ensures the app launches only when the script is run directly
301
  if __name__ == "__main__":
302
  # Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
303
- # Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions above.
304
- print("Launching Gradio App...")
305
  demo.launch()
 
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  # Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
5
 
6
+ # --- Attempt to import from src or use placeholders ---
 
7
  try:
8
  from src.about import (
9
  CITATION_BUTTON_LABEL,
 
13
  LLM_BENCHMARKS_TEXT,
14
  TITLE,
15
  )
16
+ from src.display.css_html_js import custom_css # Assuming this might exist but we'll override/append
17
  from src.envs import REPO_ID # Keep if needed for restart_space or other functions
18
  from src.submission.submit import add_new_eval # Keep if using the submit tab
19
  print("Successfully imported from src module.")
20
+ # Ensure custom_css is initialized if it exists but is None or empty
21
+ if not isinstance(custom_css, str):
22
+ custom_css = ""
23
  except ImportError:
24
  print("Warning: Using placeholder values because src module imports failed.")
25
  CITATION_BUTTON_LABEL="Citation"
26
+ CITATION_BUTTON_TEXT="Please cite us if you use this benchmark...\n[Your BibTeX entry here]" # Added placeholder content
27
  EVALUATION_QUEUE_TEXT="Current evaluation queue:"
28
+ INTRODUCTION_TEXT="""
29
+ Welcome to the **MLE-Dojo Benchmark Leaderboard**. Select a category below to see the rankings.
30
+ Models are ranked based on their Elo scores across various machine learning tasks.
31
+ """
32
+ LLM_BENCHMARKS_TEXT="""
33
+ ## About the Benchmarks
34
+
35
+ This leaderboard tracks the performance of various models on the MLE-Dojo benchmark suite.
36
+ The suite includes tasks covering:
37
+
38
+ * **MLE-Lite:** Lightweight ML tasks.
39
+ * **Tabular:** Tasks involving structured data.
40
+ * **NLP:** Natural Language Processing tasks.
41
+ * **CV:** Computer Vision tasks.
42
+
43
+ Scores are calculated using an Elo rating system. Higher scores indicate better performance relative to other models in the benchmark.
44
+ """
45
  TITLE="<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>"
46
+ custom_css="" # Start with empty CSS
47
  REPO_ID="your/space-id" # Replace with actual ID if needed
48
  def add_new_eval(*args): return "Submission placeholder."
49
+ print("Placeholder function 'add_new_eval' defined.")
50
  # --- End Placeholder Definitions ---
51
 
52
 
53
  # --- Elo Leaderboard Configuration ---
54
  # Enhanced data with Rank (placeholder), Organizer, License, and URL
 
 
55
  data = [
56
  {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
57
  {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
 
64
  ]
65
 
66
  # Create a master DataFrame
 
67
  master_df = pd.DataFrame(data)
68
 
69
  # Define categories for selection (user-facing)
 
89
  if score_column is None or score_column not in master_df.columns:
90
  print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
91
  score_column = category_to_column[DEFAULT_CATEGORY]
 
92
  if score_column not in master_df.columns:
 
 
93
  print(f"Error: Default column '{score_column}' also not found.")
94
+ # Return empty df with correct capitalized column names for display
95
  return pd.DataFrame({
96
+ "Rank": [], "Model": [], "Elo Score": [], "Organizer": [], "License": []
 
 
 
 
97
  })
98
 
99
+ # Select base columns + the score column for sorting (use original case from master_df)
 
100
  cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
101
  df = master_df[cols_to_select].copy()
102
 
 
107
  df.reset_index(drop=True, inplace=True)
108
  df.insert(0, 'Rank', df.index + 1)
109
 
110
+ # Format Model Name as HTML Hyperlink (results in 'Model' column)
 
111
  df['Model'] = df.apply(
112
+ lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none; font-weight: 600;'>{row['model_name']}</a>",
113
  axis=1
114
  )
115
 
116
  # Rename the score column to 'Elo Score' for consistent display
117
  df.rename(columns={score_column: 'Elo Score'}, inplace=True)
118
 
119
+ # Rename 'organizer' and 'license' to match desired display headers (Capitalized)
120
  df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
121
 
122
+ # Select and reorder columns for final display (use Capitalized names)
 
123
  final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
124
  df = df[final_columns]
125
 
126
+ # Return DataFrame with columns: 'Rank', 'Model', 'Organizer', 'License', 'Elo Score'
 
127
  return df
128
 
129
  # --- Mock/Placeholder functions/data for other tabs ---
130
+ print("Warning: Evaluation queue data fetching is disabled/mocked.")
 
131
  finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
132
  running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
133
  pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
134
+ EVAL_COLS = ["Model", "Status", "Requested", "Started"]
135
+ EVAL_TYPES = ["str", "str", "str", "str"]
136
 
137
  # --- Keep restart function if relevant ---
138
  def restart_space():
 
139
  print(f"Attempting to restart space: {REPO_ID}")
140
+ # Replace with actual restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
141
 
142
 
143
+ # --- Enhanced CSS ---
144
+ # Concatenate existing CSS (if any) with new styles
145
+ # Ensure custom_css is a string before appending
146
+ if not isinstance(custom_css, str):
147
+ custom_css = ""
148
+
149
+ custom_css += """
150
+ /* --- Import Font --- */
151
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
152
 
153
+ /* --- Global Styles & Font --- */
 
 
 
 
 
154
  body {
155
+ font-family: 'Inter', sans-serif;
156
+ background: linear-gradient(to bottom right, #fdfbfb, #ebedee); /* Subtle gradient */
157
+ color: #333;
158
+ }
159
+
160
+ :root {
161
+ --primary-color: #007bff; /* Example primary color */
162
+ --text-color: #333;
163
+ --border-radius: 8px;
164
+ --card-background: rgba(255, 255, 255, 0.8); /* Slightly transparent */
165
+ --shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
166
+ }
167
+
168
+ /* Set base font size on html for rem units */
169
+ html {
170
+ font-size: 16px; /* Base font size */
171
+ }
172
+
173
+ /* Increase overall text size slightly using rem */
174
+ .gradio-container {
175
+ font-size: 1rem; /* Approx 16px */
176
+ line-height: 1.6;
177
+ }
178
+
179
+ /* --- Headings --- */
180
+ h1, .markdown-text h1 {
181
+ font-size: 2.5rem; /* Larger title */
182
+ font-weight: 700;
183
+ color: #2c3e50; /* Darker heading color */
184
+ margin-bottom: 1rem;
185
+ text-align: center;
186
+ padding-top: 1rem;
187
+ }
188
+ h2, .markdown-text h2 {
189
+ font-size: 1.75rem; /* Larger section titles */
190
+ font-weight: 600;
191
+ color: #2c3e50;
192
+ margin-top: 1.5rem;
193
+ margin-bottom: 0.75rem;
194
+ border-bottom: 2px solid var(--primary-color);
195
+ padding-bottom: 0.3rem;
196
+ }
197
+
198
+ /* --- Markdown Text Styling --- */
199
+ .markdown-text p, .markdown-text li {
200
+ font-size: 1.05rem; /* Slightly larger paragraph text */
201
+ color: var(--text-color);
202
+ }
203
+ .markdown-text strong {
204
+ font-weight: 600;
205
+ color: #0056b3;
206
+ }
207
+
208
+ /* --- Tab Styling --- */
209
+ .tab-buttons > .tabs > button {
210
+ font-size: 1.1rem !important;
211
+ font-weight: 600;
212
+ padding: 12px 20px !important;
213
+ border-radius: var(--border-radius) var(--border-radius) 0 0 !important;
214
+ background-color: #e9ecef !important;
215
+ border-bottom: 2px solid transparent !important;
216
+ transition: all 0.3s ease;
217
+ }
218
+ .tab-buttons > .tabs > button.selected {
219
+ background-color: var(--card-background) !important;
220
+ border-bottom: 2px solid var(--primary-color) !important;
221
+ color: var(--primary-color) !important;
222
+ box-shadow: 0 -2px 5px rgba(0, 0, 0, 0.05);
223
+ }
224
+
225
+ /* --- Radio Button "Chips" Styling --- */
226
+ /* Targeting the container for the radio items */
227
+ .gradio-container .styler_radio_ MuiFormGroup-root {
228
+ display: flex;
229
+ flex-direction: row; /* Arrange horizontally */
230
+ flex-wrap: wrap;
231
+ gap: 10px; /* Space between chips */
232
+ margin-bottom: 1.5rem; /* Space below the chips */
233
+ }
234
+
235
+ /* Styling individual radio items as chips */
236
+ .gradio-container .styler_radio_ MuiFormControlLabel-root {
237
+ background-color: #f8f9fa;
238
+ border: 1px solid #dee2e6;
239
+ padding: 8px 16px; /* Chip padding */
240
+ border-radius: 20px; /* Pill shape */
241
+ cursor: pointer;
242
+ transition: all 0.2s ease-in-out;
243
+ margin: 0 !important; /* Override default margins */
244
+ }
245
+
246
+ /* Hide the actual radio button circle */
247
+ .gradio-container .styler_radio_ .MuiRadio-root {
248
+ display: none;
249
+ }
250
+
251
+ /* Style for the label text inside the chip */
252
+ .gradio-container .styler_radio_ .MuiFormControlLabel-label {
253
+ font-size: 1rem; /* Chip text size */
254
+ font-weight: 600;
255
+ color: #495057;
256
+ }
257
+
258
+ /* Style for the selected chip */
259
+ .gradio-container .styler_radio_ .Mui-checked + .MuiFormControlLabel-label {
260
+ color: white !important; /* Ensure text is readable on selected background */
261
+ }
262
+
263
+ .gradio-container .styler_radio_ .Mui-checked .MuiFormControlLabel-label {
264
+ color: white !important; /* Backup selector */
265
+ }
266
+
267
+ .gradio-container .styler_radio_ .MuiFormControlLabel-root.Mui-checked, /* This might target the container*/
268
+ .gradio-container .styler_radio_ span.Mui-checked + span { /* Or target based on the checked span */
269
+ /* This seems more complex now, let's try styling the parent container */
270
+ }
271
+ .gradio-container .styler_radio_ label:has(input:checked) {
272
+ background-color: var(--primary-color) !important;
273
+ border-color: var(--primary-color) !important;
274
+ color: white !important; /* Text color for selected */
275
+ box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3);
276
+ }
277
+ /* Apply white text color specifically to the label text when checked */
278
+ .gradio-container .styler_radio_ label:has(input:checked) span {
279
+ color: white !important;
280
+ }
281
+
282
+
283
+ /* Hover effect for non-selected chips */
284
+ .gradio-container .styler_radio_ label:not(:has(input:checked)):hover {
285
+ background-color: #e9ecef;
286
+ border-color: #adb5bd;
287
+ }
288
+
289
+
290
+ /* --- Leaderboard Table Styling --- */
291
+ #leaderboard-table {
292
+ background-color: var(--card-background);
293
+ border-radius: var(--border-radius);
294
+ box-shadow: var(--shadow);
295
+ overflow: hidden; /* Ensures rounded corners clip content */
296
+ border-collapse: separate; /* Needed for border-radius on table */
297
+ border-spacing: 0;
298
+ margin-top: 1rem;
299
+ }
300
+
301
+ #leaderboard-table th,
302
+ #leaderboard-table td {
303
+ padding: 12px 16px; /* More padding */
304
+ text-align: left;
305
+ font-size: 1rem; /* Table font size */
306
+ border-bottom: 1px solid #eee; /* Lighter border */
307
+ vertical-align: middle; /* Center content vertically */
308
+ white-space: normal; /* Allow wrapping */
309
+ }
310
+
311
+ #leaderboard-table th {
312
+ background-color: #f8f9fa; /* Light grey header */
313
+ font-weight: 600;
314
+ color: #495057;
315
+ font-size: 1.05rem;
316
+ border-top: 1px solid #eee; /* Add top border for consistency */
317
+ }
318
+
319
+ #leaderboard-table tr:last-child td {
320
+ border-bottom: none; /* Remove bottom border for last row */
321
+ }
322
+
323
+ #leaderboard-table tr:nth-child(even) td {
324
+ background-color: rgba(249, 249, 249, 0.7); /* Slightly transparent even rows */
325
+ }
326
+
327
+ #leaderboard-table tr:hover td {
328
+ background-color: rgba(233, 233, 233, 0.8); /* Hover effect */
329
+ }
330
+
331
+ /* Style for the model link */
332
+ #leaderboard-table td a {
333
+ color: var(--primary-color);
334
+ text-decoration: none;
335
+ font-weight: 600; /* Make model name stand out */
336
+ transition: color 0.2s ease;
337
+ }
338
+
339
+ #leaderboard-table td a:hover {
340
+ color: #0056b3; /* Darker blue on hover */
341
+ text-decoration: underline;
342
+ }
343
+
344
+ /* Rank column styling */
345
+ #leaderboard-table td:first-child,
346
+ #leaderboard-table th:first-child {
347
+ text-align: center;
348
+ font-weight: 700;
349
+ width: 60px; /* Fixed width for Rank */
350
+ }
351
+
352
+ /* Elo Score column styling */
353
+ #leaderboard-table td:last-child,
354
+ #leaderboard-table th:last-child {
355
+ text-align: right;
356
+ font-weight: 600;
357
+ width: 100px; /* Fixed width for Elo Score */
358
+ }
359
+
360
+
361
+ /* --- Accordion Styling --- */
362
+ .gradio-accordion, .accordion { /* Targeting gradio 4+ */
363
+ border: 1px solid #ddd;
364
+ border-radius: var(--border-radius);
365
+ margin-bottom: 1rem;
366
+ box-shadow: var(--shadow);
367
+ background-color: var(--card-background);
368
+ }
369
+ .gradio-accordion > button, .accordion > button { /* Targeting header button */
370
+ font-size: 1.1rem !important;
371
+ font-weight: 600;
372
+ padding: 12px 15px !important;
373
+ background-color: #f8f9fa !important;
374
+ border-bottom: 1px solid #eee !important;
375
+ }
376
+ .gradio-accordion > button[aria-expanded="true"],
377
+ .accordion > button[aria-expanded="true"] {
378
+ background-color: #f1f3f5 !important;
379
+ }
380
+
381
+
382
+ /* --- Textbox/Citation Styling --- */
383
+ #citation-button textarea {
384
+ font-family: 'Courier New', Courier, monospace; /* Monospace for code/citation */
385
+ font-size: 0.95rem;
386
+ background-color: #fdfdfd;
387
+ border-radius: var(--border-radius);
388
+ padding: 15px;
389
+ line-height: 1.5;
390
+ border: 1px solid #ccc;
391
+ box-shadow: inset 0 1px 3px rgba(0,0,0,0.06);
392
+ }
393
+ #citation-button button { /* Style copy button */
394
+ font-size: 0.9rem !important;
395
+ padding: 5px 10px !important;
396
+ }
397
+
398
+ /* --- General Button Styling (if needed for submit tab) --- */
399
+ .gradio-button, button.gr-button {
400
+ font-size: 1.05rem !important;
401
+ font-weight: 600;
402
+ padding: 10px 20px !important;
403
+ border-radius: var(--border-radius) !important;
404
+ transition: all 0.3s ease !important;
405
+ }
406
+
407
+ /* Adjustments for smaller screens if necessary */
408
+ @media (max-width: 768px) {
409
+ html { font-size: 15px; } /* Slightly smaller base font on mobile */
410
+ h1, .markdown-text h1 { font-size: 2rem; }
411
+ h2, .markdown-text h2 { font-size: 1.5rem; }
412
+ #leaderboard-table th, #leaderboard-table td { padding: 8px 10px; font-size: 0.95rem;}
413
+ .tab-buttons > .tabs > button { font-size: 1rem !important; padding: 10px 15px !important;}
414
+ .gradio-container .styler_radio_ MuiFormControlLabel-root { padding: 6px 12px; }
415
+ .gradio-container .styler_radio_ .MuiFormControlLabel-label { font-size: 0.95rem; }
416
+ }
417
  """
418
 
419
+ # --- Gradio App Definition ---
420
+ # Use a theme for better default styling - Glass theme is modern
421
+ demo = gr.Blocks(css=custom_css, theme=gr.themes.Glass(primary_hue="blue", secondary_hue="sky"))
422
 
423
  with demo:
424
  # Use the TITLE variable imported or defined above
 
428
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
429
 
430
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
431
+ # Added relevant icons to tab labels
432
+ with gr.TabItem("πŸ… Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
433
  with gr.Column():
434
+ gr.Markdown("## Select Category to Rank By", elem_classes="markdown-text") # Changed heading
435
  category_selector = gr.Radio(
436
  choices=CATEGORIES,
437
+ label="Category:", # Simplified label
438
  value=DEFAULT_CATEGORY,
439
  interactive=True,
440
+ # elem_classes="category-radio-chips" # Add class for potential CSS targeting if needed
441
+ # Use internal class instead for more robust targeting: 'styler_radio_'
442
+ elem_classes="styler_radio_" # Add hook class
443
  )
444
  leaderboard_df_component = gr.Dataframe(
 
445
  value=update_leaderboard(DEFAULT_CATEGORY),
 
446
  headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
 
447
  datatype=["number", "html", "str", "str", "number"],
448
  interactive=False,
 
 
449
  row_count=(len(master_df), "fixed"), # Display all rows
450
  col_count=(5, "fixed"),
451
  wrap=True, # Allow text wrapping in cells
 
463
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
464
 
465
  # --- Submit Tab (Commented out as in original request) ---
466
+ # Uncomment and ensure necessary variables/functions are available if needed
467
+ # with gr.TabItem("πŸš€ Submit", elem_id="llm-benchmark-tab-submit", id=2):
468
  # with gr.Column():
469
+ # with gr.Row():
470
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
471
+ # with gr.Column():
472
+ # with gr.Accordion(f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
473
+ # finished_eval_table = gr.Dataframe( # Use gr.Dataframe
474
+ # value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
475
+ # )
476
+ # with gr.Accordion(f"πŸ”„ Running Evaluations ({len(running_eval_queue_df)})", open=False):
477
+ # running_eval_table = gr.Dataframe( # Use gr.Dataframe
478
+ # value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
479
+ # )
480
+ # with gr.Accordion(f"⏳ Pending Evaluations ({len(pending_eval_queue_df)})", open=False):
481
+ # pending_eval_table = gr.Dataframe( # Use gr.Dataframe
482
+ # value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
483
+ # )
484
+ # with gr.Row():
485
+ # gr.Markdown("## βœ‰οΈ Submit Your Model", elem_classes="markdown-text") # Changed heading
486
+ # with gr.Row():
487
+ # with gr.Column(scale=1):
488
+ # model_name_textbox = gr.Textbox(label="Model Name (Hugging Face Hub ID)")
489
+ # revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main")
490
+ # model_type = gr.Dropdown(choices=["CausalLM", "Seq2SeqLM", "Other"], label="Model Type", multiselect=False, value="CausalLM", interactive=True) # Example choices
491
+ # with gr.Column(scale=1):
492
+ # precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True)
493
+ # weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights Type", multiselect=False, value="Original", interactive=True)
494
+ # base_model_name_textbox = gr.Textbox(label="Base Model (for Adapter/Delta)", placeholder="Leave empty if Original weights")
495
+ # submit_button = gr.Button("Submit for Evaluation", variant="primary") # Added variant
496
+ # submission_result = gr.Markdown()
497
+ # # Ensure add_new_eval is correctly imported/defined and handles these inputs
498
+ # # Make sure add_new_eval is defined if you uncomment this
499
+ # if callable(add_new_eval):
500
+ # submit_button.click(
501
+ # add_new_eval,
502
+ # [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ],
503
+ # submission_result,
504
+ # )
505
+ # else:
506
+ # print("Warning: 'add_new_eval' function not callable. Submit button disabled.")
507
+ # submit_button.interactive = False # Disable button if function missing
508
+
509
+
510
+ # --- Citation Row (at the bottom, outside Tabs, using Accordion) ---
511
  with gr.Accordion("πŸ“™ Citation", open=False):
512
+ # Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables
513
  citation_button = gr.Textbox(
514
  value=CITATION_BUTTON_TEXT,
515
  label=CITATION_BUTTON_LABEL,
516
+ lines=10, # Adjust lines based on content and new font size
517
+ elem_id="citation-button", # Keep ID for CSS targeting
518
  show_copy_button=True,
519
+ interactive=False # Make it non-editable
520
  )
521
 
 
 
 
 
 
522
  # --- Keep scheduler if relevant ---
523
  # Only start scheduler if the script is run directly
524
  if __name__ == "__main__":
525
  try:
526
  scheduler = BackgroundScheduler()
 
527
  if callable(restart_space):
 
528
  if REPO_ID and REPO_ID != "your/space-id":
529
  scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
530
  scheduler.start()
531
+ print("Scheduler started for space restart.")
532
  else:
533
  print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
534
  else:
 
541
  # Ensures the app launches only when the script is run directly
542
  if __name__ == "__main__":
543
  # Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
544
+ # Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions.
545
+ print("Launching Gradio App with enhanced styling...")
546
  demo.launch()