jasonshaoshun commited on
Commit
200beb2
·
1 Parent(s): ef71549
Files changed (2) hide show
  1. app.py +234 -217
  2. custom-select-columns.py +300 -20
app.py CHANGED
@@ -38,6 +38,103 @@ from src.submission.submit import add_new_eval
38
 
39
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def restart_space():
42
  API.restart_space(repo_id=REPO_ID)
43
 
@@ -128,122 +225,58 @@ from src.about import TasksMib_Subgraph
128
 
129
 
130
 
131
- def init_leaderboard_mib_subgraph(dataframe, track):
132
- """Initialize the subgraph leaderboard with grouped column selection by benchmark."""
133
- if dataframe is None or dataframe.empty:
134
- raise ValueError("Leaderboard DataFrame is empty or None.")
135
-
136
- print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
137
-
138
- # Create groups of columns by benchmark
139
- benchmark_groups = []
140
-
141
- # For each benchmark in our TasksMib_Subgraph enum...
142
- for task in TasksMib_Subgraph:
143
- benchmark = task.value.benchmark
144
- # Get all valid columns for this benchmark's models
145
- benchmark_cols = [
146
- f"{benchmark}_{model}"
147
- for model in task.value.models
148
- if f"{benchmark}_{model}" in dataframe.columns
149
- ]
150
- if benchmark_cols: # Only add if we have valid columns
151
- benchmark_groups.append(benchmark_cols)
152
- print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
153
-
154
- # Create model groups as well
155
- model_groups = []
156
- all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
157
-
158
- # For each unique model...
159
- for model in all_models:
160
- # Get all valid columns for this model across benchmarks
161
- model_cols = [
162
- f"{task.value.benchmark}_{model}"
163
- for task in TasksMib_Subgraph
164
- if model in task.value.models
165
- and f"{task.value.benchmark}_{model}" in dataframe.columns
166
- ]
167
- if model_cols: # Only add if we have valid columns
168
- model_groups.append(model_cols)
169
- print(f"\nModel group for {model}:", model_cols)
170
-
171
- # Combine all groups
172
- all_groups = benchmark_groups + model_groups
173
-
174
- # Flatten groups for default selection (show everything initially)
175
- all_columns = [col for group in all_groups for col in group]
176
- print("\nAll available columns:", all_columns)
177
-
178
- return Leaderboard(
179
- value=dataframe,
180
- datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
181
- select_columns=SelectColumns(
182
- default_selection=all_columns, # Show all columns initially
183
- label="Select Results:"
184
- ),
185
- search_columns=["Method"],
186
- hide_columns=[],
187
- interactive=False,
188
- )
189
-
190
-
191
  # def init_leaderboard_mib_subgraph(dataframe, track):
192
- # """Initialize the subgraph leaderboard with display names for better readability."""
193
  # if dataframe is None or dataframe.empty:
194
  # raise ValueError("Leaderboard DataFrame is empty or None.")
195
 
196
  # print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
197
 
198
- # # First, create our display name mapping
199
- # # This is like creating a translation dictionary between internal names and display names
200
- # display_mapping = {}
201
- # for task in TasksMib_Subgraph:
202
- # for model in task.value.models:
203
- # field_name = f"{task.value.benchmark}_{model}"
204
- # display_name = f"{task.value.benchmark}({model})"
205
- # display_mapping[field_name] = display_name
206
-
207
- # # Now when creating benchmark groups, we'll use display names
208
  # benchmark_groups = []
 
 
209
  # for task in TasksMib_Subgraph:
210
  # benchmark = task.value.benchmark
 
211
  # benchmark_cols = [
212
- # display_mapping[f"{benchmark}_{model}"] # Use display name from our mapping
213
  # for model in task.value.models
214
  # if f"{benchmark}_{model}" in dataframe.columns
215
  # ]
216
- # if benchmark_cols:
217
  # benchmark_groups.append(benchmark_cols)
218
  # print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
219
 
220
- # # Similarly for model groups
221
  # model_groups = []
222
  # all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
223
 
 
224
  # for model in all_models:
 
225
  # model_cols = [
226
- # display_mapping[f"{task.value.benchmark}_{model}"] # Use display name
227
  # for task in TasksMib_Subgraph
228
  # if model in task.value.models
229
  # and f"{task.value.benchmark}_{model}" in dataframe.columns
230
  # ]
231
- # if model_cols:
232
  # model_groups.append(model_cols)
233
  # print(f"\nModel group for {model}:", model_cols)
234
 
235
- # # Combine all groups using display names
236
  # all_groups = benchmark_groups + model_groups
 
 
237
  # all_columns = [col for group in all_groups for col in group]
238
-
239
- # # Important: We need to rename our DataFrame columns to match display names
240
- # renamed_df = dataframe.rename(columns=display_mapping)
241
 
242
  # return Leaderboard(
243
- # value=renamed_df, # Use DataFrame with display names
244
  # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
245
  # select_columns=SelectColumns(
246
- # default_selection=all_columns, # Now contains display names
247
  # label="Select Results:"
248
  # ),
249
  # search_columns=["Method"],
@@ -252,152 +285,132 @@ def init_leaderboard_mib_subgraph(dataframe, track):
252
  # )
253
 
254
 
 
 
 
 
255
 
256
-
257
- class SmartSelectColumns(gr.SelectColumns):
258
- """
259
- Enhanced SelectColumns component for Gradio Leaderboard with smart filtering and mapping capabilities.
260
- """
261
- def __init__(
262
- self,
263
- column_filters: Optional[Dict[str, List[str]]] = None,
264
- column_mapping: Optional[Dict[str, str]] = None,
265
- initial_selected: Optional[List[str]] = None,
266
- *args,
267
- **kwargs
268
- ):
269
- """
270
- Initialize SmartSelectColumns with enhanced functionality.
271
-
272
- Args:
273
- column_filters: Dict mapping filter names to lists of substrings to match
274
- column_mapping: Dict mapping actual column names to display names
275
- initial_selected: List of column names to be initially selected
276
- *args, **kwargs: Additional arguments passed to parent SelectColumns
277
- """
278
- super().__init__(*args, **kwargs)
279
- self.column_filters = column_filters or {}
280
- self.column_mapping = column_mapping or {}
281
- self.reverse_mapping = {v: k for k, v in self.column_mapping.items()} if column_mapping else {}
282
- self.initial_selected = initial_selected or []
283
-
284
- def preprocess(self, x: List[str]) -> List[str]:
285
- """
286
- Transform selected display names back to actual column names.
287
-
288
- Args:
289
- x: List of selected display names
290
-
291
- Returns:
292
- List of actual column names
293
- """
294
- return [self.reverse_mapping.get(col, col) for col in x]
295
-
296
- def postprocess(self, y: List[str]) -> List[str]:
297
- """
298
- Transform actual column names to display names.
299
-
300
- Args:
301
- y: List of actual column names
302
-
303
- Returns:
304
- List of display names
305
- """
306
- return [self.column_mapping.get(col, col) for col in y]
307
-
308
- def get_filtered_columns(self, df: pd.DataFrame) -> Dict[str, List[str]]:
309
- """
310
- Get columns filtered by substring matches.
311
-
312
- Args:
313
- df: Input DataFrame
314
-
315
- Returns:
316
- Dict mapping filter names to lists of matching display names
317
- """
318
- filtered_cols = {}
319
-
320
- for filter_name, substrings in self.column_filters.items():
321
- matching_cols = []
322
- for col in df.columns:
323
- if any(substr.lower() in col.lower() for substr in substrings):
324
- display_name = self.column_mapping.get(col, col)
325
- matching_cols.append(display_name)
326
- filtered_cols[filter_name] = matching_cols
327
-
328
- return filtered_cols
329
-
330
- def update(
331
- self,
332
- value: Union[pd.DataFrame, Dict[str, List[str]], Any],
333
- interactive: Optional[bool] = None
334
- ) -> Dict:
335
- """
336
- Update component with new values, supporting DataFrame fields.
337
-
338
- Args:
339
- value: DataFrame, dict of columns, or fields object
340
- interactive: Whether component should be interactive
341
-
342
- Returns:
343
- Dict containing update configuration
344
- """
345
- if isinstance(value, pd.DataFrame):
346
- filtered_cols = self.get_filtered_columns(value)
347
- choices = [self.column_mapping.get(col, col) for col in value.columns]
348
-
349
- # Set initial selection if provided
350
- value = self.initial_selected if self.initial_selected else choices
351
-
352
- return {
353
- "choices": choices,
354
- "value": value,
355
- "filtered_cols": filtered_cols,
356
- "interactive": interactive if interactive is not None else self.interactive
357
- }
358
-
359
- # Handle fields object (e.g., from dataclass)
360
- if hasattr(value, '__dataclass_fields__'):
361
- field_names = [field.name for field in fields(value)]
362
- choices = [self.column_mapping.get(name, name) for name in field_names]
363
- return {
364
- "choices": choices,
365
- "value": self.initial_selected if self.initial_selected else choices,
366
- "interactive": interactive if interactive is not None else self.interactive
367
- }
368
-
369
- return super().update(value, interactive)
370
 
 
 
 
 
 
 
 
 
371
 
372
- # Define filters and mappings
373
- filters = {
374
- "IOI Metrics": ["ioi"],
375
- "Performance Metrics": ["performance"]
376
- }
377
-
378
- mappings = {
379
- "ioi_score_1": "IOI Score (Type 1)",
380
- "ioi_score_2": "IOI Score (Type 2)",
381
- "other_metric": "Other Metric",
382
- "performance_1": "Performance Metric 1"
383
- }
 
 
 
 
384
 
385
- column_filters = filters
386
- column_mapping = mappings
387
- initial_columns = renamed_df
 
 
 
 
 
 
 
388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
- # Initialize SmartSelectColumns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  smart_columns = SmartSelectColumns(
392
- column_filters=filters,
393
  column_mapping=mappings,
394
- initial_selected=initial_columns,
395
- multiselect=True
396
  )
397
 
398
- return gr.Leaderboard(
 
399
  value=renamed_df,
400
- datatype=[c.type for c in fields(column_class)],
401
  select_columns=smart_columns,
402
  search_columns=["Method"],
403
  hide_columns=[],
@@ -405,6 +418,10 @@ def init_leaderboard_mib_subgraph(dataframe, track):
405
  )
406
 
407
 
 
 
 
 
408
  # def init_leaderboard_mib_subgraph(dataframe, track):
409
  # """Initialize the subgraph leaderboard with group-based column selection."""
410
  # if dataframe is None or dataframe.empty:
 
38
 
39
 
40
 
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+ from gradio_leaderboard import SelectColumns, Leaderboard
51
+ import pandas as pd
52
+ from typing import List, Dict, Union, Optional, Any
53
+ from dataclasses import fields
54
+
55
+ class SmartSelectColumns(SelectColumns):
56
+ """
57
+ Enhanced SelectColumns component for gradio_leaderboard with explicit column grouping.
58
+ """
59
+ def __init__(
60
+ self,
61
+ column_groups: Optional[Dict[str, List[str]]] = None,
62
+ column_mapping: Optional[Dict[str, str]] = None,
63
+ initial_selected: Optional[List[str]] = None,
64
+ **kwargs
65
+ ):
66
+ """
67
+ Initialize SmartSelectColumns with enhanced functionality.
68
+
69
+ Args:
70
+ column_groups: Dict mapping group names to lists of columns in that group
71
+ column_mapping: Dict mapping actual column names to display names
72
+ initial_selected: List of columns to show initially
73
+ """
74
+ super().__init__(**kwargs)
75
+ self.column_groups = column_groups or {}
76
+ self.column_mapping = column_mapping or {}
77
+ self.reverse_mapping = {v: k for k, v in self.column_mapping.items()} if column_mapping else {}
78
+ self.initial_selected = initial_selected or []
79
+
80
+ def preprocess_value(self, x: List[str]) -> List[str]:
81
+ """Transform selected display names back to actual column names."""
82
+ return [self.reverse_mapping.get(col, col) for col in x]
83
+
84
+ def postprocess_value(self, y: List[str]) -> List[str]:
85
+ """Transform actual column names to display names."""
86
+ return [self.column_mapping.get(col, col) for col in y]
87
+
88
+ def update(
89
+ self,
90
+ value: Union[pd.DataFrame, Dict[str, List[str]], Any]
91
+ ) -> Dict:
92
+ """Update component with new values."""
93
+ if isinstance(value, pd.DataFrame):
94
+ # Get all column names and convert to display names
95
+ choices = [self.column_mapping.get(col, col) for col in value.columns]
96
+
97
+ # Use initial selection or default columns
98
+ selected = self.initial_selected if self.initial_selected else choices
99
+
100
+ # Convert column groups to use display names
101
+ filtered_cols = {}
102
+ for group_name, columns in self.column_groups.items():
103
+ filtered_cols[group_name] = [
104
+ self.column_mapping.get(col, col)
105
+ for col in columns
106
+ if col in value.columns
107
+ ]
108
+
109
+ return {
110
+ "choices": choices,
111
+ "value": selected,
112
+ "filtered_cols": filtered_cols
113
+ }
114
+
115
+ # Handle fields object
116
+ if hasattr(value, '__dataclass_fields__'):
117
+ field_names = [field.name for field in fields(value)]
118
+ choices = [self.column_mapping.get(name, name) for name in field_names]
119
+ return {
120
+ "choices": choices,
121
+ "value": self.initial_selected if self.initial_selected else choices
122
+ }
123
+
124
+ return super().update(value)
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
  def restart_space():
139
  API.restart_space(repo_id=REPO_ID)
140
 
 
225
 
226
 
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  # def init_leaderboard_mib_subgraph(dataframe, track):
229
+ # """Initialize the subgraph leaderboard with grouped column selection by benchmark."""
230
  # if dataframe is None or dataframe.empty:
231
  # raise ValueError("Leaderboard DataFrame is empty or None.")
232
 
233
  # print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
234
 
235
+ # # Create groups of columns by benchmark
 
 
 
 
 
 
 
 
 
236
  # benchmark_groups = []
237
+
238
+ # # For each benchmark in our TasksMib_Subgraph enum...
239
  # for task in TasksMib_Subgraph:
240
  # benchmark = task.value.benchmark
241
+ # # Get all valid columns for this benchmark's models
242
  # benchmark_cols = [
243
+ # f"{benchmark}_{model}"
244
  # for model in task.value.models
245
  # if f"{benchmark}_{model}" in dataframe.columns
246
  # ]
247
+ # if benchmark_cols: # Only add if we have valid columns
248
  # benchmark_groups.append(benchmark_cols)
249
  # print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
250
 
251
+ # # Create model groups as well
252
  # model_groups = []
253
  # all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
254
 
255
+ # # For each unique model...
256
  # for model in all_models:
257
+ # # Get all valid columns for this model across benchmarks
258
  # model_cols = [
259
+ # f"{task.value.benchmark}_{model}"
260
  # for task in TasksMib_Subgraph
261
  # if model in task.value.models
262
  # and f"{task.value.benchmark}_{model}" in dataframe.columns
263
  # ]
264
+ # if model_cols: # Only add if we have valid columns
265
  # model_groups.append(model_cols)
266
  # print(f"\nModel group for {model}:", model_cols)
267
 
268
+ # # Combine all groups
269
  # all_groups = benchmark_groups + model_groups
270
+
271
+ # # Flatten groups for default selection (show everything initially)
272
  # all_columns = [col for group in all_groups for col in group]
273
+ # print("\nAll available columns:", all_columns)
 
 
274
 
275
  # return Leaderboard(
276
+ # value=dataframe,
277
  # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
278
  # select_columns=SelectColumns(
279
+ # default_selection=all_columns, # Show all columns initially
280
  # label="Select Results:"
281
  # ),
282
  # search_columns=["Method"],
 
285
  # )
286
 
287
 
288
+ def init_leaderboard_mib_subgraph(dataframe, track):
289
+ """Initialize the subgraph leaderboard with display names for better readability."""
290
+ if dataframe is None or dataframe.empty:
291
+ raise ValueError("Leaderboard DataFrame is empty or None.")
292
 
293
+ print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
+ # First, create our display name mapping
296
+ # This is like creating a translation dictionary between internal names and display names
297
+ display_mapping = {}
298
+ for task in TasksMib_Subgraph:
299
+ for model in task.value.models:
300
+ field_name = f"{task.value.benchmark}_{model}"
301
+ display_name = f"{task.value.benchmark}({model})"
302
+ display_mapping[field_name] = display_name
303
 
304
+ # Now when creating benchmark groups, we'll use display names
305
+ benchmark_groups = []
306
+ for task in TasksMib_Subgraph:
307
+ benchmark = task.value.benchmark
308
+ benchmark_cols = [
309
+ display_mapping[f"{benchmark}_{model}"] # Use display name from our mapping
310
+ for model in task.value.models
311
+ if f"{benchmark}_{model}" in dataframe.columns
312
+ ]
313
+ if benchmark_cols:
314
+ benchmark_groups.append(benchmark_cols)
315
+ print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
316
+
317
+ # Similarly for model groups
318
+ model_groups = []
319
+ all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
320
 
321
+ for model in all_models:
322
+ model_cols = [
323
+ display_mapping[f"{task.value.benchmark}_{model}"] # Use display name
324
+ for task in TasksMib_Subgraph
325
+ if model in task.value.models
326
+ and f"{task.value.benchmark}_{model}" in dataframe.columns
327
+ ]
328
+ if model_cols:
329
+ model_groups.append(model_cols)
330
+ print(f"\nModel group for {model}:", model_cols)
331
 
332
+ # Combine all groups using display names
333
+ all_groups = benchmark_groups + model_groups
334
+ all_columns = [col for group in all_groups for col in group]
335
+
336
+ # Important: We need to rename our DataFrame columns to match display names
337
+ renamed_df = dataframe.rename(columns=display_mapping)
338
+
339
+ # return Leaderboard(
340
+ # value=renamed_df, # Use DataFrame with display names
341
+ # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
342
+ # select_columns=SelectColumns(
343
+ # default_selection=all_columns, # Now contains display names
344
+ # label="Select Results:"
345
+ # ),
346
+ # search_columns=["Method"],
347
+ # hide_columns=[],
348
+ # interactive=False,
349
+ # )
350
+ # Complete column groups for both benchmarks and models
351
+ column_groups = {
352
+ # Benchmark groups
353
+ "Benchmark group for ioi": ["ioi_gpt2", "ioi_qwen2_5", "ioi_gemma2", "ioi_llama3"],
354
+ "Benchmark group for mcqa": ["mcqa_qwen2_5", "mcqa_gemma2", "mcqa_llama3"],
355
+ "Benchmark group for arithmetic_addition": ["arithmetic_addition_llama3"],
356
+ "Benchmark group for arithmetic_subtraction": ["arithmetic_subtraction_llama3"],
357
+ "Benchmark group for arc_easy": ["arc_easy_gemma2", "arc_easy_llama3"],
358
+ "Benchmark group for arc_challenge": ["arc_challenge_llama3"],
359
+
360
+ # Model groups
361
+ "Model group for qwen2_5": ["ioi_qwen2_5", "mcqa_qwen2_5"],
362
+ "Model group for gpt2": ["ioi_gpt2"],
363
+ "Model group for gemma2": ["ioi_gemma2", "mcqa_gemma2", "arc_easy_gemma2"],
364
+ "Model group for llama3": [
365
+ "ioi_llama3",
366
+ "mcqa_llama3",
367
+ "arithmetic_addition_llama3",
368
+ "arithmetic_subtraction_llama3",
369
+ "arc_easy_llama3",
370
+ "arc_challenge_llama3"
371
+ ]
372
+ }
373
 
374
+ # # Complete mappings for more readable display names
375
+ # mappings = {
376
+ # # IOI benchmark mappings
377
+ # "ioi_llama3": "IOI (LLaMA-3)",
378
+ # "ioi_qwen2_5": "IOI (Qwen-2.5)",
379
+ # "ioi_gpt2": "IOI (GPT-2)",
380
+ # "ioi_gemma2": "IOI (Gemma-2)",
381
+
382
+ # # MCQA benchmark mappings
383
+ # "mcqa_llama3": "MCQA (LLaMA-3)",
384
+ # "mcqa_qwen2_5": "MCQA (Qwen-2.5)",
385
+ # "mcqa_gemma2": "MCQA (Gemma-2)",
386
+
387
+ # # Arithmetic benchmark mappings
388
+ # "arithmetic_addition_llama3": "Arithmetic Addition (LLaMA-3)",
389
+ # "arithmetic_subtraction_llama3": "Arithmetic Subtraction (LLaMA-3)",
390
+
391
+ # # ARC benchmark mappings
392
+ # "arc_easy_llama3": "ARC Easy (LLaMA-3)",
393
+ # "arc_easy_gemma2": "ARC Easy (Gemma-2)",
394
+ # "arc_challenge_llama3": "ARC Challenge (LLaMA-3)",
395
+
396
+ # # Other columns
397
+ # "eval_name": "Evaluation Name",
398
+ # "Method": "Method",
399
+ # "Average": "Average Score"
400
+ # }
401
+ mappings = {}
402
+
403
+ # Create SmartSelectColumns instance
404
  smart_columns = SmartSelectColumns(
405
+ column_groups=column_groups,
406
  column_mapping=mappings,
407
+ initial_selected=["Method", "Average"]
 
408
  )
409
 
410
+ # Create Leaderboard directly
411
+ leaderboard = Leaderboard(
412
  value=renamed_df,
413
+ datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
414
  select_columns=smart_columns,
415
  search_columns=["Method"],
416
  hide_columns=[],
 
418
  )
419
 
420
 
421
+
422
+
423
+
424
+
425
  # def init_leaderboard_mib_subgraph(dataframe, track):
426
  # """Initialize the subgraph leaderboard with group-based column selection."""
427
  # if dataframe is None or dataframe.empty:
custom-select-columns.py CHANGED
@@ -287,25 +287,21 @@ def initialize_leaderboard(df: pd.DataFrame, column_class: Any,
287
  Returns:
288
  Configured Leaderboard instance
289
  """
290
- # Create renamed DataFrame with display names
291
- renamed_df = df.rename(columns=mappings)
292
-
293
- # Initialize SmartSelectColumns
294
- smart_columns = SmartSelectColumns(
295
- column_filters=filters,
296
- column_mapping=mappings,
297
- initial_selected=initial_columns,
298
- multiselect=True
299
- )
300
 
301
- return gr.Leaderboard(
302
- value=renamed_df,
303
- datatype=[c.type for c in fields(column_class)],
304
- select_columns=smart_columns,
305
- search_columns=["Method"],
306
- hide_columns=[],
307
- interactive=False
308
- )
309
 
310
  # Example usage
311
  if __name__ == "__main__":
@@ -321,7 +317,7 @@ if __name__ == "__main__":
321
  # Define filters and mappings
322
  filters = {
323
  "IOI Metrics": ["ioi"],
324
- "Performance Metrics": ["performance"]
325
  }
326
 
327
  mappings = {
@@ -341,5 +337,289 @@ if __name__ == "__main__":
341
  mappings=mappings,
342
  initial_columns=["Method", "IOI Score (Type 1)"]
343
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
 
345
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  Returns:
288
  Configured Leaderboard instance
289
  """
290
+
291
+ # Define filters and mappings
292
+ filters = {
293
+ "IOI Metrics": ["ioi"],
294
+ "Performance Metrics": ["performance"]
295
+ }
 
 
 
 
296
 
297
+ mappings = {
298
+ "ioi_score_1": "IOI Score (Type 1)",
299
+ "ioi_score_2": "IOI Score (Type 2)",
300
+ "other_metric": "Other Metric",
301
+ "performance_1": "Performance Metric 1"
302
+ }
303
+
304
+
305
 
306
  # Example usage
307
  if __name__ == "__main__":
 
317
  # Define filters and mappings
318
  filters = {
319
  "IOI Metrics": ["ioi"],
320
+ "gemma2.5": ["gemma2_5`"]
321
  }
322
 
323
  mappings = {
 
337
  mappings=mappings,
338
  initial_columns=["Method", "IOI Score (Type 1)"]
339
  )
340
+
341
+
342
+ # Create renamed DataFrame with display names
343
+ renamed_df = df.rename(columns=mappings)
344
+
345
+ initial_columns=["Method", "IOI Score (Type 1)"]
346
+ initial_columns=?
347
+
348
+ # Initialize SmartSelectColumns
349
+ smart_columns = SmartSelectColumns(
350
+ column_filters=filters,
351
+ column_mapping=mappings,
352
+ initial_selected=initial_columns,
353
+ multiselect=True
354
+ )
355
+ column_class=None
356
+
357
+ return gr.Leaderboard(
358
+ value=renamed_df,
359
+ datatype=[c.type for c in fields(column_class)],
360
+ select_columns=smart_columns,
361
+ search_columns=["Method"],
362
+ hide_columns=[],
363
+ interactive=False
364
+ )
365
+
366
+ demo.launch()
367
+
368
+
369
+
370
+
371
+
372
+
373
+
374
+
375
+
376
+
377
+
378
+
379
+
380
+
381
+
382
+
383
+
384
+
385
+
386
+
387
+
388
+
389
+
390
+
391
+
392
+
393
+
394
+
395
+
396
+
397
+
398
+
399
+
400
+
401
+
402
+
403
+
404
+ from gradio_leaderboard import SelectColumns, Leaderboard
405
+ import pandas as pd
406
+ from typing import List, Dict, Union, Optional, Any
407
+ from dataclasses import fields
408
+
409
+ class SmartSelectColumns(SelectColumns):
410
+ """
411
+ Enhanced SelectColumns component for gradio_leaderboard with explicit column grouping.
412
+ """
413
+ def __init__(
414
+ self,
415
+ column_groups: Optional[Dict[str, List[str]]] = None,
416
+ column_mapping: Optional[Dict[str, str]] = None,
417
+ initial_selected: Optional[List[str]] = None,
418
+ **kwargs
419
+ ):
420
+ """
421
+ Initialize SmartSelectColumns with enhanced functionality.
422
+
423
+ Args:
424
+ column_groups: Dict mapping group names to lists of columns in that group
425
+ column_mapping: Dict mapping actual column names to display names
426
+ initial_selected: List of columns to show initially
427
+ """
428
+ super().__init__(**kwargs)
429
+ self.column_groups = column_groups or {}
430
+ self.column_mapping = column_mapping or {}
431
+ self.reverse_mapping = {v: k for k, v in self.column_mapping.items()} if column_mapping else {}
432
+ self.initial_selected = initial_selected or []
433
+
434
+ def preprocess_value(self, x: List[str]) -> List[str]:
435
+ """Transform selected display names back to actual column names."""
436
+ return [self.reverse_mapping.get(col, col) for col in x]
437
+
438
+ def postprocess_value(self, y: List[str]) -> List[str]:
439
+ """Transform actual column names to display names."""
440
+ return [self.column_mapping.get(col, col) for col in y]
441
+
442
+ def update(
443
+ self,
444
+ value: Union[pd.DataFrame, Dict[str, List[str]], Any]
445
+ ) -> Dict:
446
+ """Update component with new values."""
447
+ if isinstance(value, pd.DataFrame):
448
+ # Get all column names and convert to display names
449
+ choices = [self.column_mapping.get(col, col) for col in value.columns]
450
+
451
+ # Use initial selection or default columns
452
+ selected = self.initial_selected if self.initial_selected else choices
453
+
454
+ # Convert column groups to use display names
455
+ filtered_cols = {}
456
+ for group_name, columns in self.column_groups.items():
457
+ filtered_cols[group_name] = [
458
+ self.column_mapping.get(col, col)
459
+ for col in columns
460
+ if col in value.columns
461
+ ]
462
+
463
+ return {
464
+ "choices": choices,
465
+ "value": selected,
466
+ "filtered_cols": filtered_cols
467
+ }
468
 
469
+ # Handle fields object
470
+ if hasattr(value, '__dataclass_fields__'):
471
+ field_names = [field.name for field in fields(value)]
472
+ choices = [self.column_mapping.get(name, name) for name in field_names]
473
+ return {
474
+ "choices": choices,
475
+ "value": self.initial_selected if self.initial_selected else choices
476
+ }
477
+
478
+ return super().update(value)
479
+
480
+
481
+ # Example usage
482
+ if __name__ == "__main__":
483
+ # Sample DataFrame
484
+ # df = pd.DataFrame({
485
+ # "eval_name": ["test1", "test2", "test3"],
486
+ # "Method": ["method1", "method2", "method3"],
487
+ # "ioi_llama3": [0.1, 0.2, 0.3],
488
+ # "ioi_qwen2_5": [0.4, 0.5, 0.6],
489
+ # "ioi_gpt2": [0.7, 0.8, 0.9],
490
+ # "mcqa_llama3": [0.2, 0.3, 0.4],
491
+ # "Average": [0.35, 0.45, 0.55]
492
+ # })
493
+
494
+ # Complete column groups for both benchmarks and models
495
+ column_groups = {
496
+ # Benchmark groups
497
+ "Benchmark group for ioi": ["ioi_gpt2", "ioi_qwen2_5", "ioi_gemma2", "ioi_llama3"],
498
+ "Benchmark group for mcqa": ["mcqa_qwen2_5", "mcqa_gemma2", "mcqa_llama3"],
499
+ "Benchmark group for arithmetic_addition": ["arithmetic_addition_llama3"],
500
+ "Benchmark group for arithmetic_subtraction": ["arithmetic_subtraction_llama3"],
501
+ "Benchmark group for arc_easy": ["arc_easy_gemma2", "arc_easy_llama3"],
502
+ "Benchmark group for arc_challenge": ["arc_challenge_llama3"],
503
+
504
+ # Model groups
505
+ "Model group for qwen2_5": ["ioi_qwen2_5", "mcqa_qwen2_5"],
506
+ "Model group for gpt2": ["ioi_gpt2"],
507
+ "Model group for gemma2": ["ioi_gemma2", "mcqa_gemma2", "arc_easy_gemma2"],
508
+ "Model group for llama3": [
509
+ "ioi_llama3",
510
+ "mcqa_llama3",
511
+ "arithmetic_addition_llama3",
512
+ "arithmetic_subtraction_llama3",
513
+ "arc_easy_llama3",
514
+ "arc_challenge_llama3"
515
+ ]
516
+ }
517
+
518
+ # Complete mappings for more readable display names
519
+ mappings = {
520
+ # IOI benchmark mappings
521
+ "ioi_llama3": "IOI (LLaMA-3)",
522
+ "ioi_qwen2_5": "IOI (Qwen-2.5)",
523
+ "ioi_gpt2": "IOI (GPT-2)",
524
+ "ioi_gemma2": "IOI (Gemma-2)",
525
+
526
+ # MCQA benchmark mappings
527
+ "mcqa_llama3": "MCQA (LLaMA-3)",
528
+ "mcqa_qwen2_5": "MCQA (Qwen-2.5)",
529
+ "mcqa_gemma2": "MCQA (Gemma-2)",
530
+
531
+ # Arithmetic benchmark mappings
532
+ "arithmetic_addition_llama3": "Arithmetic Addition (LLaMA-3)",
533
+ "arithmetic_subtraction_llama3": "Arithmetic Subtraction (LLaMA-3)",
534
+
535
+ # ARC benchmark mappings
536
+ "arc_easy_llama3": "ARC Easy (LLaMA-3)",
537
+ "arc_easy_gemma2": "ARC Easy (Gemma-2)",
538
+ "arc_challenge_llama3": "ARC Challenge (LLaMA-3)",
539
+
540
+ # Other columns
541
+ "eval_name": "Evaluation Name",
542
+ "Method": "Method",
543
+ "Average": "Average Score"
544
+ }
545
+
546
+ # Create SmartSelectColumns instance
547
+ smart_columns = SmartSelectColumns(
548
+ column_groups=column_groups,
549
+ column_mapping=mappings,
550
+ initial_selected=["Method", "Average"]
551
+ )
552
+
553
+ # Create Leaderboard directly
554
+ leaderboard = Leaderboard(
555
+ value=df,
556
+ datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
557
+ select_columns=smart_columns,
558
+ search_columns=["Method"],
559
+ hide_columns=[],
560
+ interactive=False
561
+ )
562
+
563
+
564
+
565
+
566
+
567
+
568
+
569
+
570
+
571
+
572
+
573
+
574
+
575
+
576
+
577
+
578
+
579
+
580
+
581
+
582
+
583
+
584
+
585
+
586
+
587
+
588
+
589
+
590
+
591
+
592
+
593
+
594
+
595
+ Debugging DataFrame columns: ['eval_name', 'Method', 'ioi_llama3', 'ioi_qwen2_5', 'ioi_gpt2', 'ioi_gemma2', 'mcqa_llama3', 'mcqa_qwen2_5', 'mcqa_gemma2', 'arithmetic_addition_llama3', 'arithmetic_subtraction_llama3', 'arc_easy_llama3', 'arc_easy_gemma2', 'arc_challenge_llama3', 'Average']
596
+
597
+ Benchmark group for ioi: ['ioi_gpt2', 'ioi_qwen2_5', 'ioi_gemma2', 'ioi_llama3']
598
+
599
+ Benchmark group for mcqa: ['mcqa_qwen2_5', 'mcqa_gemma2', 'mcqa_llama3']
600
+
601
+ Benchmark group for arithmetic_addition: ['arithmetic_addition_llama3']
602
+
603
+ Benchmark group for arithmetic_subtraction: ['arithmetic_subtraction_llama3']
604
+
605
+ Benchmark group for arc_easy: ['arc_easy_gemma2', 'arc_easy_llama3']
606
+
607
+ Benchmark group for arc_challenge: ['arc_challenge_llama3']
608
+
609
+ Model group for qwen2_5: ['ioi_qwen2_5', 'mcqa_qwen2_5']
610
+
611
+ Model group for gpt2: ['ioi_gpt2']
612
+
613
+ Model group for gemma2: ['ioi_gemma2', 'mcqa_gemma2', 'arc_easy_gemma2']
614
+
615
+ Model group for llama3: ['ioi_llama3', 'mcqa_llama3', 'arithmetic_addition_llama3', 'arithmetic_subtraction_llama3', 'arc_easy_llama3', 'arc_challenge_llama3']
616
+
617
+ All available columns: ['ioi_gpt2', 'ioi_qwen2_5', 'ioi_gemma2', 'ioi_llama3', 'mcqa_qwen2_5', 'mcqa_gemma2', 'mcqa_llama3', 'arithmetic_addition_llama3', 'arithmetic_subtraction_llama3', 'arc_easy_gemma2', 'arc_easy_llama3', 'arc_challenge_llama3', 'ioi_qwen2_5', 'mcqa_qwen2_5', 'ioi_gpt2', 'ioi_gemma2', 'mcqa_gemma2', 'arc_easy_gemma2', 'ioi_llama3', 'mcqa_llama3', 'arithmetic_addition_llama3', 'arithmetic_subtraction_llama3', 'arc_easy_llama3', 'arc_challenge_llama3']
618
+ * Running on local URL: http://0.0.0.0:7860
619
+ /usr/local/lib/python3.10/site-packages/gradio/blocks.py:2634: UserWarning: Setting share=True is not supported on Hugging Face Spaces
620
+ warnings.warn(
621
+
622
+ To create a public link, set `share=True` in `launch()`.
623
+
624
+
625
+