jasonshaoshun commited on
Commit
66f5701
·
1 Parent(s): c50d688
Files changed (1) hide show
  1. app.py +60 -329
app.py CHANGED
@@ -127,386 +127,117 @@ LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGAT
127
  from src.about import TasksMib_Subgraph
128
 
129
 
130
- # def init_leaderboard_mib_subgraph(dataframe, track):
131
- # if dataframe is None or dataframe.empty:
132
- # raise ValueError("Leaderboard DataFrame is empty or None.")
133
-
134
- # # Get unique tasks and models for filters
135
- # tasks = list(set(task.value.benchmark for task in TasksMib_Subgraph))
136
- # models = list(set(
137
- # model
138
- # for task in TasksMib_Subgraph
139
- # for model in task.value.models
140
- # ))
141
-
142
- # return Leaderboard(
143
- # value=dataframe,
144
- # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
145
- # select_columns=SelectColumns(
146
- # default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
147
- # cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
148
- # label="Select Columns to Display:",
149
- # ),
150
- # column_filters=[
151
- # ColumnFilter(
152
- # column="task_filter",
153
- # choices=tasks,
154
- # label="Filter by Task:",
155
- # default=None
156
- # ),
157
- # ColumnFilter(
158
- # column="model_filter",
159
- # choices=models,
160
- # label="Filter by Model:",
161
- # default=None
162
- # )
163
- # ],
164
- # search_columns=["Method"],
165
- # hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
166
- # bool_checkboxgroup_label="Hide models",
167
- # interactive=False,
168
- # )
169
-
170
-
171
-
172
- # def init_leaderboard_mib_subgraph(dataframe, track):
173
- # if dataframe is None or dataframe.empty:
174
- # raise ValueError("Leaderboard DataFrame is empty or None.")
175
-
176
- # # Add filter columns to display
177
- # dataframe['Task'] = dataframe.apply(
178
- # lambda row: [task.value.benchmark for task in TasksMib_Subgraph
179
- # if any(f"{task.value.benchmark}_{model}" in row.index
180
- # for model in task.value.models)][0],
181
- # axis=1
182
- # )
183
-
184
- # dataframe['Model'] = dataframe.apply(
185
- # lambda row: [model for task in TasksMib_Subgraph
186
- # for model in task.value.models
187
- # if f"{task.value.benchmark}_{model}" in row.index][0],
188
- # axis=1
189
- # )
190
-
191
- # return Leaderboard(
192
- # value=dataframe,
193
- # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
194
- # select_columns=SelectColumns(
195
- # default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
196
- # cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
197
- # label="Select Columns to Display:",
198
- # ),
199
- # search_columns=["Method", "Task", "Model"], # Add Task and Model to searchable columns
200
- # hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
201
- # bool_checkboxgroup_label="Hide models",
202
- # interactive=False,
203
- # )
204
-
205
-
206
 
207
  # def init_leaderboard_mib_subgraph(dataframe, track):
208
- # """Initialize the subgraph leaderboard with grouped column selection."""
209
  # if dataframe is None or dataframe.empty:
210
  # raise ValueError("Leaderboard DataFrame is empty or None.")
211
 
212
- # # Get tasks and models using the new class methods
213
- # tasks = TasksMib_Subgraph.get_all_tasks()
214
- # models = TasksMib_Subgraph.get_all_models()
215
-
216
- # # Create a mapping from selection to actual column names
217
- # selection_map = {}
218
 
219
- # # Add task mappings - when a task is selected, show all its columns
220
- # for task in tasks:
221
- # # For each task, find all valid task_model combinations
222
- # valid_combos = []
223
- # for model in models:
224
- # col_name = f"{task}_{model}"
225
- # if col_name in dataframe.columns:
226
- # valid_combos.append(col_name)
227
- # if valid_combos:
228
- # selection_map[task] = valid_combos
229
 
230
- # # Add model mappings - when a model is selected, show all its columns
231
- # for model in models:
232
- # # For each model, find all valid task_model combinations
233
- # valid_combos = []
234
- # for task in tasks:
235
- # col_name = f"{task}_{model}"
236
- # if col_name in dataframe.columns:
237
- # valid_combos.append(col_name)
238
- # if valid_combos:
239
- # selection_map[model] = valid_combos
240
-
241
- # return Leaderboard(
242
- # value=dataframe,
243
- # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
244
- # select_columns=SelectColumns(
245
- # choices=[tasks, models], # Two groups of choices
246
- # labels=["Tasks", "Models"], # Labels for each group
247
- # default_selection=[*tasks, *models], # Show everything by default
248
- # cant_deselect=["Method"], # Method column always visible
249
- # label="Filter by Tasks or Models:",
250
- # selection_map=selection_map # Map selections to actual columns
251
- # ),
252
- # search_columns=["Method"],
253
- # hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
254
- # bool_checkboxgroup_label="Hide models",
255
- # interactive=False,
256
- # )
257
-
258
-
259
-
260
- # def init_leaderboard_mib_subgraph(dataframe, track):
261
- # """Initialize the subgraph leaderboard with grouped column selection for gradio-leaderboard 0.0.13"""
262
- # if dataframe is None or dataframe.empty:
263
- # raise ValueError("Leaderboard DataFrame is empty or None.")
264
-
265
- # # Get all unique tasks and models
266
- # tasks = [task.value.benchmark for task in TasksMib_Subgraph]
267
- # models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
268
-
269
- # # Create two selection groups: one for tasks and one for models
270
- # # In 0.0.13, we can only have one SelectColumns, so we'll combine them
271
- # selection_choices = [
272
- # *[f"Task: {task}" for task in tasks], # Prefix with 'Task:' for clarity
273
- # *[f"Model: {model}" for model in models] # Prefix with 'Model:' for clarity
274
- # ]
275
-
276
- # return Leaderboard(
277
- # value=dataframe,
278
- # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
279
- # select_columns=SelectColumns(
280
- # default_selection=selection_choices, # Show all by default
281
- # choices=selection_choices,
282
- # cant_deselect=["Method"], # Method column always visible
283
- # label="Select Tasks or Models:",
284
- # ),
285
- # search_columns=["Method"],
286
- # hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
287
- # bool_checkboxgroup_label="Hide models",
288
- # interactive=False,
289
- # )
290
-
291
-
292
- # def init_leaderboard_mib_subgraph(dataframe, track):
293
- # """Initialize the subgraph leaderboard focusing only on task and model filtering.
294
-
295
- # This implementation creates a focused view where users can select which task-model
296
- # combinations they want to see, making the analysis of results more straightforward.
297
- # """
298
- # if dataframe is None or dataframe.empty:
299
- # raise ValueError("Leaderboard DataFrame is empty or None.")
300
-
301
- # # Get all task-model combinations that actually exist in our data
302
- # task_model_columns = []
303
  # for task in TasksMib_Subgraph:
304
- # for model in task.value.models:
305
- # col_name = f"{task.value.benchmark}_{model}"
306
- # if col_name in dataframe.columns:
307
- # task_model_columns.append(col_name)
308
-
309
- # return Leaderboard(
310
- # value=dataframe,
311
- # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
312
- # select_columns=SelectColumns(
313
- # default_selection=task_model_columns,
314
- # label="Select Task-Model Combinations:",
315
- # ),
316
- # search_columns=["Method"], # Keep Method searchable but not in column selection
317
- # hide_columns=[], # We don't need to hide any columns
318
- # bool_checkboxgroup_label="Hide models",
319
- # interactive=False,
320
- # )
321
-
322
-
323
-
324
-
325
-
326
-
327
-
328
-
329
- # def init_leaderboard_mib_subgraph(dataframe, track):
330
- # """Initialize the subgraph leaderboard with verified task/model column selection"""
331
- # if dataframe is None or dataframe.empty:
332
- # raise ValueError("Leaderboard DataFrame is empty or None.")
333
-
334
- # # First, let's identify which columns actually exist in our dataframe
335
- # print("Available columns in dataframe:", dataframe.columns.tolist())
336
-
337
- # # Create task selections based on TasksMib_Subgraph definition
338
- # task_selections = []
339
- # for task in TasksMib_Subgraph:
340
- # task_cols = []
341
- # for model in task.value.models:
342
- # col_name = f"{task.value.benchmark}_{model}"
343
- # if col_name in dataframe.columns:
344
- # task_cols.append(col_name)
345
-
346
- # if task_cols: # Only add tasks that have data
347
- # print(f"Task {task.value.benchmark} has columns:", task_cols)
348
- # task_selections.append(f"Task: {task.value.benchmark}")
349
-
350
- # # Create model selections by checking which models appear in columns
351
- # model_selections = []
352
  # all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
353
 
 
354
  # for model in all_models:
355
- # model_cols = []
356
- # for task in TasksMib_Subgraph:
357
- # if model in task.value.models:
358
- # col_name = f"{task.value.benchmark}_{model}"
359
- # if col_name in dataframe.columns:
360
- # model_cols.append(col_name)
361
-
362
- # if model_cols: # Only add models that have data
363
- # print(f"Model {model} has columns:", model_cols)
364
- # model_selections.append(f"Model: {model}")
365
-
366
- # # Combine all selections
367
- # selections = task_selections + model_selections
368
- # print("Final selection options:", selections)
369
-
370
- # # Print DataFrame information
371
- # print("\nDebugging DataFrame:")
372
- # print("DataFrame columns:", dataframe.columns.tolist())
373
- # print("DataFrame shape:", dataframe.shape)
374
- # print("DataFrame head:\n", dataframe.head())
375
-
376
- # return Leaderboard(
377
- # value=dataframe,
378
- # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
379
- # select_columns=SelectColumns(
380
- # default_selection=selections,
381
- # label="Select Tasks or Models:"
382
- # ),
383
- # search_columns=["Method"],
384
- # hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
385
- # bool_checkboxgroup_label="Hide models",
386
- # interactive=False,
387
- # )
388
-
389
 
390
-
391
- # def init_leaderboard_mib_subgraph(dataframe, track):
392
- # """Initialize the subgraph leaderboard with benchmark and model filtering capabilities."""
393
- # if dataframe is None or dataframe.empty:
394
- # raise ValueError("Leaderboard DataFrame is empty or None.")
395
-
396
- # # Print DataFrame information for debugging
397
- # # print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
398
-
399
- # # Get result columns (excluding Method and Average)
400
- # result_columns = [col for col in dataframe.columns
401
- # if col not in ['Method', 'Average', 'eval_name'] and '_' in col]
402
-
403
- # # Create benchmark and model selections
404
- # benchmarks = set()
405
- # models = set()
406
-
407
- # print(f"\nDebugging Result Columns: {result_columns}")
408
- # # Extract unique benchmarks and models from column names
409
- # for col in result_columns:
410
- # print(f"col is {col}")
411
- # benchmark, model = col.split('_', maxsplit=1)
412
- # benchmarks.add(benchmark)
413
- # models.add(model)
414
- # print(f"benchmark is {benchmark} and model is {model}")
415
-
416
- # # Create selection groups
417
- # benchmark_selections = {
418
- # # For each benchmark, store which columns should be shown
419
- # benchmark: [col for col in result_columns if col.startswith(f"{benchmark}_")]
420
- # for benchmark in benchmarks
421
- # }
422
-
423
- # model_selections = {
424
- # # For each model, store which columns should be shown
425
- # model: [col for col in result_columns if col.startswith(f"_{model}")]
426
- # for model in models
427
- # }
428
-
429
- # # Combine the selection mappings
430
- # selection_groups = {
431
- # **benchmark_selections,
432
- # **model_selections
433
- # }
434
-
435
- # print("\nDebugging Selection Groups:")
436
- # print("Benchmarks:", benchmark_selections.keys())
437
- # print("Models:", model_selections.keys())
438
-
439
- # # Convert keys to list for selection options
440
- # selection_options = list(selection_groups.keys())
441
-
442
  # return Leaderboard(
443
  # value=dataframe,
444
  # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
445
  # select_columns=SelectColumns(
446
- # default_selection=selection_options, # Show all options by default
447
- # label="Filter by Benchmark or Model:"
448
  # ),
449
  # search_columns=["Method"],
450
  # hide_columns=[],
451
  # interactive=False,
452
  # )
453
 
454
-
455
-
456
-
457
  def init_leaderboard_mib_subgraph(dataframe, track):
458
- """Initialize the subgraph leaderboard with grouped column selection by benchmark."""
459
  if dataframe is None or dataframe.empty:
460
  raise ValueError("Leaderboard DataFrame is empty or None.")
461
 
462
  print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
463
 
464
- # Create groups of columns by benchmark
465
- benchmark_groups = []
466
 
467
- # For each benchmark in our TasksMib_Subgraph enum...
468
  for task in TasksMib_Subgraph:
469
  benchmark = task.value.benchmark
470
- # Get all valid columns for this benchmark's models
471
  benchmark_cols = [
472
  f"{benchmark}_{model}"
473
  for model in task.value.models
474
  if f"{benchmark}_{model}" in dataframe.columns
475
  ]
476
- if benchmark_cols: # Only add if we have valid columns
477
- benchmark_groups.append(benchmark_cols)
478
- print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
 
 
479
 
480
- # Create model groups as well
481
- model_groups = []
482
  all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
483
-
484
- # For each unique model...
485
  for model in all_models:
486
- # Get all valid columns for this model across benchmarks
487
  model_cols = [
488
  f"{task.value.benchmark}_{model}"
489
  for task in TasksMib_Subgraph
490
  if model in task.value.models
491
  and f"{task.value.benchmark}_{model}" in dataframe.columns
492
  ]
493
- if model_cols: # Only add if we have valid columns
494
- model_groups.append(model_cols)
495
- print(f"\nModel group for {model}:", model_cols)
 
 
496
 
497
- # Combine all groups
498
- all_groups = benchmark_groups + model_groups
499
-
500
- # Flatten groups for default selection (show everything initially)
501
- all_columns = [col for group in all_groups for col in group]
502
- print("\nAll available columns:", all_columns)
503
 
504
  return Leaderboard(
505
  value=dataframe,
506
  datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
507
  select_columns=SelectColumns(
508
- default_selection=all_columns, # Show all columns initially
509
- label="Select Results:"
510
  ),
511
  search_columns=["Method"],
512
  hide_columns=[],
 
127
  from src.about import TasksMib_Subgraph
128
 
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  # def init_leaderboard_mib_subgraph(dataframe, track):
132
+ # """Initialize the subgraph leaderboard with grouped column selection by benchmark."""
133
  # if dataframe is None or dataframe.empty:
134
  # raise ValueError("Leaderboard DataFrame is empty or None.")
135
 
136
+ # print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
 
 
 
 
 
137
 
138
+ # # Create groups of columns by benchmark
139
+ # benchmark_groups = []
 
 
 
 
 
 
 
 
140
 
141
+ # # For each benchmark in our TasksMib_Subgraph enum...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  # for task in TasksMib_Subgraph:
143
+ # benchmark = task.value.benchmark
144
+ # # Get all valid columns for this benchmark's models
145
+ # benchmark_cols = [
146
+ # f"{benchmark}_{model}"
147
+ # for model in task.value.models
148
+ # if f"{benchmark}_{model}" in dataframe.columns
149
+ # ]
150
+ # if benchmark_cols: # Only add if we have valid columns
151
+ # benchmark_groups.append(benchmark_cols)
152
+ # print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
153
+
154
+ # # Create model groups as well
155
+ # model_groups = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  # all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
157
 
158
+ # # For each unique model...
159
  # for model in all_models:
160
+ # # Get all valid columns for this model across benchmarks
161
+ # model_cols = [
162
+ # f"{task.value.benchmark}_{model}"
163
+ # for task in TasksMib_Subgraph
164
+ # if model in task.value.models
165
+ # and f"{task.value.benchmark}_{model}" in dataframe.columns
166
+ # ]
167
+ # if model_cols: # Only add if we have valid columns
168
+ # model_groups.append(model_cols)
169
+ # print(f"\nModel group for {model}:", model_cols)
170
+
171
+ # # Combine all groups
172
+ # all_groups = benchmark_groups + model_groups
173
+
174
+ # # Flatten groups for default selection (show everything initially)
175
+ # all_columns = [col for group in all_groups for col in group]
176
+ # print("\nAll available columns:", all_columns)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  # return Leaderboard(
179
  # value=dataframe,
180
  # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
181
  # select_columns=SelectColumns(
182
+ # default_selection=all_columns, # Show all columns initially
183
+ # label="Select Results:"
184
  # ),
185
  # search_columns=["Method"],
186
  # hide_columns=[],
187
  # interactive=False,
188
  # )
189
 
 
 
 
190
  def init_leaderboard_mib_subgraph(dataframe, track):
191
+ """Initialize the subgraph leaderboard with group-based column selection."""
192
  if dataframe is None or dataframe.empty:
193
  raise ValueError("Leaderboard DataFrame is empty or None.")
194
 
195
  print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
196
 
197
+ # Create selection mapping for benchmark groups
198
+ selection_mapping = {}
199
 
200
+ # Create benchmark groups with descriptive names
201
  for task in TasksMib_Subgraph:
202
  benchmark = task.value.benchmark
203
+ # Get all columns for this benchmark's models
204
  benchmark_cols = [
205
  f"{benchmark}_{model}"
206
  for model in task.value.models
207
  if f"{benchmark}_{model}" in dataframe.columns
208
  ]
209
+ if benchmark_cols:
210
+ # Use a descriptive group name as the key
211
+ group_name = f"Benchmark: {benchmark.upper()}"
212
+ selection_mapping[group_name] = benchmark_cols
213
+ print(f"\n{group_name} maps to:", benchmark_cols)
214
 
215
+ # Create model groups with descriptive names
 
216
  all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
 
 
217
  for model in all_models:
218
+ # Get all columns for this model across benchmarks
219
  model_cols = [
220
  f"{task.value.benchmark}_{model}"
221
  for task in TasksMib_Subgraph
222
  if model in task.value.models
223
  and f"{task.value.benchmark}_{model}" in dataframe.columns
224
  ]
225
+ if model_cols:
226
+ # Use a descriptive group name as the key
227
+ group_name = f"Model: {model}"
228
+ selection_mapping[group_name] = model_cols
229
+ print(f"\n{group_name} maps to:", model_cols)
230
 
231
+ # The selection options are the group names
232
+ selection_options = list(selection_mapping.keys())
233
+ print("\nSelection options:", selection_options)
 
 
 
234
 
235
  return Leaderboard(
236
  value=dataframe,
237
  datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
238
  select_columns=SelectColumns(
239
+ default_selection=selection_options, # Show all groups by default
240
+ label="Select Benchmark or Model Groups:"
241
  ),
242
  search_columns=["Method"],
243
  hide_columns=[],