jasonshaoshun commited on
Commit
09666ed
·
1 Parent(s): e8fa8c8
Files changed (2) hide show
  1. app.py +79 -81
  2. src/display/utils.py +40 -40
app.py CHANGED
@@ -262,30 +262,30 @@ LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGAT
262
 
263
 
264
 
265
- def init_leaderboard_mib_subgraph(dataframe, track):
266
- # print(f"init_leaderboard_mib: dataframe head before loc is {dataframe.head()}\n")
267
 
268
- if dataframe is None or dataframe.empty:
269
- raise ValueError("Leaderboard DataFrame is empty or None.")
270
 
271
- # filter for correct track
272
- # dataframe = dataframe.loc[dataframe["Track"] == track]
273
 
274
- # print(f"init_leaderboard_mib: dataframe head after loc is {dataframe.head()}\n")
275
 
276
- return Leaderboard(
277
- value=dataframe,
278
- datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
279
- select_columns=SelectColumns(
280
- default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
281
- cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
282
- label="Select Columns to Display:",
283
- ),
284
- search_columns=["Method"], # Changed from AutoEvalColumn_mib_subgraph.model.name to "Method"
285
- hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
286
- bool_checkboxgroup_label="Hide models",
287
- interactive=False,
288
- )
289
 
290
 
291
 
@@ -351,72 +351,70 @@ def init_leaderboard_mib_subgraph(dataframe, track):
351
  # )
352
 
353
 
 
 
 
 
354
 
355
-
356
- # def init_leaderboard_mib_subgraph(dataframe, track):
357
- # """Initialize the subgraph leaderboard with display names for better readability."""
358
- # if dataframe is None or dataframe.empty:
359
- # raise ValueError("Leaderboard DataFrame is empty or None.")
360
-
361
- # print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
362
 
363
- # # First, create our display name mapping
364
- # # This is like creating a translation dictionary between internal names and display names
365
- # display_mapping = {}
366
- # for task in TasksMib_Subgraph:
367
- # for model in task.value.models:
368
- # field_name = f"{task.value.benchmark}_{model}"
369
- # display_name = f"{task.value.benchmark}({model})"
370
- # display_mapping[field_name] = display_name
371
-
372
- # # Now when creating benchmark groups, we'll use display names
373
- # benchmark_groups = []
374
- # for task in TasksMib_Subgraph:
375
- # benchmark = task.value.benchmark
376
- # benchmark_cols = [
377
- # display_mapping[f"{benchmark}_{model}"] # Use display name from our mapping
378
- # for model in task.value.models
379
- # if f"{benchmark}_{model}" in dataframe.columns
380
- # ]
381
- # if benchmark_cols:
382
- # benchmark_groups.append(benchmark_cols)
383
- # print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
384
-
385
- # # Similarly for model groups
386
- # model_groups = []
387
- # all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
388
 
389
- # for model in all_models:
390
- # model_cols = [
391
- # display_mapping[f"{task.value.benchmark}_{model}"] # Use display name
392
- # for task in TasksMib_Subgraph
393
- # if model in task.value.models
394
- # and f"{task.value.benchmark}_{model}" in dataframe.columns
395
- # ]
396
- # if model_cols:
397
- # model_groups.append(model_cols)
398
- # print(f"\nModel group for {model}:", model_cols)
399
-
400
- # # Combine all groups using display names
401
- # all_groups = benchmark_groups + model_groups
402
- # all_columns = [col for group in all_groups for col in group]
403
-
404
- # # Important: We need to rename our DataFrame columns to match display names
405
 
406
- # renamed_df = dataframe.rename(columns=display_mapping)
407
 
408
- # # Original code
409
- # return Leaderboard(
410
- # value=renamed_df, # Use DataFrame with display names
411
- # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
412
- # select_columns=SelectColumns(
413
- # default_selection=all_columns, # Now contains display names
414
- # label="Select Results:"
415
- # ),
416
- # search_columns=["Method"],
417
- # hide_columns=[],
418
- # interactive=False,
419
- # )
420
 
421
 
422
 
 
262
 
263
 
264
 
265
+ # def init_leaderboard_mib_subgraph(dataframe, track):
266
+ # # print(f"init_leaderboard_mib: dataframe head before loc is {dataframe.head()}\n")
267
 
268
+ # if dataframe is None or dataframe.empty:
269
+ # raise ValueError("Leaderboard DataFrame is empty or None.")
270
 
271
+ # # filter for correct track
272
+ # # dataframe = dataframe.loc[dataframe["Track"] == track]
273
 
274
+ # # print(f"init_leaderboard_mib: dataframe head after loc is {dataframe.head()}\n")
275
 
276
+ # return Leaderboard(
277
+ # value=dataframe,
278
+ # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
279
+ # select_columns=SelectColumns(
280
+ # default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
281
+ # cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
282
+ # label="Select Columns to Display:",
283
+ # ),
284
+ # search_columns=["Method"], # Changed from AutoEvalColumn_mib_subgraph.model.name to "Method"
285
+ # hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
286
+ # bool_checkboxgroup_label="Hide models",
287
+ # interactive=False,
288
+ # )
289
 
290
 
291
 
 
351
  # )
352
 
353
 
354
+ def init_leaderboard_mib_subgraph(dataframe, track):
355
+ """Initialize the subgraph leaderboard with display names for better readability."""
356
+ if dataframe is None or dataframe.empty:
357
+ raise ValueError("Leaderboard DataFrame is empty or None.")
358
 
359
+ print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
 
 
 
 
 
 
360
 
361
+ # First, create our display name mapping
362
+ # This is like creating a translation dictionary between internal names and display names
363
+ display_mapping = {}
364
+ for task in TasksMib_Subgraph:
365
+ for model in task.value.models:
366
+ field_name = f"{task.value.benchmark}_{model}"
367
+ display_name = f"{task.value.benchmark}({model})"
368
+ display_mapping[field_name] = display_name
369
+
370
+ # Now when creating benchmark groups, we'll use display names
371
+ benchmark_groups = []
372
+ for task in TasksMib_Subgraph:
373
+ benchmark = task.value.benchmark
374
+ benchmark_cols = [
375
+ display_mapping[f"{benchmark}_{model}"] # Use display name from our mapping
376
+ for model in task.value.models
377
+ if f"{benchmark}_{model}" in dataframe.columns
378
+ ]
379
+ if benchmark_cols:
380
+ benchmark_groups.append(benchmark_cols)
381
+ print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
382
+
383
+ # Similarly for model groups
384
+ model_groups = []
385
+ all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
386
 
387
+ for model in all_models:
388
+ model_cols = [
389
+ display_mapping[f"{task.value.benchmark}_{model}"] # Use display name
390
+ for task in TasksMib_Subgraph
391
+ if model in task.value.models
392
+ and f"{task.value.benchmark}_{model}" in dataframe.columns
393
+ ]
394
+ if model_cols:
395
+ model_groups.append(model_cols)
396
+ print(f"\nModel group for {model}:", model_cols)
397
+
398
+ # Combine all groups using display names
399
+ all_groups = benchmark_groups + model_groups
400
+ all_columns = [col for group in all_groups for col in group]
401
+
402
+ # Important: We need to rename our DataFrame columns to match display names
403
 
404
+ renamed_df = dataframe.rename(columns=display_mapping)
405
 
406
+ # Original code
407
+ return Leaderboard(
408
+ value=renamed_df, # Use DataFrame with display names
409
+ datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
410
+ select_columns=SelectColumns(
411
+ default_selection=all_columns, # Now contains display names
412
+ label="Select Results:"
413
+ ),
414
+ search_columns=["Method"],
415
+ hide_columns=[],
416
+ interactive=False,
417
+ )
418
 
419
 
420
 
src/display/utils.py CHANGED
@@ -61,23 +61,23 @@ AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_
61
  ##############################################################################################################
62
  # Version 1
63
 
64
- auto_eval_column_dict_mib_subgraph = []
65
 
66
- # Method name column
67
- auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
68
 
69
- # For each task and model combination
70
- for task in TasksMib_Subgraph:
71
- for model in task.value.models:
72
- col_name = f"{task.value.benchmark}_{model}" # ioi_gpt2, mcqa_qwen2.5, etc.
73
- auto_eval_column_dict_mib_subgraph.append([
74
- col_name,
75
- ColumnContent,
76
- ColumnContent(col_name, "number", True)
77
- ])
78
 
79
- # Average column
80
- auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
81
 
82
 
83
  # ##############################################################################################################
@@ -118,39 +118,39 @@ auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnConte
118
 
119
 
120
  ##############################################################################################################
121
- # # Version 3
122
- # auto_eval_column_dict_mib_subgraph = []
123
 
124
- # # Method name column (always present)
125
- # auto_eval_column_dict_mib_subgraph.append(
126
- # ["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)]
127
- # )
128
 
129
- # # Add columns for each task-model combination
130
- # for task in TasksMib_Subgraph:
131
- # for model in task.value.models:
132
- # field_name = f"{task.value.benchmark}_{model}"
133
- # display_name = f"{task.value.benchmark}({model})"
134
 
135
- # print(f"Creating column - Field name: {field_name}, Display name: {display_name}")
136
 
137
- # column_content = ColumnContent(display_name, "number", True)
138
- # print(f"Column content name property: {column_content.name}")
139
 
140
- # auto_eval_column_dict_mib_subgraph.append([
141
- # field_name,
142
- # ColumnContent,
143
- # column_content
144
- # ])
145
 
146
- # # Add the Average column
147
- # auto_eval_column_dict_mib_subgraph.append(
148
- # ["average", ColumnContent, ColumnContent("Average", "number", True)]
149
- # )
150
 
151
- # print("\nFinal column configurations:")
152
- # for field in auto_eval_column_dict_mib_subgraph:
153
- # print(f"Field name: {field[0]}, Display name: {field[2].name}")
154
 
155
 
156
 
 
61
  ##############################################################################################################
62
  # Version 1
63
 
64
+ # auto_eval_column_dict_mib_subgraph = []
65
 
66
+ # # Method name column
67
+ # auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
68
 
69
+ # # For each task and model combination
70
+ # for task in TasksMib_Subgraph:
71
+ # for model in task.value.models:
72
+ # col_name = f"{task.value.benchmark}_{model}" # ioi_gpt2, mcqa_qwen2.5, etc.
73
+ # auto_eval_column_dict_mib_subgraph.append([
74
+ # col_name,
75
+ # ColumnContent,
76
+ # ColumnContent(col_name, "number", True)
77
+ # ])
78
 
79
+ # # Average column
80
+ # auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
81
 
82
 
83
  # ##############################################################################################################
 
118
 
119
 
120
  ##############################################################################################################
121
+ # Version 3
122
+ auto_eval_column_dict_mib_subgraph = []
123
 
124
+ # Method name column (always present)
125
+ auto_eval_column_dict_mib_subgraph.append(
126
+ ["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)]
127
+ )
128
 
129
+ # Add columns for each task-model combination
130
+ for task in TasksMib_Subgraph:
131
+ for model in task.value.models:
132
+ field_name = f"{task.value.benchmark}_{model}"
133
+ display_name = f"{task.value.benchmark}({model})"
134
 
135
+ print(f"Creating column - Field name: {field_name}, Display name: {display_name}")
136
 
137
+ column_content = ColumnContent(display_name, "number", True)
138
+ print(f"Column content name property: {column_content.name}")
139
 
140
+ auto_eval_column_dict_mib_subgraph.append([
141
+ field_name,
142
+ ColumnContent,
143
+ column_content
144
+ ])
145
 
146
+ # Add the Average column
147
+ auto_eval_column_dict_mib_subgraph.append(
148
+ ["average", ColumnContent, ColumnContent("Average", "number", True)]
149
+ )
150
 
151
+ print("\nFinal column configurations:")
152
+ for field in auto_eval_column_dict_mib_subgraph:
153
+ print(f"Field name: {field[0]}, Display name: {field[2].name}")
154
 
155
 
156