jasonshaoshun commited on
Commit
e8fa8c8
·
1 Parent(s): 00daaaf
src/display/utils.py CHANGED
@@ -61,23 +61,23 @@ AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_
61
  ##############################################################################################################
62
  # Version 1
63
 
64
- # auto_eval_column_dict_mib_subgraph = []
65
 
66
- # # Method name column
67
- # auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
68
 
69
- # # For each task and model combination
70
- # for task in TasksMib_Subgraph:
71
- # for model in task.value.models:
72
- # col_name = f"{task.value.benchmark}_{model}" # ioi_gpt2, mcqa_qwen2.5, etc.
73
- # auto_eval_column_dict_mib_subgraph.append([
74
- # col_name,
75
- # ColumnContent,
76
- # ColumnContent(col_name, "number", True)
77
- # ])
78
 
79
- # # Average column
80
- # auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
81
 
82
 
83
  # ##############################################################################################################
@@ -118,39 +118,39 @@ AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_
118
 
119
 
120
  ##############################################################################################################
121
- # Version 3
122
- auto_eval_column_dict_mib_subgraph = []
123
 
124
- # Method name column (always present)
125
- auto_eval_column_dict_mib_subgraph.append(
126
- ["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)]
127
- )
128
 
129
- # Add columns for each task-model combination
130
- for task in TasksMib_Subgraph:
131
- for model in task.value.models:
132
- field_name = f"{task.value.benchmark}_{model}"
133
- display_name = f"{task.value.benchmark}({model})"
134
 
135
- print(f"Creating column - Field name: {field_name}, Display name: {display_name}")
136
 
137
- column_content = ColumnContent(display_name, "number", True)
138
- print(f"Column content name property: {column_content.name}")
139
 
140
- auto_eval_column_dict_mib_subgraph.append([
141
- field_name,
142
- ColumnContent,
143
- column_content
144
- ])
145
 
146
- # Add the Average column
147
- auto_eval_column_dict_mib_subgraph.append(
148
- ["average", ColumnContent, ColumnContent("Average", "number", True)]
149
- )
150
 
151
- print("\nFinal column configurations:")
152
- for field in auto_eval_column_dict_mib_subgraph:
153
- print(f"Field name: {field[0]}, Display name: {field[2].name}")
154
 
155
 
156
 
 
61
  ##############################################################################################################
62
  # Version 1
63
 
64
+ auto_eval_column_dict_mib_subgraph = []
65
 
66
+ # Method name column
67
+ auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
68
 
69
+ # For each task and model combination
70
+ for task in TasksMib_Subgraph:
71
+ for model in task.value.models:
72
+ col_name = f"{task.value.benchmark}_{model}" # ioi_gpt2, mcqa_qwen2.5, etc.
73
+ auto_eval_column_dict_mib_subgraph.append([
74
+ col_name,
75
+ ColumnContent,
76
+ ColumnContent(col_name, "number", True)
77
+ ])
78
 
79
+ # Average column
80
+ auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
81
 
82
 
83
  # ##############################################################################################################
 
118
 
119
 
120
  ##############################################################################################################
121
+ # # Version 3
122
+ # auto_eval_column_dict_mib_subgraph = []
123
 
124
+ # # Method name column (always present)
125
+ # auto_eval_column_dict_mib_subgraph.append(
126
+ # ["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)]
127
+ # )
128
 
129
+ # # Add columns for each task-model combination
130
+ # for task in TasksMib_Subgraph:
131
+ # for model in task.value.models:
132
+ # field_name = f"{task.value.benchmark}_{model}"
133
+ # display_name = f"{task.value.benchmark}({model})"
134
 
135
+ # print(f"Creating column - Field name: {field_name}, Display name: {display_name}")
136
 
137
+ # column_content = ColumnContent(display_name, "number", True)
138
+ # print(f"Column content name property: {column_content.name}")
139
 
140
+ # auto_eval_column_dict_mib_subgraph.append([
141
+ # field_name,
142
+ # ColumnContent,
143
+ # column_content
144
+ # ])
145
 
146
+ # # Add the Average column
147
+ # auto_eval_column_dict_mib_subgraph.append(
148
+ # ["average", ColumnContent, ColumnContent("Average", "number", True)]
149
+ # )
150
 
151
+ # print("\nFinal column configurations:")
152
+ # for field in auto_eval_column_dict_mib_subgraph:
153
+ # print(f"Field name: {field[0]}, Display name: {field[2].name}")
154
 
155
 
156
 
src/leaderboard/read_evals.py CHANGED
@@ -121,7 +121,7 @@ class EvalResult_MIB_SUBGRAPH:
121
 
122
  for task in TasksMib_Subgraph:
123
  for model in task.value.models:
124
- print(f"task is {task}, task.value.benchmark is {task.value.benchmark}, model is {model}")
125
  data_dict[f"{task.value.benchmark}_{model}"] = '-'
126
 
127
  all_scores = []
 
121
 
122
  for task in TasksMib_Subgraph:
123
  for model in task.value.models:
124
+ # print(f"task is {task}, task.value.benchmark is {task.value.benchmark}, model is {model}")
125
  data_dict[f"{task.value.benchmark}_{model}"] = '-'
126
 
127
  all_scores = []