jasonshaoshun commited on
Commit
9fd4b06
·
1 Parent(s): 2817fcb
Files changed (2) hide show
  1. app.py +76 -8
  2. src/display/utils.py +45 -1
app.py CHANGED
@@ -94,16 +94,48 @@ LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGAT
94
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
95
 
96
 
97
- def init_leaderboard_mib_subgraph(dataframe, track):
98
- # print(f"init_leaderboard_mib: dataframe head before loc is {dataframe.head()}\n")
 
 
 
99
 
100
- if dataframe is None or dataframe.empty:
101
- raise ValueError("Leaderboard DataFrame is empty or None.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- # filter for correct track
104
- # dataframe = dataframe.loc[dataframe["Track"] == track]
105
 
106
- # print(f"init_leaderboard_mib: dataframe head after loc is {dataframe.head()}\n")
 
 
 
 
 
 
 
 
 
 
107
 
108
  return Leaderboard(
109
  value=dataframe,
@@ -113,12 +145,40 @@ def init_leaderboard_mib_subgraph(dataframe, track):
113
  cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
114
  label="Select Columns to Display:",
115
  ),
116
- search_columns=["Method"], # Changed from AutoEvalColumn_mib_subgraph.model.name to "Method"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
118
  bool_checkboxgroup_label="Hide models",
119
  interactive=False,
120
  )
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  def init_leaderboard_mib_causalgraph(dataframe, track):
123
  # print("Debugging column issues:")
124
  # print("\nActual DataFrame columns:")
@@ -233,7 +293,15 @@ with demo:
233
  # with gr.Row():
234
  # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
235
 
 
 
236
  with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
 
 
 
 
 
 
237
  leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
238
 
239
  # Then modify the Causal Graph tab section
 
94
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
95
 
96
 
97
+
98
+
99
+
100
+ # def init_leaderboard_mib_subgraph(dataframe, track):
101
+ # # print(f"init_leaderboard_mib: dataframe head before loc is {dataframe.head()}\n")
102
 
103
+ # if dataframe is None or dataframe.empty:
104
+ # raise ValueError("Leaderboard DataFrame is empty or None.")
105
+
106
+ # # filter for correct track
107
+ # # dataframe = dataframe.loc[dataframe["Track"] == track]
108
+
109
+ # # print(f"init_leaderboard_mib: dataframe head after loc is {dataframe.head()}\n")
110
+
111
+ # return Leaderboard(
112
+ # value=dataframe,
113
+ # datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
114
+ # select_columns=SelectColumns(
115
+ # default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
116
+ # cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
117
+ # label="Select Columns to Display:",
118
+ # ),
119
+ # search_columns=["Method"], # Changed from AutoEvalColumn_mib_subgraph.model.name to "Method"
120
+ # hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
121
+ # bool_checkboxgroup_label="Hide models",
122
+ # interactive=False,
123
+ # )
124
+
125
+
126
 
 
 
127
 
128
+ def init_leaderboard_mib_subgraph(dataframe, track):
129
+ if dataframe is None or dataframe.empty:
130
+ raise ValueError("Leaderboard DataFrame is empty or None.")
131
+
132
+ # Get unique tasks and models for filters
133
+ tasks = list(set(task.value.benchmark for task in TasksMib_Subgraph))
134
+ models = list(set(
135
+ model
136
+ for task in TasksMib_Subgraph
137
+ for model in task.value.models
138
+ ))
139
 
140
  return Leaderboard(
141
  value=dataframe,
 
145
  cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
146
  label="Select Columns to Display:",
147
  ),
148
+ column_filters=[
149
+ ColumnFilter(
150
+ column="task_filter",
151
+ choices=tasks,
152
+ label="Filter by Task:",
153
+ default=None
154
+ ),
155
+ ColumnFilter(
156
+ column="model_filter",
157
+ choices=models,
158
+ label="Filter by Model:",
159
+ default=None
160
+ )
161
+ ],
162
+ search_columns=["Method"],
163
  hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
164
  bool_checkboxgroup_label="Hide models",
165
  interactive=False,
166
  )
167
 
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
  def init_leaderboard_mib_causalgraph(dataframe, track):
183
  # print("Debugging column issues:")
184
  # print("\nActual DataFrame columns:")
 
293
  # with gr.Row():
294
  # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
295
 
296
+ # with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
297
+ # leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
298
  with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
299
+ # Add description for filters
300
+ gr.Markdown("""
301
+ ### Filtering Options
302
+ Use the dropdown menus below to filter results by specific tasks or models.
303
+ You can combine filters to see specific task-model combinations.
304
+ """)
305
  leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
306
 
307
  # Then modify the Causal Graph tab section
src/display/utils.py CHANGED
@@ -60,15 +60,49 @@ AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_
60
 
61
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  auto_eval_column_dict_mib_subgraph = []
64
 
65
  # Method name column
66
  auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # For each task and model combination
69
  for task in TasksMib_Subgraph:
70
  for model in task.value.models:
71
- col_name = f"{task.value.benchmark}_{model}" # ioi_gpt2, mcqa_qwen2.5, etc.
72
  auto_eval_column_dict_mib_subgraph.append([
73
  col_name,
74
  ColumnContent,
@@ -79,6 +113,10 @@ for task in TasksMib_Subgraph:
79
  auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
80
 
81
 
 
 
 
 
82
  # Create the dataclass for MIB columns
83
  AutoEvalColumn_mib_subgraph = make_dataclass("AutoEvalColumn_mib_subgraph", auto_eval_column_dict_mib_subgraph, frozen=True)
84
 
@@ -102,6 +140,12 @@ BENCHMARK_COLS_MIB_CAUSALGRAPH = []
102
 
103
 
104
 
 
 
 
 
 
 
105
  # # Initialize the MIB causal graph columns
106
  # auto_eval_column_dict_mib_causalgraph = []
107
 
 
60
 
61
 
62
 
63
+ # auto_eval_column_dict_mib_subgraph = []
64
+
65
+ # # Method name column
66
+ # auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
67
+
68
+ # # For each task and model combination
69
+ # for task in TasksMib_Subgraph:
70
+ # for model in task.value.models:
71
+ # col_name = f"{task.value.benchmark}_{model}" # ioi_gpt2, mcqa_qwen2.5, etc.
72
+ # auto_eval_column_dict_mib_subgraph.append([
73
+ # col_name,
74
+ # ColumnContent,
75
+ # ColumnContent(col_name, "number", True)
76
+ # ])
77
+
78
+ # # Average column
79
+ # auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
80
+
81
  auto_eval_column_dict_mib_subgraph = []
82
 
83
  # Method name column
84
  auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
85
 
86
+ # Add task filter column
87
+ task_values = list(set(task.value.benchmark for task in TasksMib_Subgraph))
88
+ auto_eval_column_dict_mib_subgraph.append(
89
+ ["task_filter", ColumnContent, ColumnContent("Task", "str", True, never_hidden=True)]
90
+ )
91
+
92
+ # Add model filter column
93
+ model_values = list(set(
94
+ model
95
+ for task in TasksMib_Subgraph
96
+ for model in task.value.models
97
+ ))
98
+ auto_eval_column_dict_mib_subgraph.append(
99
+ ["model_filter", ColumnContent, ColumnContent("Model", "str", True, never_hidden=True)]
100
+ )
101
+
102
  # For each task and model combination
103
  for task in TasksMib_Subgraph:
104
  for model in task.value.models:
105
+ col_name = f"{task.value.benchmark}_{model}"
106
  auto_eval_column_dict_mib_subgraph.append([
107
  col_name,
108
  ColumnContent,
 
113
  auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
114
 
115
 
116
+
117
+
118
+
119
+
120
  # Create the dataclass for MIB columns
121
  AutoEvalColumn_mib_subgraph = make_dataclass("AutoEvalColumn_mib_subgraph", auto_eval_column_dict_mib_subgraph, frozen=True)
122
 
 
140
 
141
 
142
 
143
+
144
+
145
+
146
+
147
+
148
+
149
  # # Initialize the MIB causal graph columns
150
  # auto_eval_column_dict_mib_causalgraph = []
151