Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
a90efab
1
Parent(s):
85b93b7
debug
Browse files- app.py +57 -25
- src/display/utils.py +33 -1
app.py
CHANGED
@@ -125,17 +125,66 @@ LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGAT
|
|
125 |
|
126 |
|
127 |
from src.about import TasksMib_Subgraph
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
def init_leaderboard_mib_subgraph(dataframe, track):
|
129 |
if dataframe is None or dataframe.empty:
|
130 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
131 |
|
132 |
-
#
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
return Leaderboard(
|
141 |
value=dataframe,
|
@@ -145,21 +194,7 @@ def init_leaderboard_mib_subgraph(dataframe, track):
|
|
145 |
cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
|
146 |
label="Select Columns to Display:",
|
147 |
),
|
148 |
-
|
149 |
-
ColumnFilter(
|
150 |
-
column="task_filter",
|
151 |
-
choices=tasks,
|
152 |
-
label="Filter by Task:",
|
153 |
-
default=None
|
154 |
-
),
|
155 |
-
ColumnFilter(
|
156 |
-
column="model_filter",
|
157 |
-
choices=models,
|
158 |
-
label="Filter by Model:",
|
159 |
-
default=None
|
160 |
-
)
|
161 |
-
],
|
162 |
-
search_columns=["Method"],
|
163 |
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
164 |
bool_checkboxgroup_label="Hide models",
|
165 |
interactive=False,
|
@@ -176,9 +211,6 @@ def init_leaderboard_mib_subgraph(dataframe, track):
|
|
176 |
|
177 |
|
178 |
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
def init_leaderboard_mib_causalgraph(dataframe, track):
|
183 |
# print("Debugging column issues:")
|
184 |
# print("\nActual DataFrame columns:")
|
|
|
125 |
|
126 |
|
127 |
from src.about import TasksMib_Subgraph
|
128 |
+
# def init_leaderboard_mib_subgraph(dataframe, track):
|
129 |
+
# if dataframe is None or dataframe.empty:
|
130 |
+
# raise ValueError("Leaderboard DataFrame is empty or None.")
|
131 |
+
|
132 |
+
# # Get unique tasks and models for filters
|
133 |
+
# tasks = list(set(task.value.benchmark for task in TasksMib_Subgraph))
|
134 |
+
# models = list(set(
|
135 |
+
# model
|
136 |
+
# for task in TasksMib_Subgraph
|
137 |
+
# for model in task.value.models
|
138 |
+
# ))
|
139 |
+
|
140 |
+
# return Leaderboard(
|
141 |
+
# value=dataframe,
|
142 |
+
# datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
|
143 |
+
# select_columns=SelectColumns(
|
144 |
+
# default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
|
145 |
+
# cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
|
146 |
+
# label="Select Columns to Display:",
|
147 |
+
# ),
|
148 |
+
# column_filters=[
|
149 |
+
# ColumnFilter(
|
150 |
+
# column="task_filter",
|
151 |
+
# choices=tasks,
|
152 |
+
# label="Filter by Task:",
|
153 |
+
# default=None
|
154 |
+
# ),
|
155 |
+
# ColumnFilter(
|
156 |
+
# column="model_filter",
|
157 |
+
# choices=models,
|
158 |
+
# label="Filter by Model:",
|
159 |
+
# default=None
|
160 |
+
# )
|
161 |
+
# ],
|
162 |
+
# search_columns=["Method"],
|
163 |
+
# hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
164 |
+
# bool_checkboxgroup_label="Hide models",
|
165 |
+
# interactive=False,
|
166 |
+
# )
|
167 |
+
|
168 |
+
|
169 |
+
|
170 |
def init_leaderboard_mib_subgraph(dataframe, track):
|
171 |
if dataframe is None or dataframe.empty:
|
172 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
173 |
|
174 |
+
# Add filter columns to display
|
175 |
+
dataframe['Task'] = dataframe.apply(
|
176 |
+
lambda row: [task.value.benchmark for task in TasksMib_Subgraph
|
177 |
+
if any(f"{task.value.benchmark}_{model}" in row.index
|
178 |
+
for model in task.value.models)][0],
|
179 |
+
axis=1
|
180 |
+
)
|
181 |
+
|
182 |
+
dataframe['Model'] = dataframe.apply(
|
183 |
+
lambda row: [model for task in TasksMib_Subgraph
|
184 |
+
for model in task.value.models
|
185 |
+
if f"{task.value.benchmark}_{model}" in row.index][0],
|
186 |
+
axis=1
|
187 |
+
)
|
188 |
|
189 |
return Leaderboard(
|
190 |
value=dataframe,
|
|
|
194 |
cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
|
195 |
label="Select Columns to Display:",
|
196 |
),
|
197 |
+
search_columns=["Method", "Task", "Model"], # Add Task and Model to searchable columns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
199 |
bool_checkboxgroup_label="Hide models",
|
200 |
interactive=False,
|
|
|
211 |
|
212 |
|
213 |
|
|
|
|
|
|
|
214 |
def init_leaderboard_mib_causalgraph(dataframe, track):
|
215 |
# print("Debugging column issues:")
|
216 |
# print("\nActual DataFrame columns:")
|
src/display/utils.py
CHANGED
@@ -58,7 +58,8 @@ AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_
|
|
58 |
|
59 |
|
60 |
|
61 |
-
|
|
|
62 |
|
63 |
# auto_eval_column_dict_mib_subgraph = []
|
64 |
|
@@ -78,6 +79,9 @@ AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_
|
|
78 |
# # Average column
|
79 |
# auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
80 |
|
|
|
|
|
|
|
81 |
auto_eval_column_dict_mib_subgraph = []
|
82 |
|
83 |
# Method name column
|
@@ -113,6 +117,34 @@ for task in TasksMib_Subgraph:
|
|
113 |
auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
114 |
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
|
118 |
|
|
|
58 |
|
59 |
|
60 |
|
61 |
+
##############################################################################################################
|
62 |
+
# Version 1
|
63 |
|
64 |
# auto_eval_column_dict_mib_subgraph = []
|
65 |
|
|
|
79 |
# # Average column
|
80 |
# auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
81 |
|
82 |
+
|
83 |
+
##############################################################################################################
|
84 |
+
# Version 2
|
85 |
auto_eval_column_dict_mib_subgraph = []
|
86 |
|
87 |
# Method name column
|
|
|
117 |
auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
118 |
|
119 |
|
120 |
+
##############################################################################################################
|
121 |
+
# Version 3
|
122 |
+
auto_eval_column_dict_mib_subgraph = []
|
123 |
+
|
124 |
+
# Method name column
|
125 |
+
auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
|
126 |
+
|
127 |
+
# Add Task and Model columns for filtering
|
128 |
+
auto_eval_column_dict_mib_subgraph.append(["Task", ColumnContent, ColumnContent("Task", "str", True)])
|
129 |
+
auto_eval_column_dict_mib_subgraph.append(["Model", ColumnContent, ColumnContent("Model", "str", True)])
|
130 |
+
|
131 |
+
# For each task and model combination
|
132 |
+
for task in TasksMib_Subgraph:
|
133 |
+
for model in task.value.models:
|
134 |
+
col_name = f"{task.value.benchmark}_{model}"
|
135 |
+
auto_eval_column_dict_mib_subgraph.append([
|
136 |
+
col_name,
|
137 |
+
ColumnContent,
|
138 |
+
ColumnContent(col_name, "number", True)
|
139 |
+
])
|
140 |
+
|
141 |
+
# Average column
|
142 |
+
auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
|
148 |
|
149 |
|
150 |
|