Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
e27c948
1
Parent(s):
a90efab
debug
Browse files- app.py +70 -18
- old-requirements.txt +17 -0
- requirements.txt +1 -1
- src/about.py +12 -1
app.py
CHANGED
@@ -167,34 +167,85 @@ from src.about import TasksMib_Subgraph
|
|
167 |
|
168 |
|
169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
def init_leaderboard_mib_subgraph(dataframe, track):
|
|
|
171 |
if dataframe is None or dataframe.empty:
|
172 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
-
# Add
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
for model in task.value.models
|
185 |
-
if f"{task.value.benchmark}_{model}" in row.index][0],
|
186 |
-
axis=1
|
187 |
-
)
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
return Leaderboard(
|
190 |
value=dataframe,
|
191 |
datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
|
192 |
select_columns=SelectColumns(
|
193 |
-
|
194 |
-
|
195 |
-
|
|
|
|
|
|
|
196 |
),
|
197 |
-
search_columns=["Method"
|
198 |
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
199 |
bool_checkboxgroup_label="Hide models",
|
200 |
interactive=False,
|
@@ -211,6 +262,7 @@ def init_leaderboard_mib_subgraph(dataframe, track):
|
|
211 |
|
212 |
|
213 |
|
|
|
214 |
def init_leaderboard_mib_causalgraph(dataframe, track):
|
215 |
# print("Debugging column issues:")
|
216 |
# print("\nActual DataFrame columns:")
|
|
|
167 |
|
168 |
|
169 |
|
170 |
+
# def init_leaderboard_mib_subgraph(dataframe, track):
|
171 |
+
# if dataframe is None or dataframe.empty:
|
172 |
+
# raise ValueError("Leaderboard DataFrame is empty or None.")
|
173 |
+
|
174 |
+
# # Add filter columns to display
|
175 |
+
# dataframe['Task'] = dataframe.apply(
|
176 |
+
# lambda row: [task.value.benchmark for task in TasksMib_Subgraph
|
177 |
+
# if any(f"{task.value.benchmark}_{model}" in row.index
|
178 |
+
# for model in task.value.models)][0],
|
179 |
+
# axis=1
|
180 |
+
# )
|
181 |
+
|
182 |
+
# dataframe['Model'] = dataframe.apply(
|
183 |
+
# lambda row: [model for task in TasksMib_Subgraph
|
184 |
+
# for model in task.value.models
|
185 |
+
# if f"{task.value.benchmark}_{model}" in row.index][0],
|
186 |
+
# axis=1
|
187 |
+
# )
|
188 |
+
|
189 |
+
# return Leaderboard(
|
190 |
+
# value=dataframe,
|
191 |
+
# datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
|
192 |
+
# select_columns=SelectColumns(
|
193 |
+
# default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
|
194 |
+
# cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
|
195 |
+
# label="Select Columns to Display:",
|
196 |
+
# ),
|
197 |
+
# search_columns=["Method", "Task", "Model"], # Add Task and Model to searchable columns
|
198 |
+
# hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
199 |
+
# bool_checkboxgroup_label="Hide models",
|
200 |
+
# interactive=False,
|
201 |
+
# )
|
202 |
+
|
203 |
def init_leaderboard_mib_subgraph(dataframe, track):
|
204 |
+
"""Initialize the subgraph leaderboard with grouped column selection."""
|
205 |
if dataframe is None or dataframe.empty:
|
206 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
207 |
+
|
208 |
+
# Get tasks and models using the new class methods
|
209 |
+
tasks = TasksMib_Subgraph.get_all_tasks()
|
210 |
+
models = TasksMib_Subgraph.get_all_models()
|
211 |
+
|
212 |
+
# Create a mapping from selection to actual column names
|
213 |
+
selection_map = {}
|
214 |
|
215 |
+
# Add task mappings - when a task is selected, show all its columns
|
216 |
+
for task in tasks:
|
217 |
+
# For each task, find all valid task_model combinations
|
218 |
+
valid_combos = []
|
219 |
+
for model in models:
|
220 |
+
col_name = f"{task}_{model}"
|
221 |
+
if col_name in dataframe.columns:
|
222 |
+
valid_combos.append(col_name)
|
223 |
+
if valid_combos:
|
224 |
+
selection_map[task] = valid_combos
|
|
|
|
|
|
|
|
|
225 |
|
226 |
+
# Add model mappings - when a model is selected, show all its columns
|
227 |
+
for model in models:
|
228 |
+
# For each model, find all valid task_model combinations
|
229 |
+
valid_combos = []
|
230 |
+
for task in tasks:
|
231 |
+
col_name = f"{task}_{model}"
|
232 |
+
if col_name in dataframe.columns:
|
233 |
+
valid_combos.append(col_name)
|
234 |
+
if valid_combos:
|
235 |
+
selection_map[model] = valid_combos
|
236 |
+
|
237 |
return Leaderboard(
|
238 |
value=dataframe,
|
239 |
datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
|
240 |
select_columns=SelectColumns(
|
241 |
+
choices=[tasks, models], # Two groups of choices
|
242 |
+
labels=["Tasks", "Models"], # Labels for each group
|
243 |
+
default_selection=[*tasks, *models], # Show everything by default
|
244 |
+
cant_deselect=["Method"], # Method column always visible
|
245 |
+
label="Filter by Tasks or Models:",
|
246 |
+
selection_map=selection_map # Map selections to actual columns
|
247 |
),
|
248 |
+
search_columns=["Method"],
|
249 |
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
250 |
bool_checkboxgroup_label="Hide models",
|
251 |
interactive=False,
|
|
|
262 |
|
263 |
|
264 |
|
265 |
+
|
266 |
def init_leaderboard_mib_causalgraph(dataframe, track):
|
267 |
# print("Debugging column issues:")
|
268 |
# print("\nActual DataFrame columns:")
|
old-requirements.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
APScheduler
|
2 |
+
black
|
3 |
+
datasets
|
4 |
+
fastapi==0.112.2
|
5 |
+
gradio
|
6 |
+
gradio[oauth]
|
7 |
+
gradio_leaderboard==0.0.13
|
8 |
+
gradio_client
|
9 |
+
huggingface-hub>=0.18.0
|
10 |
+
matplotlib
|
11 |
+
numpy
|
12 |
+
pandas
|
13 |
+
python-dateutil
|
14 |
+
tqdm
|
15 |
+
transformers
|
16 |
+
tokenizers>=0.15.0
|
17 |
+
sentencepiece
|
requirements.txt
CHANGED
@@ -4,7 +4,7 @@ datasets
|
|
4 |
fastapi==0.112.2
|
5 |
gradio
|
6 |
gradio[oauth]
|
7 |
-
gradio_leaderboard==0.0.
|
8 |
gradio_client
|
9 |
huggingface-hub>=0.18.0
|
10 |
matplotlib
|
|
|
4 |
fastapi==0.112.2
|
5 |
gradio
|
6 |
gradio[oauth]
|
7 |
+
gradio_leaderboard==0.0.15
|
8 |
gradio_client
|
9 |
huggingface-hub>=0.18.0
|
10 |
matplotlib
|
src/about.py
CHANGED
@@ -47,7 +47,18 @@ class TasksMib_Subgraph(Enum):
|
|
47 |
task4 = TaskMIB_Subgraph("arc_easy", ["gemma2", "llama3"], "arc_easy", ["edge_counts", "faithfulness"])
|
48 |
task5 = TaskMIB_Subgraph("arc_challenge", ["llama3"], "arc_challenge", ["edge_counts", "faithfulness"])
|
49 |
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
|
53 |
# @dataclass
|
|
|
47 |
task4 = TaskMIB_Subgraph("arc_easy", ["gemma2", "llama3"], "arc_easy", ["edge_counts", "faithfulness"])
|
48 |
task5 = TaskMIB_Subgraph("arc_challenge", ["llama3"], "arc_challenge", ["edge_counts", "faithfulness"])
|
49 |
|
50 |
+
@classmethod
|
51 |
+
def get_all_tasks(cls):
|
52 |
+
"""Returns a list of all task benchmarks"""
|
53 |
+
return [task.value.benchmark for task in cls]
|
54 |
+
|
55 |
+
@classmethod
|
56 |
+
def get_all_models(cls):
|
57 |
+
"""Returns a list of all unique models across all tasks"""
|
58 |
+
models = set()
|
59 |
+
for task in cls:
|
60 |
+
models.update(task.value.models)
|
61 |
+
return sorted(list(models))
|
62 |
|
63 |
|
64 |
# @dataclass
|