Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
5dd7582
1
Parent(s):
53e4364
debug
Browse files- app.py +16 -14
- src/display/utils.py +12 -23
app.py
CHANGED
@@ -19,14 +19,15 @@ from src.display.css_html_js import custom_css
|
|
19 |
from src.display.utils import (
|
20 |
BENCHMARK_COLS,
|
21 |
BENCHMARK_COLS_MULTIMODAL,
|
22 |
-
|
23 |
COLS,
|
24 |
COLS_MIB,
|
25 |
COLS_MULTIMODAL,
|
26 |
EVAL_COLS,
|
27 |
EVAL_TYPES,
|
28 |
AutoEvalColumn,
|
29 |
-
|
|
|
30 |
fields,
|
31 |
)
|
32 |
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, REPO_ID, TOKEN, RESULTS_REPO_MIB_SUBGRAPH, EVAL_RESULTS_MIB_SUBGRAPH_PATH, RESULTS_REPO_MIB_CAUSALGRAPH, EVAL_RESULTS_MIB_CAUSALGRAPH_PATH
|
@@ -69,8 +70,9 @@ except Exception:
|
|
69 |
|
70 |
|
71 |
|
72 |
-
LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB,
|
73 |
-
|
|
|
74 |
|
75 |
# LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
76 |
# LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
|
@@ -95,14 +97,14 @@ def init_leaderboard_mib_subgraph(dataframe, track):
|
|
95 |
|
96 |
return Leaderboard(
|
97 |
value=dataframe,
|
98 |
-
datatype=[c.type for c in fields(
|
99 |
select_columns=SelectColumns(
|
100 |
-
default_selection=[c.name for c in fields(
|
101 |
-
cant_deselect=[c.name for c in fields(
|
102 |
label="Select Columns to Display:",
|
103 |
),
|
104 |
-
search_columns=["Method"], # Changed from
|
105 |
-
hide_columns=[c.name for c in fields(
|
106 |
bool_checkboxgroup_label="Hide models",
|
107 |
interactive=False,
|
108 |
)
|
@@ -120,14 +122,14 @@ def init_leaderboard_mib_causalgraph(dataframe, track):
|
|
120 |
|
121 |
return Leaderboard(
|
122 |
value=dataframe,
|
123 |
-
datatype=[c.type for c in fields(
|
124 |
select_columns=SelectColumns(
|
125 |
-
default_selection=[c.name for c in fields(
|
126 |
-
cant_deselect=[c.name for c in fields(
|
127 |
label="Select Columns to Display:",
|
128 |
),
|
129 |
-
search_columns=["Method"], # Changed from
|
130 |
-
hide_columns=[c.name for c in fields(
|
131 |
bool_checkboxgroup_label="Hide models",
|
132 |
interactive=False,
|
133 |
)
|
|
|
19 |
from src.display.utils import (
|
20 |
BENCHMARK_COLS,
|
21 |
BENCHMARK_COLS_MULTIMODAL,
|
22 |
+
BENCHMARK_COLS_MIB_SUBGRAPH,
|
23 |
COLS,
|
24 |
COLS_MIB,
|
25 |
COLS_MULTIMODAL,
|
26 |
EVAL_COLS,
|
27 |
EVAL_TYPES,
|
28 |
AutoEvalColumn,
|
29 |
+
AutoEvalColumn_mib_subgraph,
|
30 |
+
AutoEvalColumn_mib_causalgraph,
|
31 |
fields,
|
32 |
)
|
33 |
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, REPO_ID, TOKEN, RESULTS_REPO_MIB_SUBGRAPH, EVAL_RESULTS_MIB_SUBGRAPH_PATH, RESULTS_REPO_MIB_CAUSALGRAPH, EVAL_RESULTS_MIB_CAUSALGRAPH_PATH
|
|
|
70 |
|
71 |
|
72 |
|
73 |
+
LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB_SUBGRAPH)
|
74 |
+
|
75 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib_causalgraph(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB_CAUASALGRAPH)
|
76 |
|
77 |
# LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
78 |
# LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
|
|
|
97 |
|
98 |
return Leaderboard(
|
99 |
value=dataframe,
|
100 |
+
datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
|
101 |
select_columns=SelectColumns(
|
102 |
+
default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
|
103 |
+
cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
|
104 |
label="Select Columns to Display:",
|
105 |
),
|
106 |
+
search_columns=["Method"], # Changed from AutoEvalColumn_mib_subgraph.model.name to "Method"
|
107 |
+
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
108 |
bool_checkboxgroup_label="Hide models",
|
109 |
interactive=False,
|
110 |
)
|
|
|
122 |
|
123 |
return Leaderboard(
|
124 |
value=dataframe,
|
125 |
+
datatype=[c.type for c in fields(AutoEvalColumn_mib_causalgraph)],
|
126 |
select_columns=SelectColumns(
|
127 |
+
default_selection=[c.name for c in fields(AutoEvalColumn_mib_causalgraph) if c.displayed_by_default],
|
128 |
+
cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_causalgraph) if c.never_hidden],
|
129 |
label="Select Columns to Display:",
|
130 |
),
|
131 |
+
search_columns=["Method"], # Changed from AutoEvalColumn_mib_causalgraph.model.name to "Method"
|
132 |
+
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_causalgraph) if c.hidden],
|
133 |
bool_checkboxgroup_label="Hide models",
|
134 |
interactive=False,
|
135 |
)
|
src/display/utils.py
CHANGED
@@ -21,7 +21,7 @@ class ColumnContent:
|
|
21 |
never_hidden: bool = False
|
22 |
|
23 |
## Leaderboard columns
|
24 |
-
|
25 |
auto_eval_column_dict = []
|
26 |
auto_eval_column_dict_multimodal = []
|
27 |
|
@@ -29,42 +29,40 @@ auto_eval_column_dict_multimodal = []
|
|
29 |
|
30 |
|
31 |
|
32 |
-
|
33 |
|
34 |
# Method name column
|
35 |
-
|
36 |
|
37 |
# For each task and model combination
|
38 |
for task in TasksMib_Subgraph:
|
39 |
for model in task.value.models:
|
40 |
col_name = f"{task.value.benchmark}_{model}" # ioi_meta_llama, mcqa_qwen, etc.
|
41 |
-
|
42 |
col_name,
|
43 |
ColumnContent,
|
44 |
ColumnContent(col_name, "number", True)
|
45 |
])
|
46 |
|
47 |
# Average column
|
48 |
-
|
49 |
|
50 |
|
51 |
# Create the dataclass for MIB columns
|
52 |
-
|
53 |
|
54 |
# Column selection for display
|
55 |
-
COLS_MIB = [c.name for c in fields(
|
56 |
|
57 |
-
|
58 |
-
|
59 |
for task in TasksMib_Subgraph:
|
60 |
for model in task.value.models:
|
61 |
col_name = f"{task.value.col_name}_{model.replace('-', '_')}"
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
|
|
|
|
|
68 |
|
69 |
|
70 |
|
@@ -72,15 +70,6 @@ for task in TasksMib_Subgraph:
|
|
72 |
|
73 |
|
74 |
|
75 |
-
# Init
|
76 |
-
|
77 |
-
auto_eval_column_dict_mib.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
78 |
-
# auto_eval_column_dict_mib.append(["hf_repo", ColumnContent, ColumnContent("HF Repo", "str", False)])
|
79 |
-
# auto_eval_column_dict_mib.append(["track", ColumnContent, ColumnContent("Track", "markdown", False)])
|
80 |
-
|
81 |
-
#Scores
|
82 |
-
for task in TasksMib_Subgraph:
|
83 |
-
auto_eval_column_dict_mib.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
84 |
|
85 |
|
86 |
|
|
|
21 |
never_hidden: bool = False
|
22 |
|
23 |
## Leaderboard columns
|
24 |
+
auto_eval_column_dict_mib_subgraph = []
|
25 |
auto_eval_column_dict = []
|
26 |
auto_eval_column_dict_multimodal = []
|
27 |
|
|
|
29 |
|
30 |
|
31 |
|
32 |
+
auto_eval_column_dict_mib_subgraph = []
|
33 |
|
34 |
# Method name column
|
35 |
+
auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
|
36 |
|
37 |
# For each task and model combination
|
38 |
for task in TasksMib_Subgraph:
|
39 |
for model in task.value.models:
|
40 |
col_name = f"{task.value.benchmark}_{model}" # ioi_meta_llama, mcqa_qwen, etc.
|
41 |
+
auto_eval_column_dict_mib_subgraph.append([
|
42 |
col_name,
|
43 |
ColumnContent,
|
44 |
ColumnContent(col_name, "number", True)
|
45 |
])
|
46 |
|
47 |
# Average column
|
48 |
+
auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
49 |
|
50 |
|
51 |
# Create the dataclass for MIB columns
|
52 |
+
AutoEvalColumn_mib_subgraph = make_dataclass("AutoEvalColumn_mib_subgraph", auto_eval_column_dict_mib_subgraph, frozen=True)
|
53 |
|
54 |
# Column selection for display
|
55 |
+
COLS_MIB = [c.name for c in fields(AutoEvalColumn_mib_subgraph) if not c.hidden]
|
56 |
|
57 |
+
|
58 |
+
BENCHMARK_COLS_MIB_SUBGRAPH = []
|
59 |
for task in TasksMib_Subgraph:
|
60 |
for model in task.value.models:
|
61 |
col_name = f"{task.value.col_name}_{model.replace('-', '_')}"
|
62 |
+
BENCHMARK_COLS_MIB_SUBGRAPH.append(col_name)
|
|
|
|
|
|
|
|
|
63 |
|
64 |
+
# Implement the same for causal graph, auto_eval_column_dict_mib_causalgraph, AutoEvalColumn_mib_causalgraph
|
65 |
+
BENCHMARK_COLS_MIB_CAUASALGRAPH = []
|
66 |
|
67 |
|
68 |
|
|
|
70 |
|
71 |
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
|
75 |
|