jasonshaoshun commited on
Commit
5dd7582
·
1 Parent(s): 53e4364
Files changed (2) hide show
  1. app.py +16 -14
  2. src/display/utils.py +12 -23
app.py CHANGED
@@ -19,14 +19,15 @@ from src.display.css_html_js import custom_css
19
  from src.display.utils import (
20
  BENCHMARK_COLS,
21
  BENCHMARK_COLS_MULTIMODAL,
22
- BENCHMARK_COLS_MIB,
23
  COLS,
24
  COLS_MIB,
25
  COLS_MULTIMODAL,
26
  EVAL_COLS,
27
  EVAL_TYPES,
28
  AutoEvalColumn,
29
- AutoEvalColumn_mib,
 
30
  fields,
31
  )
32
  from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, REPO_ID, TOKEN, RESULTS_REPO_MIB_SUBGRAPH, EVAL_RESULTS_MIB_SUBGRAPH_PATH, RESULTS_REPO_MIB_CAUSALGRAPH, EVAL_RESULTS_MIB_CAUSALGRAPH_PATH
@@ -69,8 +70,9 @@ except Exception:
69
 
70
 
71
 
72
- LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB)
73
- LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib_causalgraph(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB)
 
74
 
75
  # LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
76
  # LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
@@ -95,14 +97,14 @@ def init_leaderboard_mib_subgraph(dataframe, track):
95
 
96
  return Leaderboard(
97
  value=dataframe,
98
- datatype=[c.type for c in fields(AutoEvalColumn_mib)],
99
  select_columns=SelectColumns(
100
- default_selection=[c.name for c in fields(AutoEvalColumn_mib) if c.displayed_by_default],
101
- cant_deselect=[c.name for c in fields(AutoEvalColumn_mib) if c.never_hidden],
102
  label="Select Columns to Display:",
103
  ),
104
- search_columns=["Method"], # Changed from AutoEvalColumn_mib.model.name to "Method"
105
- hide_columns=[c.name for c in fields(AutoEvalColumn_mib) if c.hidden],
106
  bool_checkboxgroup_label="Hide models",
107
  interactive=False,
108
  )
@@ -120,14 +122,14 @@ def init_leaderboard_mib_causalgraph(dataframe, track):
120
 
121
  return Leaderboard(
122
  value=dataframe,
123
- datatype=[c.type for c in fields(AutoEvalColumn_mib)],
124
  select_columns=SelectColumns(
125
- default_selection=[c.name for c in fields(AutoEvalColumn_mib) if c.displayed_by_default],
126
- cant_deselect=[c.name for c in fields(AutoEvalColumn_mib) if c.never_hidden],
127
  label="Select Columns to Display:",
128
  ),
129
- search_columns=["Method"], # Changed from AutoEvalColumn_mib.model.name to "Method"
130
- hide_columns=[c.name for c in fields(AutoEvalColumn_mib) if c.hidden],
131
  bool_checkboxgroup_label="Hide models",
132
  interactive=False,
133
  )
 
19
  from src.display.utils import (
20
  BENCHMARK_COLS,
21
  BENCHMARK_COLS_MULTIMODAL,
22
+ BENCHMARK_COLS_MIB_SUBGRAPH,
23
  COLS,
24
  COLS_MIB,
25
  COLS_MULTIMODAL,
26
  EVAL_COLS,
27
  EVAL_TYPES,
28
  AutoEvalColumn,
29
+ AutoEvalColumn_mib_subgraph,
30
+ AutoEvalColumn_mib_causalgraph,
31
  fields,
32
  )
33
  from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, REPO_ID, TOKEN, RESULTS_REPO_MIB_SUBGRAPH, EVAL_RESULTS_MIB_SUBGRAPH_PATH, RESULTS_REPO_MIB_CAUSALGRAPH, EVAL_RESULTS_MIB_CAUSALGRAPH_PATH
 
70
 
71
 
72
 
73
+ LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB_SUBGRAPH)
74
+
75
+ LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib_causalgraph(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB_CAUASALGRAPH)
76
 
77
  # LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
78
  # LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
 
97
 
98
  return Leaderboard(
99
  value=dataframe,
100
+ datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
101
  select_columns=SelectColumns(
102
+ default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
103
+ cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
104
  label="Select Columns to Display:",
105
  ),
106
+ search_columns=["Method"], # Changed from AutoEvalColumn_mib_subgraph.model.name to "Method"
107
+ hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
108
  bool_checkboxgroup_label="Hide models",
109
  interactive=False,
110
  )
 
122
 
123
  return Leaderboard(
124
  value=dataframe,
125
+ datatype=[c.type for c in fields(AutoEvalColumn_mib_causalgraph)],
126
  select_columns=SelectColumns(
127
+ default_selection=[c.name for c in fields(AutoEvalColumn_mib_causalgraph) if c.displayed_by_default],
128
+ cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_causalgraph) if c.never_hidden],
129
  label="Select Columns to Display:",
130
  ),
131
+ search_columns=["Method"], # Changed from AutoEvalColumn_mib_causalgraph.model.name to "Method"
132
+ hide_columns=[c.name for c in fields(AutoEvalColumn_mib_causalgraph) if c.hidden],
133
  bool_checkboxgroup_label="Hide models",
134
  interactive=False,
135
  )
src/display/utils.py CHANGED
@@ -21,7 +21,7 @@ class ColumnContent:
21
  never_hidden: bool = False
22
 
23
  ## Leaderboard columns
24
- auto_eval_column_dict_mib = []
25
  auto_eval_column_dict = []
26
  auto_eval_column_dict_multimodal = []
27
 
@@ -29,42 +29,40 @@ auto_eval_column_dict_multimodal = []
29
 
30
 
31
 
32
- auto_eval_column_dict_mib = []
33
 
34
  # Method name column
35
- auto_eval_column_dict_mib.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
36
 
37
  # For each task and model combination
38
  for task in TasksMib_Subgraph:
39
  for model in task.value.models:
40
  col_name = f"{task.value.benchmark}_{model}" # ioi_meta_llama, mcqa_qwen, etc.
41
- auto_eval_column_dict_mib.append([
42
  col_name,
43
  ColumnContent,
44
  ColumnContent(col_name, "number", True)
45
  ])
46
 
47
  # Average column
48
- auto_eval_column_dict_mib.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
49
 
50
 
51
  # Create the dataclass for MIB columns
52
- AutoEvalColumn_mib = make_dataclass("AutoEvalColumn_mib", auto_eval_column_dict_mib, frozen=True)
53
 
54
  # Column selection for display
55
- COLS_MIB = [c.name for c in fields(AutoEvalColumn_mib) if not c.hidden]
56
 
57
- # BENCHMARK_COLS_MIB = [t.value.col_name for t in TasksMib_Subgraph]
58
- BENCHMARK_COLS_MIB = []
59
  for task in TasksMib_Subgraph:
60
  for model in task.value.models:
61
  col_name = f"{task.value.col_name}_{model.replace('-', '_')}"
62
- BENCHMARK_COLS_MIB.append(col_name)
63
-
64
-
65
-
66
-
67
 
 
 
68
 
69
 
70
 
@@ -72,15 +70,6 @@ for task in TasksMib_Subgraph:
72
 
73
 
74
 
75
- # Init
76
-
77
- auto_eval_column_dict_mib.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
78
- # auto_eval_column_dict_mib.append(["hf_repo", ColumnContent, ColumnContent("HF Repo", "str", False)])
79
- # auto_eval_column_dict_mib.append(["track", ColumnContent, ColumnContent("Track", "markdown", False)])
80
-
81
- #Scores
82
- for task in TasksMib_Subgraph:
83
- auto_eval_column_dict_mib.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
84
 
85
 
86
 
 
21
  never_hidden: bool = False
22
 
23
  ## Leaderboard columns
24
+ auto_eval_column_dict_mib_subgraph = []
25
  auto_eval_column_dict = []
26
  auto_eval_column_dict_multimodal = []
27
 
 
29
 
30
 
31
 
32
+ auto_eval_column_dict_mib_subgraph = []
33
 
34
  # Method name column
35
+ auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
36
 
37
  # For each task and model combination
38
  for task in TasksMib_Subgraph:
39
  for model in task.value.models:
40
  col_name = f"{task.value.benchmark}_{model}" # ioi_meta_llama, mcqa_qwen, etc.
41
+ auto_eval_column_dict_mib_subgraph.append([
42
  col_name,
43
  ColumnContent,
44
  ColumnContent(col_name, "number", True)
45
  ])
46
 
47
  # Average column
48
+ auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
49
 
50
 
51
  # Create the dataclass for MIB columns
52
+ AutoEvalColumn_mib_subgraph = make_dataclass("AutoEvalColumn_mib_subgraph", auto_eval_column_dict_mib_subgraph, frozen=True)
53
 
54
  # Column selection for display
55
+ COLS_MIB = [c.name for c in fields(AutoEvalColumn_mib_subgraph) if not c.hidden]
56
 
57
+
58
+ BENCHMARK_COLS_MIB_SUBGRAPH = []
59
  for task in TasksMib_Subgraph:
60
  for model in task.value.models:
61
  col_name = f"{task.value.col_name}_{model.replace('-', '_')}"
62
+ BENCHMARK_COLS_MIB_SUBGRAPH.append(col_name)
 
 
 
 
63
 
64
+ # Implement the same for causal graph, auto_eval_column_dict_mib_causalgraph, AutoEvalColumn_mib_causalgraph
65
+ BENCHMARK_COLS_MIB_CAUASALGRAPH = []
66
 
67
 
68
 
 
70
 
71
 
72
 
 
 
 
 
 
 
 
 
 
73
 
74
 
75