lixuejing commited on
Commit
6500fc4
·
1 Parent(s): 33927d7
Files changed (3) hide show
  1. app.py +17 -11
  2. src/about.py +2 -0
  3. src/display/utils.py +29 -1
app.py CHANGED
@@ -24,7 +24,11 @@ from src.display.utils import (
24
  fields,
25
  WeightType,
26
  Precision,
27
- NUMERIC_INTERVALS
 
 
 
 
28
  )
29
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
@@ -32,10 +36,10 @@ from src.submission.submit import add_new_eval
32
  from src.scripts.update_all_request_files import update_dynamic_files
33
  from src.tools.collections import update_collections
34
  from src.tools.datastatics import get_statics
35
- from src.tools.plots import (
36
- create_plot_df,
37
- create_scores_df,
38
- )
39
 
40
  def restart_space():
41
  API.restart_space(repo_id=REPO_ID, token=TOKEN)
@@ -60,17 +64,18 @@ def init_space():
60
  restart_space()
61
 
62
  raw_data, original_df = get_leaderboard_df(
63
- #leaderboard_df = get_leaderboard_df(
64
  results_path=EVAL_RESULTS_PATH,
65
  requests_path=EVAL_REQUESTS_PATH,
66
  dynamic_path=DYNAMIC_INFO_FILE_PATH,
67
- cols=COLS,
68
- benchmark_cols=BENCHMARK_COLS
 
 
69
  )
70
  update_collections(original_df.copy())
71
  leaderboard_df = original_df.copy()
72
 
73
- plot_df = create_plot_df(create_scores_df(raw_data))
74
 
75
  (
76
  finished_eval_queue_df,
@@ -78,9 +83,10 @@ def init_space():
78
  pending_eval_queue_df,
79
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
80
 
81
- return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
 
82
 
83
- leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
84
  #return leaderboard_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
85
 
86
  #leaderboard_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
 
24
  fields,
25
  WeightType,
26
  Precision,
27
+ NUMERIC_INTERVALS,
28
+ QUOTACOLS,
29
+ QUOTATYPES,
30
+ AutoEvalColumnQuota,
31
+ BENCHMARK_QUOTACOLS
32
  )
33
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
34
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
36
  from src.scripts.update_all_request_files import update_dynamic_files
37
  from src.tools.collections import update_collections
38
  from src.tools.datastatics import get_statics
39
+ #from src.tools.plots import (
40
+ # create_plot_df,
41
+ # create_scores_df,
42
+ #)
43
 
44
  def restart_space():
45
  API.restart_space(repo_id=REPO_ID, token=TOKEN)
 
64
  restart_space()
65
 
66
  raw_data, original_df = get_leaderboard_df(
 
67
  results_path=EVAL_RESULTS_PATH,
68
  requests_path=EVAL_REQUESTS_PATH,
69
  dynamic_path=DYNAMIC_INFO_FILE_PATH,
70
+ #cols=COLS,
71
+ #benchmark_cols=BENCHMARK_COLS,
72
+ cols=QUOTACOLS,
73
+ benchmark_cols=BENCHMARK_QUOTACOLS
74
  )
75
  update_collections(original_df.copy())
76
  leaderboard_df = original_df.copy()
77
 
78
+ #plot_df = create_plot_df(create_scores_df(raw_data))
79
 
80
  (
81
  finished_eval_queue_df,
 
83
  pending_eval_queue_df,
84
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
85
 
86
+ #return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
87
+ return leaderboard_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
88
 
89
+ leaderboard_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
90
  #return leaderboard_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
91
 
92
  #leaderboard_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
src/about.py CHANGED
@@ -22,6 +22,8 @@ class Tasks(Enum):
22
  SAT = Task("SAT", "overall", "SAT")
23
  egoplan_bench2 = Task("egoplan_bench2", "overall", "egoplan_bench2")
24
  erqa = Task("erqa", "overall", "erqa")
 
 
25
  Perception = Task("Perception", "overall", "Perception")
26
  SpatialReasoning = Task("SpatialReasoning", "overall", "SpatialReasoning")
27
  Prediction = Task("Prediction", "overall", "Prediction")
 
22
  SAT = Task("SAT", "overall", "SAT")
23
  egoplan_bench2 = Task("egoplan_bench2", "overall", "egoplan_bench2")
24
  erqa = Task("erqa", "overall", "erqa")
25
+
26
+ class Quotas(Enum):
27
  Perception = Task("Perception", "overall", "Perception")
28
  SpatialReasoning = Task("SpatialReasoning", "overall", "SpatialReasoning")
29
  Prediction = Task("Prediction", "overall", "Prediction")
src/display/utils.py CHANGED
@@ -3,7 +3,7 @@ from enum import Enum
3
 
4
  import pandas as pd
5
 
6
- from src.about import Tasks
7
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
@@ -44,6 +44,30 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
44
  # We use make dataclass to dynamically fill the scores from Tasks
45
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  ## For the queue columns in the submission tab
48
  @dataclass(frozen=True)
49
  class EvalQueueColumn: # Queue column
@@ -116,10 +140,14 @@ class Precision(Enum):
116
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
117
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
118
 
 
 
 
119
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
120
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
121
 
122
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
 
123
 
124
  NUMERIC_INTERVALS = {
125
  "?": pd.Interval(-1, 0, closed="right"),
 
3
 
4
  import pandas as pd
5
 
6
+ from src.about import Tasks,Quotas
7
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
 
44
  # We use make dataclass to dynamically fill the scores from Tasks
45
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
46
 
47
+
48
+ ## Leaderboard columns
49
+ auto_eval_column_quota_dict = []
50
+ # Init
51
+ auto_eval_column_quota_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
52
+ auto_eval_column_quota_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
53
+ #Scores
54
+ auto_eval_column_quota_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
55
+ for task in Quotas:
56
+ auto_eval_column_quota_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
57
+ # Model information
58
+ auto_eval_column_quota_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
59
+ auto_eval_column_quota_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
60
+ auto_eval_column_quota_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
61
+ auto_eval_column_quota_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
62
+ auto_eval_column_quota_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
63
+ auto_eval_column_quota_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)])
64
+ auto_eval_column_quota_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)])
65
+ # Dummy column for the search bar (hidden by the custom CSS)
66
+ auto_eval_column_quota_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
67
+
68
+ # We use make dataclass to dynamically fill the scores from Tasks
69
+ AutoEvalColumnQuota = make_dataclass("AutoEvalColumnQuota", auto_eval_column_quota_dict, frozen=True)
70
+
71
  ## For the queue columns in the submission tab
72
  @dataclass(frozen=True)
73
  class EvalQueueColumn: # Queue column
 
140
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
141
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
142
 
143
+ QUOTACOLS = [c.name for c in fields(AutoEvalColumnQuota) if not c.hidden]
144
+ QUOTATYPES = [c.type for c in fields(AutoEvalColumnQuota) if not c.hidden]
145
+
146
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
147
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
148
 
149
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
150
+ BENCHMARK_QUOTACOLS = [t.value.col_name for t in Quotas]
151
 
152
  NUMERIC_INTERVALS = {
153
  "?": pd.Interval(-1, 0, closed="right"),