Spaces:
Running
Running
lixuejing
commited on
Commit
·
6500fc4
1
Parent(s):
33927d7
update
Browse files- app.py +17 -11
- src/about.py +2 -0
- src/display/utils.py +29 -1
app.py
CHANGED
@@ -24,7 +24,11 @@ from src.display.utils import (
|
|
24 |
fields,
|
25 |
WeightType,
|
26 |
Precision,
|
27 |
-
NUMERIC_INTERVALS
|
|
|
|
|
|
|
|
|
28 |
)
|
29 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
30 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
@@ -32,10 +36,10 @@ from src.submission.submit import add_new_eval
|
|
32 |
from src.scripts.update_all_request_files import update_dynamic_files
|
33 |
from src.tools.collections import update_collections
|
34 |
from src.tools.datastatics import get_statics
|
35 |
-
from src.tools.plots import (
|
36 |
-
create_plot_df,
|
37 |
-
create_scores_df,
|
38 |
-
)
|
39 |
|
40 |
def restart_space():
|
41 |
API.restart_space(repo_id=REPO_ID, token=TOKEN)
|
@@ -60,17 +64,18 @@ def init_space():
|
|
60 |
restart_space()
|
61 |
|
62 |
raw_data, original_df = get_leaderboard_df(
|
63 |
-
#leaderboard_df = get_leaderboard_df(
|
64 |
results_path=EVAL_RESULTS_PATH,
|
65 |
requests_path=EVAL_REQUESTS_PATH,
|
66 |
dynamic_path=DYNAMIC_INFO_FILE_PATH,
|
67 |
-
cols=COLS,
|
68 |
-
benchmark_cols=BENCHMARK_COLS
|
|
|
|
|
69 |
)
|
70 |
update_collections(original_df.copy())
|
71 |
leaderboard_df = original_df.copy()
|
72 |
|
73 |
-
plot_df = create_plot_df(create_scores_df(raw_data))
|
74 |
|
75 |
(
|
76 |
finished_eval_queue_df,
|
@@ -78,9 +83,10 @@ def init_space():
|
|
78 |
pending_eval_queue_df,
|
79 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
80 |
|
81 |
-
return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
|
|
82 |
|
83 |
-
leaderboard_df, original_df,
|
84 |
#return leaderboard_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
85 |
|
86 |
#leaderboard_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
|
|
24 |
fields,
|
25 |
WeightType,
|
26 |
Precision,
|
27 |
+
NUMERIC_INTERVALS,
|
28 |
+
QUOTACOLS,
|
29 |
+
QUOTATYPES,
|
30 |
+
AutoEvalColumnQuota,
|
31 |
+
BENCHMARK_QUOTACOLS
|
32 |
)
|
33 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
34 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
|
|
36 |
from src.scripts.update_all_request_files import update_dynamic_files
|
37 |
from src.tools.collections import update_collections
|
38 |
from src.tools.datastatics import get_statics
|
39 |
+
#from src.tools.plots import (
|
40 |
+
# create_plot_df,
|
41 |
+
# create_scores_df,
|
42 |
+
#)
|
43 |
|
44 |
def restart_space():
|
45 |
API.restart_space(repo_id=REPO_ID, token=TOKEN)
|
|
|
64 |
restart_space()
|
65 |
|
66 |
raw_data, original_df = get_leaderboard_df(
|
|
|
67 |
results_path=EVAL_RESULTS_PATH,
|
68 |
requests_path=EVAL_REQUESTS_PATH,
|
69 |
dynamic_path=DYNAMIC_INFO_FILE_PATH,
|
70 |
+
#cols=COLS,
|
71 |
+
#benchmark_cols=BENCHMARK_COLS,
|
72 |
+
cols=QUOTACOLS,
|
73 |
+
benchmark_cols=BENCHMARK_QUOTACOLS
|
74 |
)
|
75 |
update_collections(original_df.copy())
|
76 |
leaderboard_df = original_df.copy()
|
77 |
|
78 |
+
#plot_df = create_plot_df(create_scores_df(raw_data))
|
79 |
|
80 |
(
|
81 |
finished_eval_queue_df,
|
|
|
83 |
pending_eval_queue_df,
|
84 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
85 |
|
86 |
+
#return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
87 |
+
return leaderboard_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
88 |
|
89 |
+
leaderboard_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
90 |
#return leaderboard_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
91 |
|
92 |
#leaderboard_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
src/about.py
CHANGED
@@ -22,6 +22,8 @@ class Tasks(Enum):
|
|
22 |
SAT = Task("SAT", "overall", "SAT")
|
23 |
egoplan_bench2 = Task("egoplan_bench2", "overall", "egoplan_bench2")
|
24 |
erqa = Task("erqa", "overall", "erqa")
|
|
|
|
|
25 |
Perception = Task("Perception", "overall", "Perception")
|
26 |
SpatialReasoning = Task("SpatialReasoning", "overall", "SpatialReasoning")
|
27 |
Prediction = Task("Prediction", "overall", "Prediction")
|
|
|
22 |
SAT = Task("SAT", "overall", "SAT")
|
23 |
egoplan_bench2 = Task("egoplan_bench2", "overall", "egoplan_bench2")
|
24 |
erqa = Task("erqa", "overall", "erqa")
|
25 |
+
|
26 |
+
class Quotas(Enum):
|
27 |
Perception = Task("Perception", "overall", "Perception")
|
28 |
SpatialReasoning = Task("SpatialReasoning", "overall", "SpatialReasoning")
|
29 |
Prediction = Task("Prediction", "overall", "Prediction")
|
src/display/utils.py
CHANGED
@@ -3,7 +3,7 @@ from enum import Enum
|
|
3 |
|
4 |
import pandas as pd
|
5 |
|
6 |
-
from src.about import Tasks
|
7 |
|
8 |
def fields(raw_class):
|
9 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
@@ -44,6 +44,30 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
|
|
44 |
# We use make dataclass to dynamically fill the scores from Tasks
|
45 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
## For the queue columns in the submission tab
|
48 |
@dataclass(frozen=True)
|
49 |
class EvalQueueColumn: # Queue column
|
@@ -116,10 +140,14 @@ class Precision(Enum):
|
|
116 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
117 |
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
|
118 |
|
|
|
|
|
|
|
119 |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
120 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
121 |
|
122 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
|
|
123 |
|
124 |
NUMERIC_INTERVALS = {
|
125 |
"?": pd.Interval(-1, 0, closed="right"),
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
|
6 |
+
from src.about import Tasks,Quotas
|
7 |
|
8 |
def fields(raw_class):
|
9 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
|
|
44 |
# We use make dataclass to dynamically fill the scores from Tasks
|
45 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
46 |
|
47 |
+
|
48 |
+
## Leaderboard columns
|
49 |
+
auto_eval_column_quota_dict = []
|
50 |
+
# Init
|
51 |
+
auto_eval_column_quota_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
52 |
+
auto_eval_column_quota_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
53 |
+
#Scores
|
54 |
+
auto_eval_column_quota_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
55 |
+
for task in Quotas:
|
56 |
+
auto_eval_column_quota_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
57 |
+
# Model information
|
58 |
+
auto_eval_column_quota_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
59 |
+
auto_eval_column_quota_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
60 |
+
auto_eval_column_quota_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
61 |
+
auto_eval_column_quota_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
62 |
+
auto_eval_column_quota_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
63 |
+
auto_eval_column_quota_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)])
|
64 |
+
auto_eval_column_quota_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)])
|
65 |
+
# Dummy column for the search bar (hidden by the custom CSS)
|
66 |
+
auto_eval_column_quota_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
67 |
+
|
68 |
+
# We use make dataclass to dynamically fill the scores from Tasks
|
69 |
+
AutoEvalColumnQuota = make_dataclass("AutoEvalColumnQuota", auto_eval_column_quota_dict, frozen=True)
|
70 |
+
|
71 |
## For the queue columns in the submission tab
|
72 |
@dataclass(frozen=True)
|
73 |
class EvalQueueColumn: # Queue column
|
|
|
140 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
141 |
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
|
142 |
|
143 |
+
QUOTACOLS = [c.name for c in fields(AutoEvalColumnQuota) if not c.hidden]
|
144 |
+
QUOTATYPES = [c.type for c in fields(AutoEvalColumnQuota) if not c.hidden]
|
145 |
+
|
146 |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
147 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
148 |
|
149 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
150 |
+
BENCHMARK_QUOTACOLS = [t.value.col_name for t in Quotas]
|
151 |
|
152 |
NUMERIC_INTERVALS = {
|
153 |
"?": pd.Interval(-1, 0, closed="right"),
|