Spaces:
Runtime error
Runtime error
Andrea Seveso
commited on
Commit
·
9df8442
1
Parent(s):
6aa8d26
Remove columns from eval
Browse files- .gitignore +1 -0
- src/display/utils.py +36 -36
.gitignore
CHANGED
@@ -11,3 +11,4 @@ eval-results/
|
|
11 |
eval-queue-bk/
|
12 |
eval-results-bk/
|
13 |
logs/
|
|
|
|
11 |
eval-queue-bk/
|
12 |
eval-results-bk/
|
13 |
logs/
|
14 |
+
results/*
|
src/display/utils.py
CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
|
|
5 |
|
6 |
from src.about import Tasks
|
7 |
|
|
|
8 |
def fields(raw_class):
|
9 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
10 |
|
@@ -20,52 +21,59 @@ class ColumnContent:
|
|
20 |
hidden: bool = False
|
21 |
never_hidden: bool = False
|
22 |
|
23 |
-
|
|
|
24 |
auto_eval_column_dict = []
|
25 |
# Init
|
26 |
-
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent(
|
27 |
-
|
28 |
-
|
|
|
|
|
29 |
# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
30 |
for task in Tasks:
|
31 |
-
auto_eval_column_dict.append(
|
|
|
32 |
# Model information
|
33 |
-
auto_eval_column_dict.append(
|
34 |
-
|
35 |
-
auto_eval_column_dict.append(
|
36 |
-
|
37 |
-
auto_eval_column_dict.append(
|
38 |
-
|
39 |
-
auto_eval_column_dict.append(
|
40 |
-
|
41 |
-
auto_eval_column_dict.append(
|
|
|
42 |
|
43 |
# We use make dataclass to dynamically fill the scores from Tasks
|
44 |
-
AutoEvalColumn = make_dataclass(
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
## For the queue columns in the submission tab
|
47 |
@dataclass(frozen=True)
|
48 |
class EvalQueueColumn: # Queue column
|
49 |
model = ColumnContent("model", "markdown", True)
|
50 |
revision = ColumnContent("revision", "str", True)
|
51 |
private = ColumnContent("private", "bool", True)
|
52 |
precision = ColumnContent("precision", "str", True)
|
53 |
-
weight_type = ColumnContent("weight_type", "str", "Original")
|
54 |
status = ColumnContent("status", "str", True)
|
55 |
|
56 |
-
|
|
|
|
|
57 |
@dataclass
|
58 |
class ModelDetails:
|
59 |
name: str
|
60 |
display_name: str = ""
|
61 |
-
symbol: str = ""
|
62 |
|
63 |
|
64 |
class ModelType(Enum):
|
65 |
-
|
66 |
-
|
67 |
-
IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
68 |
-
RL = ModelDetails(name="RL-tuned", symbol="🟦")
|
69 |
Unknown = ModelDetails(name="", symbol="?")
|
70 |
|
71 |
def to_str(self, separator=" "):
|
@@ -73,20 +81,12 @@ class ModelType(Enum):
|
|
73 |
|
74 |
@staticmethod
|
75 |
def from_str(type):
|
76 |
-
if "
|
77 |
-
return ModelType.
|
78 |
-
if "
|
79 |
-
return ModelType.
|
80 |
-
if "RL-tuned" in type or "🟦" in type:
|
81 |
-
return ModelType.RL
|
82 |
-
if "instruction-tuned" in type or "⭕" in type:
|
83 |
-
return ModelType.IFT
|
84 |
return ModelType.Unknown
|
85 |
|
86 |
-
class WeightType(Enum):
|
87 |
-
Adapter = ModelDetails("Adapter")
|
88 |
-
Original = ModelDetails("Original")
|
89 |
-
Delta = ModelDetails("Delta")
|
90 |
|
91 |
class Precision(Enum):
|
92 |
float16 = ModelDetails("float16")
|
@@ -100,6 +100,7 @@ class Precision(Enum):
|
|
100 |
return Precision.bfloat16
|
101 |
return Precision.Unknown
|
102 |
|
|
|
103 |
# Column selection
|
104 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
105 |
|
@@ -107,4 +108,3 @@ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
|
107 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
108 |
|
109 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
110 |
-
|
|
|
5 |
|
6 |
from src.about import Tasks
|
7 |
|
8 |
+
|
9 |
def fields(raw_class):
|
10 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
11 |
|
|
|
21 |
hidden: bool = False
|
22 |
never_hidden: bool = False
|
23 |
|
24 |
+
|
25 |
+
# Leaderboard columns
|
26 |
auto_eval_column_dict = []
|
27 |
# Init
|
28 |
+
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent(
|
29 |
+
"T", "str", True, never_hidden=True)])
|
30 |
+
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent(
|
31 |
+
"Model", "markdown", True, never_hidden=True)])
|
32 |
+
# Scores
|
33 |
# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
34 |
for task in Tasks:
|
35 |
+
auto_eval_column_dict.append(
|
36 |
+
[task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
37 |
# Model information
|
38 |
+
auto_eval_column_dict.append(
|
39 |
+
["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
40 |
+
auto_eval_column_dict.append(
|
41 |
+
["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
42 |
+
auto_eval_column_dict.append(
|
43 |
+
["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
44 |
+
auto_eval_column_dict.append(
|
45 |
+
["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
46 |
+
auto_eval_column_dict.append(
|
47 |
+
["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
48 |
|
49 |
# We use make dataclass to dynamically fill the scores from Tasks
|
50 |
+
AutoEvalColumn = make_dataclass(
|
51 |
+
"AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
52 |
+
|
53 |
+
# For the queue columns in the submission tab
|
54 |
+
|
55 |
|
|
|
56 |
@dataclass(frozen=True)
|
57 |
class EvalQueueColumn: # Queue column
|
58 |
model = ColumnContent("model", "markdown", True)
|
59 |
revision = ColumnContent("revision", "str", True)
|
60 |
private = ColumnContent("private", "bool", True)
|
61 |
precision = ColumnContent("precision", "str", True)
|
|
|
62 |
status = ColumnContent("status", "str", True)
|
63 |
|
64 |
+
# All the model information that we might need
|
65 |
+
|
66 |
+
|
67 |
@dataclass
|
68 |
class ModelDetails:
|
69 |
name: str
|
70 |
display_name: str = ""
|
71 |
+
symbol: str = "" # emoji
|
72 |
|
73 |
|
74 |
class ModelType(Enum):
|
75 |
+
OP = ModelDetails(name="pretrained", symbol="🟢")
|
76 |
+
CL = ModelDetails(name="instruction-tuned", symbol="⭕")
|
|
|
|
|
77 |
Unknown = ModelDetails(name="", symbol="?")
|
78 |
|
79 |
def to_str(self, separator=" "):
|
|
|
81 |
|
82 |
@staticmethod
|
83 |
def from_str(type):
|
84 |
+
if "open" in type or "🟢" in type:
|
85 |
+
return ModelType.OP
|
86 |
+
if "closed" in type or "⭕" in type:
|
87 |
+
return ModelType.CL
|
|
|
|
|
|
|
|
|
88 |
return ModelType.Unknown
|
89 |
|
|
|
|
|
|
|
|
|
90 |
|
91 |
class Precision(Enum):
|
92 |
float16 = ModelDetails("float16")
|
|
|
100 |
return Precision.bfloat16
|
101 |
return Precision.Unknown
|
102 |
|
103 |
+
|
104 |
# Column selection
|
105 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
106 |
|
|
|
108 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
109 |
|
110 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
|