test
Browse files- src/about.py +2 -2
- src/leaderboard/read_evals.py +13 -5
src/about.py
CHANGED
@@ -12,8 +12,8 @@ class Task:
|
|
12 |
# ---------------------------------------------------
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
-
task0 = Task("
|
16 |
-
task1 = Task("
|
17 |
|
18 |
NUM_FEWSHOT = 0 # Change with your few shot
|
19 |
# ---------------------------------------------------
|
|
|
12 |
# ---------------------------------------------------
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
+
task0 = Task("speed", "spq", "Speed")
|
16 |
+
task1 = Task("contamination", "score", "Contamination")
|
17 |
|
18 |
NUM_FEWSHOT = 0 # Change with your few shot
|
19 |
# ---------------------------------------------------
|
src/leaderboard/read_evals.py
CHANGED
@@ -60,6 +60,10 @@ class EvalResult:
|
|
60 |
still_on_hub, _, model_config = is_model_on_hub(
|
61 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
62 |
)
|
|
|
|
|
|
|
|
|
63 |
architecture = "?"
|
64 |
if model_config is not None:
|
65 |
architectures = getattr(model_config, "architectures", None)
|
@@ -90,11 +94,10 @@ class EvalResult:
|
|
90 |
def update_with_request_file(self, requests_path):
|
91 |
"""Finds the relevant request file for the current model and updates info with it"""
|
92 |
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
|
93 |
-
|
94 |
try:
|
95 |
with open(request_file, "r") as f:
|
96 |
request = json.load(f)
|
97 |
-
|
98 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
99 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
100 |
self.license = request.get("license", "?")
|
@@ -106,7 +109,7 @@ class EvalResult:
|
|
106 |
|
107 |
def to_dict(self):
|
108 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
109 |
-
average =
|
110 |
data_dict = {
|
111 |
"eval_name": self.eval_name, # not a column, just a save name,
|
112 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
@@ -135,6 +138,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
135 |
requests_path,
|
136 |
f"{model_name}_eval_request_*.json",
|
137 |
)
|
|
|
138 |
request_files = glob.glob(request_files)
|
139 |
|
140 |
# Select correct request file (precision)
|
@@ -174,7 +178,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
174 |
# Creation of result
|
175 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
176 |
eval_result.update_with_request_file(requests_path)
|
177 |
-
|
178 |
# Store results of same eval together
|
179 |
eval_name = eval_result.eval_name
|
180 |
if eval_name in eval_results.keys():
|
@@ -183,13 +187,17 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
183 |
eval_results[eval_name] = eval_result
|
184 |
|
185 |
results = []
|
186 |
-
|
187 |
for v in eval_results.values():
|
188 |
try:
|
189 |
print(v.to_dict())
|
190 |
v.to_dict() # we test if the dict version is complete
|
191 |
results.append(v)
|
192 |
except KeyError: # not all eval values present
|
|
|
|
|
|
|
|
|
193 |
continue
|
194 |
|
195 |
print(results)
|
|
|
60 |
still_on_hub, _, model_config = is_model_on_hub(
|
61 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
62 |
)
|
63 |
+
|
64 |
+
##make still_on_hub always true for now:
|
65 |
+
still_on_hub = True
|
66 |
+
|
67 |
architecture = "?"
|
68 |
if model_config is not None:
|
69 |
architectures = getattr(model_config, "architectures", None)
|
|
|
94 |
def update_with_request_file(self, requests_path):
|
95 |
"""Finds the relevant request file for the current model and updates info with it"""
|
96 |
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
|
|
|
97 |
try:
|
98 |
with open(request_file, "r") as f:
|
99 |
request = json.load(f)
|
100 |
+
|
101 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
102 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
103 |
self.license = request.get("license", "?")
|
|
|
109 |
|
110 |
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
+
average = self.results["average"]
|
113 |
data_dict = {
|
114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
115 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
138 |
requests_path,
|
139 |
f"{model_name}_eval_request_*.json",
|
140 |
)
|
141 |
+
|
142 |
request_files = glob.glob(request_files)
|
143 |
|
144 |
# Select correct request file (precision)
|
|
|
178 |
# Creation of result
|
179 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
180 |
eval_result.update_with_request_file(requests_path)
|
181 |
+
|
182 |
# Store results of same eval together
|
183 |
eval_name = eval_result.eval_name
|
184 |
if eval_name in eval_results.keys():
|
|
|
187 |
eval_results[eval_name] = eval_result
|
188 |
|
189 |
results = []
|
190 |
+
#print(eval_results.values())
|
191 |
for v in eval_results.values():
|
192 |
try:
|
193 |
print(v.to_dict())
|
194 |
v.to_dict() # we test if the dict version is complete
|
195 |
results.append(v)
|
196 |
except KeyError: # not all eval values present
|
197 |
+
print("Key error in eval result, skipping")
|
198 |
+
|
199 |
+
print(v)
|
200 |
+
print(v.to_dict())
|
201 |
continue
|
202 |
|
203 |
print(results)
|