Spaces:
Sleeping
Sleeping
Update run_eval.py
Browse files- run_eval.py +11 -4
run_eval.py
CHANGED
@@ -37,7 +37,6 @@ login(token)
|
|
37 |
DATASET_REPO = os.environ["HF_DATASET_REPO"]
|
38 |
api = HfApi()
|
39 |
|
40 |
-
METRICS_TO_KEEP = {"acc", "accuracy", "acc_stderr", "f1", "exact_match"}
|
41 |
all_rows = []
|
42 |
|
43 |
# βββββ Safe tokenizer loading βββββ
|
@@ -163,10 +162,18 @@ for cfg in CONFIGS:
|
|
163 |
count_before = len(all_rows)
|
164 |
for task, scores in res["results"].items():
|
165 |
for metric, value in scores.items():
|
166 |
-
|
167 |
-
if base_metric not in METRICS_TO_KEEP:
|
168 |
continue
|
169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
|
172 |
|
|
|
37 |
DATASET_REPO = os.environ["HF_DATASET_REPO"]
|
38 |
api = HfApi()
|
39 |
|
|
|
40 |
all_rows = []
|
41 |
|
42 |
# βββββ Safe tokenizer loading βββββ
|
|
|
162 |
count_before = len(all_rows)
|
163 |
for task, scores in res["results"].items():
|
164 |
for metric, value in scores.items():
|
165 |
+
if value is None:
|
|
|
166 |
continue
|
167 |
+
metric_name, _, aggregation = metric.partition(",")
|
168 |
+
|
169 |
+
all_rows.append({
|
170 |
+
**meta,
|
171 |
+
"task": task,
|
172 |
+
"metric": metric_name,
|
173 |
+
"aggregation": aggregation or None,
|
174 |
+
"value": value
|
175 |
+
})
|
176 |
+
|
177 |
|
178 |
print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
|
179 |
|