Spaces:

Mdrnfox
/

peft-bench-eval

Sleeping

Mdrnfox commited on 20 days ago

Commit

921975c

verified ·

1 Parent(s): 0799cad

Update run_eval.py

Files changed (1) hide show

run_eval.py CHANGED Viewed

@@ -37,7 +37,6 @@ login(token)
 DATASET_REPO = os.environ["HF_DATASET_REPO"]
 api = HfApi()
-METRICS_TO_KEEP = {"acc", "accuracy", "acc_stderr", "f1", "exact_match"}
 all_rows = []
 # ───── Safe tokenizer loading ─────
@@ -163,10 +162,18 @@ for cfg in CONFIGS:
     count_before = len(all_rows)
     for task, scores in res["results"].items():
         for metric, value in scores.items():
-            base_metric = metric.split(",")[0]
-            if base_metric not in METRICS_TO_KEEP:
                 continue
-            all_rows.append({**meta, "task": task, "metric": metric, "value": value})
     print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")

 DATASET_REPO = os.environ["HF_DATASET_REPO"]
 api = HfApi()
 all_rows = []
 # ───── Safe tokenizer loading ─────
     count_before = len(all_rows)
     for task, scores in res["results"].items():
         for metric, value in scores.items():
+            if value is None:
                 continue
+                metric_name, _, aggregation = metric.partition(",")
+                all_rows.append({
+                    **meta,
+                    "task": task,
+                    "metric": metric_name,
+                    "aggregation": aggregation or None,
+                    "value": value
+                })
     print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")