Spaces:

Mdrnfox
/

peft-bench-eval

Sleeping

Mdrnfox commited on 25 days ago

Commit

1e513f3

verified ·

1 Parent(s): 7e57047

Update run_eval.py

Files changed (1) hide show

run_eval.py CHANGED Viewed

@@ -145,6 +145,10 @@ for cfg in CONFIGS:
             print(f"Evaluation failed for {adapter_repo}: {e}")
             continue
     meta = {
         "model_id": adapter_repo,
         "adapter_type": adapter_type,
@@ -153,12 +157,15 @@ for cfg in CONFIGS:
         "run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
         "commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
     }
     for task, scores in res["results"].items():
         for metric, value in scores.items():
             if metric not in METRICS_TO_KEEP:
                 continue
             all_rows.append({**meta, "task": task, "metric": metric, "value": value})
 # ───── Merge and upload results ─────
 df_new = pd.DataFrame(all_rows)
@@ -177,10 +184,9 @@ with tempfile.TemporaryDirectory() as tmp:
     df_combined = df_combined.sort_values("run_date")
     df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
-    print("Existing rows:", len(df_existing))
-    print("New rows:", len(df_new))
-    print("Combined (pre-dedup):", len(df_existing) + len(df_new))
-    print("Final rows (after dedup):", len(df_combined))
     out = Path("peft_bench.parquet")
     df_combined.to_parquet(out, index=False)

             print(f"Evaluation failed for {adapter_repo}: {e}")
             continue
+    if not res.get("results"):
+    print(f"No results returned for {adapter_repo}. Skipping...")
+    continue
     meta = {
         "model_id": adapter_repo,
         "adapter_type": adapter_type,
         "run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
         "commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
     }
+    count_before = len(all_rows)
     for task, scores in res["results"].items():
         for metric, value in scores.items():
             if metric not in METRICS_TO_KEEP:
                 continue
             all_rows.append({**meta, "task": task, "metric": metric, "value": value})
+    print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
 # ───── Merge and upload results ─────
 df_new = pd.DataFrame(all_rows)
     df_combined = df_combined.sort_values("run_date")
     df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
+    print("\nFinal new results:")
+    print(df_new[["model_id", "task", "metric", "value"]])
     out = Path("peft_bench.parquet")
     df_combined.to_parquet(out, index=False)