Spaces:

Mdrnfox
/

peft-bench-eval

Sleeping

Mdrnfox commited on 29 days ago

Commit

c485faf

verified ·

1 Parent(s): 921975c

Update run_eval.py

Files changed (1) hide show

run_eval.py CHANGED Viewed

@@ -134,7 +134,7 @@ for cfg in CONFIGS:
             res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
             print(f"Raw results for {adapter_repo}: {res}")
             if not res.get("results"):
-                print(f"⚠️ Empty results — likely a task or model compatibility issue for: {adapter_repo}")
                 continue
             print(f"\nEvaluation raw result for {adapter_repo}:")
             print(res.get("results", {}))
@@ -164,15 +164,15 @@ for cfg in CONFIGS:
         for metric, value in scores.items():
             if value is None:
                 continue
-                metric_name, _, aggregation = metric.partition(",")
-                all_rows.append({
-                    **meta,
-                    "task": task,
-                    "metric": metric_name,
-                    "aggregation": aggregation or None,
-                    "value": value
-                })
     print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
@@ -196,7 +196,7 @@ with tempfile.TemporaryDirectory() as tmp:
     df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
     print("\nFinal new results:")
-    print(df_new[["model_id", "task", "metric", "value"]])
     out = Path("peft_bench.parquet")

             res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
             print(f"Raw results for {adapter_repo}: {res}")
             if not res.get("results"):
+                print(f"Empty results — likely a task or model compatibility issue for: {adapter_repo}")
                 continue
             print(f"\nEvaluation raw result for {adapter_repo}:")
             print(res.get("results", {}))
         for metric, value in scores.items():
             if value is None:
                 continue
+            metric_name, _, aggregation = metric.partition(",")
+            all_rows.append({
+                **meta,
+                "task": task,
+                "metric": metric_name,
+                "aggregation": aggregation or None,
+                "value": value
+            })
     print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
     df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
     print("\nFinal new results:")
+    print(df_new[["model_id", "task", "metric", "aggregation", "value"]])
     out = Path("peft_bench.parquet")