Spaces:
Sleeping
Sleeping
Update run_eval.py
Browse files- run_eval.py +11 -11
run_eval.py
CHANGED
|
@@ -134,7 +134,7 @@ for cfg in CONFIGS:
|
|
| 134 |
res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
|
| 135 |
print(f"Raw results for {adapter_repo}: {res}")
|
| 136 |
if not res.get("results"):
|
| 137 |
-
print(f"
|
| 138 |
continue
|
| 139 |
print(f"\nEvaluation raw result for {adapter_repo}:")
|
| 140 |
print(res.get("results", {}))
|
|
@@ -164,15 +164,15 @@ for cfg in CONFIGS:
|
|
| 164 |
for metric, value in scores.items():
|
| 165 |
if value is None:
|
| 166 |
continue
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
|
| 177 |
|
| 178 |
print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
|
|
@@ -196,7 +196,7 @@ with tempfile.TemporaryDirectory() as tmp:
|
|
| 196 |
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
|
| 197 |
|
| 198 |
print("\nFinal new results:")
|
| 199 |
-
print(df_new[["model_id", "task", "metric", "value"]])
|
| 200 |
|
| 201 |
|
| 202 |
out = Path("peft_bench.parquet")
|
|
|
|
| 134 |
res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
|
| 135 |
print(f"Raw results for {adapter_repo}: {res}")
|
| 136 |
if not res.get("results"):
|
| 137 |
+
print(f"Empty results — likely a task or model compatibility issue for: {adapter_repo}")
|
| 138 |
continue
|
| 139 |
print(f"\nEvaluation raw result for {adapter_repo}:")
|
| 140 |
print(res.get("results", {}))
|
|
|
|
| 164 |
for metric, value in scores.items():
|
| 165 |
if value is None:
|
| 166 |
continue
|
| 167 |
+
metric_name, _, aggregation = metric.partition(",")
|
| 168 |
+
|
| 169 |
+
all_rows.append({
|
| 170 |
+
**meta,
|
| 171 |
+
"task": task,
|
| 172 |
+
"metric": metric_name,
|
| 173 |
+
"aggregation": aggregation or None,
|
| 174 |
+
"value": value
|
| 175 |
+
})
|
| 176 |
|
| 177 |
|
| 178 |
print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
|
|
|
|
| 196 |
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
|
| 197 |
|
| 198 |
print("\nFinal new results:")
|
| 199 |
+
print(df_new[["model_id", "task", "metric", "aggregation", "value"]])
|
| 200 |
|
| 201 |
|
| 202 |
out = Path("peft_bench.parquet")
|