Spaces:
Sleeping
Sleeping
Update run_eval.py
Browse files- run_eval.py +11 -11
run_eval.py
CHANGED
@@ -134,7 +134,7 @@ for cfg in CONFIGS:
|
|
134 |
res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
|
135 |
print(f"Raw results for {adapter_repo}: {res}")
|
136 |
if not res.get("results"):
|
137 |
-
print(f"
|
138 |
continue
|
139 |
print(f"\nEvaluation raw result for {adapter_repo}:")
|
140 |
print(res.get("results", {}))
|
@@ -164,15 +164,15 @@ for cfg in CONFIGS:
|
|
164 |
for metric, value in scores.items():
|
165 |
if value is None:
|
166 |
continue
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
|
177 |
|
178 |
print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
|
@@ -196,7 +196,7 @@ with tempfile.TemporaryDirectory() as tmp:
|
|
196 |
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
|
197 |
|
198 |
print("\nFinal new results:")
|
199 |
-
print(df_new[["model_id", "task", "metric", "value"]])
|
200 |
|
201 |
|
202 |
out = Path("peft_bench.parquet")
|
|
|
134 |
res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
|
135 |
print(f"Raw results for {adapter_repo}: {res}")
|
136 |
if not res.get("results"):
|
137 |
+
print(f"Empty results — likely a task or model compatibility issue for: {adapter_repo}")
|
138 |
continue
|
139 |
print(f"\nEvaluation raw result for {adapter_repo}:")
|
140 |
print(res.get("results", {}))
|
|
|
164 |
for metric, value in scores.items():
|
165 |
if value is None:
|
166 |
continue
|
167 |
+
metric_name, _, aggregation = metric.partition(",")
|
168 |
+
|
169 |
+
all_rows.append({
|
170 |
+
**meta,
|
171 |
+
"task": task,
|
172 |
+
"metric": metric_name,
|
173 |
+
"aggregation": aggregation or None,
|
174 |
+
"value": value
|
175 |
+
})
|
176 |
|
177 |
|
178 |
print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
|
|
|
196 |
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
|
197 |
|
198 |
print("\nFinal new results:")
|
199 |
+
print(df_new[["model_id", "task", "metric", "aggregation", "value"]])
|
200 |
|
201 |
|
202 |
out = Path("peft_bench.parquet")
|