Spaces:
Sleeping
Sleeping
Update run_eval.py
Browse files- run_eval.py +11 -5
run_eval.py
CHANGED
@@ -145,6 +145,10 @@ for cfg in CONFIGS:
|
|
145 |
print(f"Evaluation failed for {adapter_repo}: {e}")
|
146 |
continue
|
147 |
|
|
|
|
|
|
|
|
|
148 |
meta = {
|
149 |
"model_id": adapter_repo,
|
150 |
"adapter_type": adapter_type,
|
@@ -153,12 +157,15 @@ for cfg in CONFIGS:
|
|
153 |
"run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
|
154 |
"commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
|
155 |
}
|
156 |
-
|
|
|
157 |
for task, scores in res["results"].items():
|
158 |
for metric, value in scores.items():
|
159 |
if metric not in METRICS_TO_KEEP:
|
160 |
continue
|
161 |
all_rows.append({**meta, "task": task, "metric": metric, "value": value})
|
|
|
|
|
162 |
|
163 |
# βββββ Merge and upload results βββββ
|
164 |
df_new = pd.DataFrame(all_rows)
|
@@ -177,10 +184,9 @@ with tempfile.TemporaryDirectory() as tmp:
|
|
177 |
df_combined = df_combined.sort_values("run_date")
|
178 |
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
|
179 |
|
180 |
-
print("
|
181 |
-
print("
|
182 |
-
|
183 |
-
print("Final rows (after dedup):", len(df_combined))
|
184 |
|
185 |
out = Path("peft_bench.parquet")
|
186 |
df_combined.to_parquet(out, index=False)
|
|
|
145 |
print(f"Evaluation failed for {adapter_repo}: {e}")
|
146 |
continue
|
147 |
|
148 |
+
if not res.get("results"):
|
149 |
+
print(f"No results returned for {adapter_repo}. Skipping...")
|
150 |
+
continue
|
151 |
+
|
152 |
meta = {
|
153 |
"model_id": adapter_repo,
|
154 |
"adapter_type": adapter_type,
|
|
|
157 |
"run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
|
158 |
"commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
|
159 |
}
|
160 |
+
|
161 |
+
count_before = len(all_rows)
|
162 |
for task, scores in res["results"].items():
|
163 |
for metric, value in scores.items():
|
164 |
if metric not in METRICS_TO_KEEP:
|
165 |
continue
|
166 |
all_rows.append({**meta, "task": task, "metric": metric, "value": value})
|
167 |
+
print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
|
168 |
+
|
169 |
|
170 |
# βββββ Merge and upload results βββββ
|
171 |
df_new = pd.DataFrame(all_rows)
|
|
|
184 |
df_combined = df_combined.sort_values("run_date")
|
185 |
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
|
186 |
|
187 |
+
print("\nFinal new results:")
|
188 |
+
print(df_new[["model_id", "task", "metric", "value"]])
|
189 |
+
|
|
|
190 |
|
191 |
out = Path("peft_bench.parquet")
|
192 |
df_combined.to_parquet(out, index=False)
|