Mdrnfox commited on
Commit
1e513f3
Β·
verified Β·
1 Parent(s): 7e57047

Update run_eval.py

Browse files
Files changed (1) hide show
  1. run_eval.py +11 -5
run_eval.py CHANGED
@@ -145,6 +145,10 @@ for cfg in CONFIGS:
145
  print(f"Evaluation failed for {adapter_repo}: {e}")
146
  continue
147
 
 
 
 
 
148
  meta = {
149
  "model_id": adapter_repo,
150
  "adapter_type": adapter_type,
@@ -153,12 +157,15 @@ for cfg in CONFIGS:
153
  "run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
154
  "commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
155
  }
156
-
 
157
  for task, scores in res["results"].items():
158
  for metric, value in scores.items():
159
  if metric not in METRICS_TO_KEEP:
160
  continue
161
  all_rows.append({**meta, "task": task, "metric": metric, "value": value})
 
 
162
 
163
  # ───── Merge and upload results ─────
164
  df_new = pd.DataFrame(all_rows)
@@ -177,10 +184,9 @@ with tempfile.TemporaryDirectory() as tmp:
177
  df_combined = df_combined.sort_values("run_date")
178
  df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
179
 
180
- print("Existing rows:", len(df_existing))
181
- print("New rows:", len(df_new))
182
- print("Combined (pre-dedup):", len(df_existing) + len(df_new))
183
- print("Final rows (after dedup):", len(df_combined))
184
 
185
  out = Path("peft_bench.parquet")
186
  df_combined.to_parquet(out, index=False)
 
145
  print(f"Evaluation failed for {adapter_repo}: {e}")
146
  continue
147
 
148
+ if not res.get("results"):
149
+ print(f"No results returned for {adapter_repo}. Skipping...")
150
+ continue
151
+
152
  meta = {
153
  "model_id": adapter_repo,
154
  "adapter_type": adapter_type,
 
157
  "run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
158
  "commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
159
  }
160
+
161
+ count_before = len(all_rows)
162
  for task, scores in res["results"].items():
163
  for metric, value in scores.items():
164
  if metric not in METRICS_TO_KEEP:
165
  continue
166
  all_rows.append({**meta, "task": task, "metric": metric, "value": value})
167
+ print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
168
+
169
 
170
  # ───── Merge and upload results ─────
171
  df_new = pd.DataFrame(all_rows)
 
184
  df_combined = df_combined.sort_values("run_date")
185
  df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
186
 
187
+ print("\nFinal new results:")
188
+ print(df_new[["model_id", "task", "metric", "value"]])
189
+
 
190
 
191
  out = Path("peft_bench.parquet")
192
  df_combined.to_parquet(out, index=False)