Mdrnfox commited on
Commit
921975c
Β·
verified Β·
1 Parent(s): 0799cad

Update run_eval.py

Browse files
Files changed (1) hide show
  1. run_eval.py +11 -4
run_eval.py CHANGED
@@ -37,7 +37,6 @@ login(token)
37
  DATASET_REPO = os.environ["HF_DATASET_REPO"]
38
  api = HfApi()
39
 
40
- METRICS_TO_KEEP = {"acc", "accuracy", "acc_stderr", "f1", "exact_match"}
41
  all_rows = []
42
 
43
  # ───── Safe tokenizer loading ─────
@@ -163,10 +162,18 @@ for cfg in CONFIGS:
163
  count_before = len(all_rows)
164
  for task, scores in res["results"].items():
165
  for metric, value in scores.items():
166
- base_metric = metric.split(",")[0]
167
- if base_metric not in METRICS_TO_KEEP:
168
  continue
169
- all_rows.append({**meta, "task": task, "metric": metric, "value": value})
 
 
 
 
 
 
 
 
 
170
 
171
  print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
172
 
 
37
  DATASET_REPO = os.environ["HF_DATASET_REPO"]
38
  api = HfApi()
39
 
 
40
  all_rows = []
41
 
42
  # ───── Safe tokenizer loading ─────
 
162
  count_before = len(all_rows)
163
  for task, scores in res["results"].items():
164
  for metric, value in scores.items():
165
+ if value is None:
 
166
  continue
167
+ metric_name, _, aggregation = metric.partition(",")
168
+
169
+ all_rows.append({
170
+ **meta,
171
+ "task": task,
172
+ "metric": metric_name,
173
+ "aggregation": aggregation or None,
174
+ "value": value
175
+ })
176
+
177
 
178
  print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")
179