Spaces:
Sleeping
Sleeping
File size: 4,017 Bytes
bba84bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
#!/usr/bin/env python3
import datetime, os, subprocess, tempfile
from pathlib import Path
import pandas as pd, yaml, torch
from huggingface_hub import HfApi, login, hf_hub_download
from lm_eval import evaluator
from lm_eval.models.huggingface import HFLM
from peft import PeftModel
from transformers import (
AutoModelForCausalLM,
AutoModelForSequenceClassification,
AutoTokenizer,
)
CONFIGS = []
# βββββ Load all configs βββββ
if Path("adapters.yaml").exists():
CONFIGS.extend(yaml.safe_load(open("adapters.yaml"))["adapters"])
for yml in Path("manifests").glob("*.yaml"):
CONFIGS.append(yaml.safe_load(open(yml)))
if not CONFIGS:
raise RuntimeError("No adapter configs found in adapters.yaml or manifests/")
# βββββ Hugging Face auth βββββ
token = os.getenv("HF_TOKEN")
if not token or token == "***":
raise RuntimeError("HF_TOKEN secret is missing.")
login(token)
DATASET_REPO = os.environ["HF_DATASET_REPO"]
api = HfApi()
# βββββ Evaluate each adapter βββββ
all_rows = []
METRICS_TO_KEEP = {"acc", "accuracy", "acc_stderr", "f1", "exact_match"}
for cfg in CONFIGS:
base_model_id = cfg["base_model"]
adapter_repo = cfg["adapter_repo"]
adapter_type = cfg.get("adapter_type", "LoRA")
tasks = cfg["tasks"]
print(f"\nπ¦ Loading base model: {base_model_id}")
tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=True)
# Try causal first, fallback to encoder
try:
base_model = AutoModelForCausalLM.from_pretrained(base_model_id)
is_encoder = False
except:
base_model = AutoModelForSequenceClassification.from_pretrained(base_model_id)
is_encoder = True
peft_model = PeftModel.from_pretrained(base_model, adapter_repo)
merged_model = peft_model.merge_and_unload()
device = "cuda" if torch.cuda.is_available() else "cpu"
merged_model.to(device)
merged_model.eval()
with tempfile.TemporaryDirectory() as td:
merged_model.save_pretrained(td)
tokenizer.save_pretrained(td)
hf_lm = HFLM(
pretrained=td,
batch_size=8 if not is_encoder else 16,
device=device,
)
res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
meta = {
"model_id": adapter_repo,
"adapter_type": adapter_type,
"trainable_params": cfg.get("trainable_params"),
"peak_gpu_mem_mb": torch.cuda.max_memory_allocated(device) // 1024**2 if torch.cuda.is_available() else None,
"run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
"commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
}
for task, scores in res["results"].items():
for metric, value in scores.items():
if metric not in METRICS_TO_KEEP:
continue
all_rows.append({**meta, "task": task, "metric": metric, "value": value})
# βββββ Merge and upload results βββββ
df_new = pd.DataFrame(all_rows)
with tempfile.TemporaryDirectory() as tmp:
current_path = hf_hub_download(
repo_id=DATASET_REPO,
filename="data/peft_bench.parquet",
repo_type="dataset",
cache_dir=tmp,
local_dir=tmp,
local_dir_use_symlinks=False,
)
df_existing = pd.read_parquet(current_path)
df_combined = pd.concat([df_existing, df_new], ignore_index=True)
df_combined = (
df_combined
.sort_values("run_date")
.drop_duplicates(subset=["model_id", "task", "metric"], keep="last")
)
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
out = Path("peft_bench.parquet")
df_combined.to_parquet(out, index=False)
api.upload_file(
path_or_fileobj=out,
path_in_repo="data/peft_bench.parquet",
repo_id=DATASET_REPO,
repo_type="dataset",
commit_message=f"Add {len(CONFIGS)} new adapter run(s)",
)
|