Spaces:
Sleeping
Sleeping
File size: 4,315 Bytes
bba84bb e138b53 bba84bb adef9e5 bba84bb adef9e5 bba84bb e138b53 bba84bb e138b53 bba84bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
#!/usr/bin/env python3
import datetime, os, subprocess, tempfile
from pathlib import Path
import pandas as pd, yaml, torch
from huggingface_hub import HfApi, login, hf_hub_download
from lm_eval import evaluator
from lm_eval.models.huggingface import HFLM
from peft import PeftModel
from transformers import (
AutoModelForCausalLM,
AutoModelForSequenceClassification,
AutoTokenizer,
)
CONFIGS = []
# βββββ Load all configs βββββ
if Path("adapters.yaml").exists():
CONFIGS.extend(yaml.safe_load(open("adapters.yaml"))["adapters"])
for yml in Path("manifests").glob("*.yaml"):
CONFIGS.append(yaml.safe_load(open(yml)))
if not CONFIGS:
raise RuntimeError("No adapter configs found in adapters.yaml or manifests/")
# βββββ Hugging Face auth βββββ
token = os.getenv("HF_TOKEN")
if not token or token == "***":
raise RuntimeError("HF_TOKEN secret is missing.")
login(token)
DATASET_REPO = os.environ["HF_DATASET_REPO"]
api = HfApi()
# βββββ Evaluate each adapter βββββ
all_rows = []
METRICS_TO_KEEP = {"acc", "accuracy", "acc_stderr", "f1", "exact_match"}
for cfg in CONFIGS:
base_model_id = cfg["base_model"]
adapter_repo = cfg["adapter_repo"]
adapter_type = cfg.get("adapter_type", "LoRA")
tasks = cfg["tasks"]
print(f"\nLoading base model: {base_model_id}")
tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=True)
# Try causal first, fallback to encoder
try:
base_model = AutoModelForCausalLM.from_pretrained(
base_model_id,
trust_remote_code=True,
use_safetensors=True
)
is_encoder = False
except:
base_model = AutoModelForSequenceClassification.from_pretrained(
base_model_id,
trust_remote_code=True,
use_safetensors=True
)
is_encoder = True
peft_model = PeftModel.from_pretrained(base_model, adapter_repo)
merged_model = peft_model.merge_and_unload()
device = "cuda" if torch.cuda.is_available() else "cpu"
merged_model.to(device)
merged_model.eval()
with tempfile.TemporaryDirectory() as td:
merged_model.save_pretrained(td)
tokenizer.save_pretrained(td)
hf_lm = HFLM(
pretrained=td,
batch_size=8 if not is_encoder else 16,
device=device,
)
res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
meta = {
"model_id": adapter_repo,
"adapter_type": adapter_type,
"trainable_params": cfg.get("trainable_params"),
"peak_gpu_mem_mb": torch.cuda.max_memory_allocated(device) // 1024**2 if torch.cuda.is_available() else None,
"run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
"commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
}
for task, scores in res["results"].items():
for metric, value in scores.items():
if metric not in METRICS_TO_KEEP:
continue
all_rows.append({**meta, "task": task, "metric": metric, "value": value})
# βββββ Merge and upload results βββββ
df_new = pd.DataFrame(all_rows)
with tempfile.TemporaryDirectory() as tmp:
current_path = hf_hub_download(
repo_id=DATASET_REPO,
filename="data/peft_bench.parquet",
repo_type="dataset",
cache_dir=tmp,
local_dir=tmp,
local_dir_use_symlinks=False,
)
df_existing = pd.read_parquet(current_path)
df_combined = pd.concat([df_existing, df_new], ignore_index=True)
df_combined = df_combined.sort_values("run_date")
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
print("Existing rows:", len(df_existing))
print("New rows:", len(df_new))
print("Combined rows (pre-dedup):", len(df_existing) + len(df_new))
print("Final rows (after dedup):", len(df_combined))
out = Path("peft_bench.parquet")
df_combined.to_parquet(out, index=False)
api.upload_file(
path_or_fileobj=out,
path_in_repo="data/peft_bench.parquet",
repo_id=DATASET_REPO,
repo_type="dataset",
commit_message=f"Add {len(CONFIGS)} new adapter run(s)",
)
|