Spaces:
Sleeping
Sleeping
File size: 5,791 Bytes
bba84bb 1cb7bed bba84bb cf4ffdb bba84bb cf4ffdb bba84bb e138b53 cf4ffdb bba84bb adef9e5 cf4ffdb bba84bb cf4ffdb adef9e5 bba84bb cf4ffdb 1cb7bed cf4ffdb 1cb7bed cf4ffdb 1cb7bed cf4ffdb bba84bb cf4ffdb bba84bb cf4ffdb bba84bb e138b53 bba84bb e138b53 cf4ffdb e138b53 bba84bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
#!/usr/bin/env python3
import datetime, os, subprocess, tempfile
from pathlib import Path
import pandas as pd, yaml, torch
from huggingface_hub import HfApi, login, hf_hub_download, model_info
from lm_eval import evaluator
from lm_eval.models.huggingface import HFLM
from peft import PeftModel
from transformers import (
AutoModelForCausalLM,
AutoModelForSequenceClassification,
AutoTokenizer,
)
CONFIGS = []
# βββββ Load all configs βββββ
if Path("adapters.yaml").exists():
CONFIGS.extend(yaml.safe_load(open("adapters.yaml"))["adapters"])
for yml in Path("manifests").glob("*.yaml"):
CONFIGS.append(yaml.safe_load(open(yml)))
if not CONFIGS:
raise RuntimeError("No adapter configs found in adapters.yaml or manifests/")
# βββββ Hugging Face auth βββββ
token = os.getenv("HF_TOKEN")
if not token or token == "***":
raise RuntimeError("HF_TOKEN secret is missing.")
login(token)
DATASET_REPO = os.environ["HF_DATASET_REPO"]
api = HfApi()
METRICS_TO_KEEP = {"acc", "accuracy", "acc_stderr", "f1", "exact_match"}
all_rows = []
# βββββ Safe tokenizer loading βββββ
def load_tokenizer(model_id: str):
try:
return AutoTokenizer.from_pretrained(model_id, use_fast=True)
except Exception as e1:
print(f"Fast tokenizer failed for {model_id}: {e1}")
try:
return AutoTokenizer.from_pretrained(model_id, use_fast=False)
except Exception as e2:
raise RuntimeError(f"Failed to load tokenizer for {model_id}: {e2}") from e2
# βββββ Evaluate each adapter βββββ
for cfg in CONFIGS:
base_model_id = cfg["base_model"]
adapter_repo = cfg["adapter_repo"]
adapter_type = cfg.get("adapter_type", "LoRA")
tasks = cfg["tasks"]
print(f"\nLoading base model: {base_model_id}")
tokenizer = load_tokenizer(base_model_id)
if "llama" in base_model_id.lower():
try:
tokenizer.legacy = False
except:
pass
try:
base_model = AutoModelForCausalLM.from_pretrained(
base_model_id,
trust_remote_code=True,
use_safetensors=True
)
is_encoder = False
print("Loaded as Causal LM")
except Exception as e:
print(f"β οΈ Failed to load causal LM: {e}")
base_model = AutoModelForSequenceClassification.from_pretrained(
base_model_id,
trust_remote_code=True,
use_safetensors=True
)
is_encoder = True
print("Loaded as Sequence Classification model")
try:
info = model_info(adapter_repo)
files = [f.rfilename for f in info.siblings]
if "adapter_config.json" not in files:
print(f"{adapter_repo} is not a valid PEFT adapter (missing adapter_config.json)")
continue
except Exception as e:
print(f"Failed to inspect adapter {adapter_repo}: {e}")
continue
try:
peft_model = PeftModel.from_pretrained(base_model, adapter_repo)
merged_model = peft_model.merge_and_unload()
except Exception as e:
print(f"Failed to apply adapter {adapter_repo}: {e}")
continue
device = "cuda" if torch.cuda.is_available() else "cpu"
merged_model.to(device)
merged_model.eval()
with tempfile.TemporaryDirectory() as td:
merged_model.save_pretrained(td)
tokenizer.save_pretrained(td)
# Verify tokenizer object
if not hasattr(tokenizer, "vocab_size"):
print("Invalid tokenizer loaded. Skipping.")
continue
hf_lm = HFLM(
pretrained=td,
batch_size=8 if not is_encoder else 16,
device=device,
)
try:
res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
except Exception as e:
print(f"Evaluation failed for {adapter_repo}: {e}")
continue
meta = {
"model_id": adapter_repo,
"adapter_type": adapter_type,
"trainable_params": cfg.get("trainable_params"),
"peak_gpu_mem_mb": torch.cuda.max_memory_allocated(device) // 1024**2 if torch.cuda.is_available() else None,
"run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
"commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
}
for task, scores in res["results"].items():
for metric, value in scores.items():
if metric not in METRICS_TO_KEEP:
continue
all_rows.append({**meta, "task": task, "metric": metric, "value": value})
# βββββ Merge and upload results βββββ
df_new = pd.DataFrame(all_rows)
with tempfile.TemporaryDirectory() as tmp:
current_path = hf_hub_download(
repo_id=DATASET_REPO,
filename="data/peft_bench.parquet",
repo_type="dataset",
cache_dir=tmp,
local_dir=tmp,
local_dir_use_symlinks=False,
)
df_existing = pd.read_parquet(current_path)
df_combined = pd.concat([df_existing, df_new], ignore_index=True)
df_combined = df_combined.sort_values("run_date")
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")
print("Existing rows:", len(df_existing))
print("New rows:", len(df_new))
print("Combined (pre-dedup):", len(df_existing) + len(df_new))
print("Final rows (after dedup):", len(df_combined))
out = Path("peft_bench.parquet")
df_combined.to_parquet(out, index=False)
api.upload_file(
path_or_fileobj=out,
path_in_repo="data/peft_bench.parquet",
repo_id=DATASET_REPO,
repo_type="dataset",
commit_message=f"Add {len(CONFIGS)} new adapter run(s)",
)
|