Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
import datetime, os, subprocess, tempfile | |
from pathlib import Path | |
import gc | |
import pandas as pd, yaml, torch | |
from huggingface_hub import HfApi, login, hf_hub_download, model_info | |
from lm_eval import evaluator | |
from lm_eval.models.huggingface import HFLM | |
from peft import PeftModel | |
from transformers import ( | |
AutoModelForCausalLM, | |
AutoModelForSequenceClassification, | |
AutoTokenizer, | |
BitsAndBytesConfig | |
) | |
CONFIGS = [] | |
# βββββ Load all configs βββββ | |
if Path("adapters.yaml").exists(): | |
CONFIGS.extend(yaml.safe_load(open("adapters.yaml"))["adapters"]) | |
for yml in Path("manifests").glob("*.yaml"): | |
CONFIGS.append(yaml.safe_load(open(yml))) | |
if not CONFIGS: | |
raise RuntimeError("No adapter configs found in adapters.yaml or manifests/") | |
# βββββ Hugging Face auth βββββ | |
token = os.getenv("HF_TOKEN") | |
if not token or token == "***": | |
raise RuntimeError("HF_TOKEN secret is missing.") | |
login(token) | |
DATASET_REPO = os.environ["HF_DATASET_REPO"] | |
api = HfApi() | |
all_rows = [] | |
# βββββ Safe tokenizer loading βββββ | |
def load_tokenizer(model_id: str): | |
try: | |
return AutoTokenizer.from_pretrained(model_id, use_fast=True) | |
except Exception as e1: | |
print(f"Fast tokenizer failed for {model_id}: {e1}") | |
try: | |
return AutoTokenizer.from_pretrained(model_id, use_fast=False) | |
except Exception as e2: | |
raise RuntimeError(f"Failed to load tokenizer for {model_id}: {e2}") from e2 | |
# βββββ Evaluate each adapter βββββ | |
for cfg in CONFIGS: | |
base_model_id = cfg["base_model"] | |
adapter_repo = cfg["adapter_repo"] | |
adapter_type = cfg.get("adapter_type", "LoRA") | |
tasks = cfg["tasks"] | |
print(f"\nLoading base model: {base_model_id}") | |
tokenizer = load_tokenizer(base_model_id) | |
if "llama" in base_model_id.lower(): | |
try: | |
tokenizer.legacy = False | |
except: | |
pass | |
try: | |
base_model = AutoModelForCausalLM.from_pretrained( | |
base_model_id, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
trust_remote_code=True, | |
use_safetensors=True | |
) | |
is_encoder = False | |
print("Loaded as Causal LM") | |
except Exception as e: | |
print(f"β οΈ Failed to load causal LM: {e}") | |
base_model = AutoModelForSequenceClassification.from_pretrained( | |
base_model_id, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
trust_remote_code=True, | |
use_safetensors=True | |
) | |
is_encoder = True | |
print("Loaded as Sequence Classification model") | |
try: | |
info = model_info(adapter_repo) | |
files = [f.rfilename for f in info.siblings] | |
if "adapter_config.json" not in files: | |
print(f"{adapter_repo} is not a valid PEFT adapter (missing adapter_config.json)") | |
continue | |
except Exception as e: | |
print(f"Failed to inspect adapter {adapter_repo}: {e}") | |
continue | |
try: | |
peft_model = PeftModel.from_pretrained( | |
base_model, | |
adapter_repo, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
) | |
merged_model = peft_model.merge_and_unload() | |
except Exception as e: | |
print(f"Failed to apply adapter {adapter_repo}: {e}") | |
continue | |
merged_model.eval() | |
with tempfile.TemporaryDirectory() as td: | |
merged_model.save_pretrained(td) | |
tokenizer.save_pretrained(td) | |
# Verify tokenizer object | |
if not hasattr(tokenizer, "vocab_size"): | |
print("Invalid tokenizer loaded. Skipping.") | |
continue | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
hf_lm = HFLM( | |
pretrained=td, | |
batch_size=8 if not is_encoder else 16, | |
device=device, | |
) | |
try: | |
res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks) | |
print(f"Raw results for {adapter_repo}: {res}") | |
if not res.get("results"): | |
print(f"Empty results β likely a task or model compatibility issue for: {adapter_repo}") | |
continue | |
print(f"\nEvaluation raw result for {adapter_repo}:") | |
print(res.get("results", {})) | |
del merged_model | |
del peft_model | |
del base_model | |
del tokenizer | |
del hf_lm | |
gc.collect() | |
torch.cuda.empty_cache() | |
torch.cuda.ipc_collect() | |
except Exception as e: | |
print(f"Evaluation failed for {adapter_repo}: {e}") | |
continue | |
meta = { | |
"model_id": adapter_repo, | |
"adapter_type": adapter_type, | |
"trainable_params": cfg.get("trainable_params"), | |
"peak_gpu_mem_mb": torch.cuda.max_memory_allocated() // 1024**2 if torch.cuda.is_available() else None, | |
"run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"), | |
"commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(), | |
} | |
count_before = len(all_rows) | |
for task, scores in res["results"].items(): | |
for metric, value in scores.items(): | |
if value is None: | |
continue | |
metric_name, _, aggregation = metric.partition(",") | |
all_rows.append({ | |
**meta, | |
"task": task, | |
"metric": metric_name, | |
"aggregation": aggregation or None, | |
"value": value | |
}) | |
print(f"{len(all_rows) - count_before} rows added for {adapter_repo}") | |
# βββββ Merge and upload results βββββ | |
df_new = pd.DataFrame(all_rows) | |
with tempfile.TemporaryDirectory() as tmp: | |
current_path = hf_hub_download( | |
repo_id=DATASET_REPO, | |
filename="data/peft_bench.parquet", | |
repo_type="dataset", | |
cache_dir=tmp, | |
local_dir=tmp, | |
local_dir_use_symlinks=False, | |
) | |
df_existing = pd.read_parquet(current_path) | |
df_combined = pd.concat([df_existing, df_new], ignore_index=True) | |
df_combined = df_combined.sort_values("run_date") | |
df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce") | |
print("\nFinal new results:") | |
print(df_new[["model_id", "task", "metric", "aggregation", "value"]]) | |
out = Path("peft_bench.parquet") | |
df_combined.to_parquet(out, index=False) | |
api.upload_file( | |
path_or_fileobj=out, | |
path_in_repo="data/peft_bench.parquet", | |
repo_id=DATASET_REPO, | |
repo_type="dataset", | |
commit_message=f"Add {len(CONFIGS)} new adapter run(s)", | |
) | |