Spaces:

Mdrnfox
/

peft-bench-eval

Sleeping

File size: 5,791 Bytes

bba84bb
 
 
 
 
1cb7bed
bba84bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf4ffdb
 
 
 
 
 
 
 
 
 
 
 
bba84bb
cf4ffdb
bba84bb
 
 
 
 
 
e138b53
cf4ffdb
 
 
 
 
 
 
bba84bb
 
adef9e5
 
 
 
cf4ffdb
bba84bb
cf4ffdb
 
 
adef9e5
 
 
 
 
bba84bb
cf4ffdb
 
1cb7bed
 
 
 
 
cf4ffdb
1cb7bed
cf4ffdb
1cb7bed
cf4ffdb
 
 
 
 
 
 
 
bba84bb
 
 
 
 
 
 
 
cf4ffdb
 
 
 
 
bba84bb
 
 
 
 
cf4ffdb
 
 
 
 
 
bba84bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e138b53
bba84bb
 
e138b53
 
cf4ffdb
e138b53
 
bba84bb

#!/usr/bin/env python3
import datetime, os, subprocess, tempfile
from pathlib import Path

import pandas as pd, yaml, torch
from huggingface_hub import HfApi, login, hf_hub_download, model_info
from lm_eval import evaluator
from lm_eval.models.huggingface import HFLM
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    AutoTokenizer,
)

CONFIGS = []

# ───── Load all configs ─────
if Path("adapters.yaml").exists():
    CONFIGS.extend(yaml.safe_load(open("adapters.yaml"))["adapters"])

for yml in Path("manifests").glob("*.yaml"):
    CONFIGS.append(yaml.safe_load(open(yml)))

if not CONFIGS:
    raise RuntimeError("No adapter configs found in adapters.yaml or manifests/")

# ───── Hugging Face auth ─────
token = os.getenv("HF_TOKEN")
if not token or token == "***":
    raise RuntimeError("HF_TOKEN secret is missing.")
login(token)

DATASET_REPO = os.environ["HF_DATASET_REPO"]
api = HfApi()

METRICS_TO_KEEP = {"acc", "accuracy", "acc_stderr", "f1", "exact_match"}
all_rows = []

# ───── Safe tokenizer loading ─────
def load_tokenizer(model_id: str):
    try:
        return AutoTokenizer.from_pretrained(model_id, use_fast=True)
    except Exception as e1:
        print(f"Fast tokenizer failed for {model_id}: {e1}")
        try:
            return AutoTokenizer.from_pretrained(model_id, use_fast=False)
        except Exception as e2:
            raise RuntimeError(f"Failed to load tokenizer for {model_id}: {e2}") from e2

# ───── Evaluate each adapter ─────
for cfg in CONFIGS:
    base_model_id = cfg["base_model"]
    adapter_repo = cfg["adapter_repo"]
    adapter_type = cfg.get("adapter_type", "LoRA")
    tasks = cfg["tasks"]

    print(f"\nLoading base model: {base_model_id}")
    tokenizer = load_tokenizer(base_model_id)

    if "llama" in base_model_id.lower():
        try:
            tokenizer.legacy = False 
        except:
            pass

    try:
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_id,
            trust_remote_code=True,
            use_safetensors=True
        )
        is_encoder = False
        print("Loaded as Causal LM")
    except Exception as e:
        print(f"⚠️ Failed to load causal LM: {e}")
        base_model = AutoModelForSequenceClassification.from_pretrained(
            base_model_id,
            trust_remote_code=True,
            use_safetensors=True
        )
        is_encoder = True
        print("Loaded as Sequence Classification model")

    try:
        info = model_info(adapter_repo)
        files = [f.rfilename for f in info.siblings]
        if "adapter_config.json" not in files:
            print(f"{adapter_repo} is not a valid PEFT adapter (missing adapter_config.json)")
            continue
    except Exception as e:
        print(f"Failed to inspect adapter {adapter_repo}: {e}")
        continue

    try:
        peft_model = PeftModel.from_pretrained(base_model, adapter_repo)
        merged_model = peft_model.merge_and_unload()
    except Exception as e:
        print(f"Failed to apply adapter {adapter_repo}: {e}")
        continue

    device = "cuda" if torch.cuda.is_available() else "cpu"
    merged_model.to(device)
    merged_model.eval()

    with tempfile.TemporaryDirectory() as td:
        merged_model.save_pretrained(td)
        tokenizer.save_pretrained(td)

        # Verify tokenizer object
        if not hasattr(tokenizer, "vocab_size"):
            print("Invalid tokenizer loaded. Skipping.")
            continue

        hf_lm = HFLM(
            pretrained=td,
            batch_size=8 if not is_encoder else 16,
            device=device,
        )

        try:
            res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
        except Exception as e:
            print(f"Evaluation failed for {adapter_repo}: {e}")
            continue

    meta = {
        "model_id": adapter_repo,
        "adapter_type": adapter_type,
        "trainable_params": cfg.get("trainable_params"),
        "peak_gpu_mem_mb": torch.cuda.max_memory_allocated(device) // 1024**2 if torch.cuda.is_available() else None,
        "run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
        "commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
    }

    for task, scores in res["results"].items():
        for metric, value in scores.items():
            if metric not in METRICS_TO_KEEP:
                continue
            all_rows.append({**meta, "task": task, "metric": metric, "value": value})

# ───── Merge and upload results ─────
df_new = pd.DataFrame(all_rows)

with tempfile.TemporaryDirectory() as tmp:
    current_path = hf_hub_download(
        repo_id=DATASET_REPO,
        filename="data/peft_bench.parquet",
        repo_type="dataset",
        cache_dir=tmp,
        local_dir=tmp,
        local_dir_use_symlinks=False,
    )
    df_existing = pd.read_parquet(current_path)
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)
    df_combined = df_combined.sort_values("run_date")
    df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")

    print("Existing rows:", len(df_existing))
    print("New rows:", len(df_new))
    print("Combined (pre-dedup):", len(df_existing) + len(df_new))
    print("Final rows (after dedup):", len(df_combined))

    out = Path("peft_bench.parquet")
    df_combined.to_parquet(out, index=False)

    api.upload_file(
        path_or_fileobj=out,
        path_in_repo="data/peft_bench.parquet",
        repo_id=DATASET_REPO,
        repo_type="dataset",
        commit_message=f"Add {len(CONFIGS)} new adapter run(s)",
    )