Spaces:

Mdrnfox
/

peft-bench-eval

Sleeping

File size: 6,270 Bytes

bba84bb
 
 
 
e540b2c
bba84bb
1cb7bed
bba84bb
 
 
 
 
 
 
e540b2c
bba84bb
 
e540b2c
bba84bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf4ffdb
 
 
 
 
 
 
 
 
 
 
 
bba84bb
cf4ffdb
bba84bb
 
 
 
 
 
e138b53
cf4ffdb
 
 
 
 
 
 
bba84bb
 
adef9e5
 
e540b2c
 
adef9e5
 
cf4ffdb
bba84bb
cf4ffdb
 
 
adef9e5
 
e540b2c
 
adef9e5
 
 
bba84bb
cf4ffdb
 
1cb7bed
 
 
 
 
cf4ffdb
1cb7bed
cf4ffdb
1cb7bed
cf4ffdb
 
e540b2c
 
 
 
 
 
cf4ffdb
 
 
 
 
bba84bb
 
 
 
 
 
cf4ffdb
 
 
 
 
bba84bb
 
 
 
20deea1
bba84bb
cf4ffdb
 
 
88f3e59
 
 
 
 
 
 
 
cf4ffdb
 
 
bba84bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e138b53
bba84bb
 
e138b53
 
cf4ffdb
e138b53
 
bba84bb

#!/usr/bin/env python3
import datetime, os, subprocess, tempfile
from pathlib import Path

import gc
import pandas as pd, yaml, torch
from huggingface_hub import HfApi, login, hf_hub_download, model_info
from lm_eval import evaluator
from lm_eval.models.huggingface import HFLM
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig 
)


CONFIGS = []

# ───── Load all configs ─────
if Path("adapters.yaml").exists():
    CONFIGS.extend(yaml.safe_load(open("adapters.yaml"))["adapters"])

for yml in Path("manifests").glob("*.yaml"):
    CONFIGS.append(yaml.safe_load(open(yml)))

if not CONFIGS:
    raise RuntimeError("No adapter configs found in adapters.yaml or manifests/")

# ───── Hugging Face auth ─────
token = os.getenv("HF_TOKEN")
if not token or token == "***":
    raise RuntimeError("HF_TOKEN secret is missing.")
login(token)

DATASET_REPO = os.environ["HF_DATASET_REPO"]
api = HfApi()

METRICS_TO_KEEP = {"acc", "accuracy", "acc_stderr", "f1", "exact_match"}
all_rows = []

# ───── Safe tokenizer loading ─────
def load_tokenizer(model_id: str):
    try:
        return AutoTokenizer.from_pretrained(model_id, use_fast=True)
    except Exception as e1:
        print(f"Fast tokenizer failed for {model_id}: {e1}")
        try:
            return AutoTokenizer.from_pretrained(model_id, use_fast=False)
        except Exception as e2:
            raise RuntimeError(f"Failed to load tokenizer for {model_id}: {e2}") from e2

# ───── Evaluate each adapter ─────
for cfg in CONFIGS:
    base_model_id = cfg["base_model"]
    adapter_repo = cfg["adapter_repo"]
    adapter_type = cfg.get("adapter_type", "LoRA")
    tasks = cfg["tasks"]

    print(f"\nLoading base model: {base_model_id}")
    tokenizer = load_tokenizer(base_model_id)

    if "llama" in base_model_id.lower():
        try:
            tokenizer.legacy = False 
        except:
            pass

    try:
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_id,
            device_map="auto", 
            torch_dtype=torch.float16,
            trust_remote_code=True,
            use_safetensors=True
        )
        is_encoder = False
        print("Loaded as Causal LM")
    except Exception as e:
        print(f"⚠️ Failed to load causal LM: {e}")
        base_model = AutoModelForSequenceClassification.from_pretrained(
            base_model_id,
            device_map="auto", 
            torch_dtype=torch.float16,
            trust_remote_code=True,
            use_safetensors=True
        )
        is_encoder = True
        print("Loaded as Sequence Classification model")

    try:
        info = model_info(adapter_repo)
        files = [f.rfilename for f in info.siblings]
        if "adapter_config.json" not in files:
            print(f"{adapter_repo} is not a valid PEFT adapter (missing adapter_config.json)")
            continue
    except Exception as e:
        print(f"Failed to inspect adapter {adapter_repo}: {e}")
        continue

    try:
        peft_model = PeftModel.from_pretrained(
            base_model,
            adapter_repo,
            device_map="auto",
            torch_dtype=torch.float16,
        )        
        merged_model = peft_model.merge_and_unload()
    except Exception as e:
        print(f"Failed to apply adapter {adapter_repo}: {e}")
        continue

    merged_model.eval()

    with tempfile.TemporaryDirectory() as td:
        merged_model.save_pretrained(td)
        tokenizer.save_pretrained(td)

        # Verify tokenizer object
        if not hasattr(tokenizer, "vocab_size"):
            print("Invalid tokenizer loaded. Skipping.")
            continue

        hf_lm = HFLM(
            pretrained=td,
            batch_size=8 if not is_encoder else 16,
            device=device,
            tokenizer_args={"use_fast": False}
        )

        try:
            res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
            del merged_model
            del peft_model
            del base_model
            del tokenizer
            del hf_lm
            gc.collect()
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()
        except Exception as e:
            print(f"Evaluation failed for {adapter_repo}: {e}")
            continue

    meta = {
        "model_id": adapter_repo,
        "adapter_type": adapter_type,
        "trainable_params": cfg.get("trainable_params"),
        "peak_gpu_mem_mb": torch.cuda.max_memory_allocated(device) // 1024**2 if torch.cuda.is_available() else None,
        "run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
        "commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
    }

    for task, scores in res["results"].items():
        for metric, value in scores.items():
            if metric not in METRICS_TO_KEEP:
                continue
            all_rows.append({**meta, "task": task, "metric": metric, "value": value})

# ───── Merge and upload results ─────
df_new = pd.DataFrame(all_rows)

with tempfile.TemporaryDirectory() as tmp:
    current_path = hf_hub_download(
        repo_id=DATASET_REPO,
        filename="data/peft_bench.parquet",
        repo_type="dataset",
        cache_dir=tmp,
        local_dir=tmp,
        local_dir_use_symlinks=False,
    )
    df_existing = pd.read_parquet(current_path)
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)
    df_combined = df_combined.sort_values("run_date")
    df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")

    print("Existing rows:", len(df_existing))
    print("New rows:", len(df_new))
    print("Combined (pre-dedup):", len(df_existing) + len(df_new))
    print("Final rows (after dedup):", len(df_combined))

    out = Path("peft_bench.parquet")
    df_combined.to_parquet(out, index=False)

    api.upload_file(
        path_or_fileobj=out,
        path_in_repo="data/peft_bench.parquet",
        repo_id=DATASET_REPO,
        repo_type="dataset",
        commit_message=f"Add {len(CONFIGS)} new adapter run(s)",
    )