Spaces:

AbstractPhil
/

GPT-OSS-20B-Mirel

Running on Zero

App Files Files Community

AbstractPhil commited on 19 days ago

Commit

3248cf5

1 Parent(s): 5d33a3c

peft loading fixed

Browse files

Files changed (1) hide show

app.py +80 -22

app.py CHANGED Viewed

@@ -41,8 +41,8 @@ except ImportError:
 # -----------------------
 # MX format uses special dtypes - we need to handle this properly
 MODEL_ID          = os.getenv("MODEL_ID", "openai/gpt-oss-20b")
-ADAPTER_ID        = os.getenv("ADAPTER_ID", "AbstractPhil/mirel-gpt-oss-20b") or None
-ADAPTER_SUBFOLDER = os.getenv("ADAPTER_SUBFOLDER", "checkpoints/checkpoint-516") or None
 ATTN_IMPL         = os.getenv("ATTN_IMPL", "eager")
 SYSTEM_DEF        = os.getenv("SYSTEM_PROMPT", "You are Mirel, a memory-stable symbolic assistant.")
 MAX_DEF           = int(os.getenv("MAX_NEW_TOKENS", "256"))
@@ -163,7 +163,7 @@ def convert_fp32_lora_to_mx_compatible(lora_state_dict: Dict[str, torch.Tensor])
     return converted
-def prepare_model_for_mx_lora(model, adapter_path: str):
     """
     Prepare and attach LoRA adapter to MX format model.
     Handles the special requirements of GPT-OSS MX models.
@@ -171,24 +171,80 @@ def prepare_model_for_mx_lora(model, adapter_path: str):
     if not _HAS_PEFT:
         raise RuntimeError("PEFT is required for LoRA adapters. Install with: pip install peft")
-    print(f"[LoRA] Loading adapter from {adapter_path}")
-    # Load the LoRA config
-    peft_config = PeftConfig.from_pretrained(adapter_path, token=HF_TOKEN)
-    # Load the LoRA weights
     from safetensors.torch import load_file
     import os.path as osp
-    adapter_weights_path = osp.join(adapter_path, "adapter_model.safetensors")
-    if not osp.exists(adapter_weights_path):
-        adapter_weights_path = osp.join(adapter_path, "adapter_model.bin")
-        if osp.exists(adapter_weights_path):
-            adapter_weights = torch.load(adapter_weights_path, map_location="cpu")
         else:
-            raise FileNotFoundError(f"No adapter weights found at {adapter_path}")
-    else:
-        adapter_weights = load_file(adapter_weights_path)
     # Convert weights for MX compatibility
     print("[LoRA] Converting fp32 weights for MX format compatibility...")
@@ -203,8 +259,7 @@ def prepare_model_for_mx_lora(model, adapter_path: str):
         model,
         adapter_path,
         is_trainable=False,
-        token=HF_TOKEN,
-        # Don't specify torch_dtype here - let it match the base model
     )
     # Manually update the adapter weights with our converted versions
@@ -285,18 +340,21 @@ def _load_model_on(device_map: Optional[str]) -> AutoModelForCausalLM:
     if ADAPTER_ID:
         try:
             if is_mx_model:
-                # Use special MX-compatible LoRA loading
-                model = prepare_model_for_mx_lora(model, ADAPTER_ID)
             else:
                 # Standard PEFT loading for non-MX models
                 if not _HAS_PEFT:
                     raise RuntimeError("PEFT is required when ADAPTER_ID is set.")
                 print(f"[Model] Loading adapter from {ADAPTER_ID} (standard mode)...")
                 model = PeftModel.from_pretrained(
                     model,
-                    ADAPTER_ID,
-                    is_trainable=False,
-                    token=HF_TOKEN
                 )
             print("[Model] Successfully loaded with LoRA adapter")

 # -----------------------
 # MX format uses special dtypes - we need to handle this properly
 MODEL_ID          = os.getenv("MODEL_ID", "openai/gpt-oss-20b")
+ADAPTER_ID        = os.getenv("ADAPTER_ID", "AbstractPhil/mirel-gpt-oss-20b")  # Default to your adapter
+ADAPTER_SUBFOLDER = os.getenv("ADAPTER_SUBFOLDER", "checkpoints/checkpoint-516")  # Default to the subfolder
 ATTN_IMPL         = os.getenv("ATTN_IMPL", "eager")
 SYSTEM_DEF        = os.getenv("SYSTEM_PROMPT", "You are Mirel, a memory-stable symbolic assistant.")
 MAX_DEF           = int(os.getenv("MAX_NEW_TOKENS", "256"))
     return converted
+def prepare_model_for_mx_lora(model, adapter_path: str, subfolder: Optional[str] = None):
     """
     Prepare and attach LoRA adapter to MX format model.
     Handles the special requirements of GPT-OSS MX models.
     if not _HAS_PEFT:
         raise RuntimeError("PEFT is required for LoRA adapters. Install with: pip install peft")
+    # Build the full path including subfolder
+    full_adapter_path = adapter_path
+    if subfolder:
+        print(f"[LoRA] Loading adapter from {adapter_path} (subfolder: {subfolder})")
+    else:
+        print(f"[LoRA] Loading adapter from {adapter_path}")
+    # Load the LoRA config with subfolder support
+    peft_kwargs = {"token": HF_TOKEN}
+    if subfolder:
+        peft_kwargs["subfolder"] = subfolder
+    peft_config = PeftConfig.from_pretrained(adapter_path, **peft_kwargs)
+    # Load the LoRA weights - need to check in the right location
     from safetensors.torch import load_file
     import os.path as osp
+    from huggingface_hub import hf_hub_download
+    try:
+        # Try to download from HF Hub with subfolder
+        if subfolder:
+            # Download the adapter weights file
+            try:
+                adapter_weights_path = hf_hub_download(
+                    repo_id=adapter_path,
+                    filename="adapter_model.safetensors",
+                    subfolder=subfolder,
+                    token=HF_TOKEN
+                )
+                adapter_weights = load_file(adapter_weights_path)
+                print(f"[LoRA] Loaded safetensors weights from {subfolder}")
+            except Exception:
+                # Try .bin format
+                adapter_weights_path = hf_hub_download(
+                    repo_id=adapter_path,
+                    filename="adapter_model.bin",
+                    subfolder=subfolder,
+                    token=HF_TOKEN
+                )
+                adapter_weights = torch.load(adapter_weights_path, map_location="cpu")
+                print(f"[LoRA] Loaded bin weights from {subfolder}")
         else:
+            # No subfolder - try local path first, then HF Hub
+            local_safetensors = osp.join(adapter_path, "adapter_model.safetensors")
+            local_bin = osp.join(adapter_path, "adapter_model.bin")
+            if osp.exists(local_safetensors):
+                adapter_weights = load_file(local_safetensors)
+                print("[LoRA] Loaded local safetensors weights")
+            elif osp.exists(local_bin):
+                adapter_weights = torch.load(local_bin, map_location="cpu")
+                print("[LoRA] Loaded local bin weights")
+            else:
+                # Try downloading from HF Hub
+                try:
+                    adapter_weights_path = hf_hub_download(
+                        repo_id=adapter_path,
+                        filename="adapter_model.safetensors",
+                        token=HF_TOKEN
+                    )
+                    adapter_weights = load_file(adapter_weights_path)
+                    print("[LoRA] Downloaded safetensors weights from Hub")
+                except Exception:
+                    adapter_weights_path = hf_hub_download(
+                        repo_id=adapter_path,
+                        filename="adapter_model.bin",
+                        token=HF_TOKEN
+                    )
+                    adapter_weights = torch.load(adapter_weights_path, map_location="cpu")
+                    print("[LoRA] Downloaded bin weights from Hub")
+    except Exception as e:
+        raise FileNotFoundError(f"Could not load adapter weights: {e}")
     # Convert weights for MX compatibility
     print("[LoRA] Converting fp32 weights for MX format compatibility...")
         model,
         adapter_path,
         is_trainable=False,
+        **peft_kwargs  # This includes token and subfolder
     )
     # Manually update the adapter weights with our converted versions
     if ADAPTER_ID:
         try:
             if is_mx_model:
+                # Use special MX-compatible LoRA loading with subfolder support
+                model = prepare_model_for_mx_lora(model, ADAPTER_ID, ADAPTER_SUBFOLDER)
             else:
                 # Standard PEFT loading for non-MX models
                 if not _HAS_PEFT:
                     raise RuntimeError("PEFT is required when ADAPTER_ID is set.")
                 print(f"[Model] Loading adapter from {ADAPTER_ID} (standard mode)...")
+                peft_kwargs = {"token": HF_TOKEN, "is_trainable": False}
+                if ADAPTER_SUBFOLDER:
+                    peft_kwargs["subfolder"] = ADAPTER_SUBFOLDER
+                    print(f"[Model] Using subfolder: {ADAPTER_SUBFOLDER}")
                 model = PeftModel.from_pretrained(
                     model,
+                    ADAPTER_ID,
+                    **peft_kwargs
                 )
             print("[Model] Successfully loaded with LoRA adapter")