Wan2-1-Image-Generator

Running on Zero

App Files Files Community

ovi054 commited on about 1 month ago

Commit

4166d00

verified ·

1 Parent(s): adefe82

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -41

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import torch
-from diffusers import UniPCMultistepScheduler
 from diffusers import WanPipeline, AutoencoderKLWan
 from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe
 from huggingface_hub import hf_hub_download
@@ -14,65 +14,97 @@ print(f"Using device: {device}")
 model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
 print("Loading VAE...")
-# VAE is often kept in float32 for precision during decoding
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
 print("Loading WanPipeline in bfloat16...")
-# Load the main model in bfloat16 to save memory
 pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
 flow_shift = 1.0
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift)
-# Move the base pipeline to the GPU. This is where meta-device loading happens.
-print("Moving pipeline to device...")
 pipe.to(device)
-# --- LORA FUSING (Done ONCE at startup) ---
-print("Loading and Fusing base LoRA...")
 CAUSVID_LORA_REPO = "Kijai/WanVideo_comfy"
 CAUSVID_LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
 CUSTOM_LORA_NAME = "custom_lora"
 try:
     causvid_path = hf_hub_download(repo_id=CAUSVID_LORA_REPO, filename=CAUSVID_LORA_FILENAME)
-    pipe.load_lora_weights(causvid_path) # Loads LoRA, likely onto CPU
-    print("✅ Base LoRA loaded.")
-    pipe.fuse_lora() # Fuses float32 LoRA into bfloat16 model
-    print("✅ Base LoRA fused.")
-    # FIX for Dtype Mismatch: After fusing, some layers became float32.
-    # We must cast the entire pipeline back to bfloat16 to ensure consistency.
-    pipe.to(dtype=torch.bfloat16)
-    print("✅ Pipeline converted back to bfloat16 post-fusion.")
 except Exception as e:
-    print(f"⚠️ Could not load or fuse the base LoRA: {e}")
 print("Initialization complete. Gradio is starting...")
 @spaces.GPU()
 def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_steps=30, lora_id=None, progress=gr.Progress(track_tqdm=True)):
-    clean_lora_id = lora_id.strip() if lora_id else ""
     if clean_lora_id:
         try:
-            print(f"Applying custom LoRA '{clean_lora_id}' for this run...")
-            # 1. Load the temporary LoRA
             pipe.load_lora_weights(clean_lora_id, adapter_name=CUSTOM_LORA_NAME)
-            # 2. Fuse it into the model. This is the key to avoiding device/meta errors.
-            pipe.fuse_lora(adapter_names=[CUSTOM_LORA_NAME])
-            # 3. Ensure dtype consistency after the new fusion
-            pipe.to(dtype=torch.bfloat16)
-            print(f"✅ Custom LoRA '{CUSTOM_LORA_NAME}' fused and activated.")
         except Exception as e:
-            print(f"⚠️ Failed to load or fuse custom LoRA '{clean_lora_id}': {e}. Running without it.")
-            clean_lora_id = "" # Clear the id so we don't try to clean it up
     apply_cache_on_pipe(pipe)
     try:
@@ -89,18 +121,19 @@ def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_ste
         image = (image * 255).astype(np.uint8)
         return Image.fromarray(image)
     finally:
-        # Clean up the dynamic LoRA if it was successfully loaded and fused
-        if clean_lora_id:
-            print(f"Cleaning up custom LoRA '{CUSTOM_LORA_NAME}'...")
-            # 1. Unfuse the temporary LoRA to revert the model's weights
-            pipe.unfuse_lora(adapter_names=[CUSTOM_LORA_NAME])
-            # 2. Unload the LoRA weights from memory
-            # FIX for TypeError: unload_lora_weights takes no adapter name
-            pipe.unload_lora_weights()
-            print("✅ Custom LoRA unfused and unloaded.")
-# --- Your Gradio Interface Code (no changes needed here) ---
 iface = gr.Interface(
     fn=generate,
     inputs=[

 import torch
+from diffusers import UniPCMultepScheduler
 from diffusers import WanPipeline, AutoencoderKLWan
 from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe
 from huggingface_hub import hf_hub_download
 model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
 print("Loading VAE...")
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
 print("Loading WanPipeline in bfloat16...")
+# This will use ZeroGPU/accelerate with meta devices
 pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
 flow_shift = 1.0
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift)
+# Move the base pipeline to the GPU. ZeroGPU will manage this.
+print("Moving pipeline to device (ZeroGPU will handle offloading)...")
 pipe.to(device)
+# --- LORA SETUP ---
+# We will NOT fuse anything. Everything will be handled dynamically.
 CAUSVID_LORA_REPO = "Kijai/WanVideo_comfy"
 CAUSVID_LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
+BASE_LORA_NAME = "causvid_lora"
 CUSTOM_LORA_NAME = "custom_lora"
+print("Downloading base LoRA...")
 try:
     causvid_path = hf_hub_download(repo_id=CAUSVID_LORA_REPO, filename=CAUSVID_LORA_FILENAME)
+    print("✅ Base LoRA downloaded.")
 except Exception as e:
+    causvid_path = None
+    print(f"⚠️ Could not download base LoRA: {e}")
 print("Initialization complete. Gradio is starting...")
+def move_adapter_to_device(pipe, adapter_name, device):
+    """
+    Surgically moves only the parameters of a specific LoRA adapter to the target device.
+    This avoids touching the base model's meta tensors.
+    """
+    print(f"Moving adapter '{adapter_name}' to {device}...")
+    for param in pipe.transformer.parameters():
+        if hasattr(param, "adapter_name") and param.adapter_name == adapter_name:
+            param.data = param.data.to(device, non_blocking=True)
+            if param.grad is not None:
+                param.grad.data = param.grad.data.to(device, non_blocking=True)
+    print(f"✅ Adapter '{adapter_name}' moved.")
 @spaces.GPU()
 def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_steps=30, lora_id=None, progress=gr.Progress(track_tqdm=True)):
+    # --- DYNAMIC LORA MANAGEMENT FOR EACH RUN ---
+    # Start with a clean slate by disabling any active adapters from previous runs
+    pipe.disable_lora()
+    active_adapters = []
+    adapter_weights = []
+    # 1. Load the Base LoRA
+    if causvid_path:
+        try:
+            # We load it for every run to ensure a clean state
+            print(f"Loading base LoRA '{BASE_LORA_NAME}'...")
+            pipe.load_lora_weights(causvid_path, adapter_name=BASE_LORA_NAME)
+            # THE CRITICAL FIX: Move only this adapter's weights to the GPU
+            move_adapter_to_device(pipe, BASE_LORA_NAME, device)
+            active_adapters.append(BASE_LORA_NAME)
+            adapter_weights.append(1.0)
+        except Exception as e:
+            print(f"⚠️ Failed to load base LoRA: {e}")
+    # 2. Load the Custom LoRA if provided
+    clean_lora_id = lora_id.strip() if lora_id else ""
     if clean_lora_id:
         try:
+            print(f"Loading custom LoRA '{CUSTOM_LORA_NAME}' from '{clean_lora_id}'...")
             pipe.load_lora_weights(clean_lora_id, adapter_name=CUSTOM_LORA_NAME)
+            # THE CRITICAL FIX: Move only this adapter's weights to the GPU
+            move_adapter_to_device(pipe, CUSTOM_LORA_NAME, device)
+            active_adapters.append(CUSTOM_LORA_NAME)
+            adapter_weights.append(1.0)
         except Exception as e:
+            print(f"⚠️ Failed to load custom LoRA '{clean_lora_id}': {e}")
+            # If it fails, delete the adapter config to prevent issues
+            if CUSTOM_LORA_NAME in pipe.transformer.peft_config:
+                del pipe.transformer.peft_config[CUSTOM_LORA_NAME]
+    # 3. Activate the successfully loaded adapters
+    if active_adapters:
+        print(f"Activating adapters: {active_adapters} with weights: {adapter_weights}")
+        pipe.set_adapters(active_adapters, adapter_weights)
     apply_cache_on_pipe(pipe)
     try:
         image = (image * 255).astype(np.uint8)
         return Image.fromarray(image)
     finally:
+        # --- PROPER CLEANUP ---
+        print("Cleaning up LoRAs for this run...")
+        # Disable adapters to stop them from being used
+        pipe.disable_lora()
+        # Delete the LoRA configs from the model to truly unload them
+        if BASE_LORA_NAME in pipe.transformer.peft_config:
+            del pipe.transformer.peft_config[BASE_LORA_NAME]
+        if CUSTOM_LORA_NAME in pipe.transformer.peft_config:
+            del pipe.transformer.peft_config[CUSTOM_LORA_NAME]
+        print("✅ LoRAs cleaned up.")
 iface = gr.Interface(
     fn=generate,
     inputs=[