Wan2-1-Image-Generator

Running on Zero

App Files Files Community

ovi054 commited on 28 days ago

Commit

adefe82

verified ·

1 Parent(s): 09fcd4a

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -31

app.py CHANGED Viewed

@@ -12,12 +12,13 @@ import spaces
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
-model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers" # Using the large model
 print("Loading VAE...")
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
-print("Loading WanPipeline...")
-# low_cpu_mem_usage is often needed for meta device loading
 pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
 flow_shift = 1.0
@@ -28,24 +29,24 @@ print("Moving pipeline to device...")
 pipe.to(device)
 # --- LORA FUSING (Done ONCE at startup) ---
-# We will fuse the base LoRA permanently into the model for performance and to solve the device issue.
-# This means we cannot dynamically unload it, but it's the correct approach for a fixed setup.
 print("Loading and Fusing base LoRA...")
 CAUSVID_LORA_REPO = "Kijai/WanVideo_comfy"
 CAUSVID_LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
-CUSTOM_LORA_NAME = "custom_lora" # For any optional LoRA
 try:
     causvid_path = hf_hub_download(repo_id=CAUSVID_LORA_REPO, filename=CAUSVID_LORA_FILENAME)
-    # 1. Load the LoRA weights. They will likely be on the CPU.
-    pipe.load_lora_weights(causvid_path) # Use the default adapter name
     print("✅ Base LoRA loaded.")
-    # 2. Fuse the weights into the base model. This resolves the device mismatch.
-    pipe.fuse_lora()
-    print("✅ Base LoRA fused successfully.")
 except Exception as e:
     print(f"⚠️ Could not load or fuse the base LoRA: {e}")
@@ -53,28 +54,25 @@ print("Initialization complete. Gradio is starting...")
 @spaces.GPU()
 def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_steps=30, lora_id=None, progress=gr.Progress(track_tqdm=True)):
-    # The base LoRA is already fused. We only need to handle the optional custom LoRA.
     clean_lora_id = lora_id.strip() if lora_id else ""
-    # We will load and unload the custom LoRA dynamically for each run
     if clean_lora_id:
         try:
-            print(f"Loading custom LoRA '{clean_lora_id}' for this run...")
-            # Load the custom LoRA. Note: We will NOT fuse this one to keep it temporary.
             pipe.load_lora_weights(clean_lora_id, adapter_name=CUSTOM_LORA_NAME)
-            # ✨ This is the critical part for dynamic LoRAs on large models.
-            # We must explicitly move the adapter to the correct device.
-            pipe.to(device, dtype=pipe.transformer.dtype) # Ensure dtype matches
-            pipe.set_adapters([CUSTOM_LORA_NAME], adapter_weights=[1.0])
-            print(f"✅ Custom LoRA '{CUSTOM_LORA_NAME}' activated.")
         except Exception as e:
-            print(f"⚠️ Failed to load custom LoRA '{clean_lora_id}': {e}. Running without it.")
-            # Ensure no adapters are active if loading failed
-            pipe.disable_lora()
-    # Apply performance optimizations
     apply_cache_on_pipe(pipe)
     try:
@@ -91,12 +89,16 @@ def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_ste
         image = (image * 255).astype(np.uint8)
         return Image.fromarray(image)
     finally:
-        # Clean up the dynamic LoRA if it was loaded
         if clean_lora_id:
-            print(f"Unloading '{CUSTOM_LORA_NAME}' to clean up.")
-            pipe.unload_lora_weights(CUSTOM_LORA_NAME)
-            # It's good practice to disable all just in case.
-            pipe.disable_lora()
 # --- Your Gradio Interface Code (no changes needed here) ---
 iface = gr.Interface(
@@ -107,7 +109,7 @@ iface = gr.Interface(
         gr.Slider(label="Width", minimum=480, maximum=1280, step=16, value=1024),
         gr.Slider(label="Height", minimum=480, maximum=1280, step=16, value=1024),
         gr.Slider(minimum=1, maximum=80, step=1, label="Inference Steps", value=10),
-        gr.Textbox(label="LoRA ID (Optional, loads dynamically)"),
     ],
     outputs=gr.Image(label="output"),
 )

 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
+model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
 print("Loading VAE...")
+# VAE is often kept in float32 for precision during decoding
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
+print("Loading WanPipeline in bfloat16...")
+# Load the main model in bfloat16 to save memory
 pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
 flow_shift = 1.0
 pipe.to(device)
 # --- LORA FUSING (Done ONCE at startup) ---
 print("Loading and Fusing base LoRA...")
 CAUSVID_LORA_REPO = "Kijai/WanVideo_comfy"
 CAUSVID_LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
+CUSTOM_LORA_NAME = "custom_lora"
 try:
     causvid_path = hf_hub_download(repo_id=CAUSVID_LORA_REPO, filename=CAUSVID_LORA_FILENAME)
+    pipe.load_lora_weights(causvid_path) # Loads LoRA, likely onto CPU
     print("✅ Base LoRA loaded.")
+    pipe.fuse_lora() # Fuses float32 LoRA into bfloat16 model
+    print("✅ Base LoRA fused.")
+    # FIX for Dtype Mismatch: After fusing, some layers became float32.
+    # We must cast the entire pipeline back to bfloat16 to ensure consistency.
+    pipe.to(dtype=torch.bfloat16)
+    print("✅ Pipeline converted back to bfloat16 post-fusion.")
 except Exception as e:
     print(f"⚠️ Could not load or fuse the base LoRA: {e}")
 @spaces.GPU()
 def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_steps=30, lora_id=None, progress=gr.Progress(track_tqdm=True)):
     clean_lora_id = lora_id.strip() if lora_id else ""
     if clean_lora_id:
         try:
+            print(f"Applying custom LoRA '{clean_lora_id}' for this run...")
+            # 1. Load the temporary LoRA
             pipe.load_lora_weights(clean_lora_id, adapter_name=CUSTOM_LORA_NAME)
+            # 2. Fuse it into the model. This is the key to avoiding device/meta errors.
+            pipe.fuse_lora(adapter_names=[CUSTOM_LORA_NAME])
+            # 3. Ensure dtype consistency after the new fusion
+            pipe.to(dtype=torch.bfloat16)
+            print(f"✅ Custom LoRA '{CUSTOM_LORA_NAME}' fused and activated.")
         except Exception as e:
+            print(f"⚠️ Failed to load or fuse custom LoRA '{clean_lora_id}': {e}. Running without it.")
+            clean_lora_id = "" # Clear the id so we don't try to clean it up
     apply_cache_on_pipe(pipe)
     try:
         image = (image * 255).astype(np.uint8)
         return Image.fromarray(image)
     finally:
+        # Clean up the dynamic LoRA if it was successfully loaded and fused
         if clean_lora_id:
+            print(f"Cleaning up custom LoRA '{CUSTOM_LORA_NAME}'...")
+            # 1. Unfuse the temporary LoRA to revert the model's weights
+            pipe.unfuse_lora(adapter_names=[CUSTOM_LORA_NAME])
+            # 2. Unload the LoRA weights from memory
+            # FIX for TypeError: unload_lora_weights takes no adapter name
+            pipe.unload_lora_weights()
+            print("✅ Custom LoRA unfused and unloaded.")
 # --- Your Gradio Interface Code (no changes needed here) ---
 iface = gr.Interface(
         gr.Slider(label="Width", minimum=480, maximum=1280, step=16, value=1024),
         gr.Slider(label="Height", minimum=480, maximum=1280, step=16, value=1024),
         gr.Slider(minimum=1, maximum=80, step=1, label="Inference Steps", value=10),
+        gr.Textbox(label="LoRA ID (Optional)"),
     ],
     outputs=gr.Image(label="output"),
 )