Wan2-1-Image-Generator

Running on Zero

App Files Files Community

ovi054 commited on 27 days ago

Commit

09fcd4a

verified ·

1 Parent(s): 9757f67

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -86

app.py CHANGED Viewed

@@ -1,97 +1,82 @@
 import torch
-from diffusers import UniPCMultistepScheduler, FlowMatchEulerDiscreteScheduler, DDIMScheduler, DPMSolverMultistepScheduler
-from diffusers import WanPipeline, AutoencoderKLWan  # Use Wan-specific VAE
-# from diffusers.hooks import apply_first_block_cache, FirstBlockCacheConfig
 from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe
-from diffusers.models import UNetSpatioTemporalConditionModel
-from transformers import T5EncoderModel, T5Tokenizer
 from huggingface_hub import hf_hub_download
 from PIL import Image
 import numpy as np
 import gradio as gr
 import spaces
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
 pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
-flow_shift = 1.0 #5.0  1.0 for image, 5.0 for 720P, 3.0 for 480P
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift)
 pipe.to(device)
-# Configure DDIMScheduler with a beta schedule
-# pipe.scheduler = DDIMScheduler.from_config(
-#     pipe.scheduler.config,
-#     beta_start=0.00085,  # Starting beta value
-#     beta_end=0.012,      # Ending beta value
-#     beta_schedule="linear",  # Linear beta schedule (other options: "scaled_linear", "squaredcos_cap_v2")
-#     num_train_timesteps=1000,  # Number of timesteps
-#     flow_shift=flow_shift
-# )
-# Configure FlowMatchEulerDiscreteScheduler
-# pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
-#     pipe.scheduler.config,
-#     flow_shift=flow_shift           # Retain flow_shift for WanPipeline compatibility
-# )
-# --- LoRA State Management ---
-# Define unique names for our adapters
-DEFAULT_LORA_NAME = "causvid_lora"
-CUSTOM_LORA_NAME = "custom_lora"
-# Track which custom LoRA is currently loaded to avoid reloading
-CURRENTLY_LOADED_CUSTOM_LORA = None
-# Load the default base LoRA ONCE at startup
-print("Loading base LoRA...")
 CAUSVID_LORA_REPO = "Kijai/WanVideo_comfy"
 CAUSVID_LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
-# try:
-#     causvid_path = hf_hub_download(repo_id=CAUSVID_LORA_REPO, filename=CAUSVID_LORA_FILENAME)
-#     pipe.load_lora_weights(causvid_path, adapter_name=DEFAULT_LORA_NAME)
-#     print(f"✅ Default LoRA '{DEFAULT_LORA_NAME}' loaded successfully.")
-# except Exception as e:
-#     print(f"⚠️ Default LoRA could not be loaded: {e}")
-#     DEFAULT_LORA_NAME = None
-# print("Initialization complete. Gradio is starting...")
 @spaces.GPU()
 def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_steps=30, lora_id=None, progress=gr.Progress(track_tqdm=True)):
-    # if lora_id and lora_id.strip() != "":
-    #     pipe.unload_lora_weights()
-    #     pipe.load_lora_weights(lora_id.strip())
     clean_lora_id = lora_id.strip() if lora_id else ""
-    print("Loading base LoRA for this run...")
-    causvid_path = hf_hub_download(repo_id=CAUSVID_LORA_REPO, filename=CAUSVID_LORA_FILENAME)
-    pipe.load_lora_weights(causvid_path, adapter_name=DEFAULT_LORA_NAME)
-    # If a custom LoRA is provided, load it as well.
     if clean_lora_id:
-        print(f"Loading custom LoRA '{clean_lora_id}' for this run...")
-        pipe.load_lora_weights(clean_lora_id, adapter_name=CUSTOM_LORA_NAME)
-        # If a custom LoRA is present, activate both.
-        pipe.set_adapters([DEFAULT_LORA_NAME, CUSTOM_LORA_NAME], adapter_weights=[1.0, 1.0])
-    else:
-        # If no custom LoRA, just activate the base one.
-        print("Activating base LoRA only.")
-        pipe.set_adapters([DEFAULT_LORA_NAME], adapter_weights=[1.0])
-    pipe.to(device)
-    #pipe.to("cuda")
-    # apply_first_block_cache(pipe.transformer, FirstBlockCacheConfig(threshold=0.2))
-    apply_cache_on_pipe(
-        pipe,
-        # residual_diff_threshold=0.2,
-    )
     try:
         output = pipe(
             prompt=prompt,
@@ -100,36 +85,29 @@ def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_ste
             width=width,
             num_frames=1,
             num_inference_steps=num_inference_steps,
-            guidance_scale=1.0, #5.0
         )
         image = output.frames[0][0]
         image = (image * 255).astype(np.uint8)
         return Image.fromarray(image)
     finally:
-        # if lora_id and lora_id.strip() != "":
-        #     pass
-            # pipe.unload_lora_weights()
-        # if clean_lora_id:
-        #     print(f"Unloading '{CUSTOM_LORA_NAME}' from this run.")
-        #     pipe.unload_lora_weights(CUSTOM_LORA_NAME)
-        # # Always disable all active LoRAs to reset the state.
-        # pipe.disable_lora()
-        print("Unloading all LoRAs to clean up.")
-        pipe.unload_lora_weights()
 iface = gr.Interface(
     fn=generate,
     inputs=[
         gr.Textbox(label="Input prompt"),
-    ],
-    additional_inputs = [
         gr.Textbox(label="Negative prompt", value = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"),
         gr.Slider(label="Width", minimum=480, maximum=1280, step=16, value=1024),
         gr.Slider(label="Height", minimum=480, maximum=1280, step=16, value=1024),
         gr.Slider(minimum=1, maximum=80, step=1, label="Inference Steps", value=10),
-        gr.Textbox(label="LoRA ID"),
     ],
     outputs=gr.Image(label="output"),
 )

 import torch
+from diffusers import UniPCMultistepScheduler
+from diffusers import WanPipeline, AutoencoderKLWan
 from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe
 from huggingface_hub import hf_hub_download
 from PIL import Image
 import numpy as np
 import gradio as gr
 import spaces
+# --- INITIAL SETUP ---
 device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers" # Using the large model
+print("Loading VAE...")
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
+print("Loading WanPipeline...")
+# low_cpu_mem_usage is often needed for meta device loading
 pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
+flow_shift = 1.0
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift)
+# Move the base pipeline to the GPU. This is where meta-device loading happens.
+print("Moving pipeline to device...")
 pipe.to(device)
+# --- LORA FUSING (Done ONCE at startup) ---
+# We will fuse the base LoRA permanently into the model for performance and to solve the device issue.
+# This means we cannot dynamically unload it, but it's the correct approach for a fixed setup.
+print("Loading and Fusing base LoRA...")
 CAUSVID_LORA_REPO = "Kijai/WanVideo_comfy"
 CAUSVID_LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
+CUSTOM_LORA_NAME = "custom_lora" # For any optional LoRA
+try:
+    causvid_path = hf_hub_download(repo_id=CAUSVID_LORA_REPO, filename=CAUSVID_LORA_FILENAME)
+    # 1. Load the LoRA weights. They will likely be on the CPU.
+    pipe.load_lora_weights(causvid_path) # Use the default adapter name
+    print("✅ Base LoRA loaded.")
+    # 2. Fuse the weights into the base model. This resolves the device mismatch.
+    pipe.fuse_lora()
+    print("✅ Base LoRA fused successfully.")
+except Exception as e:
+    print(f"⚠️ Could not load or fuse the base LoRA: {e}")
+print("Initialization complete. Gradio is starting...")
 @spaces.GPU()
 def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_steps=30, lora_id=None, progress=gr.Progress(track_tqdm=True)):
+    # The base LoRA is already fused. We only need to handle the optional custom LoRA.
     clean_lora_id = lora_id.strip() if lora_id else ""
+    # We will load and unload the custom LoRA dynamically for each run
     if clean_lora_id:
+        try:
+            print(f"Loading custom LoRA '{clean_lora_id}' for this run...")
+            # Load the custom LoRA. Note: We will NOT fuse this one to keep it temporary.
+            pipe.load_lora_weights(clean_lora_id, adapter_name=CUSTOM_LORA_NAME)
+            # ✨ This is the critical part for dynamic LoRAs on large models.
+            # We must explicitly move the adapter to the correct device.
+            pipe.to(device, dtype=pipe.transformer.dtype) # Ensure dtype matches
+            pipe.set_adapters([CUSTOM_LORA_NAME], adapter_weights=[1.0])
+            print(f"✅ Custom LoRA '{CUSTOM_LORA_NAME}' activated.")
+        except Exception as e:
+            print(f"⚠️ Failed to load custom LoRA '{clean_lora_id}': {e}. Running without it.")
+            # Ensure no adapters are active if loading failed
+            pipe.disable_lora()
+    # Apply performance optimizations
+    apply_cache_on_pipe(pipe)
     try:
         output = pipe(
             prompt=prompt,
             width=width,
             num_frames=1,
             num_inference_steps=num_inference_steps,
+            guidance_scale=1.0,
         )
         image = output.frames[0][0]
         image = (image * 255).astype(np.uint8)
         return Image.fromarray(image)
     finally:
+        # Clean up the dynamic LoRA if it was loaded
+        if clean_lora_id:
+            print(f"Unloading '{CUSTOM_LORA_NAME}' to clean up.")
+            pipe.unload_lora_weights(CUSTOM_LORA_NAME)
+            # It's good practice to disable all just in case.
+            pipe.disable_lora()
+# --- Your Gradio Interface Code (no changes needed here) ---
 iface = gr.Interface(
     fn=generate,
     inputs=[
         gr.Textbox(label="Input prompt"),
         gr.Textbox(label="Negative prompt", value = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"),
         gr.Slider(label="Width", minimum=480, maximum=1280, step=16, value=1024),
         gr.Slider(label="Height", minimum=480, maximum=1280, step=16, value=1024),
         gr.Slider(minimum=1, maximum=80, step=1, label="Inference Steps", value=10),
+        gr.Textbox(label="LoRA ID (Optional, loads dynamically)"),
     ],
     outputs=gr.Image(label="output"),
 )