wan-fusionx-lora

Runtime error

App Files Files Community

Lemonator commited on Jul 22

Commit

1f619e0

verified ·

1 Parent(s): 314e10e

Update app_lora.py

Browse files

Files changed (1) hide show

app_lora.py +49 -37

app_lora.py CHANGED Viewed

@@ -20,32 +20,46 @@ MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
 LORA_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
 LORA_FILENAME = "FusionX_LoRa/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"
-# --- Model Initialization ---
 pipe = None
-# This check correctly identifies if the Hugging Face Space has a GPU.
-if torch.cuda.is_available():
-    image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float16)
-    vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float16)
-    pipe = WanImageToVideoPipeline.from_pretrained(
-        MODEL_ID, vae=vae, image_encoder=image_encoder, torch_dtype=torch.float16
-    )
-    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
-    pipe.enable_model_cpu_offload()
-    try:
-        causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
-        print("✅ LoRA downloaded to:", causvid_path)
-        pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
-        pipe.set_adapters(["causvid_lora"], adapter_weights=[0.75])
-        pipe.fuse_lora()
-    except Exception as e:
-        import traceback
-        print("❌ Error during LoRA loading:")
-        traceback.print_exc()
-else:
-    print("CUDA is not available. This script requires a GPU. Please upgrade your Space hardware.")
 # --- Constants and Helper Functions ---
 MOD_VALUE = 32
 DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE = 640, 1024
 NEW_FORMULA_MAX_AREA = 640.0 * 1024.0
@@ -99,8 +113,11 @@ def generate_video(input_image, prompt, height, width,
                    negative_prompt, duration_seconds,
                    guidance_scale, steps, seed, randomize_seed,
                    progress=gr.Progress(track_tqdm=True)):
-    if pipe is None:
-        raise gr.Error("Pipeline not initialized. Check logs for GPU availability.")
     if input_image is None:
         raise gr.Error("Please upload an input image.")
@@ -118,9 +135,9 @@ def generate_video(input_image, prompt, height, width,
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = input_image.resize((target_w, target_h), Image.Resampling.LANCZOS)
-    torch.cuda.empty_cache()
     try:
         with torch.inference_mode(), torch.autocast("cuda", dtype=torch.float16):
             output_frames_list = pipe(
                 image=resized_image, prompt=prompt, negative_prompt=negative_prompt,
@@ -136,14 +153,13 @@ def generate_video(input_image, prompt, height, width,
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
         export_video_with_ffmpeg(output_frames_list, video_path, fps=FIXED_FPS)
-        # Optional: FFmpeg optimization
-        # ...
     return video_path, current_seed
 # --- Gradio UI ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Wan 2.1 I2V FusionX-LoRA")
-    gr.Markdown("GPU is required. If this doesn't load, check your Space hardware settings.")
     with gr.Row():
         with gr.Column():
@@ -159,7 +175,7 @@ with gr.Blocks() as demo:
                     width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label="Width")
                 steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Inference Steps")
                 guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=False)
-            generate_button = gr.Button("Generate Video", variant="primary", interactive=(pipe is not None))
         with gr.Column():
             video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
             gr.Markdown("### Tips:\n- Longer videos need more memory.\n- 4-8 steps is optimal.")
@@ -170,9 +186,5 @@ with gr.Blocks() as demo:
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
 if __name__ == "__main__":
-    if pipe is not None:
-        demo.queue(max_size=3).launch()
-    else:
-        # This provides a clean message in the UI if the app can't start.
-        gr.Markdown("# Application Start Failed").launch()
-        gr.Info("A GPU is required to run this application. Please ensure your Hugging Face Space is configured with GPU hardware.")

 LORA_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
 LORA_FILENAME = "FusionX_LoRa/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"
+# Global variable to hold the pipeline. It's initialized to None.
 pipe = None
+def initialize_pipeline():
+    """
+    Initializes the model pipeline on the first request.
+    This function is designed for serverless GPU environments like ZeroGPU.
+    """
+    global pipe
+    # The 'pipe' global variable acts as a flag. If it's not None, we've already initialized.
+    if pipe is None:
+        print("First time setup: Initializing model pipeline...")
+        gr.Info("Cold start: The first generation will take longer as the model is loaded.")
+        if not torch.cuda.is_available():
+            raise gr.Error("GPU not available. This application requires a GPU to run.")
+        image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float16)
+        vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float16)
+        # All model loading happens here, when a GPU is guaranteed to be active.
+        pipe = WanImageToVideoPipeline.from_pretrained(
+            MODEL_ID, vae=vae, image_encoder=image_encoder, torch_dtype=torch.float16
+        )
+        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
+        pipe.enable_model_cpu_offload()
+        try:
+            causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
+            print("✅ LoRA downloaded to:", causvid_path)
+            pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
+            pipe.set_adapters(["causvid_lora"], adapter_weights=[0.75])
+            pipe.fuse_lora()
+        except Exception as e:
+            raise gr.Error(f"Error loading LoRA: {e}")
+        print("✅ Pipeline initialized successfully.")
 # --- Constants and Helper Functions ---
+# (These are unchanged)
 MOD_VALUE = 32
 DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE = 640, 1024
 NEW_FORMULA_MAX_AREA = 640.0 * 1024.0
                    negative_prompt, duration_seconds,
                    guidance_scale, steps, seed, randomize_seed,
                    progress=gr.Progress(track_tqdm=True)):
+    # --- LAZY LOADING TRIGGER ---
+    # This will load the model on the first run, and do nothing on subsequent runs.
+    initialize_pipeline()
     if input_image is None:
         raise gr.Error("Please upload an input image.")
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = input_image.resize((target_w, target_h), Image.Resampling.LANCZOS)
     try:
+        torch.cuda.empty_cache()
         with torch.inference_mode(), torch.autocast("cuda", dtype=torch.float16):
             output_frames_list = pipe(
                 image=resized_image, prompt=prompt, negative_prompt=negative_prompt,
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
         export_video_with_ffmpeg(output_frames_list, video_path, fps=FIXED_FPS)
     return video_path, current_seed
 # --- Gradio UI ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Wan 2.1 I2V FusionX-LoRA (ZeroGPU Ready)")
+    gr.Markdown("The first generation will be slow due to a 'cold start'. Subsequent generations will be much faster.")
     with gr.Row():
         with gr.Column():
                     width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label="Width")
                 steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Inference Steps")
                 guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=False)
+            generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
             video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
             gr.Markdown("### Tips:\n- Longer videos need more memory.\n- 4-8 steps is optimal.")
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
 if __name__ == "__main__":
+    # We launch the demo unconditionally now. The GPU check is deferred until the first click.
+    demo.queue(max_size=3).launch()