Spaces:

ginigen
/

Nano-Banana-Video

Paused

App Files Files Community

ginipick commited on 27 days ago

Commit

f83c2b9

verified ·

1 Parent(s): 59d5bb3

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -43

app.py CHANGED Viewed

@@ -266,8 +266,8 @@ def resize_image_landscape(image: Image.Image) -> Image.Image:
     return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
 def get_duration(input_image, prompt, steps, negative_prompt, duration_seconds, guidance_scale, guidance_scale_2, seed, randomize_seed):
-    # Duration based on steps parameter
-    return int(steps) * 15
 @spaces.GPU(duration=get_duration)
 def generate_video(
@@ -275,7 +275,7 @@ def generate_video(
     prompt,
     steps=4,
     negative_prompt=default_negative_prompt,
-    duration_seconds=MAX_DURATION,
     guidance_scale=1,
     guidance_scale_2=1,
     seed=42,
@@ -286,61 +286,76 @@ def generate_video(
     if input_image is None:
         raise gr.Error("Please generate or upload an image first.")
-    # Initialize pipeline if needed
-    initialize_video_pipeline()
-    if video_pipe is None:
-        raise gr.Error("Video pipeline not initialized. Please check GPU availability.")
     try:
-        # Ensure frames are divisible by 4
         num_frames = int(round(duration_seconds * FIXED_FPS))
-        num_frames = np.clip(num_frames, MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
-        # Round to nearest number divisible by 4
         num_frames = ((num_frames - 1) // 4) * 4 + 1
         current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
-        resized_image = resize_image_for_video(input_image)
-        # Clear cache before generation
-        torch.cuda.empty_cache()
-        gc.collect()
-        # Generate video with memory management
         with torch.inference_mode():
-            output_frames_list = video_pipe(
-                image=resized_image,
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                height=resized_image.height,
-                width=resized_image.width,
-                num_frames=num_frames,
-                guidance_scale=float(guidance_scale),
-                guidance_scale_2=float(guidance_scale_2),
-                num_inference_steps=int(steps),
-                generator=torch.Generator(device="cuda").manual_seed(current_seed),
-            ).frames[0]
         # Clear cache after generation
         torch.cuda.empty_cache()
         gc.collect()
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
             video_path = tmpfile.name
         export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
-        return video_path, current_seed, "🎬 Video generated successfully!"
     except RuntimeError as e:
-        if "out of memory" in str(e).lower() or "CUDA" in str(e):
-            torch.cuda.empty_cache()
-            gc.collect()
-            raise gr.Error("GPU memory error. Try reducing the duration or number of steps.")
         else:
-            raise gr.Error(f"Video generation error: {str(e)}")
     except Exception as e:
-        raise gr.Error(f"Unexpected error: {str(e)}")
 # ===========================
 # Enhanced CSS
@@ -541,12 +556,12 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
                             lines=3
                         )
                         duration_input = gr.Slider(
-                            minimum=MIN_DURATION,
-                            maximum=MAX_DURATION,
                             step=0.1,
-                            value=3.5,
                             label="Duration (seconds)",
-                            info=f"Clamped to {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps"
                         )
                         with gr.Accordion("Advanced Settings", open=False):
@@ -568,10 +583,10 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
                             )
                             steps_slider = gr.Slider(
                                 minimum=1,
-                                maximum=30,
                                 step=1,
-                                value=6,
-                                label="Inference Steps"
                             )
                             guidance_1 = gr.Slider(
                                 minimum=0.0,

     return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
 def get_duration(input_image, prompt, steps, negative_prompt, duration_seconds, guidance_scale, guidance_scale_2, seed, randomize_seed):
+    # Shorter duration for stability
+    return min(60, int(steps) * 10)
 @spaces.GPU(duration=get_duration)
 def generate_video(
     prompt,
     steps=4,
     negative_prompt=default_negative_prompt,
+    duration_seconds=2.0,  # Reduced default
     guidance_scale=1,
     guidance_scale_2=1,
     seed=42,
     if input_image is None:
         raise gr.Error("Please generate or upload an image first.")
     try:
+        # Initialize pipeline if needed (simplified)
+        global video_pipe
+        if video_pipe is None:
+            print("Initializing video pipeline...")
+            video_pipe = WanImageToVideoPipeline.from_pretrained(
+                VIDEO_MODEL_ID,
+                torch_dtype=torch.bfloat16,
+                variant="fp16",
+                use_safetensors=True
+            ).to('cuda')
+            # Load Lightning LoRA for faster generation
+            try:
+                video_pipe.load_lora_weights("Kijai/WanVideo_comfy", weight_name="Wan22-Lightning-4-cfg1_bf16_v0.9.safetensors")
+                video_pipe.fuse_lora(lora_scale=1.0)
+            except:
+                pass
+        # Clear cache before generation
+        torch.cuda.empty_cache()
+        gc.collect()
+        # Ensure frames are divisible by 4 and limit to reasonable range
         num_frames = int(round(duration_seconds * FIXED_FPS))
+        num_frames = np.clip(num_frames, 9, 33)  # Limit to 0.5-2 seconds
         num_frames = ((num_frames - 1) // 4) * 4 + 1
         current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+        # Resize image
+        resized_image = resize_image_for_video(input_image)
+        # Generate with reduced settings
         with torch.inference_mode():
+            with torch.autocast('cuda', dtype=torch.bfloat16):
+                output_frames_list = video_pipe(
+                    image=resized_image,
+                    prompt=prompt,
+                    negative_prompt=negative_prompt,
+                    height=resized_image.height,
+                    width=resized_image.width,
+                    num_frames=num_frames,
+                    guidance_scale=float(guidance_scale),
+                    guidance_scale_2=float(guidance_scale_2),
+                    num_inference_steps=int(steps),
+                    generator=torch.Generator(device="cuda").manual_seed(current_seed),
+                ).frames[0]
         # Clear cache after generation
         torch.cuda.empty_cache()
         gc.collect()
+        # Save video
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
             video_path = tmpfile.name
         export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
+        return video_path, current_seed, f"🎬 Video generated successfully! ({num_frames} frames)"
     except RuntimeError as e:
+        torch.cuda.empty_cache()
+        gc.collect()
+        if "out of memory" in str(e).lower():
+            raise gr.Error("GPU memory exceeded. Try reducing duration to 1-2 seconds and steps to 4.")
         else:
+            raise gr.Error(f"GPU error: {str(e)[:100]}")
     except Exception as e:
+        raise gr.Error(f"Error: {str(e)[:200]}")
 # ===========================
 # Enhanced CSS
                             lines=3
                         )
                         duration_input = gr.Slider(
+                            minimum=0.5,
+                            maximum=2.0,
                             step=0.1,
+                            value=1.5,
                             label="Duration (seconds)",
+                            info="Shorter videos use less memory"
                         )
                         with gr.Accordion("Advanced Settings", open=False):
                             )
                             steps_slider = gr.Slider(
                                 minimum=1,
+                                maximum=8,
                                 step=1,
+                                value=4,
+                                label="Inference Steps (4 recommended)"
                             )
                             guidance_1 = gr.Slider(
                                 minimum=0.0,