Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -7,15 +7,18 @@ from io import BytesIO
|
|
7 |
import time
|
8 |
import tempfile
|
9 |
import base64
|
10 |
-
import spaces
|
11 |
-
import torch
|
12 |
-
from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
|
13 |
-
from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
|
14 |
-
from diffusers.utils.export_utils import export_to_video
|
15 |
import numpy as np
|
16 |
import random
|
17 |
import gc
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# ===========================
|
20 |
# Configuration
|
21 |
# ===========================
|
@@ -38,83 +41,29 @@ default_prompt_i2v = "make this image come alive, cinematic motion, smooth anima
|
|
38 |
default_negative_prompt = "static, still, no motion, frozen"
|
39 |
|
40 |
# ===========================
|
41 |
-
# Initialize Video Pipeline
|
42 |
# ===========================
|
43 |
|
44 |
-
# Initialize once on startup
|
45 |
video_pipe = None
|
46 |
video_pipeline_ready = False
|
47 |
|
48 |
-
def
|
|
|
49 |
global video_pipe, video_pipeline_ready
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
device_map='cuda',
|
65 |
-
),
|
66 |
-
transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
|
67 |
-
subfolder='transformer_2',
|
68 |
-
torch_dtype=torch.bfloat16,
|
69 |
-
device_map='cuda',
|
70 |
-
),
|
71 |
-
torch_dtype=torch.bfloat16,
|
72 |
-
).to('cuda')
|
73 |
-
|
74 |
-
# Clear memory after loading
|
75 |
-
gc.collect()
|
76 |
-
torch.cuda.empty_cache()
|
77 |
-
|
78 |
-
# Load Lightning LoRA
|
79 |
-
try:
|
80 |
-
print("Loading Lightning LoRA adapter...")
|
81 |
-
video_pipe.transformer.load_adapter("Lightx2v/lightx2v_I2V_14B_480p_cfg_step_4", adapter_name="lightx2v")
|
82 |
-
video_pipe.transformer_2.load_adapter("Lightx2v/lightx2v_I2V_14B_480p_cfg_step_4", adapter_name="lightx2v_2")
|
83 |
-
video_pipe.transformer.set_adapters(["lightx2v"], adapter_weights=[1.0])
|
84 |
-
video_pipe.transformer_2.set_adapters(["lightx2v_2"], adapter_weights=[1.0])
|
85 |
-
print("Lightning LoRA loaded successfully")
|
86 |
-
except Exception as e:
|
87 |
-
print(f"Warning: Could not load Lightning LoRA: {e}")
|
88 |
-
# Continue without LoRA
|
89 |
-
|
90 |
-
# Clear memory again
|
91 |
-
gc.collect()
|
92 |
-
torch.cuda.empty_cache()
|
93 |
-
|
94 |
-
# Try to optimize if module available
|
95 |
-
try:
|
96 |
-
from optimization import optimize_pipeline_
|
97 |
-
print("Optimizing pipeline...")
|
98 |
-
optimize_pipeline_(video_pipe,
|
99 |
-
image=Image.new('RGB', (LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT)),
|
100 |
-
prompt='prompt',
|
101 |
-
height=LANDSCAPE_HEIGHT,
|
102 |
-
width=LANDSCAPE_WIDTH,
|
103 |
-
num_frames=MAX_FRAMES_MODEL,
|
104 |
-
)
|
105 |
-
print("Pipeline optimization complete")
|
106 |
-
except ImportError:
|
107 |
-
print("Optimization module not found, running without optimization")
|
108 |
-
except Exception as e:
|
109 |
-
print(f"Warning: Optimization failed: {e}")
|
110 |
-
|
111 |
-
video_pipeline_ready = True
|
112 |
-
print("Video pipeline initialized successfully!")
|
113 |
-
|
114 |
-
except Exception as e:
|
115 |
-
print(f"Error initializing video pipeline: {e}")
|
116 |
-
video_pipe = None
|
117 |
-
video_pipeline_ready = False
|
118 |
|
119 |
# ===========================
|
120 |
# Image Processing Functions
|
@@ -134,15 +83,16 @@ def upload_image_to_hosting(image):
|
|
134 |
data={
|
135 |
'key': '6d207e02198a847aa98d0a2a901485a5',
|
136 |
'image': img_base64,
|
137 |
-
}
|
|
|
138 |
)
|
139 |
|
140 |
if response.status_code == 200:
|
141 |
data = response.json()
|
142 |
if data.get('success'):
|
143 |
return data['data']['url']
|
144 |
-
except:
|
145 |
-
|
146 |
|
147 |
# Method 2: Try 0x0.st
|
148 |
try:
|
@@ -151,12 +101,12 @@ def upload_image_to_hosting(image):
|
|
151 |
buffered.seek(0)
|
152 |
|
153 |
files = {'file': ('image.png', buffered, 'image/png')}
|
154 |
-
response = requests.post("https://0x0.st", files=files)
|
155 |
|
156 |
if response.status_code == 200:
|
157 |
return response.text.strip()
|
158 |
-
except:
|
159 |
-
|
160 |
|
161 |
# Method 3: Fallback to base64
|
162 |
buffered = BytesIO()
|
@@ -184,12 +134,15 @@ def process_images(prompt, image1, image2=None):
|
|
184 |
url2 = upload_image_to_hosting(image2)
|
185 |
image_urls.append(url2)
|
186 |
|
187 |
-
# Run the model
|
|
|
188 |
output = replicate.run(
|
189 |
-
"
|
190 |
input={
|
191 |
"prompt": prompt,
|
192 |
-
"
|
|
|
|
|
193 |
}
|
194 |
)
|
195 |
|
@@ -199,57 +152,33 @@ def process_images(prompt, image1, image2=None):
|
|
199 |
# Get the generated image
|
200 |
img = None
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
|
|
208 |
|
209 |
-
if
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
response = requests.get(output_url, timeout=30)
|
214 |
-
if response.status_code == 200:
|
215 |
-
img = Image.open(BytesIO(response.content))
|
216 |
-
except:
|
217 |
-
pass
|
218 |
-
|
219 |
-
if img is None:
|
220 |
-
output_url = None
|
221 |
-
if isinstance(output, str):
|
222 |
-
output_url = output
|
223 |
-
elif isinstance(output, list) and len(output) > 0:
|
224 |
-
output_url = output[0]
|
225 |
-
|
226 |
-
if output_url:
|
227 |
-
response = requests.get(output_url, timeout=30)
|
228 |
-
if response.status_code == 200:
|
229 |
-
img = Image.open(BytesIO(response.content))
|
230 |
|
231 |
if img:
|
232 |
-
return img, "✨ Image generated successfully!
|
233 |
else:
|
234 |
return None, "Could not process output", None
|
235 |
|
236 |
except Exception as e:
|
237 |
-
return None, f"Error: {str(e)[:
|
238 |
|
239 |
# ===========================
|
240 |
-
# Video Generation Functions
|
241 |
# ===========================
|
242 |
|
243 |
def resize_image_for_video(image: Image.Image) -> Image.Image:
|
244 |
"""Resize image for video generation"""
|
245 |
-
if image.height > image.width:
|
246 |
-
transposed = image.transpose(Image.Transpose.ROTATE_90)
|
247 |
-
resized = resize_image_landscape(transposed)
|
248 |
-
return resized.transpose(Image.Transpose.ROTATE_270)
|
249 |
-
return resize_image_landscape(image)
|
250 |
-
|
251 |
-
def resize_image_landscape(image: Image.Image) -> Image.Image:
|
252 |
-
"""Resize landscape image to target dimensions"""
|
253 |
target_aspect = LANDSCAPE_WIDTH / LANDSCAPE_HEIGHT
|
254 |
width, height = image.size
|
255 |
in_aspect = width / height
|
@@ -265,80 +194,84 @@ def resize_image_landscape(image: Image.Image) -> Image.Image:
|
|
265 |
|
266 |
return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
|
267 |
|
268 |
-
def get_duration(input_image, prompt, steps, negative_prompt, duration_seconds, guidance_scale, guidance_scale_2, seed, randomize_seed):
|
269 |
-
# Shorter duration for stability
|
270 |
-
return min(60, int(steps) * 10)
|
271 |
-
|
272 |
-
@spaces.GPU(duration=get_duration)
|
273 |
def generate_video(
|
274 |
input_image,
|
275 |
prompt,
|
276 |
steps=4,
|
277 |
negative_prompt=default_negative_prompt,
|
278 |
-
duration_seconds=
|
279 |
guidance_scale=1,
|
280 |
guidance_scale_2=1,
|
281 |
seed=42,
|
282 |
randomize_seed=False,
|
283 |
-
progress=gr.Progress(track_tqdm=True),
|
284 |
):
|
285 |
-
"""Generate a video from an input image"""
|
286 |
if input_image is None:
|
287 |
raise gr.Error("Please generate or upload an image first.")
|
288 |
|
|
|
|
|
|
|
289 |
try:
|
290 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
global video_pipe
|
|
|
|
|
292 |
if video_pipe is None:
|
293 |
-
print("Initializing video pipeline...")
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
|
300 |
-
#
|
301 |
try:
|
302 |
-
video_pipe
|
303 |
-
|
304 |
-
|
|
|
|
|
|
|
|
|
305 |
except Exception as e:
|
306 |
-
print(f"
|
307 |
-
|
308 |
-
|
309 |
-
# Clear cache before generation
|
310 |
-
torch.cuda.empty_cache()
|
311 |
-
gc.collect()
|
312 |
|
313 |
-
#
|
314 |
-
num_frames = int(round(duration_seconds * FIXED_FPS))
|
315 |
-
num_frames =
|
316 |
-
num_frames = ((num_frames - 1) // 4) * 4 + 1
|
317 |
|
318 |
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
319 |
|
320 |
# Resize image
|
321 |
resized_image = resize_image_for_video(input_image)
|
322 |
|
323 |
-
# Generate with
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
|
343 |
# Save video
|
344 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
|
@@ -346,342 +279,228 @@ def generate_video(
|
|
346 |
|
347 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
348 |
|
349 |
-
return video_path, current_seed, f"🎬 Video generated
|
350 |
|
351 |
-
except RuntimeError as e:
|
352 |
-
torch.cuda.empty_cache()
|
353 |
-
gc.collect()
|
354 |
-
if "out of memory" in str(e).lower():
|
355 |
-
raise gr.Error("GPU memory exceeded. Try reducing duration to 1-2 seconds and steps to 4.")
|
356 |
-
else:
|
357 |
-
raise gr.Error(f"GPU error: {str(e)[:100]}")
|
358 |
except Exception as e:
|
359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
|
361 |
# ===========================
|
362 |
-
#
|
363 |
# ===========================
|
364 |
|
365 |
css = """
|
366 |
.gradio-container {
|
367 |
-
|
368 |
-
|
369 |
-
min-height: 100vh;
|
370 |
}
|
371 |
.header-container {
|
372 |
background: linear-gradient(135deg, #ffd93d 0%, #ffb347 100%);
|
373 |
-
padding:
|
374 |
-
border-radius:
|
375 |
-
margin-bottom:
|
376 |
-
|
377 |
}
|
378 |
.logo-text {
|
379 |
-
font-size:
|
380 |
-
font-weight:
|
381 |
color: #2d3436;
|
382 |
-
text-align: center;
|
383 |
margin: 0;
|
384 |
-
letter-spacing: -2px;
|
385 |
}
|
386 |
.subtitle {
|
387 |
color: #2d3436;
|
388 |
-
|
389 |
-
font-size: 1.2rem;
|
390 |
margin-top: 0.5rem;
|
391 |
-
opacity: 0.9;
|
392 |
-
font-weight: 600;
|
393 |
-
}
|
394 |
-
.main-content {
|
395 |
-
background: rgba(255, 255, 255, 0.95);
|
396 |
-
backdrop-filter: blur(20px);
|
397 |
-
border-radius: 24px;
|
398 |
-
padding: 2.5rem;
|
399 |
-
box-shadow: 0 10px 40px rgba(0, 0, 0, 0.08);
|
400 |
-
margin-bottom: 2rem;
|
401 |
-
}
|
402 |
-
.gr-button-primary {
|
403 |
-
background: linear-gradient(135deg, #ffd93d 0%, #ffb347 100%) !important;
|
404 |
-
border: none !important;
|
405 |
-
color: #2d3436 !important;
|
406 |
-
font-weight: 700 !important;
|
407 |
-
font-size: 1.1rem !important;
|
408 |
-
padding: 1.2rem 2rem !important;
|
409 |
-
border-radius: 14px !important;
|
410 |
-
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
|
411 |
-
text-transform: uppercase;
|
412 |
-
letter-spacing: 1px;
|
413 |
-
width: 100%;
|
414 |
-
margin-top: 1rem !important;
|
415 |
-
}
|
416 |
-
.gr-button-primary:hover {
|
417 |
-
transform: translateY(-3px) !important;
|
418 |
-
box-shadow: 0 15px 40px rgba(255, 179, 71, 0.35) !important;
|
419 |
-
}
|
420 |
-
.gr-button-secondary {
|
421 |
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
422 |
-
border: none !important;
|
423 |
-
color: white !important;
|
424 |
-
font-weight: 700 !important;
|
425 |
-
font-size: 1.1rem !important;
|
426 |
-
padding: 1.2rem 2rem !important;
|
427 |
-
border-radius: 14px !important;
|
428 |
-
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
|
429 |
-
text-transform: uppercase;
|
430 |
-
letter-spacing: 1px;
|
431 |
-
width: 100%;
|
432 |
-
margin-top: 1rem !important;
|
433 |
-
}
|
434 |
-
.gr-button-secondary:hover {
|
435 |
-
transform: translateY(-3px) !important;
|
436 |
-
box-shadow: 0 15px 40px rgba(102, 126, 234, 0.35) !important;
|
437 |
-
}
|
438 |
-
.section-title {
|
439 |
-
font-size: 1.8rem;
|
440 |
-
font-weight: 800;
|
441 |
-
color: #2d3436;
|
442 |
-
margin-bottom: 1rem;
|
443 |
-
padding-bottom: 0.5rem;
|
444 |
-
border-bottom: 3px solid #ffd93d;
|
445 |
-
}
|
446 |
-
.status-text {
|
447 |
-
font-family: 'SF Mono', 'Monaco', monospace;
|
448 |
-
color: #00b894;
|
449 |
-
font-size: 0.9rem;
|
450 |
-
}
|
451 |
-
.image-container {
|
452 |
-
border-radius: 14px !important;
|
453 |
-
overflow: hidden;
|
454 |
-
border: 2px solid #e1e8ed !important;
|
455 |
-
background: #fafbfc !important;
|
456 |
-
}
|
457 |
-
footer {
|
458 |
-
display: none !important;
|
459 |
}
|
460 |
"""
|
461 |
|
462 |
# ===========================
|
463 |
-
# Gradio Interface
|
464 |
# ===========================
|
465 |
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
gr.HTML("""
|
472 |
-
<
|
473 |
-
|
474 |
-
|
475 |
-
<a href="https://huggingface.co/spaces/openfree/Nano-Banana-Upscale" target="_blank">
|
476 |
-
<img src="https://img.shields.io/static/v1?label=NANO%20BANANA&message=UPSCALE&color=%230000ff&labelColor=%23800080&logo=GOOGLE&logoColor=white&style=for-the-badge" alt="Nano Banana Upscale">
|
477 |
-
</a>
|
478 |
-
<a href="https://discord.gg/openfreeai" target="_blank">
|
479 |
-
<img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="Discord Openfree AI">
|
480 |
-
</a>
|
481 |
</div>
|
482 |
""")
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
with gr.Row(equal_height=True):
|
491 |
-
with gr.Column(scale=1):
|
492 |
style_prompt = gr.Textbox(
|
493 |
label="Style Description",
|
494 |
placeholder="Describe your style...",
|
495 |
lines=3,
|
496 |
-
value="
|
497 |
)
|
498 |
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
)
|
506 |
-
|
507 |
-
|
508 |
-
type="pil",
|
509 |
-
height=200,
|
510 |
-
elem_classes="image-container"
|
511 |
-
)
|
512 |
|
513 |
generate_img_btn = gr.Button(
|
514 |
"Generate Image ✨",
|
515 |
-
variant="primary"
|
516 |
-
size="lg"
|
517 |
)
|
518 |
|
519 |
-
with gr.Column(
|
520 |
output_image = gr.Image(
|
521 |
label="Generated Result",
|
522 |
-
type="pil"
|
523 |
-
height=420,
|
524 |
-
elem_classes="image-container"
|
525 |
)
|
526 |
|
527 |
img_status = gr.Textbox(
|
528 |
label="Status",
|
529 |
interactive=False,
|
530 |
-
|
531 |
-
elem_classes="status-text",
|
532 |
-
value="Ready to generate image..."
|
533 |
)
|
534 |
|
535 |
send_to_video_btn = gr.Button(
|
536 |
"Send to Video Generation →",
|
537 |
variant="secondary",
|
538 |
-
size="lg",
|
539 |
visible=False
|
540 |
)
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
with gr.Column(elem_classes="main-content"):
|
545 |
-
gr.HTML('<h2 class="section-title">🎬 Video Generation from Image</h2>')
|
546 |
-
|
547 |
with gr.Row():
|
548 |
with gr.Column():
|
549 |
video_input_image = gr.Image(
|
550 |
-
type="pil",
|
551 |
-
label="Input Image
|
552 |
-
elem_classes="image-container"
|
553 |
)
|
|
|
554 |
video_prompt = gr.Textbox(
|
555 |
-
label="Animation Prompt",
|
556 |
-
value=default_prompt_i2v
|
557 |
-
lines=3
|
558 |
)
|
|
|
559 |
duration_input = gr.Slider(
|
560 |
-
minimum=0.5,
|
561 |
-
maximum=2.0,
|
562 |
-
step=0.
|
563 |
-
value=1.
|
564 |
-
label="Duration (seconds)"
|
565 |
-
info="Shorter videos use less memory"
|
566 |
)
|
567 |
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
label="Seed",
|
576 |
-
minimum=0,
|
577 |
-
maximum=MAX_SEED,
|
578 |
-
step=1,
|
579 |
-
value=42
|
580 |
-
)
|
581 |
-
randomize_seed = gr.Checkbox(
|
582 |
-
label="Randomize seed",
|
583 |
-
value=True
|
584 |
-
)
|
585 |
-
steps_slider = gr.Slider(
|
586 |
-
minimum=1,
|
587 |
-
maximum=8,
|
588 |
-
step=1,
|
589 |
-
value=4,
|
590 |
-
label="Inference Steps (4 recommended)"
|
591 |
-
)
|
592 |
-
guidance_1 = gr.Slider(
|
593 |
-
minimum=0.0,
|
594 |
-
maximum=10.0,
|
595 |
-
step=0.5,
|
596 |
-
value=1,
|
597 |
-
label="Guidance Scale - High Noise"
|
598 |
-
)
|
599 |
-
guidance_2 = gr.Slider(
|
600 |
-
minimum=0.0,
|
601 |
-
maximum=10.0,
|
602 |
-
step=0.5,
|
603 |
-
value=1,
|
604 |
-
label="Guidance Scale - Low Noise"
|
605 |
-
)
|
606 |
|
607 |
generate_video_btn = gr.Button(
|
608 |
"Generate Video 🎬",
|
609 |
-
variant="primary"
|
610 |
-
size="lg"
|
611 |
)
|
612 |
|
613 |
with gr.Column():
|
614 |
video_output = gr.Video(
|
615 |
-
label="Generated Video",
|
616 |
autoplay=True
|
617 |
)
|
|
|
618 |
video_status = gr.Textbox(
|
619 |
label="Status",
|
620 |
interactive=False,
|
621 |
-
|
622 |
-
elem_classes="status-text",
|
623 |
-
value="Ready to generate video..."
|
624 |
)
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
|
676 |
-
# Launch
|
677 |
if __name__ == "__main__":
|
678 |
-
|
679 |
-
print("Starting
|
680 |
-
print("
|
681 |
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
import time
|
8 |
import tempfile
|
9 |
import base64
|
|
|
|
|
|
|
|
|
|
|
10 |
import numpy as np
|
11 |
import random
|
12 |
import gc
|
13 |
|
14 |
+
# GPU 관련 임포트는 나중에 조건부로 처리
|
15 |
+
try:
|
16 |
+
import torch
|
17 |
+
TORCH_AVAILABLE = True
|
18 |
+
except ImportError:
|
19 |
+
TORCH_AVAILABLE = False
|
20 |
+
print("Warning: PyTorch not available. Video generation will be disabled.")
|
21 |
+
|
22 |
# ===========================
|
23 |
# Configuration
|
24 |
# ===========================
|
|
|
41 |
default_negative_prompt = "static, still, no motion, frozen"
|
42 |
|
43 |
# ===========================
|
44 |
+
# Initialize Video Pipeline (Lazy Loading)
|
45 |
# ===========================
|
46 |
|
|
|
47 |
video_pipe = None
|
48 |
video_pipeline_ready = False
|
49 |
|
50 |
+
def lazy_import_video_dependencies():
|
51 |
+
"""Lazy import video dependencies only when needed"""
|
52 |
global video_pipe, video_pipeline_ready
|
53 |
+
|
54 |
+
if not TORCH_AVAILABLE:
|
55 |
+
raise gr.Error("PyTorch is not installed. Video generation is not available.")
|
56 |
+
|
57 |
+
try:
|
58 |
+
# Try to import video pipeline dependencies
|
59 |
+
from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
|
60 |
+
from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
|
61 |
+
from diffusers.utils.export_utils import export_to_video
|
62 |
+
|
63 |
+
return WanImageToVideoPipeline, WanTransformer3DModel, export_to_video
|
64 |
+
except ImportError as e:
|
65 |
+
print(f"Warning: Video dependencies not available: {e}")
|
66 |
+
return None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
# ===========================
|
69 |
# Image Processing Functions
|
|
|
83 |
data={
|
84 |
'key': '6d207e02198a847aa98d0a2a901485a5',
|
85 |
'image': img_base64,
|
86 |
+
},
|
87 |
+
timeout=10
|
88 |
)
|
89 |
|
90 |
if response.status_code == 200:
|
91 |
data = response.json()
|
92 |
if data.get('success'):
|
93 |
return data['data']['url']
|
94 |
+
except Exception as e:
|
95 |
+
print(f"imgbb upload failed: {e}")
|
96 |
|
97 |
# Method 2: Try 0x0.st
|
98 |
try:
|
|
|
101 |
buffered.seek(0)
|
102 |
|
103 |
files = {'file': ('image.png', buffered, 'image/png')}
|
104 |
+
response = requests.post("https://0x0.st", files=files, timeout=10)
|
105 |
|
106 |
if response.status_code == 200:
|
107 |
return response.text.strip()
|
108 |
+
except Exception as e:
|
109 |
+
print(f"0x0.st upload failed: {e}")
|
110 |
|
111 |
# Method 3: Fallback to base64
|
112 |
buffered = BytesIO()
|
|
|
134 |
url2 = upload_image_to_hosting(image2)
|
135 |
image_urls.append(url2)
|
136 |
|
137 |
+
# Run the model (using a placeholder model name - replace with actual)
|
138 |
+
# Note: "google/nano-banana" doesn't exist - replace with actual model
|
139 |
output = replicate.run(
|
140 |
+
"stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b",
|
141 |
input={
|
142 |
"prompt": prompt,
|
143 |
+
"image": url1 if len(image_urls) == 1 else None,
|
144 |
+
"width": 1024,
|
145 |
+
"height": 1024
|
146 |
}
|
147 |
)
|
148 |
|
|
|
152 |
# Get the generated image
|
153 |
img = None
|
154 |
|
155 |
+
# Handle different output formats
|
156 |
+
if isinstance(output, list) and len(output) > 0:
|
157 |
+
output_url = output[0]
|
158 |
+
elif isinstance(output, str):
|
159 |
+
output_url = output
|
160 |
+
else:
|
161 |
+
output_url = str(output)
|
162 |
|
163 |
+
if output_url:
|
164 |
+
response = requests.get(output_url, timeout=30)
|
165 |
+
if response.status_code == 200:
|
166 |
+
img = Image.open(BytesIO(response.content))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
if img:
|
169 |
+
return img, "✨ Image generated successfully!", img
|
170 |
else:
|
171 |
return None, "Could not process output", None
|
172 |
|
173 |
except Exception as e:
|
174 |
+
return None, f"Error: {str(e)[:200]}", None
|
175 |
|
176 |
# ===========================
|
177 |
+
# Video Generation Functions (Simplified)
|
178 |
# ===========================
|
179 |
|
180 |
def resize_image_for_video(image: Image.Image) -> Image.Image:
|
181 |
"""Resize image for video generation"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
target_aspect = LANDSCAPE_WIDTH / LANDSCAPE_HEIGHT
|
183 |
width, height = image.size
|
184 |
in_aspect = width / height
|
|
|
194 |
|
195 |
return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
|
196 |
|
|
|
|
|
|
|
|
|
|
|
197 |
def generate_video(
|
198 |
input_image,
|
199 |
prompt,
|
200 |
steps=4,
|
201 |
negative_prompt=default_negative_prompt,
|
202 |
+
duration_seconds=1.5,
|
203 |
guidance_scale=1,
|
204 |
guidance_scale_2=1,
|
205 |
seed=42,
|
206 |
randomize_seed=False,
|
|
|
207 |
):
|
208 |
+
"""Generate a video from an input image (simplified version)"""
|
209 |
if input_image is None:
|
210 |
raise gr.Error("Please generate or upload an image first.")
|
211 |
|
212 |
+
if not TORCH_AVAILABLE:
|
213 |
+
raise gr.Error("Video generation is not available. PyTorch is not installed.")
|
214 |
+
|
215 |
try:
|
216 |
+
# Import dependencies
|
217 |
+
video_deps = lazy_import_video_dependencies()
|
218 |
+
if not all(video_deps):
|
219 |
+
raise gr.Error("Video generation dependencies are not available.")
|
220 |
+
|
221 |
+
WanImageToVideoPipeline, WanTransformer3DModel, export_to_video = video_deps
|
222 |
+
|
223 |
global video_pipe
|
224 |
+
|
225 |
+
# Simple initialization without complex optimizations
|
226 |
if video_pipe is None:
|
227 |
+
print("Initializing video pipeline (simplified)...")
|
228 |
+
|
229 |
+
# Clear GPU memory first
|
230 |
+
if TORCH_AVAILABLE:
|
231 |
+
torch.cuda.empty_cache()
|
232 |
+
gc.collect()
|
233 |
|
234 |
+
# Basic pipeline loading
|
235 |
try:
|
236 |
+
video_pipe = WanImageToVideoPipeline.from_pretrained(
|
237 |
+
VIDEO_MODEL_ID,
|
238 |
+
torch_dtype=torch.float16 if TORCH_AVAILABLE else None,
|
239 |
+
low_cpu_mem_usage=True,
|
240 |
+
device_map="auto"
|
241 |
+
)
|
242 |
+
print("Video pipeline loaded")
|
243 |
except Exception as e:
|
244 |
+
print(f"Failed to load video pipeline: {e}")
|
245 |
+
raise gr.Error("Could not load video model. Please try again later.")
|
|
|
|
|
|
|
|
|
246 |
|
247 |
+
# Prepare video generation
|
248 |
+
num_frames = min(17, int(round(duration_seconds * FIXED_FPS))) # Limit frames
|
249 |
+
num_frames = ((num_frames - 1) // 4) * 4 + 1 # Ensure divisible by 4
|
|
|
250 |
|
251 |
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
252 |
|
253 |
# Resize image
|
254 |
resized_image = resize_image_for_video(input_image)
|
255 |
|
256 |
+
# Generate video with minimal settings
|
257 |
+
print(f"Generating {num_frames} frames...")
|
258 |
+
|
259 |
+
if TORCH_AVAILABLE:
|
260 |
+
generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(current_seed)
|
261 |
+
else:
|
262 |
+
generator = None
|
263 |
+
|
264 |
+
output_frames_list = video_pipe(
|
265 |
+
image=resized_image,
|
266 |
+
prompt=prompt,
|
267 |
+
negative_prompt=negative_prompt,
|
268 |
+
height=LANDSCAPE_HEIGHT,
|
269 |
+
width=LANDSCAPE_WIDTH,
|
270 |
+
num_frames=num_frames,
|
271 |
+
guidance_scale=float(guidance_scale),
|
272 |
+
num_inference_steps=int(steps),
|
273 |
+
generator=generator,
|
274 |
+
).frames[0]
|
275 |
|
276 |
# Save video
|
277 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
|
|
|
279 |
|
280 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
281 |
|
282 |
+
return video_path, current_seed, f"🎬 Video generated! ({num_frames} frames)"
|
283 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
except Exception as e:
|
285 |
+
if TORCH_AVAILABLE:
|
286 |
+
torch.cuda.empty_cache()
|
287 |
+
gc.collect()
|
288 |
+
error_msg = str(e)[:200]
|
289 |
+
if "out of memory" in error_msg.lower():
|
290 |
+
return None, seed, "GPU memory exceeded. Try reducing duration and steps."
|
291 |
+
return None, seed, f"Error: {error_msg}"
|
292 |
|
293 |
# ===========================
|
294 |
+
# Simple CSS
|
295 |
# ===========================
|
296 |
|
297 |
css = """
|
298 |
.gradio-container {
|
299 |
+
max-width: 1200px;
|
300 |
+
margin: 0 auto;
|
|
|
301 |
}
|
302 |
.header-container {
|
303 |
background: linear-gradient(135deg, #ffd93d 0%, #ffb347 100%);
|
304 |
+
padding: 2rem;
|
305 |
+
border-radius: 12px;
|
306 |
+
margin-bottom: 2rem;
|
307 |
+
text-align: center;
|
308 |
}
|
309 |
.logo-text {
|
310 |
+
font-size: 2.5rem;
|
311 |
+
font-weight: bold;
|
312 |
color: #2d3436;
|
|
|
313 |
margin: 0;
|
|
|
314 |
}
|
315 |
.subtitle {
|
316 |
color: #2d3436;
|
317 |
+
font-size: 1rem;
|
|
|
318 |
margin-top: 0.5rem;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
}
|
320 |
"""
|
321 |
|
322 |
# ===========================
|
323 |
+
# Gradio Interface (Simplified)
|
324 |
# ===========================
|
325 |
|
326 |
+
def create_demo():
|
327 |
+
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
328 |
+
# Shared state
|
329 |
+
generated_image_state = gr.State(None)
|
330 |
+
|
331 |
gr.HTML("""
|
332 |
+
<div class="header-container">
|
333 |
+
<h1 class="logo-text">🍌 Nano Banana + Video</h1>
|
334 |
+
<p class="subtitle">AI-Powered Image Generation with Video Creation</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
</div>
|
336 |
""")
|
337 |
+
|
338 |
+
with gr.Tabs():
|
339 |
+
# Tab 1: Image Generation
|
340 |
+
with gr.TabItem("🎨 Step 1: Generate Image"):
|
341 |
+
with gr.Row():
|
342 |
+
with gr.Column():
|
|
|
|
|
|
|
343 |
style_prompt = gr.Textbox(
|
344 |
label="Style Description",
|
345 |
placeholder="Describe your style...",
|
346 |
lines=3,
|
347 |
+
value="A beautiful landscape in anime style"
|
348 |
)
|
349 |
|
350 |
+
image1 = gr.Image(
|
351 |
+
label="Reference Image (Optional)",
|
352 |
+
type="pil"
|
353 |
+
)
|
354 |
+
|
355 |
+
image2 = gr.Image(
|
356 |
+
label="Secondary Image (Optional)",
|
357 |
+
type="pil"
|
358 |
+
)
|
|
|
|
|
|
|
|
|
359 |
|
360 |
generate_img_btn = gr.Button(
|
361 |
"Generate Image ✨",
|
362 |
+
variant="primary"
|
|
|
363 |
)
|
364 |
|
365 |
+
with gr.Column():
|
366 |
output_image = gr.Image(
|
367 |
label="Generated Result",
|
368 |
+
type="pil"
|
|
|
|
|
369 |
)
|
370 |
|
371 |
img_status = gr.Textbox(
|
372 |
label="Status",
|
373 |
interactive=False,
|
374 |
+
value="Ready..."
|
|
|
|
|
375 |
)
|
376 |
|
377 |
send_to_video_btn = gr.Button(
|
378 |
"Send to Video Generation →",
|
379 |
variant="secondary",
|
|
|
380 |
visible=False
|
381 |
)
|
382 |
+
|
383 |
+
# Tab 2: Video Generation
|
384 |
+
with gr.TabItem("🎬 Step 2: Generate Video"):
|
|
|
|
|
|
|
385 |
with gr.Row():
|
386 |
with gr.Column():
|
387 |
video_input_image = gr.Image(
|
388 |
+
type="pil",
|
389 |
+
label="Input Image"
|
|
|
390 |
)
|
391 |
+
|
392 |
video_prompt = gr.Textbox(
|
393 |
+
label="Animation Prompt",
|
394 |
+
value=default_prompt_i2v
|
|
|
395 |
)
|
396 |
+
|
397 |
duration_input = gr.Slider(
|
398 |
+
minimum=0.5,
|
399 |
+
maximum=2.0,
|
400 |
+
step=0.5,
|
401 |
+
value=1.0,
|
402 |
+
label="Duration (seconds)"
|
|
|
403 |
)
|
404 |
|
405 |
+
steps_slider = gr.Slider(
|
406 |
+
minimum=1,
|
407 |
+
maximum=8,
|
408 |
+
step=1,
|
409 |
+
value=4,
|
410 |
+
label="Inference Steps"
|
411 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
|
413 |
generate_video_btn = gr.Button(
|
414 |
"Generate Video 🎬",
|
415 |
+
variant="primary"
|
|
|
416 |
)
|
417 |
|
418 |
with gr.Column():
|
419 |
video_output = gr.Video(
|
420 |
+
label="Generated Video",
|
421 |
autoplay=True
|
422 |
)
|
423 |
+
|
424 |
video_status = gr.Textbox(
|
425 |
label="Status",
|
426 |
interactive=False,
|
427 |
+
value="Ready..."
|
|
|
|
|
428 |
)
|
429 |
+
|
430 |
+
# Event Handlers
|
431 |
+
def on_image_generated(prompt, img1, img2):
|
432 |
+
img, status, state_img = process_images(prompt, img1, img2)
|
433 |
+
if img:
|
434 |
+
return img, status, state_img, gr.update(visible=True)
|
435 |
+
return img, status, state_img, gr.update(visible=False)
|
436 |
+
|
437 |
+
def send_image_to_video(img):
|
438 |
+
if img:
|
439 |
+
return img, "Image loaded!"
|
440 |
+
return None, "No image to send."
|
441 |
+
|
442 |
+
# Wire up events
|
443 |
+
generate_img_btn.click(
|
444 |
+
fn=on_image_generated,
|
445 |
+
inputs=[style_prompt, image1, image2],
|
446 |
+
outputs=[output_image, img_status, generated_image_state, send_to_video_btn]
|
447 |
+
)
|
448 |
+
|
449 |
+
send_to_video_btn.click(
|
450 |
+
fn=send_image_to_video,
|
451 |
+
inputs=[generated_image_state],
|
452 |
+
outputs=[video_input_image, video_status]
|
453 |
+
)
|
454 |
+
|
455 |
+
# Simplified video generation
|
456 |
+
def generate_video_wrapper(img, prompt, duration, steps):
|
457 |
+
if not TORCH_AVAILABLE:
|
458 |
+
return None, "Video generation requires PyTorch. Please install it first."
|
459 |
+
|
460 |
+
try:
|
461 |
+
video_path, seed, status = generate_video(
|
462 |
+
img, prompt, steps=steps, duration_seconds=duration
|
463 |
+
)
|
464 |
+
return video_path, status
|
465 |
+
except Exception as e:
|
466 |
+
return None, f"Error: {str(e)[:100]}"
|
467 |
+
|
468 |
+
generate_video_btn.click(
|
469 |
+
fn=generate_video_wrapper,
|
470 |
+
inputs=[video_input_image, video_prompt, duration_input, steps_slider],
|
471 |
+
outputs=[video_output, video_status]
|
472 |
+
)
|
473 |
+
|
474 |
+
return demo
|
475 |
+
|
476 |
+
# ===========================
|
477 |
+
# Main Launch
|
478 |
+
# ===========================
|
479 |
|
|
|
480 |
if __name__ == "__main__":
|
481 |
+
print("=" * 50)
|
482 |
+
print("Starting Nano Banana + Video Application")
|
483 |
+
print("=" * 50)
|
484 |
|
485 |
+
# Check environment
|
486 |
+
if not os.getenv('REPLICATE_API_TOKEN'):
|
487 |
+
print("Warning: REPLICATE_API_TOKEN not set. Image generation may not work.")
|
488 |
+
|
489 |
+
if not TORCH_AVAILABLE:
|
490 |
+
print("Warning: PyTorch not available. Video generation will be disabled.")
|
491 |
+
print("To enable video generation, install PyTorch: pip install torch")
|
492 |
+
|
493 |
+
try:
|
494 |
+
# Create and launch demo
|
495 |
+
demo = create_demo()
|
496 |
+
|
497 |
+
demo.launch(
|
498 |
+
share=False, # Set to True if you want a public link
|
499 |
+
server_name="0.0.0.0",
|
500 |
+
server_port=7860,
|
501 |
+
show_error=True,
|
502 |
+
debug=False # Set to True for debugging
|
503 |
+
)
|
504 |
+
except Exception as e:
|
505 |
+
print(f"Failed to launch application: {e}")
|
506 |
+
print("Please check your environment and dependencies.")
|