ginipick commited on
Commit
cea104c
·
verified ·
1 Parent(s): 2e49cd4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +251 -432
app.py CHANGED
@@ -7,15 +7,18 @@ from io import BytesIO
7
  import time
8
  import tempfile
9
  import base64
10
- import spaces
11
- import torch
12
- from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
13
- from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
14
- from diffusers.utils.export_utils import export_to_video
15
  import numpy as np
16
  import random
17
  import gc
18
 
 
 
 
 
 
 
 
 
19
  # ===========================
20
  # Configuration
21
  # ===========================
@@ -38,83 +41,29 @@ default_prompt_i2v = "make this image come alive, cinematic motion, smooth anima
38
  default_negative_prompt = "static, still, no motion, frozen"
39
 
40
  # ===========================
41
- # Initialize Video Pipeline
42
  # ===========================
43
 
44
- # Initialize once on startup
45
  video_pipe = None
46
  video_pipeline_ready = False
47
 
48
- def initialize_video_pipeline():
 
49
  global video_pipe, video_pipeline_ready
50
- if video_pipe is None and not video_pipeline_ready:
51
- try:
52
- print("Starting video pipeline initialization...")
53
-
54
- # Install PyTorch 2.8 (if needed)
55
- os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
56
-
57
- # Import LoRA loading utilities
58
- from peft import LoraConfig, get_peft_model, TaskType
59
-
60
- video_pipe = WanImageToVideoPipeline.from_pretrained(VIDEO_MODEL_ID,
61
- transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
62
- subfolder='transformer',
63
- torch_dtype=torch.bfloat16,
64
- device_map='cuda',
65
- ),
66
- transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
67
- subfolder='transformer_2',
68
- torch_dtype=torch.bfloat16,
69
- device_map='cuda',
70
- ),
71
- torch_dtype=torch.bfloat16,
72
- ).to('cuda')
73
-
74
- # Clear memory after loading
75
- gc.collect()
76
- torch.cuda.empty_cache()
77
-
78
- # Load Lightning LoRA
79
- try:
80
- print("Loading Lightning LoRA adapter...")
81
- video_pipe.transformer.load_adapter("Lightx2v/lightx2v_I2V_14B_480p_cfg_step_4", adapter_name="lightx2v")
82
- video_pipe.transformer_2.load_adapter("Lightx2v/lightx2v_I2V_14B_480p_cfg_step_4", adapter_name="lightx2v_2")
83
- video_pipe.transformer.set_adapters(["lightx2v"], adapter_weights=[1.0])
84
- video_pipe.transformer_2.set_adapters(["lightx2v_2"], adapter_weights=[1.0])
85
- print("Lightning LoRA loaded successfully")
86
- except Exception as e:
87
- print(f"Warning: Could not load Lightning LoRA: {e}")
88
- # Continue without LoRA
89
-
90
- # Clear memory again
91
- gc.collect()
92
- torch.cuda.empty_cache()
93
-
94
- # Try to optimize if module available
95
- try:
96
- from optimization import optimize_pipeline_
97
- print("Optimizing pipeline...")
98
- optimize_pipeline_(video_pipe,
99
- image=Image.new('RGB', (LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT)),
100
- prompt='prompt',
101
- height=LANDSCAPE_HEIGHT,
102
- width=LANDSCAPE_WIDTH,
103
- num_frames=MAX_FRAMES_MODEL,
104
- )
105
- print("Pipeline optimization complete")
106
- except ImportError:
107
- print("Optimization module not found, running without optimization")
108
- except Exception as e:
109
- print(f"Warning: Optimization failed: {e}")
110
-
111
- video_pipeline_ready = True
112
- print("Video pipeline initialized successfully!")
113
-
114
- except Exception as e:
115
- print(f"Error initializing video pipeline: {e}")
116
- video_pipe = None
117
- video_pipeline_ready = False
118
 
119
  # ===========================
120
  # Image Processing Functions
@@ -134,15 +83,16 @@ def upload_image_to_hosting(image):
134
  data={
135
  'key': '6d207e02198a847aa98d0a2a901485a5',
136
  'image': img_base64,
137
- }
 
138
  )
139
 
140
  if response.status_code == 200:
141
  data = response.json()
142
  if data.get('success'):
143
  return data['data']['url']
144
- except:
145
- pass
146
 
147
  # Method 2: Try 0x0.st
148
  try:
@@ -151,12 +101,12 @@ def upload_image_to_hosting(image):
151
  buffered.seek(0)
152
 
153
  files = {'file': ('image.png', buffered, 'image/png')}
154
- response = requests.post("https://0x0.st", files=files)
155
 
156
  if response.status_code == 200:
157
  return response.text.strip()
158
- except:
159
- pass
160
 
161
  # Method 3: Fallback to base64
162
  buffered = BytesIO()
@@ -184,12 +134,15 @@ def process_images(prompt, image1, image2=None):
184
  url2 = upload_image_to_hosting(image2)
185
  image_urls.append(url2)
186
 
187
- # Run the model
 
188
  output = replicate.run(
189
- "google/nano-banana",
190
  input={
191
  "prompt": prompt,
192
- "image_input": image_urls
 
 
193
  }
194
  )
195
 
@@ -199,57 +152,33 @@ def process_images(prompt, image1, image2=None):
199
  # Get the generated image
200
  img = None
201
 
202
- try:
203
- if hasattr(output, 'read'):
204
- img_data = output.read()
205
- img = Image.open(BytesIO(img_data))
206
- except:
207
- pass
 
208
 
209
- if img is None:
210
- try:
211
- if hasattr(output, 'url'):
212
- output_url = output.url()
213
- response = requests.get(output_url, timeout=30)
214
- if response.status_code == 200:
215
- img = Image.open(BytesIO(response.content))
216
- except:
217
- pass
218
-
219
- if img is None:
220
- output_url = None
221
- if isinstance(output, str):
222
- output_url = output
223
- elif isinstance(output, list) and len(output) > 0:
224
- output_url = output[0]
225
-
226
- if output_url:
227
- response = requests.get(output_url, timeout=30)
228
- if response.status_code == 200:
229
- img = Image.open(BytesIO(response.content))
230
 
231
  if img:
232
- return img, "✨ Image generated successfully! You can now generate a video from this image.", img
233
  else:
234
  return None, "Could not process output", None
235
 
236
  except Exception as e:
237
- return None, f"Error: {str(e)[:100]}", None
238
 
239
  # ===========================
240
- # Video Generation Functions
241
  # ===========================
242
 
243
  def resize_image_for_video(image: Image.Image) -> Image.Image:
244
  """Resize image for video generation"""
245
- if image.height > image.width:
246
- transposed = image.transpose(Image.Transpose.ROTATE_90)
247
- resized = resize_image_landscape(transposed)
248
- return resized.transpose(Image.Transpose.ROTATE_270)
249
- return resize_image_landscape(image)
250
-
251
- def resize_image_landscape(image: Image.Image) -> Image.Image:
252
- """Resize landscape image to target dimensions"""
253
  target_aspect = LANDSCAPE_WIDTH / LANDSCAPE_HEIGHT
254
  width, height = image.size
255
  in_aspect = width / height
@@ -265,80 +194,84 @@ def resize_image_landscape(image: Image.Image) -> Image.Image:
265
 
266
  return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
267
 
268
- def get_duration(input_image, prompt, steps, negative_prompt, duration_seconds, guidance_scale, guidance_scale_2, seed, randomize_seed):
269
- # Shorter duration for stability
270
- return min(60, int(steps) * 10)
271
-
272
- @spaces.GPU(duration=get_duration)
273
  def generate_video(
274
  input_image,
275
  prompt,
276
  steps=4,
277
  negative_prompt=default_negative_prompt,
278
- duration_seconds=2.0, # Reduced default
279
  guidance_scale=1,
280
  guidance_scale_2=1,
281
  seed=42,
282
  randomize_seed=False,
283
- progress=gr.Progress(track_tqdm=True),
284
  ):
285
- """Generate a video from an input image"""
286
  if input_image is None:
287
  raise gr.Error("Please generate or upload an image first.")
288
 
 
 
 
289
  try:
290
- # Initialize pipeline if needed (simplified)
 
 
 
 
 
 
291
  global video_pipe
 
 
292
  if video_pipe is None:
293
- print("Initializing video pipeline...")
294
- video_pipe = WanImageToVideoPipeline.from_pretrained(
295
- VIDEO_MODEL_ID,
296
- torch_dtype=torch.bfloat16,
297
- use_safetensors=True
298
- ).to('cuda')
299
 
300
- # Load Lightning LoRA for faster generation
301
  try:
302
- video_pipe.load_lora_weights("Kijai/WanVideo_comfy", weight_name="Wan22-Lightning-4-cfg1_bf16_v0.9.safetensors")
303
- video_pipe.fuse_lora(lora_scale=1.0)
304
- print("Lightning LoRA loaded")
 
 
 
 
305
  except Exception as e:
306
- print(f"LoRA loading skipped: {e}")
307
- pass
308
-
309
- # Clear cache before generation
310
- torch.cuda.empty_cache()
311
- gc.collect()
312
 
313
- # Ensure frames are divisible by 4 and limit to reasonable range
314
- num_frames = int(round(duration_seconds * FIXED_FPS))
315
- num_frames = np.clip(num_frames, 9, 33) # Limit to 0.5-2 seconds
316
- num_frames = ((num_frames - 1) // 4) * 4 + 1
317
 
318
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
319
 
320
  # Resize image
321
  resized_image = resize_image_for_video(input_image)
322
 
323
- # Generate with reduced settings
324
- with torch.inference_mode():
325
- with torch.autocast('cuda', dtype=torch.bfloat16):
326
- output_frames_list = video_pipe(
327
- image=resized_image,
328
- prompt=prompt,
329
- negative_prompt=negative_prompt,
330
- height=resized_image.height,
331
- width=resized_image.width,
332
- num_frames=num_frames,
333
- guidance_scale=float(guidance_scale),
334
- guidance_scale_2=float(guidance_scale_2),
335
- num_inference_steps=int(steps),
336
- generator=torch.Generator(device="cuda").manual_seed(current_seed),
337
- ).frames[0]
338
-
339
- # Clear cache after generation
340
- torch.cuda.empty_cache()
341
- gc.collect()
342
 
343
  # Save video
344
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
@@ -346,342 +279,228 @@ def generate_video(
346
 
347
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
348
 
349
- return video_path, current_seed, f"🎬 Video generated successfully! ({num_frames} frames)"
350
 
351
- except RuntimeError as e:
352
- torch.cuda.empty_cache()
353
- gc.collect()
354
- if "out of memory" in str(e).lower():
355
- raise gr.Error("GPU memory exceeded. Try reducing duration to 1-2 seconds and steps to 4.")
356
- else:
357
- raise gr.Error(f"GPU error: {str(e)[:100]}")
358
  except Exception as e:
359
- raise gr.Error(f"Error: {str(e)[:200]}")
 
 
 
 
 
 
360
 
361
  # ===========================
362
- # Enhanced CSS
363
  # ===========================
364
 
365
  css = """
366
  .gradio-container {
367
- background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
368
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
369
- min-height: 100vh;
370
  }
371
  .header-container {
372
  background: linear-gradient(135deg, #ffd93d 0%, #ffb347 100%);
373
- padding: 2.5rem;
374
- border-radius: 24px;
375
- margin-bottom: 2.5rem;
376
- box-shadow: 0 20px 60px rgba(255, 179, 71, 0.25);
377
  }
378
  .logo-text {
379
- font-size: 3.5rem;
380
- font-weight: 900;
381
  color: #2d3436;
382
- text-align: center;
383
  margin: 0;
384
- letter-spacing: -2px;
385
  }
386
  .subtitle {
387
  color: #2d3436;
388
- text-align: center;
389
- font-size: 1.2rem;
390
  margin-top: 0.5rem;
391
- opacity: 0.9;
392
- font-weight: 600;
393
- }
394
- .main-content {
395
- background: rgba(255, 255, 255, 0.95);
396
- backdrop-filter: blur(20px);
397
- border-radius: 24px;
398
- padding: 2.5rem;
399
- box-shadow: 0 10px 40px rgba(0, 0, 0, 0.08);
400
- margin-bottom: 2rem;
401
- }
402
- .gr-button-primary {
403
- background: linear-gradient(135deg, #ffd93d 0%, #ffb347 100%) !important;
404
- border: none !important;
405
- color: #2d3436 !important;
406
- font-weight: 700 !important;
407
- font-size: 1.1rem !important;
408
- padding: 1.2rem 2rem !important;
409
- border-radius: 14px !important;
410
- transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
411
- text-transform: uppercase;
412
- letter-spacing: 1px;
413
- width: 100%;
414
- margin-top: 1rem !important;
415
- }
416
- .gr-button-primary:hover {
417
- transform: translateY(-3px) !important;
418
- box-shadow: 0 15px 40px rgba(255, 179, 71, 0.35) !important;
419
- }
420
- .gr-button-secondary {
421
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
422
- border: none !important;
423
- color: white !important;
424
- font-weight: 700 !important;
425
- font-size: 1.1rem !important;
426
- padding: 1.2rem 2rem !important;
427
- border-radius: 14px !important;
428
- transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
429
- text-transform: uppercase;
430
- letter-spacing: 1px;
431
- width: 100%;
432
- margin-top: 1rem !important;
433
- }
434
- .gr-button-secondary:hover {
435
- transform: translateY(-3px) !important;
436
- box-shadow: 0 15px 40px rgba(102, 126, 234, 0.35) !important;
437
- }
438
- .section-title {
439
- font-size: 1.8rem;
440
- font-weight: 800;
441
- color: #2d3436;
442
- margin-bottom: 1rem;
443
- padding-bottom: 0.5rem;
444
- border-bottom: 3px solid #ffd93d;
445
- }
446
- .status-text {
447
- font-family: 'SF Mono', 'Monaco', monospace;
448
- color: #00b894;
449
- font-size: 0.9rem;
450
- }
451
- .image-container {
452
- border-radius: 14px !important;
453
- overflow: hidden;
454
- border: 2px solid #e1e8ed !important;
455
- background: #fafbfc !important;
456
- }
457
- footer {
458
- display: none !important;
459
  }
460
  """
461
 
462
  # ===========================
463
- # Gradio Interface
464
  # ===========================
465
 
466
- with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
467
- # Shared state for passing image between tabs
468
- generated_image_state = gr.State(None)
469
-
470
- with gr.Column(elem_classes="header-container"):
471
  gr.HTML("""
472
- <h1 class="logo-text">🍌 Nano Banana + Video</h1>
473
- <p class="subtitle">AI-Powered Image Style Transfer with Video Generation</p>
474
- <div style="display: flex; justify-content: center; align-items: center; gap: 10px; margin-top: 20px;">
475
- <a href="https://huggingface.co/spaces/openfree/Nano-Banana-Upscale" target="_blank">
476
- <img src="https://img.shields.io/static/v1?label=NANO%20BANANA&message=UPSCALE&color=%230000ff&labelColor=%23800080&logo=GOOGLE&logoColor=white&style=for-the-badge" alt="Nano Banana Upscale">
477
- </a>
478
- <a href="https://discord.gg/openfreeai" target="_blank">
479
- <img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="Discord Openfree AI">
480
- </a>
481
  </div>
482
  """)
483
-
484
- with gr.Tabs():
485
- # Tab 1: Image Generation
486
- with gr.TabItem("🎨 Step 1: Generate Image"):
487
- with gr.Column(elem_classes="main-content"):
488
- gr.HTML('<h2 class="section-title">🎨 Image Style Transfer</h2>')
489
-
490
- with gr.Row(equal_height=True):
491
- with gr.Column(scale=1):
492
  style_prompt = gr.Textbox(
493
  label="Style Description",
494
  placeholder="Describe your style...",
495
  lines=3,
496
- value="Make the sheets in the style of the logo. Make the scene natural.",
497
  )
498
 
499
- with gr.Row(equal_height=True):
500
- image1 = gr.Image(
501
- label="Primary Image",
502
- type="pil",
503
- height=200,
504
- elem_classes="image-container"
505
- )
506
- image2 = gr.Image(
507
- label="Secondary Image (Optional)",
508
- type="pil",
509
- height=200,
510
- elem_classes="image-container"
511
- )
512
 
513
  generate_img_btn = gr.Button(
514
  "Generate Image ✨",
515
- variant="primary",
516
- size="lg"
517
  )
518
 
519
- with gr.Column(scale=1):
520
  output_image = gr.Image(
521
  label="Generated Result",
522
- type="pil",
523
- height=420,
524
- elem_classes="image-container"
525
  )
526
 
527
  img_status = gr.Textbox(
528
  label="Status",
529
  interactive=False,
530
- lines=1,
531
- elem_classes="status-text",
532
- value="Ready to generate image..."
533
  )
534
 
535
  send_to_video_btn = gr.Button(
536
  "Send to Video Generation →",
537
  variant="secondary",
538
- size="lg",
539
  visible=False
540
  )
541
-
542
- # Tab 2: Video Generation
543
- with gr.TabItem("🎬 Step 2: Generate Video"):
544
- with gr.Column(elem_classes="main-content"):
545
- gr.HTML('<h2 class="section-title">🎬 Video Generation from Image</h2>')
546
-
547
  with gr.Row():
548
  with gr.Column():
549
  video_input_image = gr.Image(
550
- type="pil",
551
- label="Input Image (from Step 1 or upload new)",
552
- elem_classes="image-container"
553
  )
 
554
  video_prompt = gr.Textbox(
555
- label="Animation Prompt",
556
- value=default_prompt_i2v,
557
- lines=3
558
  )
 
559
  duration_input = gr.Slider(
560
- minimum=0.5,
561
- maximum=2.0,
562
- step=0.1,
563
- value=1.5,
564
- label="Duration (seconds)",
565
- info="Shorter videos use less memory"
566
  )
567
 
568
- with gr.Accordion("Advanced Settings", open=False):
569
- video_negative_prompt = gr.Textbox(
570
- label="Negative Prompt",
571
- value=default_negative_prompt,
572
- lines=3
573
- )
574
- video_seed = gr.Slider(
575
- label="Seed",
576
- minimum=0,
577
- maximum=MAX_SEED,
578
- step=1,
579
- value=42
580
- )
581
- randomize_seed = gr.Checkbox(
582
- label="Randomize seed",
583
- value=True
584
- )
585
- steps_slider = gr.Slider(
586
- minimum=1,
587
- maximum=8,
588
- step=1,
589
- value=4,
590
- label="Inference Steps (4 recommended)"
591
- )
592
- guidance_1 = gr.Slider(
593
- minimum=0.0,
594
- maximum=10.0,
595
- step=0.5,
596
- value=1,
597
- label="Guidance Scale - High Noise"
598
- )
599
- guidance_2 = gr.Slider(
600
- minimum=0.0,
601
- maximum=10.0,
602
- step=0.5,
603
- value=1,
604
- label="Guidance Scale - Low Noise"
605
- )
606
 
607
  generate_video_btn = gr.Button(
608
  "Generate Video 🎬",
609
- variant="primary",
610
- size="lg"
611
  )
612
 
613
  with gr.Column():
614
  video_output = gr.Video(
615
- label="Generated Video",
616
  autoplay=True
617
  )
 
618
  video_status = gr.Textbox(
619
  label="Status",
620
  interactive=False,
621
- lines=1,
622
- elem_classes="status-text",
623
- value="Ready to generate video..."
624
  )
625
-
626
- # Event Handlers
627
- def on_image_generated(prompt, img1, img2):
628
- img, status, state_img = process_images(prompt, img1, img2)
629
- if img:
630
- return img, status, state_img, gr.update(visible=True)
631
- return img, status, state_img, gr.update(visible=False)
632
-
633
- def send_image_to_video(img):
634
- if img:
635
- return img, "Image loaded! Ready to generate video."
636
- return None, "No image to send."
637
-
638
- # Image generation events
639
- generate_img_btn.click(
640
- fn=on_image_generated,
641
- inputs=[style_prompt, image1, image2],
642
- outputs=[output_image, img_status, generated_image_state, send_to_video_btn]
643
- )
644
-
645
- # Send to video tab
646
- send_to_video_btn.click(
647
- fn=send_image_to_video,
648
- inputs=[generated_image_state],
649
- outputs=[video_input_image, video_status]
650
- )
651
-
652
- # Video generation events
653
- video_inputs = [
654
- video_input_image, video_prompt, steps_slider,
655
- video_negative_prompt, duration_input,
656
- guidance_1, guidance_2, video_seed, randomize_seed
657
- ]
658
-
659
- def generate_video_wrapper(img, prompt, steps, neg_prompt, duration, g1, g2, seed, rand_seed):
660
- try:
661
- # Pass steps as first argument for GPU duration
662
- video_path, new_seed, status = generate_video(
663
- img, prompt, steps, neg_prompt, duration, g1, g2, seed, rand_seed
664
- )
665
- return video_path, new_seed, status
666
- except Exception as e:
667
- return None, seed, f"Error: {str(e)}"
668
-
669
- generate_video_btn.click(
670
- fn=generate_video_wrapper,
671
- inputs=video_inputs,
672
- outputs=[video_output, video_seed, video_status]
673
- )
674
-
675
 
676
- # Launch
677
  if __name__ == "__main__":
678
- # Don't initialize video pipeline on startup to avoid blocking
679
- print("Starting application...")
680
- print("Note: Video pipeline will initialize on first use")
681
 
682
- demo.launch(
683
- share=True,
684
- server_name="0.0.0.0",
685
- server_port=7860,
686
- show_error=True
687
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import time
8
  import tempfile
9
  import base64
 
 
 
 
 
10
  import numpy as np
11
  import random
12
  import gc
13
 
14
+ # GPU 관련 임포트는 나중에 조건부로 처리
15
+ try:
16
+ import torch
17
+ TORCH_AVAILABLE = True
18
+ except ImportError:
19
+ TORCH_AVAILABLE = False
20
+ print("Warning: PyTorch not available. Video generation will be disabled.")
21
+
22
  # ===========================
23
  # Configuration
24
  # ===========================
 
41
  default_negative_prompt = "static, still, no motion, frozen"
42
 
43
  # ===========================
44
+ # Initialize Video Pipeline (Lazy Loading)
45
  # ===========================
46
 
 
47
  video_pipe = None
48
  video_pipeline_ready = False
49
 
50
+ def lazy_import_video_dependencies():
51
+ """Lazy import video dependencies only when needed"""
52
  global video_pipe, video_pipeline_ready
53
+
54
+ if not TORCH_AVAILABLE:
55
+ raise gr.Error("PyTorch is not installed. Video generation is not available.")
56
+
57
+ try:
58
+ # Try to import video pipeline dependencies
59
+ from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
60
+ from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
61
+ from diffusers.utils.export_utils import export_to_video
62
+
63
+ return WanImageToVideoPipeline, WanTransformer3DModel, export_to_video
64
+ except ImportError as e:
65
+ print(f"Warning: Video dependencies not available: {e}")
66
+ return None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  # ===========================
69
  # Image Processing Functions
 
83
  data={
84
  'key': '6d207e02198a847aa98d0a2a901485a5',
85
  'image': img_base64,
86
+ },
87
+ timeout=10
88
  )
89
 
90
  if response.status_code == 200:
91
  data = response.json()
92
  if data.get('success'):
93
  return data['data']['url']
94
+ except Exception as e:
95
+ print(f"imgbb upload failed: {e}")
96
 
97
  # Method 2: Try 0x0.st
98
  try:
 
101
  buffered.seek(0)
102
 
103
  files = {'file': ('image.png', buffered, 'image/png')}
104
+ response = requests.post("https://0x0.st", files=files, timeout=10)
105
 
106
  if response.status_code == 200:
107
  return response.text.strip()
108
+ except Exception as e:
109
+ print(f"0x0.st upload failed: {e}")
110
 
111
  # Method 3: Fallback to base64
112
  buffered = BytesIO()
 
134
  url2 = upload_image_to_hosting(image2)
135
  image_urls.append(url2)
136
 
137
+ # Run the model (using a placeholder model name - replace with actual)
138
+ # Note: "google/nano-banana" doesn't exist - replace with actual model
139
  output = replicate.run(
140
+ "stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b",
141
  input={
142
  "prompt": prompt,
143
+ "image": url1 if len(image_urls) == 1 else None,
144
+ "width": 1024,
145
+ "height": 1024
146
  }
147
  )
148
 
 
152
  # Get the generated image
153
  img = None
154
 
155
+ # Handle different output formats
156
+ if isinstance(output, list) and len(output) > 0:
157
+ output_url = output[0]
158
+ elif isinstance(output, str):
159
+ output_url = output
160
+ else:
161
+ output_url = str(output)
162
 
163
+ if output_url:
164
+ response = requests.get(output_url, timeout=30)
165
+ if response.status_code == 200:
166
+ img = Image.open(BytesIO(response.content))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  if img:
169
+ return img, "✨ Image generated successfully!", img
170
  else:
171
  return None, "Could not process output", None
172
 
173
  except Exception as e:
174
+ return None, f"Error: {str(e)[:200]}", None
175
 
176
  # ===========================
177
+ # Video Generation Functions (Simplified)
178
  # ===========================
179
 
180
  def resize_image_for_video(image: Image.Image) -> Image.Image:
181
  """Resize image for video generation"""
 
 
 
 
 
 
 
 
182
  target_aspect = LANDSCAPE_WIDTH / LANDSCAPE_HEIGHT
183
  width, height = image.size
184
  in_aspect = width / height
 
194
 
195
  return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
196
 
 
 
 
 
 
197
  def generate_video(
198
  input_image,
199
  prompt,
200
  steps=4,
201
  negative_prompt=default_negative_prompt,
202
+ duration_seconds=1.5,
203
  guidance_scale=1,
204
  guidance_scale_2=1,
205
  seed=42,
206
  randomize_seed=False,
 
207
  ):
208
+ """Generate a video from an input image (simplified version)"""
209
  if input_image is None:
210
  raise gr.Error("Please generate or upload an image first.")
211
 
212
+ if not TORCH_AVAILABLE:
213
+ raise gr.Error("Video generation is not available. PyTorch is not installed.")
214
+
215
  try:
216
+ # Import dependencies
217
+ video_deps = lazy_import_video_dependencies()
218
+ if not all(video_deps):
219
+ raise gr.Error("Video generation dependencies are not available.")
220
+
221
+ WanImageToVideoPipeline, WanTransformer3DModel, export_to_video = video_deps
222
+
223
  global video_pipe
224
+
225
+ # Simple initialization without complex optimizations
226
  if video_pipe is None:
227
+ print("Initializing video pipeline (simplified)...")
228
+
229
+ # Clear GPU memory first
230
+ if TORCH_AVAILABLE:
231
+ torch.cuda.empty_cache()
232
+ gc.collect()
233
 
234
+ # Basic pipeline loading
235
  try:
236
+ video_pipe = WanImageToVideoPipeline.from_pretrained(
237
+ VIDEO_MODEL_ID,
238
+ torch_dtype=torch.float16 if TORCH_AVAILABLE else None,
239
+ low_cpu_mem_usage=True,
240
+ device_map="auto"
241
+ )
242
+ print("Video pipeline loaded")
243
  except Exception as e:
244
+ print(f"Failed to load video pipeline: {e}")
245
+ raise gr.Error("Could not load video model. Please try again later.")
 
 
 
 
246
 
247
+ # Prepare video generation
248
+ num_frames = min(17, int(round(duration_seconds * FIXED_FPS))) # Limit frames
249
+ num_frames = ((num_frames - 1) // 4) * 4 + 1 # Ensure divisible by 4
 
250
 
251
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
252
 
253
  # Resize image
254
  resized_image = resize_image_for_video(input_image)
255
 
256
+ # Generate video with minimal settings
257
+ print(f"Generating {num_frames} frames...")
258
+
259
+ if TORCH_AVAILABLE:
260
+ generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(current_seed)
261
+ else:
262
+ generator = None
263
+
264
+ output_frames_list = video_pipe(
265
+ image=resized_image,
266
+ prompt=prompt,
267
+ negative_prompt=negative_prompt,
268
+ height=LANDSCAPE_HEIGHT,
269
+ width=LANDSCAPE_WIDTH,
270
+ num_frames=num_frames,
271
+ guidance_scale=float(guidance_scale),
272
+ num_inference_steps=int(steps),
273
+ generator=generator,
274
+ ).frames[0]
275
 
276
  # Save video
277
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
 
279
 
280
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
281
 
282
+ return video_path, current_seed, f"🎬 Video generated! ({num_frames} frames)"
283
 
 
 
 
 
 
 
 
284
  except Exception as e:
285
+ if TORCH_AVAILABLE:
286
+ torch.cuda.empty_cache()
287
+ gc.collect()
288
+ error_msg = str(e)[:200]
289
+ if "out of memory" in error_msg.lower():
290
+ return None, seed, "GPU memory exceeded. Try reducing duration and steps."
291
+ return None, seed, f"Error: {error_msg}"
292
 
293
  # ===========================
294
+ # Simple CSS
295
  # ===========================
296
 
297
  css = """
298
  .gradio-container {
299
+ max-width: 1200px;
300
+ margin: 0 auto;
 
301
  }
302
  .header-container {
303
  background: linear-gradient(135deg, #ffd93d 0%, #ffb347 100%);
304
+ padding: 2rem;
305
+ border-radius: 12px;
306
+ margin-bottom: 2rem;
307
+ text-align: center;
308
  }
309
  .logo-text {
310
+ font-size: 2.5rem;
311
+ font-weight: bold;
312
  color: #2d3436;
 
313
  margin: 0;
 
314
  }
315
  .subtitle {
316
  color: #2d3436;
317
+ font-size: 1rem;
 
318
  margin-top: 0.5rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  }
320
  """
321
 
322
  # ===========================
323
+ # Gradio Interface (Simplified)
324
  # ===========================
325
 
326
+ def create_demo():
327
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
328
+ # Shared state
329
+ generated_image_state = gr.State(None)
330
+
331
  gr.HTML("""
332
+ <div class="header-container">
333
+ <h1 class="logo-text">🍌 Nano Banana + Video</h1>
334
+ <p class="subtitle">AI-Powered Image Generation with Video Creation</p>
 
 
 
 
 
 
335
  </div>
336
  """)
337
+
338
+ with gr.Tabs():
339
+ # Tab 1: Image Generation
340
+ with gr.TabItem("🎨 Step 1: Generate Image"):
341
+ with gr.Row():
342
+ with gr.Column():
 
 
 
343
  style_prompt = gr.Textbox(
344
  label="Style Description",
345
  placeholder="Describe your style...",
346
  lines=3,
347
+ value="A beautiful landscape in anime style"
348
  )
349
 
350
+ image1 = gr.Image(
351
+ label="Reference Image (Optional)",
352
+ type="pil"
353
+ )
354
+
355
+ image2 = gr.Image(
356
+ label="Secondary Image (Optional)",
357
+ type="pil"
358
+ )
 
 
 
 
359
 
360
  generate_img_btn = gr.Button(
361
  "Generate Image ✨",
362
+ variant="primary"
 
363
  )
364
 
365
+ with gr.Column():
366
  output_image = gr.Image(
367
  label="Generated Result",
368
+ type="pil"
 
 
369
  )
370
 
371
  img_status = gr.Textbox(
372
  label="Status",
373
  interactive=False,
374
+ value="Ready..."
 
 
375
  )
376
 
377
  send_to_video_btn = gr.Button(
378
  "Send to Video Generation →",
379
  variant="secondary",
 
380
  visible=False
381
  )
382
+
383
+ # Tab 2: Video Generation
384
+ with gr.TabItem("🎬 Step 2: Generate Video"):
 
 
 
385
  with gr.Row():
386
  with gr.Column():
387
  video_input_image = gr.Image(
388
+ type="pil",
389
+ label="Input Image"
 
390
  )
391
+
392
  video_prompt = gr.Textbox(
393
+ label="Animation Prompt",
394
+ value=default_prompt_i2v
 
395
  )
396
+
397
  duration_input = gr.Slider(
398
+ minimum=0.5,
399
+ maximum=2.0,
400
+ step=0.5,
401
+ value=1.0,
402
+ label="Duration (seconds)"
 
403
  )
404
 
405
+ steps_slider = gr.Slider(
406
+ minimum=1,
407
+ maximum=8,
408
+ step=1,
409
+ value=4,
410
+ label="Inference Steps"
411
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
 
413
  generate_video_btn = gr.Button(
414
  "Generate Video 🎬",
415
+ variant="primary"
 
416
  )
417
 
418
  with gr.Column():
419
  video_output = gr.Video(
420
+ label="Generated Video",
421
  autoplay=True
422
  )
423
+
424
  video_status = gr.Textbox(
425
  label="Status",
426
  interactive=False,
427
+ value="Ready..."
 
 
428
  )
429
+
430
+ # Event Handlers
431
+ def on_image_generated(prompt, img1, img2):
432
+ img, status, state_img = process_images(prompt, img1, img2)
433
+ if img:
434
+ return img, status, state_img, gr.update(visible=True)
435
+ return img, status, state_img, gr.update(visible=False)
436
+
437
+ def send_image_to_video(img):
438
+ if img:
439
+ return img, "Image loaded!"
440
+ return None, "No image to send."
441
+
442
+ # Wire up events
443
+ generate_img_btn.click(
444
+ fn=on_image_generated,
445
+ inputs=[style_prompt, image1, image2],
446
+ outputs=[output_image, img_status, generated_image_state, send_to_video_btn]
447
+ )
448
+
449
+ send_to_video_btn.click(
450
+ fn=send_image_to_video,
451
+ inputs=[generated_image_state],
452
+ outputs=[video_input_image, video_status]
453
+ )
454
+
455
+ # Simplified video generation
456
+ def generate_video_wrapper(img, prompt, duration, steps):
457
+ if not TORCH_AVAILABLE:
458
+ return None, "Video generation requires PyTorch. Please install it first."
459
+
460
+ try:
461
+ video_path, seed, status = generate_video(
462
+ img, prompt, steps=steps, duration_seconds=duration
463
+ )
464
+ return video_path, status
465
+ except Exception as e:
466
+ return None, f"Error: {str(e)[:100]}"
467
+
468
+ generate_video_btn.click(
469
+ fn=generate_video_wrapper,
470
+ inputs=[video_input_image, video_prompt, duration_input, steps_slider],
471
+ outputs=[video_output, video_status]
472
+ )
473
+
474
+ return demo
475
+
476
+ # ===========================
477
+ # Main Launch
478
+ # ===========================
479
 
 
480
  if __name__ == "__main__":
481
+ print("=" * 50)
482
+ print("Starting Nano Banana + Video Application")
483
+ print("=" * 50)
484
 
485
+ # Check environment
486
+ if not os.getenv('REPLICATE_API_TOKEN'):
487
+ print("Warning: REPLICATE_API_TOKEN not set. Image generation may not work.")
488
+
489
+ if not TORCH_AVAILABLE:
490
+ print("Warning: PyTorch not available. Video generation will be disabled.")
491
+ print("To enable video generation, install PyTorch: pip install torch")
492
+
493
+ try:
494
+ # Create and launch demo
495
+ demo = create_demo()
496
+
497
+ demo.launch(
498
+ share=False, # Set to True if you want a public link
499
+ server_name="0.0.0.0",
500
+ server_port=7860,
501
+ show_error=True,
502
+ debug=False # Set to True for debugging
503
+ )
504
+ except Exception as e:
505
+ print(f"Failed to launch application: {e}")
506
+ print("Please check your environment and dependencies.")