kevalfst commited on
Commit
f7bfc02
Β·
verified Β·
1 Parent(s): 3455f8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -63
app.py CHANGED
@@ -7,95 +7,127 @@ from diffusers import (
7
  WanPipeline,
8
  )
9
  from diffusers.utils import export_to_video, load_image
 
 
10
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  dtype = torch.float16 if device == "cuda" else torch.float32
 
13
 
14
- # Pipeline factory
15
- def make_pipe(cls, model_id, **kwargs):
16
- pipe = cls.from_pretrained(model_id, torch_dtype=dtype, **kwargs)
17
- pipe.enable_model_cpu_offload()
18
- return pipe
19
-
20
- # Global model caches
21
  TXT2IMG_PIPE = None
22
  IMG2IMG_PIPE = None
23
  TXT2VID_PIPE = None
24
  IMG2VID_PIPE = None
25
 
26
- # Text β†’ Image
27
- def generate_image_from_text(prompt):
 
 
 
 
 
28
  global TXT2IMG_PIPE
29
  if TXT2IMG_PIPE is None:
30
- TXT2IMG_PIPE = make_pipe(
31
- StableDiffusionPipeline,
32
- "stabilityai/stable-diffusion-2-1-base"
33
- ).to(device)
34
- return TXT2IMG_PIPE(prompt, num_inference_steps=20).images[0]
 
35
 
36
- # Image β†’ Image
37
- def generate_image_from_image_and_prompt(image, prompt):
38
  global IMG2IMG_PIPE
39
  if IMG2IMG_PIPE is None:
40
- IMG2IMG_PIPE = make_pipe(
41
- StableDiffusionInstructPix2PixPipeline,
42
- "timbrooks/instruct-pix2pix"
43
- ).to(device)
44
- out = IMG2IMG_PIPE(prompt=prompt, image=image, num_inference_steps=8)
45
- return out.images[0]
46
 
47
- # Text β†’ Video
48
- def generate_video_from_text(prompt):
49
  global TXT2VID_PIPE
50
  if TXT2VID_PIPE is None:
51
- TXT2VID_PIPE = make_pipe(
52
- WanPipeline,
53
- "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
54
- ).to(device)
55
- frames = TXT2VID_PIPE(prompt=prompt, num_frames=12).frames[0]
56
- return export_to_video(frames, "/tmp/wan_video.mp4", fps=8)
57
 
58
- # Image β†’ Video
59
- def generate_video_from_image(image):
60
  global IMG2VID_PIPE
61
  if IMG2VID_PIPE is None:
62
- IMG2VID_PIPE = make_pipe(
63
- StableVideoDiffusionPipeline,
64
- "stabilityai/stable-video-diffusion-img2vid-xt",
65
- variant="fp16" if dtype == torch.float16 else None
66
- ).to(device)
67
  image = load_image(image).resize((512, 288))
68
- frames = IMG2VID_PIPE(image, num_inference_steps=16).frames[0]
69
- return export_to_video(frames, "/tmp/svd_video.mp4", fps=8)
70
 
71
- # Gradio Interface
72
- with gr.Blocks() as demo:
73
- gr.Markdown("# 🧠 Lightweight Any‑to‑Any AI Playground")
74
 
75
- with gr.Tab("Text β†’ Image"):
76
- text_prompt = gr.Textbox(label="Prompt")
77
- output_image = gr.Image(label="Generated Image")
78
- text2img_button = gr.Button("Generate")
79
- text2img_button.click(generate_image_from_text, inputs=text_prompt, outputs=output_image)
 
 
 
 
 
 
 
 
 
80
 
81
- with gr.Tab("Image β†’ Image"):
82
- input_image = gr.Image(label="Input Image")
83
- edit_prompt = gr.Textbox(label="Edit Prompt")
84
- edited_image = gr.Image(label="Edited Image")
85
- img2img_button = gr.Button("Generate")
86
- img2img_button.click(generate_image_from_image_and_prompt, inputs=[input_image, edit_prompt], outputs=edited_image)
 
 
 
 
 
 
 
 
87
 
88
- with gr.Tab("Text β†’ Video"):
89
- video_prompt = gr.Textbox(label="Prompt")
90
- video_output = gr.Video(label="Generated Video")
91
- txt2vid_button = gr.Button("Generate")
92
- txt2vid_button.click(generate_video_from_text, inputs=video_prompt, outputs=video_output)
 
 
 
 
 
 
 
 
93
 
94
- with gr.Tab("Image β†’ Video"):
95
- video_input_img = gr.Image(label="Input Image")
96
- anim_video_output = gr.Video(label="Animated Video")
97
- img2vid_button = gr.Button("Animate")
98
- img2vid_button.click(generate_video_from_image, inputs=video_input_img, outputs=anim_video_output)
 
 
 
 
 
 
 
 
99
 
100
  demo.queue()
101
  demo.launch(show_error=True)
 
7
  WanPipeline,
8
  )
9
  from diffusers.utils import export_to_video, load_image
10
+ import random
11
+ import numpy as np
12
 
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
  dtype = torch.float16 if device == "cuda" else torch.float32
15
+ MAX_SEED = np.iinfo(np.int32).max
16
 
17
+ # Model cache
 
 
 
 
 
 
18
  TXT2IMG_PIPE = None
19
  IMG2IMG_PIPE = None
20
  TXT2VID_PIPE = None
21
  IMG2VID_PIPE = None
22
 
23
+ def make_pipe(cls, model_id, **kwargs):
24
+ pipe = cls.from_pretrained(model_id, torch_dtype=dtype, **kwargs)
25
+ pipe.enable_model_cpu_offload()
26
+ return pipe
27
+
28
+ # Functions
29
+ def generate_image_from_text(prompt, seed, randomize_seed):
30
  global TXT2IMG_PIPE
31
  if TXT2IMG_PIPE is None:
32
+ TXT2IMG_PIPE = make_pipe(StableDiffusionPipeline, "stabilityai/stable-diffusion-2-1-base").to(device)
33
+ if randomize_seed:
34
+ seed = random.randint(0, MAX_SEED)
35
+ generator = torch.manual_seed(seed)
36
+ image = TXT2IMG_PIPE(prompt=prompt, num_inference_steps=20, generator=generator).images[0]
37
+ return image, seed
38
 
39
+ def generate_image_from_image_and_prompt(image, prompt, seed, randomize_seed):
 
40
  global IMG2IMG_PIPE
41
  if IMG2IMG_PIPE is None:
42
+ IMG2IMG_PIPE = make_pipe(StableDiffusionInstructPix2PixPipeline, "timbrooks/instruct-pix2pix").to(device)
43
+ if randomize_seed:
44
+ seed = random.randint(0, MAX_SEED)
45
+ generator = torch.manual_seed(seed)
46
+ out = IMG2IMG_PIPE(prompt=prompt, image=image, num_inference_steps=8, generator=generator)
47
+ return out.images[0], seed
48
 
49
+ def generate_video_from_text(prompt, seed, randomize_seed):
 
50
  global TXT2VID_PIPE
51
  if TXT2VID_PIPE is None:
52
+ TXT2VID_PIPE = make_pipe(WanPipeline, "Wan-AI/Wan2.1-T2V-1.3B-Diffusers").to(device)
53
+ if randomize_seed:
54
+ seed = random.randint(0, MAX_SEED)
55
+ generator = torch.manual_seed(seed)
56
+ frames = TXT2VID_PIPE(prompt=prompt, num_frames=12, generator=generator).frames[0]
57
+ return export_to_video(frames, "/tmp/wan_video.mp4", fps=8), seed
58
 
59
+ def generate_video_from_image(image, seed, randomize_seed):
 
60
  global IMG2VID_PIPE
61
  if IMG2VID_PIPE is None:
62
+ IMG2VID_PIPE = make_pipe(StableVideoDiffusionPipeline, "stabilityai/stable-video-diffusion-img2vid-xt", variant="fp16" if dtype == torch.float16 else None).to(device)
63
+ if randomize_seed:
64
+ seed = random.randint(0, MAX_SEED)
65
+ generator = torch.manual_seed(seed)
 
66
  image = load_image(image).resize((512, 288))
67
+ frames = IMG2VID_PIPE(image=image, num_inference_steps=16, generator=generator).frames[0]
68
+ return export_to_video(frames, "/tmp/svd_video.mp4", fps=8), seed
69
 
70
+ # UI
71
+ with gr.Blocks(css="footer {display:none !important}") as demo:
72
+ gr.Markdown("# 🧠 AI Playground – Multi-Mode Generator")
73
 
74
+ with gr.Tabs():
75
+ # Text β†’ Image
76
+ with gr.Tab("Text β†’ Image"):
77
+ with gr.Row():
78
+ prompt_txt = gr.Textbox(label="Prompt")
79
+ generate_btn = gr.Button("Generate")
80
+ result_img = gr.Image()
81
+ seed_txt = gr.Slider(0, MAX_SEED, value=42, label="Seed")
82
+ rand_seed_txt = gr.Checkbox(label="Randomize seed", value=True)
83
+ generate_btn.click(
84
+ fn=generate_image_from_text,
85
+ inputs=[prompt_txt, seed_txt, rand_seed_txt],
86
+ outputs=[result_img, seed_txt]
87
+ )
88
 
89
+ # Image β†’ Image
90
+ with gr.Tab("Image β†’ Image"):
91
+ with gr.Row():
92
+ image_in = gr.Image(label="Input Image")
93
+ prompt_img = gr.Textbox(label="Edit Prompt")
94
+ generate_btn2 = gr.Button("Generate")
95
+ result_img2 = gr.Image()
96
+ seed_img = gr.Slider(0, MAX_SEED, value=123, label="Seed")
97
+ rand_seed_img = gr.Checkbox(label="Randomize seed", value=True)
98
+ generate_btn2.click(
99
+ fn=generate_image_from_image_and_prompt,
100
+ inputs=[image_in, prompt_img, seed_img, rand_seed_img],
101
+ outputs=[result_img2, seed_img]
102
+ )
103
 
104
+ # Text β†’ Video
105
+ with gr.Tab("Text β†’ Video"):
106
+ with gr.Row():
107
+ prompt_vid = gr.Textbox(label="Prompt")
108
+ generate_btn3 = gr.Button("Generate")
109
+ result_vid = gr.Video()
110
+ seed_vid = gr.Slider(0, MAX_SEED, value=555, label="Seed")
111
+ rand_seed_vid = gr.Checkbox(label="Randomize seed", value=True)
112
+ generate_btn3.click(
113
+ fn=generate_video_from_text,
114
+ inputs=[prompt_vid, seed_vid, rand_seed_vid],
115
+ outputs=[result_vid, seed_vid]
116
+ )
117
 
118
+ # Image β†’ Video
119
+ with gr.Tab("Image β†’ Video"):
120
+ with gr.Row():
121
+ image_in_vid = gr.Image(label="Input Image")
122
+ generate_btn4 = gr.Button("Animate")
123
+ result_vid2 = gr.Video()
124
+ seed_vid2 = gr.Slider(0, MAX_SEED, value=999, label="Seed")
125
+ rand_seed_vid2 = gr.Checkbox(label="Randomize seed", value=True)
126
+ generate_btn4.click(
127
+ fn=generate_video_from_image,
128
+ inputs=[image_in_vid, seed_vid2, rand_seed_vid2],
129
+ outputs=[result_vid2, seed_vid2]
130
+ )
131
 
132
  demo.queue()
133
  demo.launch(show_error=True)