Spaces:
Running
Running
import torch | |
import gradio as gr | |
from diffusers import ( | |
StableDiffusionPipeline, | |
StableDiffusionInstructPix2PixPipeline, | |
StableVideoDiffusionPipeline, | |
WanPipeline, | |
) | |
from diffusers.utils import export_to_video, load_image | |
# Detect device & dtype | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
dtype = torch.float16 if device == "cuda" else torch.float32 | |
# Factory to load & offload a pipeline | |
def make_pipe(cls, model_id, **kwargs): | |
pipe = cls.from_pretrained(model_id, torch_dtype=dtype, **kwargs) | |
# Enables CPU offload of model parts not in use | |
pipe.enable_model_cpu_offload() | |
return pipe | |
# Hold pipelines in globals but don’t load yet | |
TXT2IMG_PIPE = None | |
IMG2IMG_PIPE = None | |
TXT2VID_PIPE = None | |
IMG2VID_PIPE = None | |
def generate_image_from_text(prompt): | |
global TXT2IMG_PIPE | |
if TXT2IMG_PIPE is None: | |
TXT2IMG_PIPE = make_pipe( | |
StableDiffusionPipeline, | |
"stabilityai/stable-diffusion-2-1-base" | |
).to(device) | |
return TXT2IMG_PIPE(prompt, num_inference_steps=20).images[0] | |
def generate_image_from_image_and_prompt(image, prompt): | |
global IMG2IMG_PIPE | |
if IMG2IMG_PIPE is None: | |
IMG2IMG_PIPE = make_pipe( | |
StableDiffusionInstructPix2PixPipeline, | |
"timbrooks/instruct-pix2pix" | |
).to(device) | |
out = IMG2IMG_PIPE(prompt=prompt, image=image, num_inference_steps=8) | |
return out.images[0] | |
def generate_video_from_text(prompt): | |
global TXT2VID_PIPE | |
if TXT2VID_PIPE is None: | |
TXT2VID_PIPE = make_pipe( | |
WanPipeline, | |
"Wan-AI/Wan2.1-T2V-1.3B-Diffusers" | |
).to(device) | |
frames = TXT2VID_PIPE(prompt=prompt, num_frames=12).frames[0] | |
return export_to_video(frames, "wan_video.mp4", fps=8) | |
def generate_video_from_image(image): | |
global IMG2VID_PIPE | |
if IMG2VID_PIPE is None: | |
IMG2VID_PIPE = make_pipe( | |
StableVideoDiffusionPipeline, | |
"stabilityai/stable-video-diffusion-img2vid-xt", | |
variant="fp16" if dtype==torch.float16 else None | |
).to(device) | |
image = load_image(image).resize((512, 288)) | |
frames = IMG2VID_PIPE(image, num_inference_steps=16).frames[0] | |
return export_to_video(frames, "svd_video.mp4", fps=8) | |
with gr.Blocks() as demo: | |
gr.Markdown("# 🧠 Lightweight Any‑to‑Any AI Playground") | |
with gr.Tab("Text → Image"): | |
inp = gr.Textbox(label="Prompt") | |
out = gr.Image() | |
gr.Button("Generate").click(generate_image_from_text, inp, out) | |
with gr.Tab("Image → Image"): | |
img = gr.Image(label="Input Image") | |
prm = gr.Textbox(label="Edit Prompt") | |
out2 = gr.Image() | |
gr.Button("Generate").click(generate_image_from_image_and_prompt, [img, prm], out2) | |
with gr.Tab("Text → Video"): | |
inp2 = gr.Textbox(label="Prompt") | |
out_vid = gr.Video() | |
gr.Button("Generate").click(generate_video_from_text, inp2, out_vid) | |
with gr.Tab("Image → Video"): | |
img2 = gr.Image(label="Input Image") | |
out_vid2 = gr.Video() | |
gr.Button("Animate").click(generate_video_from_image, img2, out_vid2) | |
demo.launch() | |