Spaces:
Running
Running
File size: 3,160 Bytes
556d852 cce6ff1 4b17c2f cce6ff1 4b17c2f cce6ff1 4b17c2f cce6ff1 4b17c2f cce6ff1 4b17c2f cce6ff1 4b17c2f 556d852 cce6ff1 4b17c2f cce6ff1 4b17c2f cce6ff1 556d852 4b17c2f cce6ff1 4b17c2f cce6ff1 4b17c2f cce6ff1 4b17c2f cce6ff1 4b17c2f cce6ff1 707450c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import torch
import gradio as gr
from diffusers import (
StableDiffusionPipeline,
StableDiffusionInstructPix2PixPipeline,
StableVideoDiffusionPipeline,
WanPipeline,
)
from diffusers.utils import export_to_video, load_image
# Detect device & dtype
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32
# Factory to load & offload a pipeline
def make_pipe(cls, model_id, **kwargs):
pipe = cls.from_pretrained(model_id, torch_dtype=dtype, **kwargs)
# Enables CPU offload of model parts not in use
pipe.enable_model_cpu_offload()
return pipe
# Hold pipelines in globals but don’t load yet
TXT2IMG_PIPE = None
IMG2IMG_PIPE = None
TXT2VID_PIPE = None
IMG2VID_PIPE = None
def generate_image_from_text(prompt):
global TXT2IMG_PIPE
if TXT2IMG_PIPE is None:
TXT2IMG_PIPE = make_pipe(
StableDiffusionPipeline,
"stabilityai/stable-diffusion-2-1-base"
).to(device)
return TXT2IMG_PIPE(prompt, num_inference_steps=20).images[0]
def generate_image_from_image_and_prompt(image, prompt):
global IMG2IMG_PIPE
if IMG2IMG_PIPE is None:
IMG2IMG_PIPE = make_pipe(
StableDiffusionInstructPix2PixPipeline,
"timbrooks/instruct-pix2pix"
).to(device)
out = IMG2IMG_PIPE(prompt=prompt, image=image, num_inference_steps=8)
return out.images[0]
def generate_video_from_text(prompt):
global TXT2VID_PIPE
if TXT2VID_PIPE is None:
TXT2VID_PIPE = make_pipe(
WanPipeline,
"Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
).to(device)
frames = TXT2VID_PIPE(prompt=prompt, num_frames=12).frames[0]
return export_to_video(frames, "wan_video.mp4", fps=8)
def generate_video_from_image(image):
global IMG2VID_PIPE
if IMG2VID_PIPE is None:
IMG2VID_PIPE = make_pipe(
StableVideoDiffusionPipeline,
"stabilityai/stable-video-diffusion-img2vid-xt",
variant="fp16" if dtype==torch.float16 else None
).to(device)
image = load_image(image).resize((512, 288))
frames = IMG2VID_PIPE(image, num_inference_steps=16).frames[0]
return export_to_video(frames, "svd_video.mp4", fps=8)
with gr.Blocks() as demo:
gr.Markdown("# 🧠 Lightweight Any‑to‑Any AI Playground")
with gr.Tab("Text → Image"):
inp = gr.Textbox(label="Prompt")
out = gr.Image()
gr.Button("Generate").click(generate_image_from_text, inp, out)
with gr.Tab("Image → Image"):
img = gr.Image(label="Input Image")
prm = gr.Textbox(label="Edit Prompt")
out2 = gr.Image()
gr.Button("Generate").click(generate_image_from_image_and_prompt, [img, prm], out2)
with gr.Tab("Text → Video"):
inp2 = gr.Textbox(label="Prompt")
out_vid = gr.Video()
gr.Button("Generate").click(generate_video_from_text, inp2, out_vid)
with gr.Tab("Image → Video"):
img2 = gr.Image(label="Input Image")
out_vid2 = gr.Video()
gr.Button("Animate").click(generate_video_from_image, img2, out_vid2)
demo.launch()
|