visionary-ai / app.py
kevalfst's picture
Update app.py
76f81b8 verified
raw
history blame
7.27 kB
import gradio as gr
import torch
import random
import hashlib
from diffusers import DiffusionPipeline
from transformers import pipeline
from diffusers.utils import export_to_video
# Optional: xformers optimization
try:
import xformers
has_xformers = True
except ImportError:
has_xformers = False
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
MAX_SEED = 2**32 - 1
# Model lists ordered by size
image_models = {
"Stable Diffusion 1.5 (light)": "runwayml/stable-diffusion-v1-5",
"Stable Diffusion 2.1": "stabilityai/stable-diffusion-2-1",
"Dreamlike 2.0": "dreamlike-art/dreamlike-photoreal-2.0",
"Playground v2": "playgroundai/playground-v2-1024px-aesthetic",
"Muse 512": "amused/muse-512-finetuned",
"PixArt": "PixArt-alpha/PixArt-LCM-XL-2-1024-MS",
"Kandinsky 3": "kandinsky-community/kandinsky-3",
"BLIP Diffusion": "Salesforce/blipdiffusion",
"SDXL Base 1.0 (heavy)": "stabilityai/stable-diffusion-xl-base-1.0",
"OpenJourney (heavy)": "prompthero/openjourney"
}
text_models = {
"GPT-2 (light)": "gpt2",
"GPT-Neo 1.3B": "EleutherAI/gpt-neo-1.3B",
"BLOOM 1.1B": "bigscience/bloom-1b1",
"GPT-J 6B": "EleutherAI/gpt-j-6B",
"Falcon 7B": "tiiuae/falcon-7b",
"XGen 7B": "Salesforce/xgen-7b-8k-base",
"BTLM 3B": "cerebras/btlm-3b-8k-base",
"MPT 7B": "mosaicml/mpt-7b",
"StableLM 2": "stabilityai/stablelm-2-1_6b",
"LLaMA 2 7B (heavy)": "meta-llama/Llama-2-7b-hf"
}
video_models = {
"CogVideoX-2B": "THUDM/CogVideoX-2b",
"CogVideoX-5B": "THUDM/CogVideoX-5b",
"AnimateDiff-Lightning": "ByteDance/AnimateDiff-Lightning",
"ModelScope T2V": "damo-vilab/text-to-video-ms-1.7b",
"VideoCrafter2": "VideoCrafter/VideoCrafter2",
"Open-Sora-Plan-v1.2.0": "LanguageBind/Open-Sora-Plan-v1.2.0",
"LTX-Video": "Lightricks/LTX-Video",
"HunyuanVideo": "tencent/HunyuanVideo",
"Latte-1": "maxin-cn/Latte-1",
"LaVie": "Vchitect/LaVie"
}
# Caches
image_pipes = {}
text_pipes = {}
video_pipes = {}
image_cache = {}
text_cache = {}
video_cache = {}
def hash_inputs(*args):
combined = "|".join(map(str, args))
return hashlib.sha256(combined.encode()).hexdigest()
def generate_image(prompt, model_name, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
key = hash_inputs(prompt, model_name, seed)
if key in image_cache:
progress(100, desc="Using cached image.")
return image_cache[key], seed
progress(10, desc="Loading model...")
if model_name not in image_pipes:
pipe = DiffusionPipeline.from_pretrained(
image_models[model_name],
torch_dtype=torch_dtype,
low_cpu_mem_usage=True
)
if torch.__version__.startswith("2"):
pipe = torch.compile(pipe)
if has_xformers and device == "cuda":
try:
pipe.enable_xformers_memory_efficient_attention()
except Exception:
pass
pipe.to(device)
image_pipes[model_name] = pipe
pipe = image_pipes[model_name]
progress(40, desc="Generating image...")
result = pipe(prompt=prompt, generator=torch.manual_seed(seed), num_inference_steps=15, width=512, height=512)
image = result.images[0]
image_cache[key] = image
progress(100, desc="Done.")
return image, seed
def generate_text(prompt, model_name, progress=gr.Progress(track_tqdm=True)):
key = hash_inputs(prompt, model_name)
if key in text_cache:
progress(100, desc="Using cached text.")
return text_cache[key]
progress(10, desc="Loading model...")
if model_name not in text_pipes:
text_pipes[model_name] = pipeline(
"text-generation",
model=text_models[model_name],
device=0 if device == "cuda" else -1
)
pipe = text_pipes[model_name]
progress(40, desc="Generating text...")
result = pipe(prompt, max_length=100, do_sample=True)[0]['generated_text']
text_cache[key] = result
progress(100, desc="Done.")
return result
def generate_video(prompt, model_name, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
key = hash_inputs(prompt, model_name, seed)
if key in video_cache:
progress(100, desc="Using cached video.")
return video_cache[key], seed
progress(10, desc="Loading model...")
if model_name not in video_pipes:
pipe = DiffusionPipeline.from_pretrained(
video_models[model_name],
torch_dtype=torch_dtype,
variant="fp16"
)
if torch.__version__.startswith("2"):
pipe = torch.compile(pipe)
if has_xformers and device == "cuda":
try:
pipe.enable_xformers_memory_efficient_attention()
except Exception:
pass
pipe.to(device)
video_pipes[model_name] = pipe
pipe = video_pipes[model_name]
progress(40, desc="Generating video...")
result = pipe(prompt=prompt, generator=torch.manual_seed(seed), num_inference_steps=15)
video_frames = result.frames[0]
video_path = export_to_video(video_frames)
video_cache[key] = video_path
progress(100, desc="Done.")
return video_path, seed
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# ⚡ Fast Multi-Model AI Playground with Caching")
with gr.Tabs():
# Image Generation
with gr.Tab("🖼️ Image Generation"):
img_prompt = gr.Textbox(label="Prompt")
img_model = gr.Dropdown(choices=list(image_models.keys()), value="Stable Diffusion 1.5 (light)", label="Image Model")
img_seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
img_rand = gr.Checkbox(label="Randomize seed", value=True)
img_btn = gr.Button("Generate Image")
img_out = gr.Image()
img_btn.click(fn=generate_image, inputs=[img_prompt, img_model, img_seed, img_rand], outputs=[img_out, img_seed])
# Text Generation
with gr.Tab("📝 Text Generation"):
txt_prompt = gr.Textbox(label="Prompt")
txt_model = gr.Dropdown(choices=list(text_models.keys()), value="GPT-2 (light)", label="Text Model")
txt_btn = gr.Button("Generate Text")
txt_out = gr.Textbox(label="Output Text")
txt_btn.click(fn=generate_text, inputs=[txt_prompt, txt_model], outputs=[txt_out])
# Video Generation
with gr.Tab("🎥 Video Generation"):
vid_prompt = gr.Textbox(label="Prompt")
vid_model = gr.Dropdown(choices=list(video_models.keys()), value="CogVideoX-2B", label="Video Model")
vid_seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
vid_rand = gr.Checkbox(label="Randomize seed", value=True)
vid_btn = gr.Button("Generate Video")
vid_out = gr.Video()
vid_btn.click(fn=generate_video, inputs=[vid_prompt, vid_model, vid_seed, vid_rand], outputs=[vid_out, vid_seed])
demo.launch(show_error=True)