Spaces:

kevalfst
/

visionary-ai

Running

File size: 7,268 Bytes

78ec26d
c4ccad7
7ea27ba
76f81b8
c4ccad7
7ea27ba
76f81b8
 
 
 
 
 
 
 
868b112
c4ccad7
 
7ea27ba
 
76f81b8
7ea27ba
bdd1e49
7ea27ba
bdd1e49
7ea27ba
bdd1e49
7ea27ba
bdd1e49
7ea27ba
bdd1e49
 
7ea27ba
3455f8c
7ea27ba
bdd1e49
7ea27ba
 
bdd1e49
7ea27ba
 
bdd1e49
 
 
 
7ea27ba
3455f8c
76f81b8
 
 
 
 
 
 
 
 
 
 
 
 
 
7ea27ba
 
76f81b8
 
 
 
 
 
 
 
3455f8c
bdd1e49
f7bfc02
 
7ea27ba
76f81b8
 
 
 
 
 
7ea27ba
76f81b8
7ea27ba
76f81b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ea27ba
3455f8c
76f81b8
 
 
 
bdd1e49
 
76f81b8
bdd1e49
 
76f81b8
 
 
 
 
 
7ea27ba
76f81b8
 
 
 
 
7ea27ba
 
76f81b8
bdd1e49
76f81b8
 
bdd1e49
 
7ea27ba
76f81b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdd1e49
868b112
76f81b8
7ea27ba
 
76f81b8
bdd1e49
7ea27ba
bdd1e49
7ea27ba
 
 
 
 
 
76f81b8
bdd1e49
7ea27ba
bdd1e49
7ea27ba
bdd1e49
76f81b8
78ec26d
76f81b8
 
bdd1e49
76f81b8
 
 
 
 
 
bdd1e49
7ea27ba

import gradio as gr
import torch
import random
import hashlib
from diffusers import DiffusionPipeline
from transformers import pipeline
from diffusers.utils import export_to_video

# Optional: xformers optimization
try:
    import xformers
    has_xformers = True
except ImportError:
    has_xformers = False

device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
MAX_SEED = 2**32 - 1

# Model lists ordered by size
image_models = {
    "Stable Diffusion 1.5 (light)": "runwayml/stable-diffusion-v1-5",
    "Stable Diffusion 2.1": "stabilityai/stable-diffusion-2-1",
    "Dreamlike 2.0": "dreamlike-art/dreamlike-photoreal-2.0",
    "Playground v2": "playgroundai/playground-v2-1024px-aesthetic",
    "Muse 512": "amused/muse-512-finetuned",
    "PixArt": "PixArt-alpha/PixArt-LCM-XL-2-1024-MS",
    "Kandinsky 3": "kandinsky-community/kandinsky-3",
    "BLIP Diffusion": "Salesforce/blipdiffusion",
    "SDXL Base 1.0 (heavy)": "stabilityai/stable-diffusion-xl-base-1.0",
    "OpenJourney (heavy)": "prompthero/openjourney"
}

text_models = {
    "GPT-2 (light)": "gpt2",
    "GPT-Neo 1.3B": "EleutherAI/gpt-neo-1.3B",
    "BLOOM 1.1B": "bigscience/bloom-1b1",
    "GPT-J 6B": "EleutherAI/gpt-j-6B",
    "Falcon 7B": "tiiuae/falcon-7b",
    "XGen 7B": "Salesforce/xgen-7b-8k-base",
    "BTLM 3B": "cerebras/btlm-3b-8k-base",
    "MPT 7B": "mosaicml/mpt-7b",
    "StableLM 2": "stabilityai/stablelm-2-1_6b",
    "LLaMA 2 7B (heavy)": "meta-llama/Llama-2-7b-hf"
}

video_models = {
    "CogVideoX-2B": "THUDM/CogVideoX-2b",
    "CogVideoX-5B": "THUDM/CogVideoX-5b",
    "AnimateDiff-Lightning": "ByteDance/AnimateDiff-Lightning",
    "ModelScope T2V": "damo-vilab/text-to-video-ms-1.7b",
    "VideoCrafter2": "VideoCrafter/VideoCrafter2",
    "Open-Sora-Plan-v1.2.0": "LanguageBind/Open-Sora-Plan-v1.2.0",
    "LTX-Video": "Lightricks/LTX-Video",
    "HunyuanVideo": "tencent/HunyuanVideo",
    "Latte-1": "maxin-cn/Latte-1",
    "LaVie": "Vchitect/LaVie"
}

# Caches
image_pipes = {}
text_pipes = {}
video_pipes = {}
image_cache = {}
text_cache = {}
video_cache = {}

def hash_inputs(*args):
    combined = "|".join(map(str, args))
    return hashlib.sha256(combined.encode()).hexdigest()

def generate_image(prompt, model_name, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    key = hash_inputs(prompt, model_name, seed)
    if key in image_cache:
        progress(100, desc="Using cached image.")
        return image_cache[key], seed

    progress(10, desc="Loading model...")
    if model_name not in image_pipes:
        pipe = DiffusionPipeline.from_pretrained(
            image_models[model_name],
            torch_dtype=torch_dtype,
            low_cpu_mem_usage=True
        )

        if torch.__version__.startswith("2"):
            pipe = torch.compile(pipe)
        if has_xformers and device == "cuda":
            try:
                pipe.enable_xformers_memory_efficient_attention()
            except Exception:
                pass

        pipe.to(device)
        image_pipes[model_name] = pipe

    pipe = image_pipes[model_name]

    progress(40, desc="Generating image...")
    result = pipe(prompt=prompt, generator=torch.manual_seed(seed), num_inference_steps=15, width=512, height=512)
    image = result.images[0]
    image_cache[key] = image

    progress(100, desc="Done.")
    return image, seed

def generate_text(prompt, model_name, progress=gr.Progress(track_tqdm=True)):
    key = hash_inputs(prompt, model_name)
    if key in text_cache:
        progress(100, desc="Using cached text.")
        return text_cache[key]

    progress(10, desc="Loading model...")
    if model_name not in text_pipes:
        text_pipes[model_name] = pipeline(
            "text-generation",
            model=text_models[model_name],
            device=0 if device == "cuda" else -1
        )
    pipe = text_pipes[model_name]

    progress(40, desc="Generating text...")
    result = pipe(prompt, max_length=100, do_sample=True)[0]['generated_text']
    text_cache[key] = result

    progress(100, desc="Done.")
    return result

def generate_video(prompt, model_name, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    key = hash_inputs(prompt, model_name, seed)
    if key in video_cache:
        progress(100, desc="Using cached video.")
        return video_cache[key], seed

    progress(10, desc="Loading model...")
    if model_name not in video_pipes:
        pipe = DiffusionPipeline.from_pretrained(
            video_models[model_name],
            torch_dtype=torch_dtype,
            variant="fp16"
        )

        if torch.__version__.startswith("2"):
            pipe = torch.compile(pipe)
        if has_xformers and device == "cuda":
            try:
                pipe.enable_xformers_memory_efficient_attention()
            except Exception:
                pass

        pipe.to(device)
        video_pipes[model_name] = pipe

    pipe = video_pipes[model_name]

    progress(40, desc="Generating video...")
    result = pipe(prompt=prompt, generator=torch.manual_seed(seed), num_inference_steps=15)
    video_frames = result.frames[0]
    video_path = export_to_video(video_frames)
    video_cache[key] = video_path

    progress(100, desc="Done.")
    return video_path, seed

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# ⚡ Fast Multi-Model AI Playground with Caching")

    with gr.Tabs():
        # Image Generation
        with gr.Tab("🖼️ Image Generation"):
            img_prompt = gr.Textbox(label="Prompt")
            img_model = gr.Dropdown(choices=list(image_models.keys()), value="Stable Diffusion 1.5 (light)", label="Image Model")
            img_seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
            img_rand = gr.Checkbox(label="Randomize seed", value=True)
            img_btn = gr.Button("Generate Image")
            img_out = gr.Image()
            img_btn.click(fn=generate_image, inputs=[img_prompt, img_model, img_seed, img_rand], outputs=[img_out, img_seed])

        # Text Generation
        with gr.Tab("📝 Text Generation"):
            txt_prompt = gr.Textbox(label="Prompt")
            txt_model = gr.Dropdown(choices=list(text_models.keys()), value="GPT-2 (light)", label="Text Model")
            txt_btn = gr.Button("Generate Text")
            txt_out = gr.Textbox(label="Output Text")
            txt_btn.click(fn=generate_text, inputs=[txt_prompt, txt_model], outputs=[txt_out])

        # Video Generation
        with gr.Tab("🎥 Video Generation"):
            vid_prompt = gr.Textbox(label="Prompt")
            vid_model = gr.Dropdown(choices=list(video_models.keys()), value="CogVideoX-2B", label="Video Model")
            vid_seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
            vid_rand = gr.Checkbox(label="Randomize seed", value=True)
            vid_btn = gr.Button("Generate Video")
            vid_out = gr.Video()
            vid_btn.click(fn=generate_video, inputs=[vid_prompt, vid_model, vid_seed, vid_rand], outputs=[vid_out, vid_seed])

demo.launch(show_error=True)