Spaces:

kevalfst
/

visionary-ai

Running

File size: 4,563 Bytes

78ec26d
c4ccad7
7ea27ba
c4ccad7
7ea27ba
868b112
c4ccad7
 
7ea27ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7bfc02
7ea27ba
 
 
 
 
 
 
 
 
 
 
 
3455f8c
7ea27ba
 
 
 
 
 
 
 
 
 
 
 
3455f8c
7ea27ba
 
 
3455f8c
7ea27ba
 
f7bfc02
 
7ea27ba
 
 
 
 
 
 
 
 
 
 
3455f8c
7ea27ba
 
 
 
 
 
 
 
 
 
 
 
868b112
7ea27ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78ec26d
7ea27ba
 
 
 
 
 
 
78ec26d
7ea27ba

import gradio as gr
import torch
import random
from diffusers import DiffusionPipeline
from transformers import pipeline

device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
MAX_SEED = 2**32 - 1

# --- Model lists ---
image_models = {
    "Stable Diffusion 1.5": "runwayml/stable-diffusion-v1-5",
    "Stable Diffusion 2.1": "stabilityai/stable-diffusion-2-1",
    "SDXL Base 1.0": "stabilityai/stable-diffusion-xl-base-1.0",
    "Playground v2": "playgroundai/playground-v2-1024px-aesthetic",
    "Kandinsky 3": "kandinsky-community/kandinsky-3",
    "PixArt": "PixArt-alpha/PixArt-LCM-XL-2-1024-MS",
    "BLIP Diffusion": "Salesforce/blipdiffusion",
    "Muse 512": "amused/muse-512-finetuned",
    "Dreamlike 2.0": "dreamlike-art/dreamlike-photoreal-2.0",
    "OpenJourney": "prompthero/openjourney"
}

video_models = {
    "AnimateDiff": "animate-diff/animate-diff",
    "CogVideoX-5b": "THUDM/CogVideoX-5b",
    "HunyuanVideo": "tencent/HunyuanVideo",
    "LTX-Video": "Lightricks/LTX-Video",
    "ModelScope T2V": "damo-vilab/modelscope-text-to-video-synthesis",
    "VideoCrafter": "videocrafter/videocrafter",
    "Mochi-1": "mochi/mochi-1",
    "Allegro": "allegro/allegro",
    "OpenSora": "LanguageBind/Open-Sora-Plan-v1.2.0",
    "Zer0Scope": "zero-scope/zero-scope"
}

text_models = {
    "GPT-2": "gpt2",
    "GPT-Neo 1.3B": "EleutherAI/gpt-neo-1.3B",
    "GPT-J 6B": "EleutherAI/gpt-j-6B",
    "BLOOM 1.1B": "bigscience/bloom-1b1",
    "Falcon 7B": "tiiuae/falcon-7b",
    "MPT 7B": "mosaicml/mpt-7b",
    "LLaMA 2 7B": "meta-llama/Llama-2-7b-hf",
    "BTLM 3B": "cerebras/btlm-3b-8k-base",
    "XGen 7B": "Salesforce/xgen-7b-8k-base",
    "StableLM 2": "stabilityai/stablelm-2-1_6b"
}

# --- Caching loaded pipelines ---
image_pipes = {}
text_pipes = {}

# --- Functional logic ---
def generate_image(prompt, model_name, seed, randomize_seed):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.manual_seed(seed)

    if model_name not in image_pipes:
        image_pipes[model_name] = DiffusionPipeline.from_pretrained(
            image_models[model_name],
            torch_dtype=torch_dtype
        ).to(device)

    pipe = image_pipes[model_name]
    image = pipe(prompt=prompt, generator=generator, num_inference_steps=25, width=512, height=512).images[0]
    return image, seed

def generate_text(prompt, model_name):
    if model_name not in text_pipes:
        text_pipes[model_name] = pipeline("text-generation", model=text_models[model_name], device=0 if device == "cuda" else -1)
    pipe = text_pipes[model_name]
    output = pipe(prompt, max_length=100, do_sample=True)[0]['generated_text']
    return output

def generate_video(prompt, model_name):
    # Placeholder: real video models would return video frames
    return f"[Video placeholder] Model: {model_name}\nPrompt: {prompt}"

# --- Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# 🔄 Multi-Task AI Generator")

    with gr.Tabs():
        # Tab 1: Image Generation
        with gr.Tab("🖼️ Image"):
            img_prompt = gr.Textbox(label="Prompt")
            img_model = gr.Dropdown(choices=list(image_models.keys()), value="Stable Diffusion 1.5", label="Select Image Model")
            img_seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
            img_rand = gr.Checkbox(label="Randomize seed", value=True)
            img_btn = gr.Button("Generate Image")
            img_out = gr.Image()
            img_btn.click(fn=generate_image, inputs=[img_prompt, img_model, img_seed, img_rand], outputs=[img_out, img_seed])

        # Tab 2: Video Generation
        with gr.Tab("🎥 Video"):
            vid_prompt = gr.Textbox(label="Prompt")
            vid_model = gr.Dropdown(choices=list(video_models.keys()), value="AnimateDiff", label="Select Video Model")
            vid_btn = gr.Button("Generate Video")
            vid_out = gr.Textbox(label="Result (Placeholder)")
            vid_btn.click(fn=generate_video, inputs=[vid_prompt, vid_model], outputs=vid_out)

        # Tab 3: Text Generation
        with gr.Tab("📝 Text"):
            txt_prompt = gr.Textbox(label="Prompt")
            txt_model = gr.Dropdown(choices=list(text_models.keys()), value="GPT-2", label="Select Text Model")
            txt_btn = gr.Button("Generate Text")
            txt_out = gr.Textbox(label="Generated Text")
            txt_btn.click(fn=generate_text, inputs=[txt_prompt, txt_model], outputs=txt_out)

demo.launch(show_error=True)