File size: 7,268 Bytes
78ec26d
c4ccad7
7ea27ba
76f81b8
c4ccad7
7ea27ba
76f81b8
 
 
 
 
 
 
 
868b112
c4ccad7
 
7ea27ba
 
76f81b8
7ea27ba
bdd1e49
7ea27ba
bdd1e49
7ea27ba
bdd1e49
7ea27ba
bdd1e49
7ea27ba
bdd1e49
 
7ea27ba
3455f8c
7ea27ba
bdd1e49
7ea27ba
 
bdd1e49
7ea27ba
 
bdd1e49
 
 
 
7ea27ba
3455f8c
76f81b8
 
 
 
 
 
 
 
 
 
 
 
 
 
7ea27ba
 
76f81b8
 
 
 
 
 
 
 
3455f8c
bdd1e49
f7bfc02
 
7ea27ba
76f81b8
 
 
 
 
 
7ea27ba
76f81b8
7ea27ba
76f81b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ea27ba
3455f8c
76f81b8
 
 
 
bdd1e49
 
76f81b8
bdd1e49
 
76f81b8
 
 
 
 
 
7ea27ba
76f81b8
 
 
 
 
7ea27ba
 
76f81b8
bdd1e49
76f81b8
 
bdd1e49
 
7ea27ba
76f81b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdd1e49
868b112
76f81b8
7ea27ba
 
76f81b8
bdd1e49
7ea27ba
bdd1e49
7ea27ba
 
 
 
 
 
76f81b8
bdd1e49
7ea27ba
bdd1e49
7ea27ba
bdd1e49
76f81b8
78ec26d
76f81b8
 
bdd1e49
76f81b8
 
 
 
 
 
bdd1e49
7ea27ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import gradio as gr
import torch
import random
import hashlib
from diffusers import DiffusionPipeline
from transformers import pipeline
from diffusers.utils import export_to_video

# Optional: xformers optimization
try:
    import xformers
    has_xformers = True
except ImportError:
    has_xformers = False

device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
MAX_SEED = 2**32 - 1

# Model lists ordered by size
image_models = {
    "Stable Diffusion 1.5 (light)": "runwayml/stable-diffusion-v1-5",
    "Stable Diffusion 2.1": "stabilityai/stable-diffusion-2-1",
    "Dreamlike 2.0": "dreamlike-art/dreamlike-photoreal-2.0",
    "Playground v2": "playgroundai/playground-v2-1024px-aesthetic",
    "Muse 512": "amused/muse-512-finetuned",
    "PixArt": "PixArt-alpha/PixArt-LCM-XL-2-1024-MS",
    "Kandinsky 3": "kandinsky-community/kandinsky-3",
    "BLIP Diffusion": "Salesforce/blipdiffusion",
    "SDXL Base 1.0 (heavy)": "stabilityai/stable-diffusion-xl-base-1.0",
    "OpenJourney (heavy)": "prompthero/openjourney"
}

text_models = {
    "GPT-2 (light)": "gpt2",
    "GPT-Neo 1.3B": "EleutherAI/gpt-neo-1.3B",
    "BLOOM 1.1B": "bigscience/bloom-1b1",
    "GPT-J 6B": "EleutherAI/gpt-j-6B",
    "Falcon 7B": "tiiuae/falcon-7b",
    "XGen 7B": "Salesforce/xgen-7b-8k-base",
    "BTLM 3B": "cerebras/btlm-3b-8k-base",
    "MPT 7B": "mosaicml/mpt-7b",
    "StableLM 2": "stabilityai/stablelm-2-1_6b",
    "LLaMA 2 7B (heavy)": "meta-llama/Llama-2-7b-hf"
}

video_models = {
    "CogVideoX-2B": "THUDM/CogVideoX-2b",
    "CogVideoX-5B": "THUDM/CogVideoX-5b",
    "AnimateDiff-Lightning": "ByteDance/AnimateDiff-Lightning",
    "ModelScope T2V": "damo-vilab/text-to-video-ms-1.7b",
    "VideoCrafter2": "VideoCrafter/VideoCrafter2",
    "Open-Sora-Plan-v1.2.0": "LanguageBind/Open-Sora-Plan-v1.2.0",
    "LTX-Video": "Lightricks/LTX-Video",
    "HunyuanVideo": "tencent/HunyuanVideo",
    "Latte-1": "maxin-cn/Latte-1",
    "LaVie": "Vchitect/LaVie"
}

# Caches
image_pipes = {}
text_pipes = {}
video_pipes = {}
image_cache = {}
text_cache = {}
video_cache = {}

def hash_inputs(*args):
    combined = "|".join(map(str, args))
    return hashlib.sha256(combined.encode()).hexdigest()

def generate_image(prompt, model_name, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    key = hash_inputs(prompt, model_name, seed)
    if key in image_cache:
        progress(100, desc="Using cached image.")
        return image_cache[key], seed

    progress(10, desc="Loading model...")
    if model_name not in image_pipes:
        pipe = DiffusionPipeline.from_pretrained(
            image_models[model_name],
            torch_dtype=torch_dtype,
            low_cpu_mem_usage=True
        )

        if torch.__version__.startswith("2"):
            pipe = torch.compile(pipe)
        if has_xformers and device == "cuda":
            try:
                pipe.enable_xformers_memory_efficient_attention()
            except Exception:
                pass

        pipe.to(device)
        image_pipes[model_name] = pipe

    pipe = image_pipes[model_name]

    progress(40, desc="Generating image...")
    result = pipe(prompt=prompt, generator=torch.manual_seed(seed), num_inference_steps=15, width=512, height=512)
    image = result.images[0]
    image_cache[key] = image

    progress(100, desc="Done.")
    return image, seed

def generate_text(prompt, model_name, progress=gr.Progress(track_tqdm=True)):
    key = hash_inputs(prompt, model_name)
    if key in text_cache:
        progress(100, desc="Using cached text.")
        return text_cache[key]

    progress(10, desc="Loading model...")
    if model_name not in text_pipes:
        text_pipes[model_name] = pipeline(
            "text-generation",
            model=text_models[model_name],
            device=0 if device == "cuda" else -1
        )
    pipe = text_pipes[model_name]

    progress(40, desc="Generating text...")
    result = pipe(prompt, max_length=100, do_sample=True)[0]['generated_text']
    text_cache[key] = result

    progress(100, desc="Done.")
    return result

def generate_video(prompt, model_name, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    key = hash_inputs(prompt, model_name, seed)
    if key in video_cache:
        progress(100, desc="Using cached video.")
        return video_cache[key], seed

    progress(10, desc="Loading model...")
    if model_name not in video_pipes:
        pipe = DiffusionPipeline.from_pretrained(
            video_models[model_name],
            torch_dtype=torch_dtype,
            variant="fp16"
        )

        if torch.__version__.startswith("2"):
            pipe = torch.compile(pipe)
        if has_xformers and device == "cuda":
            try:
                pipe.enable_xformers_memory_efficient_attention()
            except Exception:
                pass

        pipe.to(device)
        video_pipes[model_name] = pipe

    pipe = video_pipes[model_name]

    progress(40, desc="Generating video...")
    result = pipe(prompt=prompt, generator=torch.manual_seed(seed), num_inference_steps=15)
    video_frames = result.frames[0]
    video_path = export_to_video(video_frames)
    video_cache[key] = video_path

    progress(100, desc="Done.")
    return video_path, seed

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# ⚡ Fast Multi-Model AI Playground with Caching")

    with gr.Tabs():
        # Image Generation
        with gr.Tab("🖼️ Image Generation"):
            img_prompt = gr.Textbox(label="Prompt")
            img_model = gr.Dropdown(choices=list(image_models.keys()), value="Stable Diffusion 1.5 (light)", label="Image Model")
            img_seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
            img_rand = gr.Checkbox(label="Randomize seed", value=True)
            img_btn = gr.Button("Generate Image")
            img_out = gr.Image()
            img_btn.click(fn=generate_image, inputs=[img_prompt, img_model, img_seed, img_rand], outputs=[img_out, img_seed])

        # Text Generation
        with gr.Tab("📝 Text Generation"):
            txt_prompt = gr.Textbox(label="Prompt")
            txt_model = gr.Dropdown(choices=list(text_models.keys()), value="GPT-2 (light)", label="Text Model")
            txt_btn = gr.Button("Generate Text")
            txt_out = gr.Textbox(label="Output Text")
            txt_btn.click(fn=generate_text, inputs=[txt_prompt, txt_model], outputs=[txt_out])

        # Video Generation
        with gr.Tab("🎥 Video Generation"):
            vid_prompt = gr.Textbox(label="Prompt")
            vid_model = gr.Dropdown(choices=list(video_models.keys()), value="CogVideoX-2B", label="Video Model")
            vid_seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
            vid_rand = gr.Checkbox(label="Randomize seed", value=True)
            vid_btn = gr.Button("Generate Video")
            vid_out = gr.Video()
            vid_btn.click(fn=generate_video, inputs=[vid_prompt, vid_model, vid_seed, vid_rand], outputs=[vid_out, vid_seed])

demo.launch(show_error=True)