Spaces:

kevalfst
/

visionary-ai

Running

App Files Files Community

visionary-ai / app.py

kevalfst

Update app.py

76f81b8 verified 23 days ago

raw

history blame

7.27 kB

	import gradio as gr
	import torch
	import random
	import hashlib
	from diffusers import DiffusionPipeline
	from transformers import pipeline
	from diffusers.utils import export_to_video

	# Optional: xformers optimization
	try:
	import xformers
	has_xformers = True
	except ImportError:
	has_xformers = False

	device = "cuda" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
	MAX_SEED = 2**32 - 1

	# Model lists ordered by size
	image_models = {
	"Stable Diffusion 1.5 (light)": "runwayml/stable-diffusion-v1-5",
	"Stable Diffusion 2.1": "stabilityai/stable-diffusion-2-1",
	"Dreamlike 2.0": "dreamlike-art/dreamlike-photoreal-2.0",
	"Playground v2": "playgroundai/playground-v2-1024px-aesthetic",
	"Muse 512": "amused/muse-512-finetuned",
	"PixArt": "PixArt-alpha/PixArt-LCM-XL-2-1024-MS",
	"Kandinsky 3": "kandinsky-community/kandinsky-3",
	"BLIP Diffusion": "Salesforce/blipdiffusion",
	"SDXL Base 1.0 (heavy)": "stabilityai/stable-diffusion-xl-base-1.0",
	"OpenJourney (heavy)": "prompthero/openjourney"
	}

	text_models = {
	"GPT-2 (light)": "gpt2",
	"GPT-Neo 1.3B": "EleutherAI/gpt-neo-1.3B",
	"BLOOM 1.1B": "bigscience/bloom-1b1",
	"GPT-J 6B": "EleutherAI/gpt-j-6B",
	"Falcon 7B": "tiiuae/falcon-7b",
	"XGen 7B": "Salesforce/xgen-7b-8k-base",
	"BTLM 3B": "cerebras/btlm-3b-8k-base",
	"MPT 7B": "mosaicml/mpt-7b",
	"StableLM 2": "stabilityai/stablelm-2-1_6b",
	"LLaMA 2 7B (heavy)": "meta-llama/Llama-2-7b-hf"
	}

	video_models = {
	"CogVideoX-2B": "THUDM/CogVideoX-2b",
	"CogVideoX-5B": "THUDM/CogVideoX-5b",
	"AnimateDiff-Lightning": "ByteDance/AnimateDiff-Lightning",
	"ModelScope T2V": "damo-vilab/text-to-video-ms-1.7b",
	"VideoCrafter2": "VideoCrafter/VideoCrafter2",
	"Open-Sora-Plan-v1.2.0": "LanguageBind/Open-Sora-Plan-v1.2.0",
	"LTX-Video": "Lightricks/LTX-Video",
	"HunyuanVideo": "tencent/HunyuanVideo",
	"Latte-1": "maxin-cn/Latte-1",
	"LaVie": "Vchitect/LaVie"
	}

	# Caches
	image_pipes = {}
	text_pipes = {}
	video_pipes = {}
	image_cache = {}
	text_cache = {}
	video_cache = {}

	def hash_inputs(*args):
	combined = "\|".join(map(str, args))
	return hashlib.sha256(combined.encode()).hexdigest()

	def generate_image(prompt, model_name, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	key = hash_inputs(prompt, model_name, seed)
	if key in image_cache:
	progress(100, desc="Using cached image.")
	return image_cache[key], seed

	progress(10, desc="Loading model...")
	if model_name not in image_pipes:
	pipe = DiffusionPipeline.from_pretrained(
	image_models[model_name],
	torch_dtype=torch_dtype,
	low_cpu_mem_usage=True
	)

	if torch.__version__.startswith("2"):
	pipe = torch.compile(pipe)
	if has_xformers and device == "cuda":
	try:
	pipe.enable_xformers_memory_efficient_attention()
	except Exception:
	pass

	pipe.to(device)
	image_pipes[model_name] = pipe

	pipe = image_pipes[model_name]

	progress(40, desc="Generating image...")
	result = pipe(prompt=prompt, generator=torch.manual_seed(seed), num_inference_steps=15, width=512, height=512)
	image = result.images[0]
	image_cache[key] = image

	progress(100, desc="Done.")
	return image, seed

	def generate_text(prompt, model_name, progress=gr.Progress(track_tqdm=True)):
	key = hash_inputs(prompt, model_name)
	if key in text_cache:
	progress(100, desc="Using cached text.")
	return text_cache[key]

	progress(10, desc="Loading model...")
	if model_name not in text_pipes:
	text_pipes[model_name] = pipeline(
	"text-generation",
	model=text_models[model_name],
	device=0 if device == "cuda" else -1
	)
	pipe = text_pipes[model_name]

	progress(40, desc="Generating text...")
	result = pipe(prompt, max_length=100, do_sample=True)[0]['generated_text']
	text_cache[key] = result

	progress(100, desc="Done.")
	return result

	def generate_video(prompt, model_name, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	key = hash_inputs(prompt, model_name, seed)
	if key in video_cache:
	progress(100, desc="Using cached video.")
	return video_cache[key], seed

	progress(10, desc="Loading model...")
	if model_name not in video_pipes:
	pipe = DiffusionPipeline.from_pretrained(
	video_models[model_name],
	torch_dtype=torch_dtype,
	variant="fp16"
	)

	if torch.__version__.startswith("2"):
	pipe = torch.compile(pipe)
	if has_xformers and device == "cuda":
	try:
	pipe.enable_xformers_memory_efficient_attention()
	except Exception:
	pass

	pipe.to(device)
	video_pipes[model_name] = pipe

	pipe = video_pipes[model_name]

	progress(40, desc="Generating video...")
	result = pipe(prompt=prompt, generator=torch.manual_seed(seed), num_inference_steps=15)
	video_frames = result.frames[0]
	video_path = export_to_video(video_frames)
	video_cache[key] = video_path

	progress(100, desc="Done.")
	return video_path, seed

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# ⚡ Fast Multi-Model AI Playground with Caching")

	with gr.Tabs():
	# Image Generation
	with gr.Tab("🖼️ Image Generation"):
	img_prompt = gr.Textbox(label="Prompt")
	img_model = gr.Dropdown(choices=list(image_models.keys()), value="Stable Diffusion 1.5 (light)", label="Image Model")
	img_seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
	img_rand = gr.Checkbox(label="Randomize seed", value=True)
	img_btn = gr.Button("Generate Image")
	img_out = gr.Image()
	img_btn.click(fn=generate_image, inputs=[img_prompt, img_model, img_seed, img_rand], outputs=[img_out, img_seed])

	# Text Generation
	with gr.Tab("📝 Text Generation"):
	txt_prompt = gr.Textbox(label="Prompt")
	txt_model = gr.Dropdown(choices=list(text_models.keys()), value="GPT-2 (light)", label="Text Model")
	txt_btn = gr.Button("Generate Text")
	txt_out = gr.Textbox(label="Output Text")
	txt_btn.click(fn=generate_text, inputs=[txt_prompt, txt_model], outputs=[txt_out])

	# Video Generation
	with gr.Tab("🎥 Video Generation"):
	vid_prompt = gr.Textbox(label="Prompt")
	vid_model = gr.Dropdown(choices=list(video_models.keys()), value="CogVideoX-2B", label="Video Model")
	vid_seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
	vid_rand = gr.Checkbox(label="Randomize seed", value=True)
	vid_btn = gr.Button("Generate Video")
	vid_out = gr.Video()
	vid_btn.click(fn=generate_video, inputs=[vid_prompt, vid_model, vid_seed, vid_rand], outputs=[vid_out, vid_seed])

	demo.launch(show_error=True)