Spaces:

Ntdeseb
/

ntia

Running

File size: 30,025 Bytes

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from diffusers import StableDiffusionPipeline, DiffusionPipeline
import requests
from PIL import Image
import io
import base64
import os
from huggingface_hub import login
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

# Configurar autenticación con Hugging Face
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
    try:
        login(token=HF_TOKEN)
        print("✅ Autenticado con Hugging Face")
    except Exception as e:
        print(f"⚠️ Error de autenticación: {e}")
else:
    print("⚠️ No se encontró HF_TOKEN - modelos gated no estarán disponibles")

# Clases para los endpoints API
class TextRequest(BaseModel):
    prompt: str
    model_name: str
    max_length: int = 100

class ImageRequest(BaseModel):
    prompt: str
    model_name: str
    num_inference_steps: int = 20

class VideoRequest(BaseModel):
    prompt: str
    model_name: str
    num_frames: int = 16
    num_inference_steps: int = 20

class ChatRequest(BaseModel):
    message: str
    history: list
    model_name: str

# Configuración de modelos libres
MODELS = {
    "text": {
        "microsoft/DialoGPT-medium": "Chat conversacional",
        "microsoft/DialoGPT-large": "Chat conversacional avanzado",
        "microsoft/DialoGPT-small": "Chat conversacional rápido",
        "gpt2": "Generación de texto",
        "gpt2-medium": "GPT-2 mediano",
        "gpt2-large": "GPT-2 grande",
        "distilgpt2": "GPT-2 optimizado",
        "EleutherAI/gpt-neo-125M": "GPT-Neo pequeño",
        "EleutherAI/gpt-neo-1.3B": "GPT-Neo mediano",
        "facebook/opt-125m": "OPT pequeño",
        "facebook/opt-350m": "OPT mediano",
        "bigscience/bloom-560m": "BLOOM multilingüe",
        "bigscience/bloom-1b1": "BLOOM grande",
        "Helsinki-NLP/opus-mt-es-en": "Traductor español-inglés",
        "Helsinki-NLP/opus-mt-en-es": "Traductor inglés-español",
        # ✅ Nuevos modelos de texto
        "mistralai/Voxtral-Mini-3B-2507": "Voxtral Mini 3B - Multimodal",
        "tiiuae/falcon-7b-instruct": "Falcon 7B Instruct",
        "google/flan-t5-base": "Flan-T5 Base - Tareas múltiples"
    },
    "image": {
        "CompVis/stable-diffusion-v1-4": "Stable Diffusion v1.4 (Libre)",
        "stabilityai/stable-diffusion-2-1": "Stable Diffusion 2.1",
        "stabilityai/stable-diffusion-xl-base-1.0": "SDXL Base",
        "stabilityai/stable-diffusion-3-medium": "SD 3 Medium",
        "prompthero/openjourney": "Midjourney Style",
        "WarriorMama777/OrangeMixs": "Orange Mixs",
        "hakurei/waifu-diffusion": "Waifu Diffusion",
        "black-forest-labs/FLUX.1-schnell": "FLUX.1 Schnell (Requiere acceso)",
        "black-forest-labs/FLUX.1-dev": "FLUX.1 Dev (Requiere acceso)",
        # ✅ Nuevos modelos de imagen
        "CompVis/ldm-text2im-large-256": "Latent Diffusion Model 256"
    },
    "video": {
        "damo-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B (Libre)",
        "ali-vilab/text-to-video-ms-1.7b": "Text-to-Video MS 1.7B Alt",
        "cerspense/zeroscope_v2_576w": "Zeroscope v2 576w (Libre)",
        "cerspense/zeroscope_v2_XL": "Zeroscope v2 XL (Libre)",
        "ByteDance/AnimateDiff-Lightning": "AnimateDiff Lightning (Libre)",
        "THUDM/CogVideoX-5b": "CogVideoX 5B (Libre)",
        "rain1011/pyramid-flow-sd3": "Pyramid Flow SD3 (Libre)",
        # ✅ Nuevos modelos de video
        "ali-vilab/modelscope-damo-text-to-video-synthesis": "ModelScope Text-to-Video"
    },
    "chat": {
        "microsoft/DialoGPT-medium": "Chat conversacional",
        "microsoft/DialoGPT-large": "Chat conversacional avanzado",
        "microsoft/DialoGPT-small": "Chat conversacional rápido",
        "facebook/opt-350m": "OPT conversacional",
        "bigscience/bloom-560m": "BLOOM multilingüe",
        # ✅ Nuevos modelos de chat
        "mistralai/Voxtral-Mini-3B-2507": "Voxtral Mini 3B - Multimodal",
        "tiiuae/falcon-7b-instruct": "Falcon 7B Instruct"
    }
}

# Cache para los modelos
model_cache = {}

def load_text_model(model_name):
    """Cargar modelo de texto con soporte para diferentes tipos"""
    if model_name not in model_cache:
        print(f"Cargando modelo de texto: {model_name}")
        
        try:
            # Detectar tipo de modelo
            if "opus-mt" in model_name.lower():
                # Modelo de traducción
                from transformers import MarianMTModel, MarianTokenizer
                tokenizer = MarianTokenizer.from_pretrained(model_name)
                model = MarianMTModel.from_pretrained(model_name)
                
            elif "flan-t5" in model_name.lower():
                # Modelo Flan-T5
                from transformers import T5Tokenizer, T5ForConditionalGeneration
                tokenizer = T5Tokenizer.from_pretrained(model_name)
                model = T5ForConditionalGeneration.from_pretrained(model_name)
            
            elif "falcon" in model_name.lower():
                # Modelo Falcon
                from transformers import AutoTokenizer, AutoModelForCausalLM
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                model = AutoModelForCausalLM.from_pretrained(model_name)
                # Configurar para Falcon
                if tokenizer.pad_token is None:
                    tokenizer.pad_token = tokenizer.eos_token
            
            elif "voxtral" in model_name.lower():
                # Modelo Voxtral (multimodal)
                from transformers import AutoTokenizer, AutoModelForCausalLM
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                model = AutoModelForCausalLM.from_pretrained(model_name)
                # Configurar para Voxtral
                if tokenizer.pad_token is None:
                    tokenizer.pad_token = tokenizer.eos_token
            
            else:
                # Modelo de generación de texto estándar
                from transformers import AutoTokenizer, AutoModelForCausalLM
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                model = AutoModelForCausalLM.from_pretrained(model_name)
                
                # Configurar para chat si es DialoGPT
                if "dialogpt" in model_name.lower():
                    tokenizer.pad_token = tokenizer.eos_token
                    model.config.pad_token_id = model.config.eos_token_id
            
            model_cache[model_name] = {
                "tokenizer": tokenizer,
                "model": model,
                "type": "text"
            }
            
        except Exception as e:
            print(f"Error cargando modelo de texto {model_name}: {e}")
            # Fallback a un modelo básico
            from transformers import AutoTokenizer, AutoModelForCausalLM
            tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
            model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
            tokenizer.pad_token = tokenizer.eos_token
            model.config.pad_token_id = model.config.eos_token_id
            
            model_cache[model_name] = {
                "tokenizer": tokenizer,
                "model": model,
                "type": "text"
            }
    
    return model_cache[model_name]

def load_image_model(model_name):
    """Cargar modelo de imagen - versión mejorada con mejor manejo de errores"""
    if model_name not in model_cache:
        print(f"Cargando modelo de imagen: {model_name}")
        
        try:
            # Configuración especial para FLUX
            if "flux" in model_name.lower():
                try:
                    from diffusers import FluxPipeline
                    print("🚀 Cargando FLUX Pipeline...")
                    pipe = FluxPipeline.from_pretrained(
                        model_name,
                        torch_dtype=torch.bfloat16,
                        use_auth_token=HF_TOKEN if HF_TOKEN else None
                    )
                    # Solo usar enable_model_cpu_offload si hay acelerador disponible
                    try:
                        pipe.enable_model_cpu_offload()
                        print("✅ FLUX con CPU offload habilitado")
                    except Exception as offload_error:
                        print(f"⚠️ No se pudo habilitar CPU offload: {offload_error}")
                        print("✅ FLUX cargado sin CPU offload")
                except Exception as e:
                    print(f"❌ Error cargando FLUX: {e}")
                    # Fallback a Stable Diffusion
                    print("🔄 Fallback a Stable Diffusion...")
                    pipe = StableDiffusionPipeline.from_pretrained(
                        "CompVis/stable-diffusion-v1-4",
                        torch_dtype=torch.float32,
                        safety_checker=None
                    )
            
            # Configuración especial para SD 2.1 (problemático)
            elif "stable-diffusion-2-1" in model_name:
                try:
                    pipe = StableDiffusionPipeline.from_pretrained(
                        model_name,
                        torch_dtype=torch.float32,
                        safety_checker=None,
                        requires_safety_checker=False
                    )
                except Exception as e:
                    print(f"Error cargando SD 2.1: {e}")
                    # Fallback a SD 1.4
                    pipe = StableDiffusionPipeline.from_pretrained(
                        "CompVis/stable-diffusion-v1-4",
                        torch_dtype=torch.float32,
                        safety_checker=None
                    )
            
            # Configuración especial para LDM
            elif "ldm-text2im" in model_name:
                try:
                    from diffusers import DiffusionPipeline
                    pipe = DiffusionPipeline.from_pretrained(
                        model_name,
                        torch_dtype=torch.float32
                    )
                except Exception as e:
                    print(f"Error cargando LDM: {e}")
                    # Fallback a Stable Diffusion
                    pipe = StableDiffusionPipeline.from_pretrained(
                        "CompVis/stable-diffusion-v1-4",
                        torch_dtype=torch.float32,
                        safety_checker=None
                    )
            
            # Configuración para otros modelos
            else:
                pipe = StableDiffusionPipeline.from_pretrained(
                    model_name,
                    torch_dtype=torch.float32,
                    safety_checker=None,
                    requires_safety_checker=False
                )
            
            # Optimizaciones básicas de memoria
            pipe.enable_attention_slicing()
            if hasattr(pipe, 'enable_model_cpu_offload'):
                try:
                    pipe.enable_model_cpu_offload()
                except Exception as e:
                    print(f"⚠️ No se pudo habilitar CPU offload: {e}")
            
            model_cache[model_name] = {
                "pipeline": pipe,
                "type": "image"
            }
                
        except Exception as e:
            print(f"Error general cargando modelo de imagen {model_name}: {e}")
            # Fallback final a SD 1.4
            try:
                pipe = StableDiffusionPipeline.from_pretrained(
                    "CompVis/stable-diffusion-v1-4",
                    torch_dtype=torch.float32,
                    safety_checker=None
                )
                pipe.enable_attention_slicing()
                
                model_cache[model_name] = {
                    "pipeline": pipe,
                    "type": "image"
                }
            except Exception as fallback_error:
                print(f"Error crítico en fallback: {fallback_error}")
                raise
    
    return model_cache[model_name]

def load_video_model(model_name):
    """Cargar modelo de video con soporte para diferentes tipos"""
    if model_name not in model_cache:
        print(f"Cargando modelo de video: {model_name}")
        
        try:
            # Detectar tipo de modelo de video
            if "text-to-video" in model_name.lower():
                # Modelos de texto a video
                from diffusers import DiffusionPipeline
                pipe = DiffusionPipeline.from_pretrained(
                        model_name,
                        torch_dtype=torch.float32,
                        variant="fp16"
                    )
            elif "modelscope" in model_name.lower():
                # ModelScope models
                    from diffusers import DiffusionPipeline
                    pipe = DiffusionPipeline.from_pretrained(
                    model_name,
                        torch_dtype=torch.float32
                )
            elif "zeroscope" in model_name.lower():
                # Zeroscope models
                from diffusers import DiffusionPipeline
                pipe = DiffusionPipeline.from_pretrained(
                    model_name,
                    torch_dtype=torch.float32
                )
            elif "animatediff" in model_name.lower():
                # AnimateDiff models
                    from diffusers import DiffusionPipeline
                    pipe = DiffusionPipeline.from_pretrained(
                        model_name,
                        torch_dtype=torch.float32
                    )
            elif "cogvideo" in model_name.lower():
                # CogVideo models
                    from diffusers import DiffusionPipeline
                    pipe = DiffusionPipeline.from_pretrained(
                    model_name,
                        torch_dtype=torch.float32
                    )
            elif "pyramid-flow" in model_name.lower():
                # Pyramid Flow models
                    from diffusers import DiffusionPipeline
                    pipe = DiffusionPipeline.from_pretrained(
                    model_name,
                        torch_dtype=torch.float32
                    )
            else:
                # Fallback a text-to-video genérico
                from diffusers import DiffusionPipeline
                pipe = DiffusionPipeline.from_pretrained(
                    model_name,
                    torch_dtype=torch.float32
                )
            
            # Optimizaciones básicas
            pipe.enable_attention_slicing()
            if hasattr(pipe, 'enable_model_cpu_offload'):
                pipe.enable_model_cpu_offload()
            
            model_cache[model_name] = {
                "pipeline": pipe,
                "type": "video"
            }
            
        except Exception as e:
            print(f"Error cargando modelo de video {model_name}: {e}")
            # Fallback a un modelo básico
            try:
                from diffusers import DiffusionPipeline
                pipe = DiffusionPipeline.from_pretrained(
                    "damo-vilab/text-to-video-ms-1.7b",
                    torch_dtype=torch.float32
                )
                pipe.enable_attention_slicing()
                
                model_cache[model_name] = {
                    "pipeline": pipe,
                    "type": "video"
                }
            except Exception as fallback_error:
                print(f"Error crítico en fallback de video: {fallback_error}")
                raise
    
    return model_cache[model_name]

def generate_text(prompt, model_name, max_length=100):
    """Generar texto con el modelo seleccionado - mejorado para diferentes tipos"""
    try:
        model_data = load_text_model(model_name)
        tokenizer = model_data["tokenizer"]
        model = model_data["model"]
        
        # Detectar si es modelo de traducción
        if "opus-mt" in model_name.lower():
            # Traducción
            inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
            with torch.no_grad():
                outputs = model.generate(inputs, max_length=max_length, num_beams=4, early_stopping=True)
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        else:
            # Generación de texto
            inputs = tokenizer.encode(prompt, return_tensors="pt")
            
            # Generar
            with torch.no_grad():
                outputs = model.generate(
                    inputs,
                    max_length=max_length,
                    num_return_sequences=1,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=tokenizer.eos_token_id
                )
            
            # Decodificar respuesta
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Para DialoGPT, extraer solo la respuesta del asistente
            if "dialogpt" in model_name.lower():
                response = response.replace(prompt, "").strip()
        
        return response
        
    except Exception as e:
        return f"Error generando texto: {str(e)}"

def generate_image(prompt, model_name, num_inference_steps=20):
    """Generar imagen con el modelo seleccionado - versión simplificada con soporte para FLUX"""
    try:
        print(f"Generando imagen con modelo: {model_name}")
        print(f"Prompt: {prompt}")
        print(f"Pasos: {num_inference_steps}")
        
        # Convertir num_inference_steps a entero si es string
        if isinstance(num_inference_steps, str):
            try:
                num_inference_steps = int(num_inference_steps)
            except ValueError:
                num_inference_steps = 20
                print(f"⚠️ No se pudo convertir '{num_inference_steps}' a entero, usando 20")
        
        model_data = load_image_model(model_name)
        pipeline = model_data["pipeline"]
        
        # Configuración específica para FLUX
        if "flux" in model_name.lower():
            import random
            # Generar un seed aleatorio para cada imagen
            random_seed = random.randint(0, 999999)
            print(f"🎲 Usando seed aleatorio para FLUX: {random_seed}")
            print(f"🔧 Parámetros FLUX: guidance_scale=3.5, steps=50, max_seq=512")
            
            image = pipeline(
                prompt,
                height=1024,
                width=1024,
                guidance_scale=3.5,  # ✅ Valor recomendado por la documentación
                num_inference_steps=50,  # ✅ Valor recomendado por la documentación
                max_sequence_length=512,  # ✅ Valor recomendado por la documentación
                generator=torch.Generator("cpu").manual_seed(random_seed)  # ✅ Seed aleatorio
            ).images[0]
        else:
            # Configuración básica para otros modelos
            image = pipeline(
                prompt,
                num_inference_steps=num_inference_steps,
                guidance_scale=7.5
        ).images[0]
        
        print("Imagen generada exitosamente")
        return image
        
    except Exception as e:
        print(f"Error generando imagen: {str(e)}")
        return f"Error generando imagen: {str(e)}"

def generate_video(prompt, model_name, num_frames=16, num_inference_steps=20):
    """Generar video con el modelo seleccionado"""
    try:
        print(f"Generando video con modelo: {model_name}")
        print(f"Prompt: {prompt}")
        print(f"Frames: {num_frames}")
        print(f"Pasos: {num_inference_steps}")
        
        model_data = load_video_model(model_name)
        pipeline = model_data["pipeline"]
        
        # Configuración específica por tipo de modelo
        if "zeroscope" in model_name.lower():
            # Zeroscope models
            video_frames = pipeline(
                prompt,
                num_inference_steps=num_inference_steps,
                num_frames=num_frames,
                height=256,
                width=256
            ).frames
        elif "animatediff" in model_name.lower():
            # AnimateDiff models
            video_frames = pipeline(
                prompt,
                num_inference_steps=num_inference_steps,
                num_frames=num_frames
            ).frames
        else:
            # Text-to-video models (default)
            video_frames = pipeline(
                prompt,
                num_inference_steps=num_inference_steps,
                num_frames=num_frames
            ).frames
        
        print("Video generado exitosamente")
        return video_frames
        
    except Exception as e:
        print(f"Error generando video: {str(e)}")
        return f"Error generando video: {str(e)}"

def chat_with_model(message, history, model_name):
    """Función de chat para DialoGPT con formato de mensajes actualizado"""
    try:
        model_data = load_text_model(model_name)
        tokenizer = model_data["tokenizer"]
        model = model_data["model"]
        
        # Construir historial de conversación desde el nuevo formato
        conversation = ""
        for msg in history:
            if msg["role"] == "user":
                conversation += f"User: {msg['content']}\n"
            elif msg["role"] == "assistant":
                conversation += f"Assistant: {msg['content']}\n"
        
        conversation += f"User: {message}\nAssistant:"
        
        # Generar respuesta
        inputs = tokenizer.encode(conversation, return_tensors="pt", truncation=True, max_length=512)
        
        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_length=inputs.shape[1] + 50,
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extraer solo la respuesta del asistente
        response = response.split("Assistant:")[-1].strip()
        
        # Retornar el historial actualizado con el nuevo formato
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": response})
        
        return history
        
    except Exception as e:
        error_msg = f"Error en el chat: {str(e)}"
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": error_msg})
        return history

# Interfaz de Gradio
with gr.Blocks(title="Modelos Libres de IA", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🤖 Modelos Libres de IA")
    gr.Markdown("### Genera texto e imágenes sin límites de cuota")
    
    with gr.Tabs():
        # Tab de Generación de Texto
        with gr.TabItem("📝 Generación de Texto"):
            with gr.Row():
                with gr.Column():
                    text_model = gr.Dropdown(
                        choices=list(MODELS["text"].keys()),
                        value="microsoft/DialoGPT-medium",
                        label="Modelo de Texto"
                    )
                    text_prompt = gr.Textbox(
                        label="Prompt",
                        placeholder="Escribe tu prompt aquí...",
                        lines=3
                    )
                    max_length = gr.Slider(
                        minimum=50,
                        maximum=200,
                        value=100,
                        step=10,
                        label="Longitud máxima"
                    )
                    text_btn = gr.Button("Generar Texto", variant="primary")
                
                with gr.Column():
                    text_output = gr.Textbox(
                        label="Resultado",
                        lines=10,
                        interactive=False
                    )
            
            text_btn.click(
                generate_text,
                inputs=[text_prompt, text_model, max_length],
                outputs=text_output
            )
        
        # Tab de Chat
        with gr.TabItem("💬 Chat"):
            with gr.Row():
                with gr.Column():
                    chat_model = gr.Dropdown(
                        choices=list(MODELS["chat"].keys()),
                        value="microsoft/DialoGPT-medium",
                        label="Modelo de Chat"
                    )
                
                with gr.Column():
                    chatbot = gr.Chatbot(
                        label="Chat",
                        height=400,
                        type="messages"
                    )
                    chat_input = gr.Textbox(
                        label="Mensaje",
                        placeholder="Escribe tu mensaje...",
                        lines=2
                    )
                    chat_btn = gr.Button("Enviar", variant="primary")
            
            chat_btn.click(
                chat_with_model,
                inputs=[chat_input, chatbot, chat_model],
                outputs=[chatbot]
            )
            
            chat_input.submit(
                chat_with_model,
                inputs=[chat_input, chatbot, chat_model],
                outputs=[chatbot]
            )
        
        # Tab de Traducción
        with gr.TabItem("🌐 Traducción"):
            with gr.Row():
                with gr.Column():
                    translate_model = gr.Dropdown(
                        choices=["Helsinki-NLP/opus-mt-es-en", "Helsinki-NLP/opus-mt-en-es"],
                        value="Helsinki-NLP/opus-mt-es-en",
                        label="Modelo de Traducción"
                    )
                    translate_text = gr.Textbox(
                        label="Texto a traducir",
                        placeholder="Escribe el texto que quieres traducir...",
                        lines=3
                    )
                    translate_btn = gr.Button("Traducir", variant="primary")
                
                with gr.Column():
                    translate_output = gr.Textbox(
                        label="Traducción",
                        lines=3,
                        interactive=False
                    )
            
            translate_btn.click(
                generate_text,
                inputs=[translate_text, translate_model, gr.Slider(value=100, visible=False)],
                outputs=translate_output
            )
        
        # Tab de Generación de Imágenes
        with gr.TabItem("🎨 Generación de Imágenes"):
            with gr.Row():
                with gr.Column():
                    image_model = gr.Dropdown(
                        choices=list(MODELS["image"].keys()),
                        value="CompVis/stable-diffusion-v1-4",
                        label="Modelo de Imagen"
                    )
                    image_prompt = gr.Textbox(
                        label="Prompt de Imagen",
                        placeholder="Describe la imagen que quieres generar...",
                        lines=3
                    )
                    steps = gr.Slider(
                        minimum=10,
                        maximum=50,
                        value=15,
                        step=5,
                        label="Pasos de inferencia"
                    )
                    image_btn = gr.Button("Generar Imagen", variant="primary")
                
                with gr.Column():
                    image_output = gr.Image(
                        label="Imagen Generada",
                        type="pil"
                    )
            
            image_btn.click(
                generate_image,
                inputs=[image_prompt, image_model, steps],
                outputs=image_output
            )
        
        # Tab de Generación de Videos
        with gr.TabItem("🎬 Generación de Videos"):
            with gr.Row():
                with gr.Column():
                    video_model = gr.Dropdown(
                        choices=list(MODELS["video"].keys()),
                        value="damo-vilab/text-to-video-ms-1.7b",
                        label="Modelo de Video"
                    )
                    video_prompt = gr.Textbox(
                        label="Prompt de Video",
                        placeholder="Describe el video que quieres generar...",
                        lines=3
                    )
                    num_frames = gr.Slider(
                        minimum=8,
                        maximum=32,
                        value=16,
                        step=4,
                        label="Número de frames"
                    )
                    video_steps = gr.Slider(
                        minimum=10,
                        maximum=50,
                        value=20,
                        step=5,
                        label="Pasos de inferencia"
                    )
                    video_btn = gr.Button("Generar Video", variant="primary")
                
                with gr.Column():
                    video_output = gr.Video(
                        label="Video Generado",
                        format="mp4"
                    )
                    
            video_btn.click(
                generate_video,
                inputs=[video_prompt, video_model, num_frames, video_steps],
                outputs=video_output
            )

# Configuración para Hugging Face Spaces
# Elimina FastAPI y usa solo Gradio
if __name__ == "__main__":
    demo.launch()