import gradio as gr
import spaces
import torch
import numpy as np
import os
import random
from ltx_video.inference import infer, InferenceConfig
from functools import partial
import warnings


warnings.filterwarnings("ignore", category=FutureWarning)

@spaces.GPU
def create(
        prompt,
        negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
        input_image_filepath=None,
        input_video_filepath=None,
        height_ui=512,
        width_ui=704,
        duration_ui=2.0,
        ui_frames_to_use=16,
        seed_ui=42,
        randomize_seed=True,
        ui_guidance_scale=3.0,
        improve_texture_flag=True,
        fps=8, 
        progress=gr.Progress(track_tqdm=True),
        mode="text-to-video"
    ):
    """
    Generate videos using the LTX Video model.
    """

    # pick seed
    used_seed = seed_ui

    output_path = f"output_{mode}_{used_seed}.mp4"

    # Validate mode-specific required parameters
    if mode == "image-to-video":
        if not input_image_filepath:
            raise gr.Error(f"input_image_filepath, {input_image_filepath} is required for image-to-video mode")
    elif mode == "video-to-video":
        if not input_video_filepath:
            raise gr.Error(f"input_video_filepath, {input_video_filepath} is required for video-to-video mode")
    elif mode == "text-to-video":
        # No additional file inputs required for text-to-video
        pass
    else:
        raise gr.Error(f"Invalid mode: {mode}. Must be one of: text-to-video, image-to-video, video-to-video")

    config = InferenceConfig(
        pipeline_config="configs/ltxv-2b-0.9.6-dev.yaml",
        prompt=prompt,
        negative_prompt=negative_prompt,
        height=height_ui,
        width=width_ui,
        num_frames=ui_frames_to_use,
        seed=used_seed,
        output_path=output_path
    )

    # attach initial image or video if mode requires
    if mode == "image-to-video" and input_image_filepath:
        config.input_media_path = input_image_filepath
    elif mode == "video-to-video" and input_video_filepath:
        config.input_media_path = input_video_filepath

    # run inference
    infer(config)

    return output_path, f"✅ Done! Seed: {used_seed}"

# ---- Gradio Blocks & UI ----
with gr.Blocks(title="AI Video Converter", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎬 AI Video Converter")
    gr.Markdown("Convert text, images, and videos into stunning AI-generated videos!")
    
    with gr.Tabs():
        # --- Text to Video ---
        with gr.Tab("📝 Text to Video"):
            gr.Markdown("### Generate videos from text descriptions")
            with gr.Row():
                with gr.Column():
                    text_prompt = gr.Textbox(
                        label="Text Prompt",
                        placeholder="Describe the video you want to create...",
                        value="A Nigerian woman dancing on the streets of Lagos, Nigeria",
                        lines=3
                    )
                    text_num_frames = gr.Slider(minimum=8, maximum=32, value=16, step=1,label="Number of Frames")
                    text_fps = gr.Slider(minimum=4, maximum=30, value=8, step=1,label="Frames Per Second")
                    text_generate_video_btn = gr.Button("Generate Video", variant="primary")
                
                with gr.Column():
                    text_output_video = gr.Video(label="Generated Video")
                    text_status = gr.Textbox(label="Status", interactive=False)
        
        # --- Image to Video ---
        with gr.Tab("🖼️ Image to Video"):
            gr.Markdown("### Animate images into videos")
            with gr.Row():
                with gr.Column():
                    image_input = gr.Image(label="Input Image",type="filepath", sources=["upload", "webcam", "clipboard"])
                    image_text_prompt = gr.Textbox(
                        label="Text Prompt",
                        placeholder="Describe the video you want to create...",
                        value="The creature from the image starts to move",
                        lines=3
                    )
                    image_num_frames = gr.Slider(minimum=8, maximum=50, value=25, step=1,label="Number of Frames")
                    image_fps = gr.Slider(minimum=4, maximum=30, value=8, step=1,label="Frames Per Second")
                    image_generate_video_btn = gr.Button("Generate Video", variant="primary")
                
                with gr.Column():
                    image_output_video = gr.Video(label="Generated Video")
                    image_status = gr.Textbox(label="Status", interactive=False)
        
        # --- Video to Video ---
        with gr.Tab("🎥 Video to Video"):
            gr.Markdown("### Transform videos with AI")
            with gr.Row():
                with gr.Column():
                    video_input = gr.Video(label="Input Video")
                    video_prompt = gr.Textbox(
                        label="Transformation Prompt",
                        placeholder="Describe how you want to transform the video...",
                        lines=3
                    )
                    video_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.1,label="Transformation Strength")
                    video_generate_video_btn = gr.Button("Transform Video", variant="primary")
                
                with gr.Column():
                    video_output_video = gr.Video(label="Transformed Video")
                    video_status = gr.Textbox(label="Status", interactive=False)
    
    
    # --- Inputs ---
    tgv_inputs = [text_prompt, gr.State(None), gr.State(None), text_num_frames, text_fps]
    igv_inputs = [image_text_prompt, image_input, gr.State(None), image_num_frames, image_fps]
    vgv_inputs = [video_prompt, gr.State(None), video_input, video_strength]

    # --- Outputs ---
    tgv_outputs = [text_output_video, text_status]
    igv_outputs = [image_output_video, image_status]
    vgv_outputs = [video_output_video, video_status]
    

    # --- Button Logic ---
    text_generate_video_btn.click(
        fn=partial(create, mode="text-to-video"),
        inputs=tgv_inputs,
        outputs=tgv_outputs
    )
    
    image_generate_video_btn.click(
        fn=partial(create, mode="image-to-video"),
        inputs=igv_inputs,
        outputs=igv_outputs
    )
    
    video_generate_video_btn.click(
        fn=partial(create, mode="video-to-video"),
        inputs=vgv_inputs,
        outputs=vgv_outputs
    )

if __name__ == "__main__":
    demo.launch(debug=True, share=False)