import gradio as gr import spaces import torch import numpy as np import os import random from ltx_video.inference import infer, InferenceConfig from functools import partial import warnings warnings.filterwarnings("ignore", category=FutureWarning) @spaces.GPU def create( prompt, negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted", input_image_filepath=None, input_video_filepath=None, height_ui=512, width_ui=704, duration_ui=2.0, ui_frames_to_use=16, seed_ui=42, randomize_seed=True, ui_guidance_scale=3.0, improve_texture_flag=True, fps=8, progress=gr.Progress(track_tqdm=True), mode="text-to-video" ): """ Generate videos using the LTX Video model. """ # pick seed used_seed = seed_ui output_path = f"output_{mode}_{used_seed}.mp4" # Validate mode-specific required parameters if mode == "image-to-video": if not input_image_filepath: raise gr.Error(f"input_image_filepath, {input_image_filepath} is required for image-to-video mode") elif mode == "video-to-video": if not input_video_filepath: raise gr.Error(f"input_video_filepath, {input_video_filepath} is required for video-to-video mode") elif mode == "text-to-video": # No additional file inputs required for text-to-video pass else: raise gr.Error(f"Invalid mode: {mode}. Must be one of: text-to-video, image-to-video, video-to-video") config = InferenceConfig( pipeline_config="configs/ltxv-2b-0.9.6-dev.yaml", prompt=prompt, negative_prompt=negative_prompt, height=height_ui, width=width_ui, num_frames=ui_frames_to_use, seed=used_seed, output_path=output_path ) # attach initial image or video if mode requires if mode == "image-to-video" and input_image_filepath: config.input_media_path = input_image_filepath elif mode == "video-to-video" and input_video_filepath: config.input_media_path = input_video_filepath # run inference infer(config) return output_path, f"✅ Done! Seed: {used_seed}" # ---- Gradio Blocks & UI ---- with gr.Blocks(title="AI Video Converter", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🎬 AI Video Converter") gr.Markdown("Convert text, images, and videos into stunning AI-generated videos!") with gr.Tabs(): # --- Text to Video --- with gr.Tab("📝 Text to Video"): gr.Markdown("### Generate videos from text descriptions") with gr.Row(): with gr.Column(): text_prompt = gr.Textbox( label="Text Prompt", placeholder="Describe the video you want to create...", value="A Nigerian woman dancing on the streets of Lagos, Nigeria", lines=3 ) text_num_frames = gr.Slider(minimum=8, maximum=32, value=16, step=1,label="Number of Frames") text_fps = gr.Slider(minimum=4, maximum=30, value=8, step=1,label="Frames Per Second") text_generate_video_btn = gr.Button("Generate Video", variant="primary") with gr.Column(): text_output_video = gr.Video(label="Generated Video") text_status = gr.Textbox(label="Status", interactive=False) # --- Image to Video --- with gr.Tab("🖼️ Image to Video"): gr.Markdown("### Animate images into videos") with gr.Row(): with gr.Column(): image_input = gr.Image(label="Input Image",type="filepath", sources=["upload", "webcam", "clipboard"]) image_text_prompt = gr.Textbox( label="Text Prompt", placeholder="Describe the video you want to create...", value="The creature from the image starts to move", lines=3 ) image_num_frames = gr.Slider(minimum=8, maximum=50, value=25, step=1,label="Number of Frames") image_fps = gr.Slider(minimum=4, maximum=30, value=8, step=1,label="Frames Per Second") image_generate_video_btn = gr.Button("Generate Video", variant="primary") with gr.Column(): image_output_video = gr.Video(label="Generated Video") image_status = gr.Textbox(label="Status", interactive=False) # --- Video to Video --- with gr.Tab("🎥 Video to Video"): gr.Markdown("### Transform videos with AI") with gr.Row(): with gr.Column(): video_input = gr.Video(label="Input Video") video_prompt = gr.Textbox( label="Transformation Prompt", placeholder="Describe how you want to transform the video...", lines=3 ) video_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.1,label="Transformation Strength") video_generate_video_btn = gr.Button("Transform Video", variant="primary") with gr.Column(): video_output_video = gr.Video(label="Transformed Video") video_status = gr.Textbox(label="Status", interactive=False) # --- Inputs --- tgv_inputs = [text_prompt, gr.State(None), gr.State(None), text_num_frames, text_fps] igv_inputs = [image_text_prompt, image_input, gr.State(None), image_num_frames, image_fps] vgv_inputs = [video_prompt, gr.State(None), video_input, video_strength] # --- Outputs --- tgv_outputs = [text_output_video, text_status] igv_outputs = [image_output_video, image_status] vgv_outputs = [video_output_video, video_status] # --- Button Logic --- text_generate_video_btn.click( fn=partial(create, mode="text-to-video"), inputs=tgv_inputs, outputs=tgv_outputs ) image_generate_video_btn.click( fn=partial(create, mode="image-to-video"), inputs=igv_inputs, outputs=igv_outputs ) video_generate_video_btn.click( fn=partial(create, mode="video-to-video"), inputs=vgv_inputs, outputs=vgv_outputs ) if __name__ == "__main__": demo.launch(debug=True, share=False)