unicodeveloper's picture
try
4ccb5a2
import gradio as gr
import spaces
import torch
import numpy as np
import os
import random
from ltx_video.inference import infer, InferenceConfig
from functools import partial
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
@spaces.GPU
def create(
prompt,
negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
input_image_filepath=None,
input_video_filepath=None,
height_ui=512,
width_ui=704,
duration_ui=2.0,
ui_frames_to_use=16,
seed_ui=42,
randomize_seed=True,
ui_guidance_scale=3.0,
improve_texture_flag=True,
fps=8,
progress=gr.Progress(track_tqdm=True),
mode="text-to-video"
):
"""
Generate videos using the LTX Video model.
"""
# pick seed
used_seed = seed_ui
output_path = f"output_{mode}_{used_seed}.mp4"
# Validate mode-specific required parameters
if mode == "image-to-video":
if not input_image_filepath:
raise gr.Error(f"input_image_filepath, {input_image_filepath} is required for image-to-video mode")
elif mode == "video-to-video":
if not input_video_filepath:
raise gr.Error(f"input_video_filepath, {input_video_filepath} is required for video-to-video mode")
elif mode == "text-to-video":
# No additional file inputs required for text-to-video
pass
else:
raise gr.Error(f"Invalid mode: {mode}. Must be one of: text-to-video, image-to-video, video-to-video")
config = InferenceConfig(
pipeline_config="configs/ltxv-2b-0.9.6-dev.yaml",
prompt=prompt,
negative_prompt=negative_prompt,
height=height_ui,
width=width_ui,
num_frames=ui_frames_to_use,
seed=used_seed,
output_path=output_path
)
# attach initial image or video if mode requires
if mode == "image-to-video" and input_image_filepath:
config.input_media_path = input_image_filepath
elif mode == "video-to-video" and input_video_filepath:
config.input_media_path = input_video_filepath
# run inference
infer(config)
return output_path, f"βœ… Done! Seed: {used_seed}"
# ---- Gradio Blocks & UI ----
with gr.Blocks(title="AI Video Converter", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎬 AI Video Converter")
gr.Markdown("Convert text, images, and videos into stunning AI-generated videos!")
with gr.Tabs():
# --- Text to Video ---
with gr.Tab("πŸ“ Text to Video"):
gr.Markdown("### Generate videos from text descriptions")
with gr.Row():
with gr.Column():
text_prompt = gr.Textbox(
label="Text Prompt",
placeholder="Describe the video you want to create...",
value="A Nigerian woman dancing on the streets of Lagos, Nigeria",
lines=3
)
text_num_frames = gr.Slider(minimum=8, maximum=32, value=16, step=1,label="Number of Frames")
text_fps = gr.Slider(minimum=4, maximum=30, value=8, step=1,label="Frames Per Second")
text_generate_video_btn = gr.Button("Generate Video", variant="primary")
with gr.Column():
text_output_video = gr.Video(label="Generated Video")
text_status = gr.Textbox(label="Status", interactive=False)
# --- Image to Video ---
with gr.Tab("πŸ–ΌοΈ Image to Video"):
gr.Markdown("### Animate images into videos")
with gr.Row():
with gr.Column():
image_input = gr.Image(label="Input Image",type="filepath", sources=["upload", "webcam", "clipboard"])
image_text_prompt = gr.Textbox(
label="Text Prompt",
placeholder="Describe the video you want to create...",
value="The creature from the image starts to move",
lines=3
)
image_num_frames = gr.Slider(minimum=8, maximum=50, value=25, step=1,label="Number of Frames")
image_fps = gr.Slider(minimum=4, maximum=30, value=8, step=1,label="Frames Per Second")
image_generate_video_btn = gr.Button("Generate Video", variant="primary")
with gr.Column():
image_output_video = gr.Video(label="Generated Video")
image_status = gr.Textbox(label="Status", interactive=False)
# --- Video to Video ---
with gr.Tab("πŸŽ₯ Video to Video"):
gr.Markdown("### Transform videos with AI")
with gr.Row():
with gr.Column():
video_input = gr.Video(label="Input Video")
video_prompt = gr.Textbox(
label="Transformation Prompt",
placeholder="Describe how you want to transform the video...",
lines=3
)
video_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.1,label="Transformation Strength")
video_generate_video_btn = gr.Button("Transform Video", variant="primary")
with gr.Column():
video_output_video = gr.Video(label="Transformed Video")
video_status = gr.Textbox(label="Status", interactive=False)
# --- Inputs ---
tgv_inputs = [text_prompt, gr.State(None), gr.State(None), text_num_frames, text_fps]
igv_inputs = [image_text_prompt, image_input, gr.State(None), image_num_frames, image_fps]
vgv_inputs = [video_prompt, gr.State(None), video_input, video_strength]
# --- Outputs ---
tgv_outputs = [text_output_video, text_status]
igv_outputs = [image_output_video, image_status]
vgv_outputs = [video_output_video, video_status]
# --- Button Logic ---
text_generate_video_btn.click(
fn=partial(create, mode="text-to-video"),
inputs=tgv_inputs,
outputs=tgv_outputs
)
image_generate_video_btn.click(
fn=partial(create, mode="image-to-video"),
inputs=igv_inputs,
outputs=igv_outputs
)
video_generate_video_btn.click(
fn=partial(create, mode="video-to-video"),
inputs=vgv_inputs,
outputs=vgv_outputs
)
if __name__ == "__main__":
demo.launch(debug=True, share=False)