Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import spaces | |
import torch | |
import numpy as np | |
import os | |
import random | |
from ltx_video.inference import infer, InferenceConfig | |
from functools import partial | |
import warnings | |
warnings.filterwarnings("ignore", category=FutureWarning) | |
def create( | |
prompt, | |
negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted", | |
input_image_filepath=None, | |
input_video_filepath=None, | |
height_ui=512, | |
width_ui=704, | |
duration_ui=2.0, | |
ui_frames_to_use=16, | |
seed_ui=42, | |
randomize_seed=True, | |
ui_guidance_scale=3.0, | |
improve_texture_flag=True, | |
fps=8, | |
progress=gr.Progress(track_tqdm=True), | |
mode="text-to-video" | |
): | |
""" | |
Generate videos using the LTX Video model. | |
""" | |
# pick seed | |
used_seed = seed_ui | |
output_path = f"output_{mode}_{used_seed}.mp4" | |
# Validate mode-specific required parameters | |
if mode == "image-to-video": | |
if not input_image_filepath: | |
raise gr.Error(f"input_image_filepath, {input_image_filepath} is required for image-to-video mode") | |
elif mode == "video-to-video": | |
if not input_video_filepath: | |
raise gr.Error(f"input_video_filepath, {input_video_filepath} is required for video-to-video mode") | |
elif mode == "text-to-video": | |
# No additional file inputs required for text-to-video | |
pass | |
else: | |
raise gr.Error(f"Invalid mode: {mode}. Must be one of: text-to-video, image-to-video, video-to-video") | |
config = InferenceConfig( | |
pipeline_config="configs/ltxv-2b-0.9.6-dev.yaml", | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
height=height_ui, | |
width=width_ui, | |
num_frames=ui_frames_to_use, | |
seed=used_seed, | |
output_path=output_path | |
) | |
# attach initial image or video if mode requires | |
if mode == "image-to-video" and input_image_filepath: | |
config.input_media_path = input_image_filepath | |
elif mode == "video-to-video" and input_video_filepath: | |
config.input_media_path = input_video_filepath | |
# run inference | |
infer(config) | |
return output_path, f"β Done! Seed: {used_seed}" | |
# ---- Gradio Blocks & UI ---- | |
with gr.Blocks(title="AI Video Converter", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π¬ AI Video Converter") | |
gr.Markdown("Convert text, images, and videos into stunning AI-generated videos!") | |
with gr.Tabs(): | |
# --- Text to Video --- | |
with gr.Tab("π Text to Video"): | |
gr.Markdown("### Generate videos from text descriptions") | |
with gr.Row(): | |
with gr.Column(): | |
text_prompt = gr.Textbox( | |
label="Text Prompt", | |
placeholder="Describe the video you want to create...", | |
value="A Nigerian woman dancing on the streets of Lagos, Nigeria", | |
lines=3 | |
) | |
text_num_frames = gr.Slider(minimum=8, maximum=32, value=16, step=1,label="Number of Frames") | |
text_fps = gr.Slider(minimum=4, maximum=30, value=8, step=1,label="Frames Per Second") | |
text_generate_video_btn = gr.Button("Generate Video", variant="primary") | |
with gr.Column(): | |
text_output_video = gr.Video(label="Generated Video") | |
text_status = gr.Textbox(label="Status", interactive=False) | |
# --- Image to Video --- | |
with gr.Tab("πΌοΈ Image to Video"): | |
gr.Markdown("### Animate images into videos") | |
with gr.Row(): | |
with gr.Column(): | |
image_input = gr.Image(label="Input Image",type="filepath", sources=["upload", "webcam", "clipboard"]) | |
image_text_prompt = gr.Textbox( | |
label="Text Prompt", | |
placeholder="Describe the video you want to create...", | |
value="The creature from the image starts to move", | |
lines=3 | |
) | |
image_num_frames = gr.Slider(minimum=8, maximum=50, value=25, step=1,label="Number of Frames") | |
image_fps = gr.Slider(minimum=4, maximum=30, value=8, step=1,label="Frames Per Second") | |
image_generate_video_btn = gr.Button("Generate Video", variant="primary") | |
with gr.Column(): | |
image_output_video = gr.Video(label="Generated Video") | |
image_status = gr.Textbox(label="Status", interactive=False) | |
# --- Video to Video --- | |
with gr.Tab("π₯ Video to Video"): | |
gr.Markdown("### Transform videos with AI") | |
with gr.Row(): | |
with gr.Column(): | |
video_input = gr.Video(label="Input Video") | |
video_prompt = gr.Textbox( | |
label="Transformation Prompt", | |
placeholder="Describe how you want to transform the video...", | |
lines=3 | |
) | |
video_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.1,label="Transformation Strength") | |
video_generate_video_btn = gr.Button("Transform Video", variant="primary") | |
with gr.Column(): | |
video_output_video = gr.Video(label="Transformed Video") | |
video_status = gr.Textbox(label="Status", interactive=False) | |
# --- Inputs --- | |
tgv_inputs = [text_prompt, gr.State(None), gr.State(None), text_num_frames, text_fps] | |
igv_inputs = [image_text_prompt, image_input, gr.State(None), image_num_frames, image_fps] | |
vgv_inputs = [video_prompt, gr.State(None), video_input, video_strength] | |
# --- Outputs --- | |
tgv_outputs = [text_output_video, text_status] | |
igv_outputs = [image_output_video, image_status] | |
vgv_outputs = [video_output_video, video_status] | |
# --- Button Logic --- | |
text_generate_video_btn.click( | |
fn=partial(create, mode="text-to-video"), | |
inputs=tgv_inputs, | |
outputs=tgv_outputs | |
) | |
image_generate_video_btn.click( | |
fn=partial(create, mode="image-to-video"), | |
inputs=igv_inputs, | |
outputs=igv_outputs | |
) | |
video_generate_video_btn.click( | |
fn=partial(create, mode="video-to-video"), | |
inputs=vgv_inputs, | |
outputs=vgv_outputs | |
) | |
if __name__ == "__main__": | |
demo.launch(debug=True, share=False) |