EPiC / gradio_app.py
Muhammad Taqi Raza
push
3fe0080
raw
history blame
5.51 kB
import os
import subprocess
from datetime import datetime
from pathlib import Path
import gradio as gr
# -----------------------------
# Setup paths and env
# -----------------------------
HF_HOME = "/app/hf_cache"
os.environ["HF_HOME"] = HF_HOME
os.environ["TRANSFORMERS_CACHE"] = HF_HOME
os.makedirs(HF_HOME, exist_ok=True)
PRETRAINED_DIR = "/app/pretrained"
os.makedirs(PRETRAINED_DIR, exist_ok=True)
# -----------------------------
# Step 1: Optional Model Download
# -----------------------------
def download_models():
expected_model = os.path.join(PRETRAINED_DIR, "RAFT/raft-things.pth")
if not Path(expected_model).exists():
print("βš™οΈ Downloading pretrained models...")
try:
subprocess.check_call(["bash", "download/download_models.sh"])
print("βœ… Models downloaded.")
except subprocess.CalledProcessError as e:
print(f"❌ Model download failed: {e}")
else:
print("βœ… Pretrained models already exist.")
download_models()
# -----------------------------
# Step 2: Inference Logic
# -----------------------------
def run_epic_inference(video_path, caption, motion_type):
temp_input_path = "/app/temp_input.mp4"
output_dir = f"/app/output_anchor"
video_output_path = f"{output_dir}/masked_videos/output.mp4"
traj_name = motion_type
traj_txt = f"/app/inference/v2v_data/test/trajs/{traj_name}.txt"
# Save uploaded video
if video_path:
os.system(f"cp '{video_path}' {temp_input_path}")
command = [
"python", "/app/inference/v2v_data/inference.py",
"--video_path", temp_input_path,
"--stride", "1",
"--out_dir", output_dir,
"--radius_scale", "1",
"--camera", "traj",
"--mask",
"--target_pose", "0", "30", "-0.6", "0", "0",
"--traj_txt", traj_txt,
"--save_name", "output",
"--mode", "gradual",
]
# Run inference command
try:
result = subprocess.run(command, capture_output=True, text=True, check=True)
print("Getting Anchor Videos run successfully.")
logs = result.stdout
except subprocess.CalledProcessError as e:
logs = f"❌ Inference failed:\n{e.stderr}"
return logs, None
# Locate the output video
if video_output_path:
return logs, str(video_output_path)
else:
return f"Inference succeeded but no output video found in {output_dir}", None
def print_output_directory(out_dir):
result = ""
for root, dirs, files in os.walk(out_dir):
level = root.replace(out_dir, '').count(os.sep)
indent = ' ' * 4 * level
result += f"{indent}{os.path.basename(root)}/"
sub_indent = ' ' * 4 * (level + 1)
for f in files:
result += f"{sub_indent}{f}\n"
return result
def inference(video_path, caption, motion_type):
logs, video_masked = run_epic_inference(video_path, caption, motion_type)
MODEL_PATH="/app/pretrained/CogVideoX-5b-I2V"
ckpt_steps=500
ckpt_dir="/app/out/EPiC_pretrained"
ckpt_file=f"checkpoint-{ckpt_steps}.pt"
ckpt_path=f"{ckpt_dir}/{ckpt_file}"
video_root_dir= f"/app/output_anchor"
out_dir=f"/app/output"
command = [
"python", "/app/inference/cli_demo_camera_i2v_pcd.py",
"--video_root_dir", video_root_dir,
"--base_model_path", MODEL_PATH,
"--controlnet_model_path", ckpt_path,
"--output_path", out_dir,
"--start_camera_idx", "0",
"--end_camera_idx", "8",
"--controlnet_weights", "1.0",
"--controlnet_guidance_start", "0.0",
"--controlnet_guidance_end", "0.4",
"--controlnet_input_channels", "3",
"--controlnet_transformer_num_attn_heads", "4",
"--controlnet_transformer_attention_head_dim", "64",
"--controlnet_transformer_out_proj_dim_factor", "64",
"--controlnet_transformer_out_proj_dim_zero_init",
"--vae_channels", "16",
"--num_frames", "49",
"--controlnet_transformer_num_layers", "8",
"--infer_with_mask",
"--pool_style", "max",
"--seed", "43"
]
# Run the command
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode == 0:
print("Inference completed successfully.")
else:
print(f"Error occurred during inference: {result.stderr}")
# Print output directory contents
logs = result.stdout
result = print_output_directory(out_dir)
return logs+result, str(f"{out_dir}/00000_43_out.mp4")
# output 43
# output/ 00000_43_out.mp4
# 00000_43_reference.mp4
# 00000_43_out_reference.mp4
# -----------------------------
# Step 3: Create Gradio UI
# -----------------------------
demo = gr.Interface(
fn=inference,
inputs=[
gr.Video(label="Upload Video (MP4)"),
gr.Textbox(label="Caption", placeholder="e.g., Amalfi coast with boats"),
gr.Dropdown(
choices=["zoom_in", "rotate", "orbit", "pan", "loop1"],
label="Camera Motion Type",
value="zoom_in",
),
],
outputs=[gr.Textbox(label="Inference Logs"), gr.Video(label="Generated Video")],
title="🎬 EPiC: Efficient Video Camera Control",
description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
)
# -----------------------------
# Step 4: Launch App
# -----------------------------
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)