import os import subprocess from datetime import datetime from pathlib import Path import gradio as gr # ----------------------------- # Setup paths and env # ----------------------------- HF_HOME = "/app/hf_cache" os.environ["HF_HOME"] = HF_HOME os.environ["TRANSFORMERS_CACHE"] = HF_HOME os.makedirs(HF_HOME, exist_ok=True) PRETRAINED_DIR = "/app/pretrained" os.makedirs(PRETRAINED_DIR, exist_ok=True) # ----------------------------- # Step 1: Optional Model Download # ----------------------------- def download_models(): expected_model = os.path.join(PRETRAINED_DIR, "RAFT/raft-things.pth") if not Path(expected_model).exists(): print("⚙️ Downloading pretrained models...") try: subprocess.check_call(["bash", "download/download_models.sh"]) print("✅ Models downloaded.") except subprocess.CalledProcessError as e: print(f"❌ Model download failed: {e}") else: print("✅ Pretrained models already exist.") download_models() # ----------------------------- # Step 2: Inference Logic # ----------------------------- def run_epic_inference(video_path, caption, motion_type): temp_input_path = "/app/temp_input.mp4" output_dir = f"/app/output_anchor" video_output_path = f"{output_dir}/masked_videos/output.mp4" traj_name = motion_type traj_txt = f"/app/inference/v2v_data/test/trajs/{traj_name}.txt" # Save uploaded video if video_path: os.system(f"cp '{video_path}' {temp_input_path}") command = [ "python", "/app/inference/v2v_data/inference.py", "--video_path", temp_input_path, "--stride", "1", "--out_dir", output_dir, "--radius_scale", "1", "--camera", "traj", "--mask", "--target_pose", "0", "30", "-0.6", "0", "0", "--traj_txt", traj_txt, "--save_name", "output", "--mode", "gradual", ] # Run inference command try: result = subprocess.run(command, capture_output=True, text=True, check=True) print("Getting Anchor Videos run successfully.") logs = result.stdout except subprocess.CalledProcessError as e: logs = f"❌ Inference failed:\n{e.stderr}" return logs, None # Locate the output video if video_output_path: return logs, str(video_output_path) else: return f"Inference succeeded but no output video found in {output_dir}", None def print_output_directory(out_dir): result = "" for root, dirs, files in os.walk(out_dir): level = root.replace(out_dir, '').count(os.sep) indent = ' ' * 4 * level result += f"{indent}{os.path.basename(root)}/" sub_indent = ' ' * 4 * (level + 1) for f in files: result += f"{sub_indent}{f}\n" return result def inference(video_path, caption, motion_type): logs, video_masked = run_epic_inference(video_path, caption, motion_type) MODEL_PATH="/app/pretrained/CogVideoX-5b-I2V" ckpt_steps=500 ckpt_dir="/app/out/EPiC_pretrained" ckpt_file=f"checkpoint-{ckpt_steps}.pt" ckpt_path=f"{ckpt_dir}/{ckpt_file}" video_root_dir= f"/app/output_anchor" out_dir=f"/app/output" command = [ "python", "/app/inference/cli_demo_camera_i2v_pcd.py", "--video_root_dir", video_root_dir, "--base_model_path", MODEL_PATH, "--controlnet_model_path", ckpt_path, "--output_path", out_dir, "--start_camera_idx", "0", "--end_camera_idx", "8", "--controlnet_weights", "1.0", "--controlnet_guidance_start", "0.0", "--controlnet_guidance_end", "0.4", "--controlnet_input_channels", "3", "--controlnet_transformer_num_attn_heads", "4", "--controlnet_transformer_attention_head_dim", "64", "--controlnet_transformer_out_proj_dim_factor", "64", "--controlnet_transformer_out_proj_dim_zero_init", "--vae_channels", "16", "--num_frames", "49", "--controlnet_transformer_num_layers", "8", "--infer_with_mask", "--pool_style", "max", "--seed", "43" ] # Run the command result = subprocess.run(command, capture_output=True, text=True) if result.returncode == 0: print("Inference completed successfully.") else: print(f"Error occurred during inference: {result.stderr}") # Print output directory contents logs = result.stdout result = print_output_directory(out_dir) return logs+result, str(f"{out_dir}/00000_43_out.mp4") # output 43 # output/ 00000_43_out.mp4 # 00000_43_reference.mp4 # 00000_43_out_reference.mp4 # ----------------------------- # Step 3: Create Gradio UI # ----------------------------- demo = gr.Interface( fn=inference, inputs=[ gr.Video(label="Upload Video (MP4)"), gr.Textbox(label="Caption", placeholder="e.g., Amalfi coast with boats"), gr.Dropdown( choices=["zoom_in", "rotate", "orbit", "pan", "loop1"], label="Camera Motion Type", value="zoom_in", ), ], outputs=[gr.Textbox(label="Inference Logs"), gr.Video(label="Generated Video")], title="🎬 EPiC: Efficient Video Camera Control", description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.", ) # ----------------------------- # Step 4: Launch App # ----------------------------- if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)