Spaces:

roll-ai
/

EPiC-fps

Paused

App Files Files Community

Muhammad Taqi Raza commited on 3 days ago

Commit

e7707d9

1 Parent(s): 3fe0080

add options to gradio

Browse files

Files changed (2) hide show

gradio_app.py +216 -62
inference/v2v_data/demo.py +1 -0

gradio_app.py CHANGED Viewed

@@ -1,3 +1,174 @@
 import os
 import subprocess
 from datetime import datetime
@@ -15,7 +186,6 @@ os.makedirs(HF_HOME, exist_ok=True)
 PRETRAINED_DIR = "/app/pretrained"
 os.makedirs(PRETRAINED_DIR, exist_ok=True)
 # -----------------------------
 # Step 1: Optional Model Download
 # -----------------------------
@@ -31,75 +201,69 @@ def download_models():
     else:
         print("✅ Pretrained models already exist.")
 download_models()
 # -----------------------------
 # Step 2: Inference Logic
 # -----------------------------
-def run_epic_inference(video_path, caption, motion_type):
     temp_input_path = "/app/temp_input.mp4"
-    output_dir = f"/app/output_anchor"
     video_output_path = f"{output_dir}/masked_videos/output.mp4"
-    traj_name = motion_type
-    traj_txt = f"/app/inference/v2v_data/test/trajs/{traj_name}.txt"
     # Save uploaded video
     if video_path:
         os.system(f"cp '{video_path}' {temp_input_path}")
     command = [
-    "python", "/app/inference/v2v_data/inference.py",
-    "--video_path", temp_input_path,
-    "--stride", "1",
-    "--out_dir", output_dir,
-    "--radius_scale", "1",
-    "--camera", "traj",
-    "--mask",
-    "--target_pose", "0", "30", "-0.6", "0", "0",
-    "--traj_txt", traj_txt,
-    "--save_name", "output",
-    "--mode", "gradual",
     ]
-    # Run inference command
     try:
         result = subprocess.run(command, capture_output=True, text=True, check=True)
-        print("Getting Anchor Videos run successfully.")
         logs = result.stdout
     except subprocess.CalledProcessError as e:
         logs = f"❌ Inference failed:\n{e.stderr}"
         return logs, None
-    # Locate the output video
-    if video_output_path:
-        return logs, str(video_output_path)
-    else:
-        return f"Inference succeeded but no output video found in {output_dir}", None
 def print_output_directory(out_dir):
     result = ""
     for root, dirs, files in os.walk(out_dir):
         level = root.replace(out_dir, '').count(os.sep)
         indent = ' ' * 4 * level
-        result += f"{indent}{os.path.basename(root)}/"
         sub_indent = ' ' * 4 * (level + 1)
         for f in files:
             result += f"{sub_indent}{f}\n"
     return result
-def inference(video_path, caption, motion_type):
-    logs, video_masked = run_epic_inference(video_path, caption, motion_type)
-    MODEL_PATH="/app/pretrained/CogVideoX-5b-I2V"
-    ckpt_steps=500
-    ckpt_dir="/app/out/EPiC_pretrained"
-    ckpt_file=f"checkpoint-{ckpt_steps}.pt"
-    ckpt_path=f"{ckpt_dir}/{ckpt_file}"
-    video_root_dir= f"/app/output_anchor"
-    out_dir=f"/app/output"
     command = [
         "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
@@ -118,30 +282,19 @@ def inference(video_path, caption, motion_type):
         "--controlnet_transformer_out_proj_dim_factor", "64",
         "--controlnet_transformer_out_proj_dim_zero_init",
         "--vae_channels", "16",
-        "--num_frames", "49",
         "--controlnet_transformer_num_layers", "8",
         "--infer_with_mask",
         "--pool_style", "max",
-        "--seed", "43"
     ]
-    # Run the command
     result = subprocess.run(command, capture_output=True, text=True)
-    if result.returncode == 0:
-        print("Inference completed successfully.")
-    else:
-        print(f"Error occurred during inference: {result.stderr}")
-    # Print output directory contents
-    logs = result.stdout
-    result = print_output_directory(out_dir)
-    return logs+result, str(f"{out_dir}/00000_43_out.mp4")
-# output 43
-# output/    00000_43_out.mp4
-#     00000_43_reference.mp4
-#     00000_43_out_reference.mp4
 # -----------------------------
 # Step 3: Create Gradio UI
@@ -150,14 +303,15 @@ demo = gr.Interface(
     fn=inference,
     inputs=[
         gr.Video(label="Upload Video (MP4)"),
-        gr.Textbox(label="Caption", placeholder="e.g., Amalfi coast with boats"),
-        gr.Dropdown(
-            choices=["zoom_in", "rotate", "orbit", "pan", "loop1"],
-            label="Camera Motion Type",
-            value="zoom_in",
-        ),
     ],
-    outputs=[gr.Textbox(label="Inference Logs"), gr.Video(label="Generated Video")],
     title="🎬 EPiC: Efficient Video Camera Control",
     description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
 )
@@ -166,4 +320,4 @@ demo = gr.Interface(
 # Step 4: Launch App
 # -----------------------------
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+# import os
+# import subprocess
+# from datetime import datetime
+# from pathlib import Path
+# import gradio as gr
+# # -----------------------------
+# # Setup paths and env
+# # -----------------------------
+# HF_HOME = "/app/hf_cache"
+# os.environ["HF_HOME"] = HF_HOME
+# os.environ["TRANSFORMERS_CACHE"] = HF_HOME
+# os.makedirs(HF_HOME, exist_ok=True)
+# PRETRAINED_DIR = "/app/pretrained"
+# os.makedirs(PRETRAINED_DIR, exist_ok=True)
+# # -----------------------------
+# # Step 1: Optional Model Download
+# # -----------------------------
+# def download_models():
+#     expected_model = os.path.join(PRETRAINED_DIR, "RAFT/raft-things.pth")
+#     if not Path(expected_model).exists():
+#         print("⚙️ Downloading pretrained models...")
+#         try:
+#             subprocess.check_call(["bash", "download/download_models.sh"])
+#             print("✅ Models downloaded.")
+#         except subprocess.CalledProcessError as e:
+#             print(f"❌ Model download failed: {e}")
+#     else:
+#         print("✅ Pretrained models already exist.")
+# download_models()
+# # -----------------------------
+# # Step 2: Inference Logic
+# # -----------------------------
+# def run_epic_inference(video_path, caption, motion_type):
+#     temp_input_path = "/app/temp_input.mp4"
+#     output_dir = f"/app/output_anchor"
+#     video_output_path = f"{output_dir}/masked_videos/output.mp4"
+#     traj_name = motion_type
+#     traj_txt = f"/app/inference/v2v_data/test/trajs/{traj_name}.txt"
+#     # Save uploaded video
+#     if video_path:
+#         os.system(f"cp '{video_path}' {temp_input_path}")
+#     command = [
+#     "python", "/app/inference/v2v_data/inference.py",
+#     "--video_path", temp_input_path,
+#     "--stride", "1",
+#     "--out_dir", output_dir,
+#     "--radius_scale", "1",
+#     "--camera", "target",
+#     "--mask",
+#     "--target_pose", "0", "30", "-0.6", "0", "0",
+#     "--traj_txt", traj_txt,
+#     "--save_name", "output",
+#     "--mode", "gradual",
+#     ]
+#     # Run inference command
+#     try:
+#         result = subprocess.run(command, capture_output=True, text=True, check=True)
+#         print("Getting Anchor Videos run successfully.")
+#         logs = result.stdout
+#     except subprocess.CalledProcessError as e:
+#         logs = f"❌ Inference failed:\n{e.stderr}"
+#         return logs, None
+#     # Locate the output video
+#     if video_output_path:
+#         return logs, str(video_output_path)
+#     else:
+#         return f"Inference succeeded but no output video found in {output_dir}", None
+# def print_output_directory(out_dir):
+#     result = ""
+#     for root, dirs, files in os.walk(out_dir):
+#         level = root.replace(out_dir, '').count(os.sep)
+#         indent = ' ' * 4 * level
+#         result += f"{indent}{os.path.basename(root)}/"
+#         sub_indent = ' ' * 4 * (level + 1)
+#         for f in files:
+#             result += f"{sub_indent}{f}\n"
+#     return result
+# def inference(video_path, caption, motion_type):
+#     logs, video_masked = run_epic_inference(video_path, caption, motion_type)
+#     MODEL_PATH="/app/pretrained/CogVideoX-5b-I2V"
+#     ckpt_steps=500
+#     ckpt_dir="/app/out/EPiC_pretrained"
+#     ckpt_file=f"checkpoint-{ckpt_steps}.pt"
+#     ckpt_path=f"{ckpt_dir}/{ckpt_file}"
+#     video_root_dir= f"/app/output_anchor"
+#     out_dir=f"/app/output"
+#     command = [
+#         "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
+#         "--video_root_dir", video_root_dir,
+#         "--base_model_path", MODEL_PATH,
+#         "--controlnet_model_path", ckpt_path,
+#         "--output_path", out_dir,
+#         "--start_camera_idx", "0",
+#         "--end_camera_idx", "8",
+#         "--controlnet_weights", "1.0",
+#         "--controlnet_guidance_start", "0.0",
+#         "--controlnet_guidance_end", "0.4",
+#         "--controlnet_input_channels", "3",
+#         "--controlnet_transformer_num_attn_heads", "4",
+#         "--controlnet_transformer_attention_head_dim", "64",
+#         "--controlnet_transformer_out_proj_dim_factor", "64",
+#         "--controlnet_transformer_out_proj_dim_zero_init",
+#         "--vae_channels", "16",
+#         "--num_frames", "49",
+#         "--controlnet_transformer_num_layers", "8",
+#         "--infer_with_mask",
+#         "--pool_style", "max",
+#         "--seed", "43"
+#     ]
+#     # Run the command
+#     result = subprocess.run(command, capture_output=True, text=True)
+#     if result.returncode == 0:
+#         print("Inference completed successfully.")
+#     else:
+#         print(f"Error occurred during inference: {result.stderr}")
+#     # Print output directory contents
+#     logs = result.stdout
+#     result = print_output_directory(out_dir)
+#     return logs+result, str(f"{out_dir}/00000_43_out.mp4")
+# # output 43
+# # output/    00000_43_out.mp4
+# #     00000_43_reference.mp4
+# #     00000_43_out_reference.mp4
+# # -----------------------------
+# # Step 3: Create Gradio UI
+# # -----------------------------
+# demo = gr.Interface(
+#     fn=inference,
+#     inputs=[
+#         gr.Video(label="Upload Video (MP4)"),
+#         gr.Textbox(label="Caption", placeholder="e.g., Amalfi coast with boats"),
+#         gr.Dropdown(
+#             choices=["zoom_in", "rotate", "orbit", "pan", "loop1"],
+#             label="Camera Motion Type",
+#             value="zoom_in",
+#         ),
+#     ],
+#     outputs=[gr.Textbox(label="Inference Logs"), gr.Video(label="Generated Video")],
+#     title="🎬 EPiC: Efficient Video Camera Control",
+#     description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
+# )
+# # -----------------------------
+# # Step 4: Launch App
+# # -----------------------------
+# if __name__ == "__main__":
+#     demo.launch(server_name="0.0.0.0", server_port=7860)
 import os
 import subprocess
 from datetime import datetime
 PRETRAINED_DIR = "/app/pretrained"
 os.makedirs(PRETRAINED_DIR, exist_ok=True)
 # -----------------------------
 # Step 1: Optional Model Download
 # -----------------------------
     else:
         print("✅ Pretrained models already exist.")
 download_models()
 # -----------------------------
 # Step 2: Inference Logic
 # -----------------------------
+def run_epic_inference(video_path, target_pose, mode):
     temp_input_path = "/app/temp_input.mp4"
+    output_dir = "/app/output_anchor"
     video_output_path = f"{output_dir}/masked_videos/output.mp4"
     # Save uploaded video
     if video_path:
         os.system(f"cp '{video_path}' {temp_input_path}")
+    try:
+        theta, phi, r, x, y = target_pose.strip().split()
+    except ValueError:
+        return f"❌ Invalid target pose format. Use: θ φ r x y", None
     command = [
+        "python", "/app/inference/v2v_data/inference.py",
+        "--video_path", temp_input_path,
+        "--stride", "1",
+        "--out_dir", output_dir,
+        "--radius_scale", "1",
+        "--camera", "target",
+        "--mask",
+        "--target_pose", theta, phi, r, x, y,
+        "--save_name", "output",
+        "--mode", mode,
     ]
     try:
         result = subprocess.run(command, capture_output=True, text=True, check=True)
         logs = result.stdout
     except subprocess.CalledProcessError as e:
         logs = f"❌ Inference failed:\n{e.stderr}"
         return logs, None
+    return logs, str(video_output_path) if os.path.exists(video_output_path) else (logs, None)
 def print_output_directory(out_dir):
     result = ""
     for root, dirs, files in os.walk(out_dir):
         level = root.replace(out_dir, '').count(os.sep)
         indent = ' ' * 4 * level
+        result += f"{indent}{os.path.basename(root)}/\n"
         sub_indent = ' ' * 4 * (level + 1)
         for f in files:
             result += f"{sub_indent}{f}\n"
     return result
+def inference(video_path, num_frames, fps, target_pose, mode):
+    logs, video_masked = run_epic_inference(video_path, target_pose, mode)
+    MODEL_PATH = "/app/pretrained/CogVideoX-5b-I2V"
+    ckpt_steps = 500
+    ckpt_dir = "/app/out/EPiC_pretrained"
+    ckpt_file = f"checkpoint-{ckpt_steps}.pt"
+    ckpt_path = f"{ckpt_dir}/{ckpt_file}"
+    video_root_dir = "/app/output_anchor"
+    out_dir = "/app/output"
     command = [
         "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
         "--controlnet_transformer_out_proj_dim_factor", "64",
         "--controlnet_transformer_out_proj_dim_zero_init",
         "--vae_channels", "16",
+        "--num_frames", str(num_frames),
         "--controlnet_transformer_num_layers", "8",
         "--infer_with_mask",
         "--pool_style", "max",
+        "--seed", "43",
+        "--fps", str(fps)
     ]
     result = subprocess.run(command, capture_output=True, text=True)
+    logs += "\n" + result.stdout
+    result_dir = print_output_directory(out_dir)
+    return logs + result_dir, str(f"{out_dir}/00000_43_out.mp4")
 # -----------------------------
 # Step 3: Create Gradio UI
     fn=inference,
     inputs=[
         gr.Video(label="Upload Video (MP4)"),
+        gr.Slider(minimum=1, maximum=120, value=50, step=1, label="Number of Frames"),
+        gr.Slider(minimum=1, maximum=90, value=10, step=1, label="FPS"),
+        gr.Textbox(label="Target Pose (θ φ r x y)", placeholder="e.g., 0 30 -0.6 0 0"),
+        gr.Dropdown(choices=["gradual", "direct", "bullet"], value="gradual", label="Camera Mode"),
+    ],
+    outputs=[
+        gr.Textbox(label="Inference Logs"),
+        gr.Video(label="Generated Video")
     ],
     title="🎬 EPiC: Efficient Video Camera Control",
     description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
 )
 # Step 4: Launch App
 # -----------------------------
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

inference/v2v_data/demo.py CHANGED Viewed

@@ -600,6 +600,7 @@ class GetAnchorVideos:
             .to(opts.device)
             .unsqueeze(0)
         )
         if opts.camera == 'target':
             dtheta, dphi, dr, dx, dy = opts.target_pose
             poses = generate_traj_specified(

             .to(opts.device)
             .unsqueeze(0)
         )
         if opts.camera == 'target':
             dtheta, dphi, dr, dx, dy = opts.target_pose
             poses = generate_traj_specified(