Muhammad Taqi Raza commited on
Commit
e7707d9
·
1 Parent(s): 3fe0080

add options to gradio

Browse files
Files changed (2) hide show
  1. gradio_app.py +216 -62
  2. inference/v2v_data/demo.py +1 -0
gradio_app.py CHANGED
@@ -1,3 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import subprocess
3
  from datetime import datetime
@@ -15,7 +186,6 @@ os.makedirs(HF_HOME, exist_ok=True)
15
  PRETRAINED_DIR = "/app/pretrained"
16
  os.makedirs(PRETRAINED_DIR, exist_ok=True)
17
 
18
-
19
  # -----------------------------
20
  # Step 1: Optional Model Download
21
  # -----------------------------
@@ -31,75 +201,69 @@ def download_models():
31
  else:
32
  print("✅ Pretrained models already exist.")
33
 
34
-
35
  download_models()
36
 
37
-
38
  # -----------------------------
39
  # Step 2: Inference Logic
40
  # -----------------------------
41
-
42
- def run_epic_inference(video_path, caption, motion_type):
43
  temp_input_path = "/app/temp_input.mp4"
44
- output_dir = f"/app/output_anchor"
45
  video_output_path = f"{output_dir}/masked_videos/output.mp4"
46
- traj_name = motion_type
47
- traj_txt = f"/app/inference/v2v_data/test/trajs/{traj_name}.txt"
48
  # Save uploaded video
49
  if video_path:
50
  os.system(f"cp '{video_path}' {temp_input_path}")
51
 
 
 
 
 
 
52
  command = [
53
- "python", "/app/inference/v2v_data/inference.py",
54
- "--video_path", temp_input_path,
55
- "--stride", "1",
56
- "--out_dir", output_dir,
57
- "--radius_scale", "1",
58
- "--camera", "traj",
59
- "--mask",
60
- "--target_pose", "0", "30", "-0.6", "0", "0",
61
- "--traj_txt", traj_txt,
62
- "--save_name", "output",
63
- "--mode", "gradual",
64
  ]
65
 
66
- # Run inference command
67
  try:
68
  result = subprocess.run(command, capture_output=True, text=True, check=True)
69
- print("Getting Anchor Videos run successfully.")
70
  logs = result.stdout
71
  except subprocess.CalledProcessError as e:
72
  logs = f"❌ Inference failed:\n{e.stderr}"
73
  return logs, None
74
 
75
- # Locate the output video
76
- if video_output_path:
77
- return logs, str(video_output_path)
78
- else:
79
- return f"Inference succeeded but no output video found in {output_dir}", None
80
  def print_output_directory(out_dir):
81
  result = ""
82
  for root, dirs, files in os.walk(out_dir):
83
  level = root.replace(out_dir, '').count(os.sep)
84
  indent = ' ' * 4 * level
85
- result += f"{indent}{os.path.basename(root)}/"
86
  sub_indent = ' ' * 4 * (level + 1)
87
  for f in files:
88
  result += f"{sub_indent}{f}\n"
89
  return result
90
 
91
- def inference(video_path, caption, motion_type):
92
- logs, video_masked = run_epic_inference(video_path, caption, motion_type)
93
-
94
- MODEL_PATH="/app/pretrained/CogVideoX-5b-I2V"
95
 
96
- ckpt_steps=500
97
- ckpt_dir="/app/out/EPiC_pretrained"
98
- ckpt_file=f"checkpoint-{ckpt_steps}.pt"
99
- ckpt_path=f"{ckpt_dir}/{ckpt_file}"
 
100
 
101
- video_root_dir= f"/app/output_anchor"
102
- out_dir=f"/app/output"
103
 
104
  command = [
105
  "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
@@ -118,30 +282,19 @@ def inference(video_path, caption, motion_type):
118
  "--controlnet_transformer_out_proj_dim_factor", "64",
119
  "--controlnet_transformer_out_proj_dim_zero_init",
120
  "--vae_channels", "16",
121
- "--num_frames", "49",
122
  "--controlnet_transformer_num_layers", "8",
123
  "--infer_with_mask",
124
  "--pool_style", "max",
125
- "--seed", "43"
 
126
  ]
127
 
128
- # Run the command
129
  result = subprocess.run(command, capture_output=True, text=True)
130
- if result.returncode == 0:
131
- print("Inference completed successfully.")
132
- else:
133
- print(f"Error occurred during inference: {result.stderr}")
134
-
135
- # Print output directory contents
136
- logs = result.stdout
137
- result = print_output_directory(out_dir)
138
 
139
- return logs+result, str(f"{out_dir}/00000_43_out.mp4")
140
-
141
- # output 43
142
- # output/ 00000_43_out.mp4
143
- # 00000_43_reference.mp4
144
- # 00000_43_out_reference.mp4
145
 
146
  # -----------------------------
147
  # Step 3: Create Gradio UI
@@ -150,14 +303,15 @@ demo = gr.Interface(
150
  fn=inference,
151
  inputs=[
152
  gr.Video(label="Upload Video (MP4)"),
153
- gr.Textbox(label="Caption", placeholder="e.g., Amalfi coast with boats"),
154
- gr.Dropdown(
155
- choices=["zoom_in", "rotate", "orbit", "pan", "loop1"],
156
- label="Camera Motion Type",
157
- value="zoom_in",
158
- ),
 
 
159
  ],
160
- outputs=[gr.Textbox(label="Inference Logs"), gr.Video(label="Generated Video")],
161
  title="🎬 EPiC: Efficient Video Camera Control",
162
  description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
163
  )
@@ -166,4 +320,4 @@ demo = gr.Interface(
166
  # Step 4: Launch App
167
  # -----------------------------
168
  if __name__ == "__main__":
169
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ # import os
2
+ # import subprocess
3
+ # from datetime import datetime
4
+ # from pathlib import Path
5
+ # import gradio as gr
6
+
7
+ # # -----------------------------
8
+ # # Setup paths and env
9
+ # # -----------------------------
10
+ # HF_HOME = "/app/hf_cache"
11
+ # os.environ["HF_HOME"] = HF_HOME
12
+ # os.environ["TRANSFORMERS_CACHE"] = HF_HOME
13
+ # os.makedirs(HF_HOME, exist_ok=True)
14
+
15
+ # PRETRAINED_DIR = "/app/pretrained"
16
+ # os.makedirs(PRETRAINED_DIR, exist_ok=True)
17
+
18
+
19
+ # # -----------------------------
20
+ # # Step 1: Optional Model Download
21
+ # # -----------------------------
22
+ # def download_models():
23
+ # expected_model = os.path.join(PRETRAINED_DIR, "RAFT/raft-things.pth")
24
+ # if not Path(expected_model).exists():
25
+ # print("⚙️ Downloading pretrained models...")
26
+ # try:
27
+ # subprocess.check_call(["bash", "download/download_models.sh"])
28
+ # print("✅ Models downloaded.")
29
+ # except subprocess.CalledProcessError as e:
30
+ # print(f"❌ Model download failed: {e}")
31
+ # else:
32
+ # print("✅ Pretrained models already exist.")
33
+
34
+
35
+ # download_models()
36
+
37
+
38
+ # # -----------------------------
39
+ # # Step 2: Inference Logic
40
+ # # -----------------------------
41
+
42
+ # def run_epic_inference(video_path, caption, motion_type):
43
+ # temp_input_path = "/app/temp_input.mp4"
44
+ # output_dir = f"/app/output_anchor"
45
+ # video_output_path = f"{output_dir}/masked_videos/output.mp4"
46
+ # traj_name = motion_type
47
+ # traj_txt = f"/app/inference/v2v_data/test/trajs/{traj_name}.txt"
48
+ # # Save uploaded video
49
+ # if video_path:
50
+ # os.system(f"cp '{video_path}' {temp_input_path}")
51
+
52
+ # command = [
53
+ # "python", "/app/inference/v2v_data/inference.py",
54
+ # "--video_path", temp_input_path,
55
+ # "--stride", "1",
56
+ # "--out_dir", output_dir,
57
+ # "--radius_scale", "1",
58
+ # "--camera", "target",
59
+ # "--mask",
60
+ # "--target_pose", "0", "30", "-0.6", "0", "0",
61
+ # "--traj_txt", traj_txt,
62
+ # "--save_name", "output",
63
+ # "--mode", "gradual",
64
+ # ]
65
+
66
+ # # Run inference command
67
+ # try:
68
+ # result = subprocess.run(command, capture_output=True, text=True, check=True)
69
+ # print("Getting Anchor Videos run successfully.")
70
+ # logs = result.stdout
71
+ # except subprocess.CalledProcessError as e:
72
+ # logs = f"❌ Inference failed:\n{e.stderr}"
73
+ # return logs, None
74
+
75
+ # # Locate the output video
76
+ # if video_output_path:
77
+ # return logs, str(video_output_path)
78
+ # else:
79
+ # return f"Inference succeeded but no output video found in {output_dir}", None
80
+ # def print_output_directory(out_dir):
81
+ # result = ""
82
+ # for root, dirs, files in os.walk(out_dir):
83
+ # level = root.replace(out_dir, '').count(os.sep)
84
+ # indent = ' ' * 4 * level
85
+ # result += f"{indent}{os.path.basename(root)}/"
86
+ # sub_indent = ' ' * 4 * (level + 1)
87
+ # for f in files:
88
+ # result += f"{sub_indent}{f}\n"
89
+ # return result
90
+
91
+ # def inference(video_path, caption, motion_type):
92
+ # logs, video_masked = run_epic_inference(video_path, caption, motion_type)
93
+
94
+ # MODEL_PATH="/app/pretrained/CogVideoX-5b-I2V"
95
+
96
+ # ckpt_steps=500
97
+ # ckpt_dir="/app/out/EPiC_pretrained"
98
+ # ckpt_file=f"checkpoint-{ckpt_steps}.pt"
99
+ # ckpt_path=f"{ckpt_dir}/{ckpt_file}"
100
+
101
+ # video_root_dir= f"/app/output_anchor"
102
+ # out_dir=f"/app/output"
103
+
104
+ # command = [
105
+ # "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
106
+ # "--video_root_dir", video_root_dir,
107
+ # "--base_model_path", MODEL_PATH,
108
+ # "--controlnet_model_path", ckpt_path,
109
+ # "--output_path", out_dir,
110
+ # "--start_camera_idx", "0",
111
+ # "--end_camera_idx", "8",
112
+ # "--controlnet_weights", "1.0",
113
+ # "--controlnet_guidance_start", "0.0",
114
+ # "--controlnet_guidance_end", "0.4",
115
+ # "--controlnet_input_channels", "3",
116
+ # "--controlnet_transformer_num_attn_heads", "4",
117
+ # "--controlnet_transformer_attention_head_dim", "64",
118
+ # "--controlnet_transformer_out_proj_dim_factor", "64",
119
+ # "--controlnet_transformer_out_proj_dim_zero_init",
120
+ # "--vae_channels", "16",
121
+ # "--num_frames", "49",
122
+ # "--controlnet_transformer_num_layers", "8",
123
+ # "--infer_with_mask",
124
+ # "--pool_style", "max",
125
+ # "--seed", "43"
126
+ # ]
127
+
128
+ # # Run the command
129
+ # result = subprocess.run(command, capture_output=True, text=True)
130
+ # if result.returncode == 0:
131
+ # print("Inference completed successfully.")
132
+ # else:
133
+ # print(f"Error occurred during inference: {result.stderr}")
134
+
135
+ # # Print output directory contents
136
+ # logs = result.stdout
137
+ # result = print_output_directory(out_dir)
138
+
139
+ # return logs+result, str(f"{out_dir}/00000_43_out.mp4")
140
+
141
+ # # output 43
142
+ # # output/ 00000_43_out.mp4
143
+ # # 00000_43_reference.mp4
144
+ # # 00000_43_out_reference.mp4
145
+
146
+ # # -----------------------------
147
+ # # Step 3: Create Gradio UI
148
+ # # -----------------------------
149
+ # demo = gr.Interface(
150
+ # fn=inference,
151
+ # inputs=[
152
+ # gr.Video(label="Upload Video (MP4)"),
153
+ # gr.Textbox(label="Caption", placeholder="e.g., Amalfi coast with boats"),
154
+ # gr.Dropdown(
155
+ # choices=["zoom_in", "rotate", "orbit", "pan", "loop1"],
156
+ # label="Camera Motion Type",
157
+ # value="zoom_in",
158
+ # ),
159
+ # ],
160
+ # outputs=[gr.Textbox(label="Inference Logs"), gr.Video(label="Generated Video")],
161
+ # title="🎬 EPiC: Efficient Video Camera Control",
162
+ # description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
163
+ # )
164
+
165
+ # # -----------------------------
166
+ # # Step 4: Launch App
167
+ # # -----------------------------
168
+ # if __name__ == "__main__":
169
+ # demo.launch(server_name="0.0.0.0", server_port=7860)
170
+
171
+
172
  import os
173
  import subprocess
174
  from datetime import datetime
 
186
  PRETRAINED_DIR = "/app/pretrained"
187
  os.makedirs(PRETRAINED_DIR, exist_ok=True)
188
 
 
189
  # -----------------------------
190
  # Step 1: Optional Model Download
191
  # -----------------------------
 
201
  else:
202
  print("✅ Pretrained models already exist.")
203
 
 
204
  download_models()
205
 
 
206
  # -----------------------------
207
  # Step 2: Inference Logic
208
  # -----------------------------
209
+ def run_epic_inference(video_path, target_pose, mode):
 
210
  temp_input_path = "/app/temp_input.mp4"
211
+ output_dir = "/app/output_anchor"
212
  video_output_path = f"{output_dir}/masked_videos/output.mp4"
213
+
 
214
  # Save uploaded video
215
  if video_path:
216
  os.system(f"cp '{video_path}' {temp_input_path}")
217
 
218
+ try:
219
+ theta, phi, r, x, y = target_pose.strip().split()
220
+ except ValueError:
221
+ return f"❌ Invalid target pose format. Use: θ φ r x y", None
222
+
223
  command = [
224
+ "python", "/app/inference/v2v_data/inference.py",
225
+ "--video_path", temp_input_path,
226
+ "--stride", "1",
227
+ "--out_dir", output_dir,
228
+ "--radius_scale", "1",
229
+ "--camera", "target",
230
+ "--mask",
231
+ "--target_pose", theta, phi, r, x, y,
232
+ "--save_name", "output",
233
+ "--mode", mode,
 
234
  ]
235
 
 
236
  try:
237
  result = subprocess.run(command, capture_output=True, text=True, check=True)
 
238
  logs = result.stdout
239
  except subprocess.CalledProcessError as e:
240
  logs = f"❌ Inference failed:\n{e.stderr}"
241
  return logs, None
242
 
243
+ return logs, str(video_output_path) if os.path.exists(video_output_path) else (logs, None)
244
+
 
 
 
245
  def print_output_directory(out_dir):
246
  result = ""
247
  for root, dirs, files in os.walk(out_dir):
248
  level = root.replace(out_dir, '').count(os.sep)
249
  indent = ' ' * 4 * level
250
+ result += f"{indent}{os.path.basename(root)}/\n"
251
  sub_indent = ' ' * 4 * (level + 1)
252
  for f in files:
253
  result += f"{sub_indent}{f}\n"
254
  return result
255
 
256
+ def inference(video_path, num_frames, fps, target_pose, mode):
257
+ logs, video_masked = run_epic_inference(video_path, target_pose, mode)
 
 
258
 
259
+ MODEL_PATH = "/app/pretrained/CogVideoX-5b-I2V"
260
+ ckpt_steps = 500
261
+ ckpt_dir = "/app/out/EPiC_pretrained"
262
+ ckpt_file = f"checkpoint-{ckpt_steps}.pt"
263
+ ckpt_path = f"{ckpt_dir}/{ckpt_file}"
264
 
265
+ video_root_dir = "/app/output_anchor"
266
+ out_dir = "/app/output"
267
 
268
  command = [
269
  "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
 
282
  "--controlnet_transformer_out_proj_dim_factor", "64",
283
  "--controlnet_transformer_out_proj_dim_zero_init",
284
  "--vae_channels", "16",
285
+ "--num_frames", str(num_frames),
286
  "--controlnet_transformer_num_layers", "8",
287
  "--infer_with_mask",
288
  "--pool_style", "max",
289
+ "--seed", "43",
290
+ "--fps", str(fps)
291
  ]
292
 
 
293
  result = subprocess.run(command, capture_output=True, text=True)
294
+ logs += "\n" + result.stdout
295
+ result_dir = print_output_directory(out_dir)
 
 
 
 
 
 
296
 
297
+ return logs + result_dir, str(f"{out_dir}/00000_43_out.mp4")
 
 
 
 
 
298
 
299
  # -----------------------------
300
  # Step 3: Create Gradio UI
 
303
  fn=inference,
304
  inputs=[
305
  gr.Video(label="Upload Video (MP4)"),
306
+ gr.Slider(minimum=1, maximum=120, value=50, step=1, label="Number of Frames"),
307
+ gr.Slider(minimum=1, maximum=90, value=10, step=1, label="FPS"),
308
+ gr.Textbox(label="Target Pose (θ φ r x y)", placeholder="e.g., 0 30 -0.6 0 0"),
309
+ gr.Dropdown(choices=["gradual", "direct", "bullet"], value="gradual", label="Camera Mode"),
310
+ ],
311
+ outputs=[
312
+ gr.Textbox(label="Inference Logs"),
313
+ gr.Video(label="Generated Video")
314
  ],
 
315
  title="🎬 EPiC: Efficient Video Camera Control",
316
  description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
317
  )
 
320
  # Step 4: Launch App
321
  # -----------------------------
322
  if __name__ == "__main__":
323
+ demo.launch(server_name="0.0.0.0", server_port=7860)
inference/v2v_data/demo.py CHANGED
@@ -600,6 +600,7 @@ class GetAnchorVideos:
600
  .to(opts.device)
601
  .unsqueeze(0)
602
  )
 
603
  if opts.camera == 'target':
604
  dtheta, dphi, dr, dx, dy = opts.target_pose
605
  poses = generate_traj_specified(
 
600
  .to(opts.device)
601
  .unsqueeze(0)
602
  )
603
+
604
  if opts.camera == 'target':
605
  dtheta, dphi, dr, dx, dy = opts.target_pose
606
  poses = generate_traj_specified(