linoyts HF Staff commited on
Commit
d0c6696
·
verified ·
1 Parent(s): 498af35
Files changed (1) hide show
  1. app.py +53 -8
app.py CHANGED
@@ -11,6 +11,9 @@ from torchvision import transforms
11
  import random
12
  from controlnet_aux import CannyDetector
13
  from image_gen_aux import DepthPreprocessor
 
 
 
14
 
15
  dtype = torch.bfloat16
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -24,6 +27,11 @@ pipeline.vae.enable_tiling()
24
  canny_processor = CannyDetector()
25
  depth_processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
26
 
 
 
 
 
 
27
  CONTROL_LORAS = {
28
  "canny": {
29
  "repo": "Lightricks/LTX-Video-ICLoRA-canny-13b-0.9.7",
@@ -41,6 +49,7 @@ CONTROL_LORAS = {
41
  "adapter_name": "pose_lora"
42
  }
43
  }
 
44
  @spaces.GPU()
45
  def read_video(video) -> torch.Tensor:
46
  """
@@ -110,20 +119,56 @@ def process_video_for_depth(video):
110
  Process video for depth control.
111
  """
112
  print("Processing video for depth control...")
113
- dapth_video = []
114
  for frame in video:
115
- dapth_video.append(depth_processor(frame)[0].convert("RGB"))
116
- return dapth_video
117
 
 
118
  def process_video_for_pose(video):
119
  """
120
- Process video for pose control.
121
- Placeholder function - will return video as-is for now.
122
- TODO: Implement pose estimation processing
123
  """
124
  print("Processing video for pose control...")
125
-
126
- return video_tensor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  def process_video_for_control(video, control_type):
129
  """Process video based on the selected control type"""
 
11
  import random
12
  from controlnet_aux import CannyDetector
13
  from image_gen_aux import DepthPreprocessor
14
+ import mediapipe as mp
15
+ from PIL import Image
16
+ import cv2
17
 
18
  dtype = torch.bfloat16
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
27
  canny_processor = CannyDetector()
28
  depth_processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
29
 
30
+ # Initialize MediaPipe pose estimation
31
+ mp_drawing = mp.solutions.drawing_utils
32
+ mp_drawing_styles = mp.solutions.drawing_styles
33
+ mp_pose = mp.solutions.pose
34
+
35
  CONTROL_LORAS = {
36
  "canny": {
37
  "repo": "Lightricks/LTX-Video-ICLoRA-canny-13b-0.9.7",
 
49
  "adapter_name": "pose_lora"
50
  }
51
  }
52
+
53
  @spaces.GPU()
54
  def read_video(video) -> torch.Tensor:
55
  """
 
119
  Process video for depth control.
120
  """
121
  print("Processing video for depth control...")
122
+ depth_video = []
123
  for frame in video:
124
+ depth_video.append(depth_processor(frame)[0].convert("RGB"))
125
+ return depth_video
126
 
127
+ @spaces.GPU()
128
  def process_video_for_pose(video):
129
  """
130
+ Process video for pose control using MediaPipe pose estimation.
131
+ Returns video frames with pose landmarks drawn on black background.
 
132
  """
133
  print("Processing video for pose control...")
134
+ pose_video = []
135
+
136
+ with mp_pose.Pose(
137
+ static_image_mode=True,
138
+ model_complexity=1,
139
+ enable_segmentation=False,
140
+ min_detection_confidence=0.5,
141
+ min_tracking_confidence=0.5
142
+ ) as pose:
143
+
144
+ for frame in video:
145
+ # Convert PIL image to numpy array
146
+ frame_np = np.array(frame)
147
+
148
+ # Convert RGB to BGR for MediaPipe
149
+ frame_bgr = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR)
150
+
151
+ # Process the frame
152
+ results = pose.process(frame_bgr)
153
+
154
+ # Create black background with same dimensions
155
+ pose_frame = np.zeros_like(frame_np)
156
+
157
+ # Draw pose landmarks if detected
158
+ if results.pose_landmarks:
159
+ mp_drawing.draw_landmarks(
160
+ pose_frame,
161
+ results.pose_landmarks,
162
+ mp_pose.POSE_CONNECTIONS,
163
+ landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style(),
164
+ connection_drawing_spec=mp_drawing_styles.get_default_pose_connections_style()
165
+ )
166
+
167
+ # Convert back to PIL Image
168
+ pose_pil = Image.fromarray(pose_frame)
169
+ pose_video.append(pose_pil)
170
+
171
+ return pose_video
172
 
173
  def process_video_for_control(video, control_type):
174
  """Process video based on the selected control type"""