Spaces:

Lightricks
/

ltx-video-iclora

Running on Zero

App Files Files Community

linoyts HF Staff commited on 4 days ago

Commit

d0c6696

verified ·

1 Parent(s): 498af35

Update app.py (#2)

Browse files

- Update app.py (5b1caa6abc921d2a63725eb2e0bbfbd31dca1761)
- Update app.py (dad60fc3751a96c2febec586868672147878ccfb)

Files changed (1) hide show

app.py +53 -8

app.py CHANGED Viewed

@@ -11,6 +11,9 @@ from torchvision import transforms
 import random
 from controlnet_aux import CannyDetector
 from image_gen_aux import DepthPreprocessor
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -24,6 +27,11 @@ pipeline.vae.enable_tiling()
 canny_processor = CannyDetector()
 depth_processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
 CONTROL_LORAS = {
     "canny": {
         "repo": "Lightricks/LTX-Video-ICLoRA-canny-13b-0.9.7",
@@ -41,6 +49,7 @@ CONTROL_LORAS = {
         "adapter_name": "pose_lora"
     }
 }
 @spaces.GPU()
 def read_video(video) -> torch.Tensor:
     """
@@ -110,20 +119,56 @@ def process_video_for_depth(video):
     Process video for depth control.
     """
     print("Processing video for depth control...")
-    dapth_video = []
     for frame in video:
-        dapth_video.append(depth_processor(frame)[0].convert("RGB"))
-    return dapth_video
 def process_video_for_pose(video):
     """
-    Process video for pose control.
-    Placeholder function - will return video as-is for now.
-    TODO: Implement pose estimation processing
     """
     print("Processing video for pose control...")
-    return video_tensor
 def process_video_for_control(video, control_type):
     """Process video based on the selected control type"""

 import random
 from controlnet_aux import CannyDetector
 from image_gen_aux import DepthPreprocessor
+import mediapipe as mp
+from PIL import Image
+import cv2
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 canny_processor = CannyDetector()
 depth_processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
+# Initialize MediaPipe pose estimation
+mp_drawing = mp.solutions.drawing_utils
+mp_drawing_styles = mp.solutions.drawing_styles
+mp_pose = mp.solutions.pose
 CONTROL_LORAS = {
     "canny": {
         "repo": "Lightricks/LTX-Video-ICLoRA-canny-13b-0.9.7",
         "adapter_name": "pose_lora"
     }
 }
 @spaces.GPU()
 def read_video(video) -> torch.Tensor:
     """
     Process video for depth control.
     """
     print("Processing video for depth control...")
+    depth_video = []
     for frame in video:
+        depth_video.append(depth_processor(frame)[0].convert("RGB"))
+    return depth_video
+@spaces.GPU()
 def process_video_for_pose(video):
     """
+    Process video for pose control using MediaPipe pose estimation.
+    Returns video frames with pose landmarks drawn on black background.
     """
     print("Processing video for pose control...")
+    pose_video = []
+    with mp_pose.Pose(
+        static_image_mode=True,
+        model_complexity=1,
+        enable_segmentation=False,
+        min_detection_confidence=0.5,
+        min_tracking_confidence=0.5
+    ) as pose:
+        for frame in video:
+            # Convert PIL image to numpy array
+            frame_np = np.array(frame)
+            # Convert RGB to BGR for MediaPipe
+            frame_bgr = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR)
+            # Process the frame
+            results = pose.process(frame_bgr)
+            # Create black background with same dimensions
+            pose_frame = np.zeros_like(frame_np)
+            # Draw pose landmarks if detected
+            if results.pose_landmarks:
+                mp_drawing.draw_landmarks(
+                    pose_frame,
+                    results.pose_landmarks,
+                    mp_pose.POSE_CONNECTIONS,
+                    landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style(),
+                    connection_drawing_spec=mp_drawing_styles.get_default_pose_connections_style()
+                )
+            # Convert back to PIL Image
+            pose_pil = Image.fromarray(pose_frame)
+            pose_video.append(pose_pil)
+    return pose_video
 def process_video_for_control(video, control_type):
     """Process video based on the selected control type"""