Spaces:

Lightricks
/

ltx-video-iclora

Running on Zero

App Files Files Community

linoyts HF Staff commited on 4 days ago

Commit

095d93c

verified ·

1 Parent(s): 2f65cea

add control processing (#1)

Browse files

- add control processing (031aa775fb0d73b03c09a09eda543046890ddf76)
- Update requirements.txt (66dcdf0f353f887cc00875802e6f9090dd1ce1ec)
- Update app.py (4feab98ace2055c2c5a66045fd44f390e9f9b58d)

Files changed (2) hide show

app.py +28 -25
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -9,7 +9,8 @@ from pipeline_ltx_condition_control import LTXConditionPipeline
 from diffusers.utils import export_to_video, load_video
 from torchvision import transforms
 import random
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -20,6 +21,8 @@ pipeline.to(device)
 pipe_upsample.to(device)
 pipeline.vae.enable_tiling()
 CONTROL_LORAS = {
     "canny": {
@@ -39,11 +42,11 @@ CONTROL_LORAS = {
     }
 }
 @spaces.GPU()
-def read_video(video_path: str) -> torch.Tensor:
     """
     Reads a video file and converts it into a torch.Tensor with the shape [F, C, H, W].
     """
-    pil_images = load_video(video_path)
     to_tensor_transform = transforms.ToTensor()
     video_tensor = torch.stack([to_tensor_transform(img) for img in pil_images])
     return video_tensor
@@ -89,27 +92,29 @@ def load_control_lora(control_type, current_lora_state):
         print(f"Error loading {control_type} LoRA: {e}")
         raise
-def process_video_for_canny(video_tensor):
     """
     Process video for canny control.
-    Placeholder function - will return video as-is for now.
-    TODO: Implement canny edge detection processing
     """
     print("Processing video for canny control...")
-    return video_tensor
-def process_video_for_depth(video_tensor):
     """
     Process video for depth control.
-    Placeholder function - will return video as-is for now.
-    TODO: Implement depth estimation processing
     """
     print("Processing video for depth control...")
-    return video_tensor
-def process_video_for_pose(video_tensor):
     """
     Process video for pose control.
     Placeholder function - will return video as-is for now.
@@ -119,16 +124,16 @@ def process_video_for_pose(video_tensor):
     return video_tensor
-def process_video_for_control(video_tensor, control_type):
     """Process video based on the selected control type"""
     if control_type == "canny":
-        return process_video_for_canny(video_tensor)
     elif control_type == "depth":
-        return process_video_for_depth(video_tensor)
     elif control_type == "pose":
-        return process_video_for_pose(video_tensor)
     else:
-        return video_tensor
 @spaces.GPU(duration=120)
 def generate_video(
@@ -169,15 +174,13 @@ def generate_video(
         # Load the appropriate control LoRA and update state
         updated_lora_state = load_control_lora(control_type, current_lora_state)
-        progress(0.1, desc="Loading reference video...")
-        # Read the reference video
-        video = read_video(reference_video)
-        progress(0.15, desc="Processing video for control...")
         # Process video based on control type
         processed_video = process_video_for_control(video, control_type)
         progress(0.2, desc="Preparing generation parameters...")

 from diffusers.utils import export_to_video, load_video
 from torchvision import transforms
 import random
+from controlnet_aux import CannyDetector
+from image_gen_aux import DepthPreprocessor
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 pipe_upsample.to(device)
 pipeline.vae.enable_tiling()
+canny_processor = CannyDetector()
+depth_processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
 CONTROL_LORAS = {
     "canny": {
     }
 }
 @spaces.GPU()
+def read_video(video) -> torch.Tensor:
     """
     Reads a video file and converts it into a torch.Tensor with the shape [F, C, H, W].
     """
     to_tensor_transform = transforms.ToTensor()
     video_tensor = torch.stack([to_tensor_transform(img) for img in pil_images])
     return video_tensor
         print(f"Error loading {control_type} LoRA: {e}")
         raise
+def process_video_for_canny(video):
     """
     Process video for canny control.
     """
     print("Processing video for canny control...")
+    canny_video = []
+    for frame in video:
+        # TODO: change resolution logic
+        canny_video.append(canny_processor(frame, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024))
+    return canny_video
+def process_video_for_depth(video):
     """
     Process video for depth control.
     """
     print("Processing video for depth control...")
+       dapth_video = []
+    for frame in video:
+        dapth_video.append(depth_processor(frame)[0].convert("RGB"))
+    return dapth_video
+def process_video_for_pose(video):
     """
     Process video for pose control.
     Placeholder function - will return video as-is for now.
     return video_tensor
+def process_video_for_control(video, control_type):
     """Process video based on the selected control type"""
     if control_type == "canny":
+        return process_video_for_canny(video)
     elif control_type == "depth":
+        return process_video_for_depth(video)
     elif control_type == "pose":
+        return process_video_for_pose(video)
     else:
+        return video
 @spaces.GPU(duration=120)
 def generate_video(
         # Load the appropriate control LoRA and update state
         updated_lora_state = load_control_lora(control_type, current_lora_state)
+        # Loads video into a list of pil images
+        video = load_video(reference_video)
+        progress(0.1, desc="Processing video for control...")
         # Process video based on control type
         processed_video = process_video_for_control(video, control_type)
+        processed_video = read_video(processed_video) # turns to tensor
         progress(0.2, desc="Preparing generation parameters...")

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 accelerate
 peft
 transformers
 sentencepiece

 accelerate
+controlnet_aux
 peft
 transformers
 sentencepiece