Spaces:

Lightricks
/

ltx-video-iclora

Running on Zero

App Files Files Community

linoyts HF Staff commited on 5 days ago

Commit

031aa77

verified ·

1 Parent(s): e169ab6

add control processing

Browse files

Files changed (1) hide show

app.py +22 -21

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from pipeline_ltx_condition_control import LTXConditionPipeline
 from diffusers.utils import export_to_video, load_video
 from torchvision import transforms
 import random
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -20,6 +20,7 @@ pipeline.to(device)
 pipe_upsample.to(device)
 pipeline.vae.enable_tiling()
 CONTROL_LORAS = {
     "canny": {
@@ -39,11 +40,11 @@ CONTROL_LORAS = {
     }
 }
 @spaces.GPU()
-def read_video(video_path: str) -> torch.Tensor:
     """
     Reads a video file and converts it into a torch.Tensor with the shape [F, C, H, W].
     """
-    pil_images = load_video(video_path)
     to_tensor_transform = transforms.ToTensor()
     video_tensor = torch.stack([to_tensor_transform(img) for img in pil_images])
     return video_tensor
@@ -89,17 +90,19 @@ def load_control_lora(control_type, current_lora_state):
         print(f"Error loading {control_type} LoRA: {e}")
         raise
-def process_video_for_canny(video_tensor):
     """
     Process video for canny control.
-    Placeholder function - will return video as-is for now.
-    TODO: Implement canny edge detection processing
     """
     print("Processing video for canny control...")
-    return video_tensor
-def process_video_for_depth(video_tensor):
     """
     Process video for depth control.
     Placeholder function - will return video as-is for now.
@@ -109,7 +112,7 @@ def process_video_for_depth(video_tensor):
     return video_tensor
-def process_video_for_pose(video_tensor):
     """
     Process video for pose control.
     Placeholder function - will return video as-is for now.
@@ -119,16 +122,16 @@ def process_video_for_pose(video_tensor):
     return video_tensor
-def process_video_for_control(video_tensor, control_type):
     """Process video based on the selected control type"""
     if control_type == "canny":
-        return process_video_for_canny(video_tensor)
     elif control_type == "depth":
-        return process_video_for_depth(video_tensor)
     elif control_type == "pose":
-        return process_video_for_pose(video_tensor)
     else:
-        return video_tensor
 @spaces.GPU(duration=120)
 def generate_video(
@@ -169,15 +172,13 @@ def generate_video(
         # Load the appropriate control LoRA and update state
         updated_lora_state = load_control_lora(control_type, current_lora_state)
-        progress(0.1, desc="Loading reference video...")
-        # Read the reference video
-        video = read_video(reference_video)
-        progress(0.15, desc="Processing video for control...")
         # Process video based on control type
         processed_video = process_video_for_control(video, control_type)
         progress(0.2, desc="Preparing generation parameters...")

 from diffusers.utils import export_to_video, load_video
 from torchvision import transforms
 import random
+from controlnet_aux import CannyDetector
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 pipe_upsample.to(device)
 pipeline.vae.enable_tiling()
+canny_processor = CannyDetector()
 CONTROL_LORAS = {
     "canny": {
     }
 }
 @spaces.GPU()
+def read_video(video) -> torch.Tensor:
     """
     Reads a video file and converts it into a torch.Tensor with the shape [F, C, H, W].
     """
     to_tensor_transform = transforms.ToTensor()
     video_tensor = torch.stack([to_tensor_transform(img) for img in pil_images])
     return video_tensor
         print(f"Error loading {control_type} LoRA: {e}")
         raise
+def process_video_for_canny(video):
     """
     Process video for canny control.
     """
     print("Processing video for canny control...")
+    canny_video = []
+    for frame in video:
+        # TODO: change resolution logic
+        canny_video.append(processor(frame, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024))
+    return canny_video
+def process_video_for_depth(video):
     """
     Process video for depth control.
     Placeholder function - will return video as-is for now.
     return video_tensor
+def process_video_for_pose(video):
     """
     Process video for pose control.
     Placeholder function - will return video as-is for now.
     return video_tensor
+def process_video_for_control(video, control_type):
     """Process video based on the selected control type"""
     if control_type == "canny":
+        return process_video_for_canny(video)
     elif control_type == "depth":
+        return process_video_for_depth(video)
     elif control_type == "pose":
+        return process_video_for_pose(video)
     else:
+        return video
 @spaces.GPU(duration=120)
 def generate_video(
         # Load the appropriate control LoRA and update state
         updated_lora_state = load_control_lora(control_type, current_lora_state)
+        # Loads video into a list of pil images
+        video = load_video(reference_video)
+        progress(0.1, desc="Processing video for control...")
         # Process video based on control type
         processed_video = process_video_for_control(video, control_type)
+        processed_video = read_video(processed_video) # turns to tensor
         progress(0.2, desc="Preparing generation parameters...")