linoyts HF Staff commited on
Commit
095d93c
·
verified ·
1 Parent(s): 2f65cea

add control processing (#1)

Browse files

- add control processing (031aa775fb0d73b03c09a09eda543046890ddf76)
- Update requirements.txt (66dcdf0f353f887cc00875802e6f9090dd1ce1ec)
- Update app.py (4feab98ace2055c2c5a66045fd44f390e9f9b58d)

Files changed (2) hide show
  1. app.py +28 -25
  2. requirements.txt +1 -0
app.py CHANGED
@@ -9,7 +9,8 @@ from pipeline_ltx_condition_control import LTXConditionPipeline
9
  from diffusers.utils import export_to_video, load_video
10
  from torchvision import transforms
11
  import random
12
-
 
13
 
14
  dtype = torch.bfloat16
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -20,6 +21,8 @@ pipeline.to(device)
20
  pipe_upsample.to(device)
21
  pipeline.vae.enable_tiling()
22
 
 
 
23
 
24
  CONTROL_LORAS = {
25
  "canny": {
@@ -39,11 +42,11 @@ CONTROL_LORAS = {
39
  }
40
  }
41
  @spaces.GPU()
42
- def read_video(video_path: str) -> torch.Tensor:
43
  """
44
  Reads a video file and converts it into a torch.Tensor with the shape [F, C, H, W].
45
  """
46
- pil_images = load_video(video_path)
47
  to_tensor_transform = transforms.ToTensor()
48
  video_tensor = torch.stack([to_tensor_transform(img) for img in pil_images])
49
  return video_tensor
@@ -89,27 +92,29 @@ def load_control_lora(control_type, current_lora_state):
89
  print(f"Error loading {control_type} LoRA: {e}")
90
  raise
91
 
92
- def process_video_for_canny(video_tensor):
93
  """
94
  Process video for canny control.
95
- Placeholder function - will return video as-is for now.
96
- TODO: Implement canny edge detection processing
97
  """
98
  print("Processing video for canny control...")
99
-
100
- return video_tensor
 
 
 
 
101
 
102
- def process_video_for_depth(video_tensor):
103
  """
104
  Process video for depth control.
105
- Placeholder function - will return video as-is for now.
106
- TODO: Implement depth estimation processing
107
  """
108
  print("Processing video for depth control...")
109
-
110
- return video_tensor
 
 
111
 
112
- def process_video_for_pose(video_tensor):
113
  """
114
  Process video for pose control.
115
  Placeholder function - will return video as-is for now.
@@ -119,16 +124,16 @@ def process_video_for_pose(video_tensor):
119
 
120
  return video_tensor
121
 
122
- def process_video_for_control(video_tensor, control_type):
123
  """Process video based on the selected control type"""
124
  if control_type == "canny":
125
- return process_video_for_canny(video_tensor)
126
  elif control_type == "depth":
127
- return process_video_for_depth(video_tensor)
128
  elif control_type == "pose":
129
- return process_video_for_pose(video_tensor)
130
  else:
131
- return video_tensor
132
 
133
  @spaces.GPU(duration=120)
134
  def generate_video(
@@ -169,15 +174,13 @@ def generate_video(
169
  # Load the appropriate control LoRA and update state
170
  updated_lora_state = load_control_lora(control_type, current_lora_state)
171
 
172
- progress(0.1, desc="Loading reference video...")
173
-
174
- # Read the reference video
175
- video = read_video(reference_video)
176
-
177
- progress(0.15, desc="Processing video for control...")
178
 
179
  # Process video based on control type
180
  processed_video = process_video_for_control(video, control_type)
 
181
 
182
  progress(0.2, desc="Preparing generation parameters...")
183
 
 
9
  from diffusers.utils import export_to_video, load_video
10
  from torchvision import transforms
11
  import random
12
+ from controlnet_aux import CannyDetector
13
+ from image_gen_aux import DepthPreprocessor
14
 
15
  dtype = torch.bfloat16
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
21
  pipe_upsample.to(device)
22
  pipeline.vae.enable_tiling()
23
 
24
+ canny_processor = CannyDetector()
25
+ depth_processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
26
 
27
  CONTROL_LORAS = {
28
  "canny": {
 
42
  }
43
  }
44
  @spaces.GPU()
45
+ def read_video(video) -> torch.Tensor:
46
  """
47
  Reads a video file and converts it into a torch.Tensor with the shape [F, C, H, W].
48
  """
49
+
50
  to_tensor_transform = transforms.ToTensor()
51
  video_tensor = torch.stack([to_tensor_transform(img) for img in pil_images])
52
  return video_tensor
 
92
  print(f"Error loading {control_type} LoRA: {e}")
93
  raise
94
 
95
+ def process_video_for_canny(video):
96
  """
97
  Process video for canny control.
 
 
98
  """
99
  print("Processing video for canny control...")
100
+ canny_video = []
101
+ for frame in video:
102
+ # TODO: change resolution logic
103
+ canny_video.append(canny_processor(frame, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024))
104
+
105
+ return canny_video
106
 
107
+ def process_video_for_depth(video):
108
  """
109
  Process video for depth control.
 
 
110
  """
111
  print("Processing video for depth control...")
112
+ dapth_video = []
113
+ for frame in video:
114
+ dapth_video.append(depth_processor(frame)[0].convert("RGB"))
115
+ return dapth_video
116
 
117
+ def process_video_for_pose(video):
118
  """
119
  Process video for pose control.
120
  Placeholder function - will return video as-is for now.
 
124
 
125
  return video_tensor
126
 
127
+ def process_video_for_control(video, control_type):
128
  """Process video based on the selected control type"""
129
  if control_type == "canny":
130
+ return process_video_for_canny(video)
131
  elif control_type == "depth":
132
+ return process_video_for_depth(video)
133
  elif control_type == "pose":
134
+ return process_video_for_pose(video)
135
  else:
136
+ return video
137
 
138
  @spaces.GPU(duration=120)
139
  def generate_video(
 
174
  # Load the appropriate control LoRA and update state
175
  updated_lora_state = load_control_lora(control_type, current_lora_state)
176
 
177
+ # Loads video into a list of pil images
178
+ video = load_video(reference_video)
179
+ progress(0.1, desc="Processing video for control...")
 
 
 
180
 
181
  # Process video based on control type
182
  processed_video = process_video_for_control(video, control_type)
183
+ processed_video = read_video(processed_video) # turns to tensor
184
 
185
  progress(0.2, desc="Preparing generation parameters...")
186
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  accelerate
 
2
  peft
3
  transformers
4
  sentencepiece
 
1
  accelerate
2
+ controlnet_aux
3
  peft
4
  transformers
5
  sentencepiece