Spaces:
Running
on
Zero
Running
on
Zero
add control processing (#1)
Browse files- add control processing (031aa775fb0d73b03c09a09eda543046890ddf76)
- Update requirements.txt (66dcdf0f353f887cc00875802e6f9090dd1ce1ec)
- Update app.py (4feab98ace2055c2c5a66045fd44f390e9f9b58d)
- app.py +28 -25
- requirements.txt +1 -0
app.py
CHANGED
@@ -9,7 +9,8 @@ from pipeline_ltx_condition_control import LTXConditionPipeline
|
|
9 |
from diffusers.utils import export_to_video, load_video
|
10 |
from torchvision import transforms
|
11 |
import random
|
12 |
-
|
|
|
13 |
|
14 |
dtype = torch.bfloat16
|
15 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -20,6 +21,8 @@ pipeline.to(device)
|
|
20 |
pipe_upsample.to(device)
|
21 |
pipeline.vae.enable_tiling()
|
22 |
|
|
|
|
|
23 |
|
24 |
CONTROL_LORAS = {
|
25 |
"canny": {
|
@@ -39,11 +42,11 @@ CONTROL_LORAS = {
|
|
39 |
}
|
40 |
}
|
41 |
@spaces.GPU()
|
42 |
-
def read_video(
|
43 |
"""
|
44 |
Reads a video file and converts it into a torch.Tensor with the shape [F, C, H, W].
|
45 |
"""
|
46 |
-
|
47 |
to_tensor_transform = transforms.ToTensor()
|
48 |
video_tensor = torch.stack([to_tensor_transform(img) for img in pil_images])
|
49 |
return video_tensor
|
@@ -89,27 +92,29 @@ def load_control_lora(control_type, current_lora_state):
|
|
89 |
print(f"Error loading {control_type} LoRA: {e}")
|
90 |
raise
|
91 |
|
92 |
-
def process_video_for_canny(
|
93 |
"""
|
94 |
Process video for canny control.
|
95 |
-
Placeholder function - will return video as-is for now.
|
96 |
-
TODO: Implement canny edge detection processing
|
97 |
"""
|
98 |
print("Processing video for canny control...")
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
101 |
|
102 |
-
def process_video_for_depth(
|
103 |
"""
|
104 |
Process video for depth control.
|
105 |
-
Placeholder function - will return video as-is for now.
|
106 |
-
TODO: Implement depth estimation processing
|
107 |
"""
|
108 |
print("Processing video for depth control...")
|
109 |
-
|
110 |
-
|
|
|
|
|
111 |
|
112 |
-
def process_video_for_pose(
|
113 |
"""
|
114 |
Process video for pose control.
|
115 |
Placeholder function - will return video as-is for now.
|
@@ -119,16 +124,16 @@ def process_video_for_pose(video_tensor):
|
|
119 |
|
120 |
return video_tensor
|
121 |
|
122 |
-
def process_video_for_control(
|
123 |
"""Process video based on the selected control type"""
|
124 |
if control_type == "canny":
|
125 |
-
return process_video_for_canny(
|
126 |
elif control_type == "depth":
|
127 |
-
return process_video_for_depth(
|
128 |
elif control_type == "pose":
|
129 |
-
return process_video_for_pose(
|
130 |
else:
|
131 |
-
return
|
132 |
|
133 |
@spaces.GPU(duration=120)
|
134 |
def generate_video(
|
@@ -169,15 +174,13 @@ def generate_video(
|
|
169 |
# Load the appropriate control LoRA and update state
|
170 |
updated_lora_state = load_control_lora(control_type, current_lora_state)
|
171 |
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
video = read_video(reference_video)
|
176 |
-
|
177 |
-
progress(0.15, desc="Processing video for control...")
|
178 |
|
179 |
# Process video based on control type
|
180 |
processed_video = process_video_for_control(video, control_type)
|
|
|
181 |
|
182 |
progress(0.2, desc="Preparing generation parameters...")
|
183 |
|
|
|
9 |
from diffusers.utils import export_to_video, load_video
|
10 |
from torchvision import transforms
|
11 |
import random
|
12 |
+
from controlnet_aux import CannyDetector
|
13 |
+
from image_gen_aux import DepthPreprocessor
|
14 |
|
15 |
dtype = torch.bfloat16
|
16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
21 |
pipe_upsample.to(device)
|
22 |
pipeline.vae.enable_tiling()
|
23 |
|
24 |
+
canny_processor = CannyDetector()
|
25 |
+
depth_processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
|
26 |
|
27 |
CONTROL_LORAS = {
|
28 |
"canny": {
|
|
|
42 |
}
|
43 |
}
|
44 |
@spaces.GPU()
|
45 |
+
def read_video(video) -> torch.Tensor:
|
46 |
"""
|
47 |
Reads a video file and converts it into a torch.Tensor with the shape [F, C, H, W].
|
48 |
"""
|
49 |
+
|
50 |
to_tensor_transform = transforms.ToTensor()
|
51 |
video_tensor = torch.stack([to_tensor_transform(img) for img in pil_images])
|
52 |
return video_tensor
|
|
|
92 |
print(f"Error loading {control_type} LoRA: {e}")
|
93 |
raise
|
94 |
|
95 |
+
def process_video_for_canny(video):
|
96 |
"""
|
97 |
Process video for canny control.
|
|
|
|
|
98 |
"""
|
99 |
print("Processing video for canny control...")
|
100 |
+
canny_video = []
|
101 |
+
for frame in video:
|
102 |
+
# TODO: change resolution logic
|
103 |
+
canny_video.append(canny_processor(frame, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024))
|
104 |
+
|
105 |
+
return canny_video
|
106 |
|
107 |
+
def process_video_for_depth(video):
|
108 |
"""
|
109 |
Process video for depth control.
|
|
|
|
|
110 |
"""
|
111 |
print("Processing video for depth control...")
|
112 |
+
dapth_video = []
|
113 |
+
for frame in video:
|
114 |
+
dapth_video.append(depth_processor(frame)[0].convert("RGB"))
|
115 |
+
return dapth_video
|
116 |
|
117 |
+
def process_video_for_pose(video):
|
118 |
"""
|
119 |
Process video for pose control.
|
120 |
Placeholder function - will return video as-is for now.
|
|
|
124 |
|
125 |
return video_tensor
|
126 |
|
127 |
+
def process_video_for_control(video, control_type):
|
128 |
"""Process video based on the selected control type"""
|
129 |
if control_type == "canny":
|
130 |
+
return process_video_for_canny(video)
|
131 |
elif control_type == "depth":
|
132 |
+
return process_video_for_depth(video)
|
133 |
elif control_type == "pose":
|
134 |
+
return process_video_for_pose(video)
|
135 |
else:
|
136 |
+
return video
|
137 |
|
138 |
@spaces.GPU(duration=120)
|
139 |
def generate_video(
|
|
|
174 |
# Load the appropriate control LoRA and update state
|
175 |
updated_lora_state = load_control_lora(control_type, current_lora_state)
|
176 |
|
177 |
+
# Loads video into a list of pil images
|
178 |
+
video = load_video(reference_video)
|
179 |
+
progress(0.1, desc="Processing video for control...")
|
|
|
|
|
|
|
180 |
|
181 |
# Process video based on control type
|
182 |
processed_video = process_video_for_control(video, control_type)
|
183 |
+
processed_video = read_video(processed_video) # turns to tensor
|
184 |
|
185 |
progress(0.2, desc="Preparing generation parameters...")
|
186 |
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
accelerate
|
|
|
2 |
peft
|
3 |
transformers
|
4 |
sentencepiece
|
|
|
1 |
accelerate
|
2 |
+
controlnet_aux
|
3 |
peft
|
4 |
transformers
|
5 |
sentencepiece
|