Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -56,9 +56,10 @@ random_seed = 42
|
|
56 |
video_length = 201
|
57 |
W = 1024
|
58 |
H = W
|
59 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
60 |
|
61 |
def get_pipe_image_and_video_predictor():
|
|
|
62 |
vae = AutoencoderKLWan.from_pretrained("./model/vae", torch_dtype=torch.float16)
|
63 |
transformer = Transformer3DModel.from_pretrained("./model/transformer", torch_dtype=torch.float16)
|
64 |
scheduler = UniPCMultistepScheduler.from_pretrained("./model/scheduler")
|
@@ -177,7 +178,7 @@ def preprocess_for_removal(images, masks):
|
|
177 |
out_masks.append(msk_resized)
|
178 |
arr_images = np.stack(out_images)
|
179 |
arr_masks = np.stack(out_masks)
|
180 |
-
return torch.from_numpy(arr_images).half()
|
181 |
|
182 |
@spaces.GPU(duration=300)
|
183 |
def inference_and_return_video(dilation_iterations, num_inference_steps, video_state=None):
|
@@ -189,7 +190,10 @@ def inference_and_return_video(dilation_iterations, num_inference_steps, video_s
|
|
189 |
images = np.array(images)
|
190 |
masks = np.array(masks)
|
191 |
img_tensor, mask_tensor = preprocess_for_removal(images, masks)
|
192 |
-
|
|
|
|
|
|
|
193 |
|
194 |
if mask_tensor.shape[1] < mask_tensor.shape[2]:
|
195 |
height = 480
|
@@ -218,7 +222,7 @@ def inference_and_return_video(dilation_iterations, num_inference_steps, video_s
|
|
218 |
clip.write_videofile(video_file, codec='libx264', audio=False, verbose=False, logger=None)
|
219 |
return video_file
|
220 |
|
221 |
-
|
222 |
def track_video(n_frames, video_state):
|
223 |
|
224 |
input_points = video_state["input_points"]
|
@@ -242,7 +246,7 @@ def track_video(n_frames, video_state):
|
|
242 |
images = [cv2.resize(img, (W_, H_)) for img in images]
|
243 |
video_state["origin_images"] = images
|
244 |
images = np.array(images)
|
245 |
-
inference_state = video_predictor.init_state(images=images/255, device=
|
246 |
video_state["inference_state"] = inference_state
|
247 |
|
248 |
if len(torch.from_numpy(video_state["masks"][0]).shape) == 3:
|
|
|
56 |
video_length = 201
|
57 |
W = 1024
|
58 |
H = W
|
59 |
+
#device = "cuda" if torch.cuda.is_available() else "cpu"
|
60 |
|
61 |
def get_pipe_image_and_video_predictor():
|
62 |
+
device="cpu"
|
63 |
vae = AutoencoderKLWan.from_pretrained("./model/vae", torch_dtype=torch.float16)
|
64 |
transformer = Transformer3DModel.from_pretrained("./model/transformer", torch_dtype=torch.float16)
|
65 |
scheduler = UniPCMultistepScheduler.from_pretrained("./model/scheduler")
|
|
|
178 |
out_masks.append(msk_resized)
|
179 |
arr_images = np.stack(out_images)
|
180 |
arr_masks = np.stack(out_masks)
|
181 |
+
return torch.from_numpy(arr_images).half(), torch.from_numpy(arr_masks).half()
|
182 |
|
183 |
@spaces.GPU(duration=300)
|
184 |
def inference_and_return_video(dilation_iterations, num_inference_steps, video_state=None):
|
|
|
190 |
images = np.array(images)
|
191 |
masks = np.array(masks)
|
192 |
img_tensor, mask_tensor = preprocess_for_removal(images, masks)
|
193 |
+
img_tensor=img_tensor.to("cuda")
|
194 |
+
mask_tensor=mask_tensor.to("cuda")
|
195 |
+
print(mask_tensor.shape)
|
196 |
+
mask_tensor = mask_tensor[:,:,:]
|
197 |
|
198 |
if mask_tensor.shape[1] < mask_tensor.shape[2]:
|
199 |
height = 480
|
|
|
222 |
clip.write_videofile(video_file, codec='libx264', audio=False, verbose=False, logger=None)
|
223 |
return video_file
|
224 |
|
225 |
+
@spaces.GPU(duration=100)
|
226 |
def track_video(n_frames, video_state):
|
227 |
|
228 |
input_points = video_state["input_points"]
|
|
|
246 |
images = [cv2.resize(img, (W_, H_)) for img in images]
|
247 |
video_state["origin_images"] = images
|
248 |
images = np.array(images)
|
249 |
+
inference_state = video_predictor.init_state(images=images/255, device="cuda")
|
250 |
video_state["inference_state"] = inference_state
|
251 |
|
252 |
if len(torch.from_numpy(video_state["masks"][0]).shape) == 3:
|