Spaces:

PengWeixuanSZU
/

MiniMax-Remover

Running on Zero

App Files Files Community

PengWeixuanSZU commited on Jun 20

Commit

4052097

verified ·

1 Parent(s): 8c347bd

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -5

app.py CHANGED Viewed

@@ -56,9 +56,10 @@ random_seed = 42
 video_length = 201
 W = 1024
 H = W
-device = "cuda" if torch.cuda.is_available() else "cpu"
 def get_pipe_image_and_video_predictor():
     vae = AutoencoderKLWan.from_pretrained("./model/vae", torch_dtype=torch.float16)
     transformer = Transformer3DModel.from_pretrained("./model/transformer", torch_dtype=torch.float16)
     scheduler = UniPCMultistepScheduler.from_pretrained("./model/scheduler")
@@ -177,7 +178,7 @@ def preprocess_for_removal(images, masks):
         out_masks.append(msk_resized)
     arr_images = np.stack(out_images)
     arr_masks = np.stack(out_masks)
-    return torch.from_numpy(arr_images).half().to(device), torch.from_numpy(arr_masks).half().to(device)
 @spaces.GPU(duration=300)
 def inference_and_return_video(dilation_iterations, num_inference_steps, video_state=None):
@@ -189,7 +190,10 @@ def inference_and_return_video(dilation_iterations, num_inference_steps, video_s
     images = np.array(images)
     masks = np.array(masks)
     img_tensor, mask_tensor = preprocess_for_removal(images, masks)
-    mask_tensor = mask_tensor[:,:,:]###
     if mask_tensor.shape[1] < mask_tensor.shape[2]:
         height = 480
@@ -218,7 +222,7 @@ def inference_and_return_video(dilation_iterations, num_inference_steps, video_s
     clip.write_videofile(video_file, codec='libx264', audio=False, verbose=False, logger=None)
     return video_file
 def track_video(n_frames, video_state):
     input_points = video_state["input_points"]
@@ -242,7 +246,7 @@ def track_video(n_frames, video_state):
     images = [cv2.resize(img, (W_, H_)) for img in images]
     video_state["origin_images"] = images
     images = np.array(images)
-    inference_state = video_predictor.init_state(images=images/255, device=device)
     video_state["inference_state"] = inference_state
     if len(torch.from_numpy(video_state["masks"][0]).shape) == 3:

 video_length = 201
 W = 1024
 H = W
+#device = "cuda" if torch.cuda.is_available() else "cpu"
 def get_pipe_image_and_video_predictor():
+    device="cpu"
     vae = AutoencoderKLWan.from_pretrained("./model/vae", torch_dtype=torch.float16)
     transformer = Transformer3DModel.from_pretrained("./model/transformer", torch_dtype=torch.float16)
     scheduler = UniPCMultistepScheduler.from_pretrained("./model/scheduler")
         out_masks.append(msk_resized)
     arr_images = np.stack(out_images)
     arr_masks = np.stack(out_masks)
+    return torch.from_numpy(arr_images).half(), torch.from_numpy(arr_masks).half()
 @spaces.GPU(duration=300)
 def inference_and_return_video(dilation_iterations, num_inference_steps, video_state=None):
     images = np.array(images)
     masks = np.array(masks)
     img_tensor, mask_tensor = preprocess_for_removal(images, masks)
+    img_tensor=img_tensor.to("cuda")
+    mask_tensor=mask_tensor.to("cuda")
+    print(mask_tensor.shape)
+    mask_tensor = mask_tensor[:,:,:]
     if mask_tensor.shape[1] < mask_tensor.shape[2]:
         height = 480
     clip.write_videofile(video_file, codec='libx264', audio=False, verbose=False, logger=None)
     return video_file
+@spaces.GPU(duration=100)
 def track_video(n_frames, video_state):
     input_points = video_state["input_points"]
     images = [cv2.resize(img, (W_, H_)) for img in images]
     video_state["origin_images"] = images
     images = np.array(images)
+    inference_state = video_predictor.init_state(images=images/255, device="cuda")
     video_state["inference_state"] = inference_state
     if len(torch.from_numpy(video_state["masks"][0]).shape) == 3: