MiniMax-Remover

Running on Zero

App Files Files Community

PengWeixuanSZU commited on 25 days ago

Commit

47341d5

verified ·

1 Parent(s): aeeda28

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -180,14 +180,14 @@ def preprocess_for_removal(images, masks):
     return torch.from_numpy(arr_images).half().to(device), torch.from_numpy(arr_masks).half().to(device)
 @spaces.GPU(duration=200)
-def inference_and_return_video(dilation_iterations, num_inference_steps, video_state=None):
     if video_state["origin_images"] is None or video_state["masks"] is None:
         return None
     images = video_state["origin_images"]
     masks = video_state["masks"]
-    #images = np.array(images)
-    #masks = np.array(masks)
     print(f"line 191 images shape:{images.shape},masks shape:{masks.shape}")
     #line 191 images shape:(1, 1024, 1820, 3),masks shape:(1, 1024, 1820), which should be (16, 1024, 1820, 3) and (16, 1024, 1820, 3)
     img_tensor, mask_tensor = preprocess_for_removal(images, masks)
@@ -242,7 +242,7 @@ def track_video(n_frames, video_state):
         W_ = int(H_ * images[0].shape[1] / images[0].shape[0])
     images = [cv2.resize(img, (W_, H_)) for img in images]
-    video_state["origin_images"] = np.array(images)##images
     images = np.array(images)
     inference_state = video_predictor.init_state(images=images/255, device=device)
     video_state["inference_state"] = inference_state
@@ -279,7 +279,7 @@ def track_video(n_frames, video_state):
         painted = np.uint8(np.clip(painted * 255, 0, 255))
         output_frames.append(painted)
         print(f"line 281 len(output_frames)={len(output_frames)}, painted shape:{painted.shape}")
-    video_state["masks"] =np.array( mask_frames)
     print(f'line 283 len video_state["masks"]:{len(video_state["masks"])}')
     print(f'line 284 video_state["masks"][0].shape:{video_state["masks"][0].shape}')
     video_file = f"/tmp/{time.time()}-{random.random()}-tracked_output.mp4"

     return torch.from_numpy(arr_images).half().to(device), torch.from_numpy(arr_masks).half().to(device)
 @spaces.GPU(duration=200)
+def inference_and_return_video(dilation_iterations, num_inference_steps, video_state):
     if video_state["origin_images"] is None or video_state["masks"] is None:
         return None
     images = video_state["origin_images"]
     masks = video_state["masks"]
+    images = np.array(images)
+    masks = np.array(masks)
     print(f"line 191 images shape:{images.shape},masks shape:{masks.shape}")
     #line 191 images shape:(1, 1024, 1820, 3),masks shape:(1, 1024, 1820), which should be (16, 1024, 1820, 3) and (16, 1024, 1820, 3)
     img_tensor, mask_tensor = preprocess_for_removal(images, masks)
         W_ = int(H_ * images[0].shape[1] / images[0].shape[0])
     images = [cv2.resize(img, (W_, H_)) for img in images]
+    video_state["origin_images"] = images
     images = np.array(images)
     inference_state = video_predictor.init_state(images=images/255, device=device)
     video_state["inference_state"] = inference_state
         painted = np.uint8(np.clip(painted * 255, 0, 255))
         output_frames.append(painted)
         print(f"line 281 len(output_frames)={len(output_frames)}, painted shape:{painted.shape}")
+    video_state["masks"] =mask_frames
     print(f'line 283 len video_state["masks"]:{len(video_state["masks"])}')
     print(f'line 284 video_state["masks"][0].shape:{video_state["masks"][0].shape}')
     video_file = f"/tmp/{time.time()}-{random.random()}-tracked_output.mp4"