GLEE_demo

Paused

App Files Files Community

Junfeng5 commited on Feb 2, 2024

Commit

943258e

verified ·

1 Parent(s): 5c34bad

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -10

app.py CHANGED Viewed

@@ -114,6 +114,20 @@ GLEEmodel_swin = GLEE_Model(cfg_swin, None, device, None, True).to(device)
 GLEEmodel_swin.load_state_dict(checkpoints_swin, strict=False)
 GLEEmodel_swin.eval()
 pixel_mean = torch.Tensor( [123.675, 116.28, 103.53]).to(device).view(3, 1, 1)
 pixel_std = torch.Tensor([58.395, 57.12, 57.375]).to(device).view(3, 1, 1)
 normalizer = lambda x: (x - pixel_mean) / pixel_std
@@ -130,16 +144,26 @@ TEXT_Y_OFFSET_SCALE = 1e-2
 if inference_type != 'LSJ':
     resizer = torchvision.transforms.Resize(inference_size,antialias=True)
     videoresizer = torchvision.transforms.Resize(video_inference_size,antialias=True)
 def segment_image(img, prompt_mode, categoryname, custom_category, expressiong, results_select, num_inst_select, threshold_select, mask_image_mix_ration, model_selection):
     torch.cuda.empty_cache()
     if model_selection == 'GLEE-Plus (SwinL)':
         GLEEmodel = GLEEmodel_swin
         print('use GLEE-Plus')
-    else:
         GLEEmodel = GLEEmodel_r50
         print('use GLEE-Lite')
     copyed_img = img['background'][:,:,:3].copy()
@@ -148,8 +172,12 @@ def segment_image(img, prompt_mode, categoryname, custom_category, expressiong,
     _,_, ori_height, ori_width = ori_image.shape
     if inference_type == 'LSJ':
-        infer_image = torch.zeros(1,3,1024,1024).to(ori_image)
-        infer_image[:,:,:inference_size,:inference_size] = ori_image
     else:
         resize_image = resizer(ori_image)
         image_size = torch.as_tensor((resize_image.shape[-2],resize_image.shape[-1]))
@@ -309,8 +337,9 @@ def segment_image(img, prompt_mode, categoryname, custom_category, expressiong,
             fakemask = torch.from_numpy(fakemask).unsqueeze(0).to(ori_image)
             if inference_type == 'LSJ':
-                infer_visual_prompt = torch.zeros(1,1024,1024).to(ori_image)
-                infer_visual_prompt[:,:inference_size,:inference_size] = fakemask
             else:
                 resize_fakemask = resizer(fakemask)
                 if size_divisibility > 1:
@@ -377,8 +406,12 @@ def process_frames(frame_list):
         _,_, ori_height, ori_width = ori_image.shape
         if inference_type == 'LSJ':
-            infer_image = torch.zeros(1,3,1024,1024).to(ori_image)
-            infer_image[:,:,:inference_size,:inference_size] = ori_image
         else:
             resize_image = videoresizer(ori_image)
             image_size = torch.as_tensor((resize_image.shape[-2],resize_image.shape[-1]))
@@ -414,14 +447,23 @@ def match_from_embds(tgt_embds, cur_embds):
 def segment_video(video, prompt_mode, categoryname, custom_category, expressiong, results_select, num_inst_select, threshold_select, mask_image_mix_ration, model_selection,video_frames_select, prompter):
     torch.cuda.empty_cache()
     ### model selection
     if model_selection == 'GLEE-Plus (SwinL)':
         GLEEmodel = GLEEmodel_swin
         print('use GLEE-Plus')
         clip_length = 2 #batchsize
-    else:
         GLEEmodel = GLEEmodel_r50
         print('use GLEE-Lite')
         clip_length = 4 #batchsize
     # read video and get sparse frames
     cap = cv2.VideoCapture(video)
@@ -678,8 +720,9 @@ def segment_video(video, prompt_mode, categoryname, custom_category, expressiong
             fakemask = torch.from_numpy(fakemask).unsqueeze(0).to(ori_image)
             if inference_type == 'LSJ':
-                infer_visual_prompt = torch.zeros(1,1024,1024).to(ori_image)
-                infer_visual_prompt[:,:inference_size,:inference_size] = fakemask
             else:
                 resize_fakemask = videoresizer(fakemask)
                 if size_divisibility > 1:

 GLEEmodel_swin.load_state_dict(checkpoints_swin, strict=False)
 GLEEmodel_swin.eval()
+cfg_eva02 = get_cfg()
+add_deeplab_config(cfg_eva02)
+add_glee_config(cfg_eva02)
+conf_files_swin = 'GLEE/configs/EVA02.yaml'
+checkpoints_eva = torch.load('GLEE/GLEE_{}.pth'.format(args.version))
+cfg_eva02.merge_from_file(conf_files_swin)
+GLEEmodel_eva02 = GLEE_Model(cfg_eva02, None, device, None, True).to(device)
+GLEEmodel_eva02.load_state_dict(checkpoints_eva, strict=False)
+GLEEmodel_eva02.eval()
+# inference_type = 'LSJ'
 pixel_mean = torch.Tensor( [123.675, 116.28, 103.53]).to(device).view(3, 1, 1)
 pixel_std = torch.Tensor([58.395, 57.12, 57.375]).to(device).view(3, 1, 1)
 normalizer = lambda x: (x - pixel_mean) / pixel_std
 if inference_type != 'LSJ':
     resizer = torchvision.transforms.Resize(inference_size,antialias=True)
     videoresizer = torchvision.transforms.Resize(video_inference_size,antialias=True)
+else:
+    resizer = torchvision.transforms.Resize(size = 1535, max_size=1536, antialias=True)
+    videoresizer = torchvision.transforms.Resize(size = 1535, max_size=1536, antialias=True)
 def segment_image(img, prompt_mode, categoryname, custom_category, expressiong, results_select, num_inst_select, threshold_select, mask_image_mix_ration, model_selection):
     torch.cuda.empty_cache()
     if model_selection == 'GLEE-Plus (SwinL)':
         GLEEmodel = GLEEmodel_swin
+        inference_type = 'resize_shot'
         print('use GLEE-Plus')
+    elif model_selection == 'GLEE-Lite (R50)':
+        inference_type = 'resize_shot'
         GLEEmodel = GLEEmodel_r50
         print('use GLEE-Lite')
+    else:
+        GLEEmodel = GLEEmodel_eva02
+        print('use GLEE-Pro')
+        inference_type = 'LSJ'
     copyed_img = img['background'][:,:,:3].copy()
     _,_, ori_height, ori_width = ori_image.shape
     if inference_type == 'LSJ':
+        resize_image = resizer(ori_image)
+        image_size = torch.as_tensor((resize_image.shape[-2],resize_image.shape[-1]))
+        re_size = resize_image.shape[-2:]
+        infer_image = torch.zeros(1,3,1536,1536).to(ori_image)
+        infer_image[:,:,:image_size[0],:image_size[1]] = resize_image
+        padding_size = (1536,1536)
     else:
         resize_image = resizer(ori_image)
         image_size = torch.as_tensor((resize_image.shape[-2],resize_image.shape[-1]))
             fakemask = torch.from_numpy(fakemask).unsqueeze(0).to(ori_image)
             if inference_type == 'LSJ':
+                resize_fakemask = resizer(fakemask)
+                infer_visual_prompt = torch.zeros(1,1536,1536).to(resize_fakemask)
+                infer_visual_prompt[:,:image_size[0],:image_size[1]] = resize_fakemask
             else:
                 resize_fakemask = resizer(fakemask)
                 if size_divisibility > 1:
         _,_, ori_height, ori_width = ori_image.shape
         if inference_type == 'LSJ':
+            resize_image = resizer(ori_image)
+            image_size = torch.as_tensor((resize_image.shape[-2],resize_image.shape[-1]))
+            re_size = resize_image.shape[-2:]
+            infer_image = torch.zeros(1,3,1536,1536).to(ori_image)
+            infer_image[:,:,:image_size[0],:image_size[1]] = resize_image
+            padding_size = (1536,1536)
         else:
             resize_image = videoresizer(ori_image)
             image_size = torch.as_tensor((resize_image.shape[-2],resize_image.shape[-1]))
 def segment_video(video, prompt_mode, categoryname, custom_category, expressiong, results_select, num_inst_select, threshold_select, mask_image_mix_ration, model_selection,video_frames_select, prompter):
     torch.cuda.empty_cache()
     ### model selection
     if model_selection == 'GLEE-Plus (SwinL)':
         GLEEmodel = GLEEmodel_swin
+        inference_type = 'resize_shot'
         print('use GLEE-Plus')
         clip_length = 2 #batchsize
+    elif model_selection == 'GLEE-Lite (R50)':
+        inference_type = 'resize_shot'
         GLEEmodel = GLEEmodel_r50
         print('use GLEE-Lite')
         clip_length = 4 #batchsize
+    else:
+        GLEEmodel = GLEEmodel_eva02
+        print('use GLEE-Pro')
+        inference_type = 'LSJ'
+        clip_length = 1 #batchsize
     # read video and get sparse frames
     cap = cv2.VideoCapture(video)
             fakemask = torch.from_numpy(fakemask).unsqueeze(0).to(ori_image)
             if inference_type == 'LSJ':
+                resize_fakemask = resizer(fakemask)
+                infer_visual_prompt = torch.zeros(1,1536,1536).to(resize_fakemask)
+                infer_visual_prompt[:,:image_size[0],:image_size[1]] = resize_fakemask
             else:
                 resize_fakemask = videoresizer(fakemask)
                 if size_divisibility > 1: