Spaces:

MykolaL
/

evp

Running on L4

App Files Files Community

MykolaL commited on Sep 3

Commit

4a991a4

verified ·

1 Parent(s): fcac98a

Upload app.py

Browse files

Files changed (1) hide show

app.py +32 -29

app.py CHANGED Viewed

@@ -87,50 +87,53 @@ def create_refseg_demo(model, tokenizer, device):
     submit = gr.Button("Submit")
     def on_submit(image, text):
-        image = np.array(image)
-        image_t = transforms.ToTensor()(image).unsqueeze(0).to(device)
         image_t = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])(image_t)
-        shape = image_t.shape
-        image_t = torch.nn.functional.interpolate(image_t, (512,512), mode='bilinear', align_corners=True)
         with torch.no_grad():
             out = model(image_t, text)
-        # --- normalize to numpy mask ---
-        if isinstance(out, np.ndarray):
-            mask = out
-        elif isinstance(out, torch.Tensor):
-            pred = out.float()
-            if pred.dim() == 2:
-                mask = pred.cpu().numpy()
-            elif pred.dim() == 3:
-                # (N,H,W) → squeeze batch
-                mask = pred.squeeze(0).cpu().numpy()
-            elif pred.dim() == 4:
-                # logits (N,C,H,W) → argmax over channel
-                pred = torch.nn.functional.interpolate(pred, size=orig_shape, mode='bilinear', align_corners=True)
-                mask = pred.argmax(1).squeeze().cpu().numpy()
-            else:
-                raise RuntimeError(f"Unexpected output shape {pred.shape}")
         else:
-            raise RuntimeError(f"Unexpected output type {type(out)}")
-        # --- ensure mask is binary uint8 ---
-        if mask.dtype != np.uint8:
-            mask = (mask > 0.5).astype(np.uint8)
-        # --- overlay like your Colab code ---
         alpha = 0.65
-        overlay = image.copy()
         overlay[mask == 0] = (overlay[mask == 0] * alpha).astype(np.uint8)
         contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         cv2.drawContours(overlay, contours, -1, (0, 255, 0), 2)
         return Image.fromarray(overlay)
     submit.click(on_submit, inputs=[input_image, input_text], outputs=refseg_image)
-    examples = gr.Examples(examples=[["imgs/test_img2.jpg", "green plant"], ["imgs/test_img3.jpg", "chair"], ["imgs/test_img4.jpg", "left green plant"], ["imgs/test_img5.jpg", "man walking on foot"], ["imgs/test_img5.jpg", "the rightest camel"]],
-                           inputs=[input_image, input_text])
 def main():

     submit = gr.Button("Submit")
     def on_submit(image, text):
+        # Convert PIL -> np array
+        image_np = np.array(image).copy()
+        transform = transforms.ToTensor()
+        image_t = transform(image).unsqueeze(0).to(device)
         image_t = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])(image_t)
+        image_t = torch.nn.functional.interpolate(image_t, (512, 512), mode='bilinear', align_corners=True)
         with torch.no_grad():
             out = model(image_t, text)
+        # Ensure numpy mask
+        if isinstance(out, torch.Tensor):
+            mask = out.squeeze().detach().cpu().numpy()
         else:
+            mask = out
+        # Convert to binary mask
+        if mask.ndim > 2:
+            mask = np.argmax(mask, axis=0)
+        mask = (mask > 0).astype(np.uint8)
+        # Resize mask to original image size
+        mask = cv2.resize(mask, (image_np.shape[1], image_np.shape[0]), interpolation=cv2.INTER_NEAREST)
+        # Overlay mask
         alpha = 0.65
+        overlay = image_np.copy()
         overlay[mask == 0] = (overlay[mask == 0] * alpha).astype(np.uint8)
+        # Draw contours
         contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         cv2.drawContours(overlay, contours, -1, (0, 255, 0), 2)
         return Image.fromarray(overlay)
     submit.click(on_submit, inputs=[input_image, input_text], outputs=refseg_image)
+    examples = gr.Examples(
+        examples=[
+            ["imgs/test_img2.jpg", "green plant"],
+            ["imgs/test_img3.jpg", "chair"],
+            ["imgs/test_img4.jpg", "left green plant"],
+            ["imgs/test_img5.jpg", "man walking on foot"],
+            ["imgs/test_img5.jpg", "the rightest camel"],
+        ],
+        inputs=[input_image, input_text]
+    )
 def main():