Spaces:

MykolaL
/

evp

Running on L4

MykolaL commited on Sep 3

Commit

c866eb2

verified ·

1 Parent(s): 3099000

Upload app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -58,7 +58,10 @@ def create_depth_demo(model, device):
         image = F.pad(image, (0, 0, 40, 0))
         with torch.no_grad():
             pred = model(image)#['pred_d']
-            pred = torch.from_numpy(pred).to(device)
         pred = pred[:,:,40:,:]
         pred = torch.nn.functional.interpolate(pred, shape[2:], mode='bilinear', align_corners=True)
@@ -91,9 +94,28 @@ def create_refseg_demo(model, tokenizer, device):
         image_t = torch.nn.functional.interpolate(image_t, (512,512), mode='bilinear', align_corners=True)
         with torch.no_grad():
-            pred = model(image_t, text)
-        pred = torch.from_numpy(pred).unsqueeze(0).to(device)
         pred = torch.nn.functional.interpolate(pred.float(), shape[2:], mode='bilinear', align_corners=True)
         output_mask = pred.cpu().argmax(1).data.numpy().squeeze()
         alpha = 0.65

         image = F.pad(image, (0, 0, 40, 0))
         with torch.no_grad():
             pred = model(image)#['pred_d']
+            pred = torch.from_numpy(pred).to(device).float()
+        if pred.dim() == 2:         # H×W
+            pred = pred.unsqueeze(0).unsqueeze(0)
         pred = pred[:,:,40:,:]
         pred = torch.nn.functional.interpolate(pred, shape[2:], mode='bilinear', align_corners=True)
         image_t = torch.nn.functional.interpolate(image_t, (512,512), mode='bilinear', align_corners=True)
         with torch.no_grad():
+            out = model(image_t, text)
+        if isinstance(out, np.ndarray):
+            pred = torch.from_numpy(out).to(device)
+        else:
+            pred = out
+        pred = pred.float()
+        if pred.dim() == 2:
+            # H×W mask -> N×C×H×W
+            pred = pred.unsqueeze(0).unsqueeze(0)
+            one_channel_mask = True
+        elif pred.dim() == 3:
+            # N×H×W -> add channel
+            pred = pred.unsqueeze(1)
+            one_channel_mask = True
+        elif pred.dim() == 4:
+            # N×C×H×W (logits) -> argmax later
+            one_channel_mask = (pred.shape[1] == 1)
         pred = torch.nn.functional.interpolate(pred.float(), shape[2:], mode='bilinear', align_corners=True)
         output_mask = pred.cpu().argmax(1).data.numpy().squeeze()
         alpha = 0.65