Simultaneous-Segmented-Depth-Prediction

Paused

App Files Files Community

Alessio Grancini commited on Feb 9

Commit

8adc978

verified ·

1 Parent(s): 6490caa

Update monocular_depth_estimator.py

Browse files

Files changed (1) hide show

monocular_depth_estimator.py +22 -64

monocular_depth_estimator.py CHANGED Viewed

@@ -25,7 +25,7 @@ class MonocularDepthEstimator:
         square=False,
         grayscale=False):
-        # Store parameters but don't initialize CUDA
         self.model_type = model_type
         self.model_weights_path = model_weights_path
         self.is_optimize = optimize
@@ -37,15 +37,14 @@ class MonocularDepthEstimator:
         self.transform = None
         self.net_w = None
         self.net_h = None
-        print("Initializing parameters...")
-        # Download model if needed
         if not os.path.exists(model_weights_path+model_type+".pt"):
             print("Model file not found. Downloading...")
             urllib.request.urlretrieve(MODEL_FILE_URL[model_type], model_weights_path+model_type+".pt")
             print("Model file downloaded successfully.")
     def load_model_if_needed(self):
         if self.model is None:
             print("Loading MiDaS model...")
@@ -58,62 +57,50 @@ class MonocularDepthEstimator:
                 self.is_square
             )
             print("Model loaded successfully")
-            print("Net width and height: ", (self.net_w, self.net_h))
     @spaces.GPU
     def predict(self, image, target_size):
-        # Load model if not loaded
         self.load_model_if_needed()
-        # convert img to tensor and load to gpu
         img_tensor = torch.from_numpy(image).to('cuda').unsqueeze(0)
         if self.is_optimize:
             img_tensor = img_tensor.to(memory_format=torch.channels_last)
             img_tensor = img_tensor.half()
-        prediction = self.model.forward(img_tensor)
-        prediction = (
-            torch.nn.functional.interpolate(
-                prediction.unsqueeze(1),
-                size=target_size[::-1],
-                mode="bicubic",
-                align_corners=False,
             )
-            .squeeze()
-            .cpu()
-            .numpy()
-        )
         return prediction
     def process_prediction(self, depth_map):
-        # normalizing depth image
         depth_min = depth_map.min()
         depth_max = depth_map.max()
         normalized_depth = 255 * (depth_map - depth_min) / (depth_max - depth_min)
         grayscale_depthmap = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2)
         depth_colormap = cv2.applyColorMap(np.uint8(grayscale_depthmap), cv2.COLORMAP_INFERNO)
         return normalized_depth/255, depth_colormap/255
     @spaces.GPU
     def make_prediction(self, image):
-        image = image.copy()
         try:
             print("Starting depth estimation...")
-            with torch.no_grad():
-                original_image_rgb = np.flip(image, 2)  # in [0, 255] (flip required to get RGB)
-                # resizing the image to feed to the model
-                self.load_model_if_needed()
-                image_tranformed = self.transform({"image": original_image_rgb/255})["image"]
-                # monocular depth prediction
-                pred = self.predict(image_tranformed, target_size=original_image_rgb.shape[1::-1])
-                # process the model predictions
-                depthmap, depth_colormap = self.process_prediction(pred)
             print("Depth estimation complete")
             return depthmap, depth_colormap
         except Exception as e:
@@ -121,36 +108,7 @@ class MonocularDepthEstimator:
             import traceback
             print(traceback.format_exc())
             raise
-    @spaces.GPU
-    def run(self, input_path):
-        cap = cv2.VideoCapture(input_path)
-        if not cap.isOpened():
-            print("Error opening video file")
-            return
-        with torch.no_grad():
-             while cap.isOpened():
-                inference_start_time = time.time()
-                ret, frame = cap.read()
-                if ret == True:
-                    _, depth_colormap = self.make_prediction(frame)
-                    inference_end_time = time.time()
-                    fps = round(1/(inference_end_time - inference_start_time))
-                    cv2.putText(depth_colormap, f'FPS: {fps}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (10, 255, 100), 2)
-                    cv2.imshow('MiDaS Depth Estimation - Press Escape to close window ', depth_colormap)
-                    if cv2.waitKey(1) == 27:  # Escape key
-                        break
-                else:
-                    break
-        cap.release()
-        cv2.destroyAllWindows()
     if __name__ == "__main__":
         depth_estimator = MonocularDepthEstimator(model_type="dpt_hybrid_384")
         depth_estimator.run("assets/videos/testvideo2.mp4")

         square=False,
         grayscale=False):
+        # Don't initialize any CUDA/GPU stuff here
         self.model_type = model_type
         self.model_weights_path = model_weights_path
         self.is_optimize = optimize
         self.transform = None
         self.net_w = None
         self.net_h = None
+        print("Initializing parameters...")
         if not os.path.exists(model_weights_path+model_type+".pt"):
             print("Model file not found. Downloading...")
             urllib.request.urlretrieve(MODEL_FILE_URL[model_type], model_weights_path+model_type+".pt")
             print("Model file downloaded successfully.")
+    @spaces.GPU
     def load_model_if_needed(self):
         if self.model is None:
             print("Loading MiDaS model...")
                 self.is_square
             )
             print("Model loaded successfully")
     @spaces.GPU
     def predict(self, image, target_size):
         self.load_model_if_needed()
         img_tensor = torch.from_numpy(image).to('cuda').unsqueeze(0)
         if self.is_optimize:
             img_tensor = img_tensor.to(memory_format=torch.channels_last)
             img_tensor = img_tensor.half()
+        with torch.no_grad():
+            prediction = self.model.forward(img_tensor)
+            prediction = (
+                torch.nn.functional.interpolate(
+                    prediction.unsqueeze(1),
+                    size=target_size[::-1],
+                    mode="bicubic",
+                    align_corners=False,
+                )
+                .squeeze()
+                .cpu()
+                .numpy()
             )
         return prediction
     def process_prediction(self, depth_map):
         depth_min = depth_map.min()
         depth_max = depth_map.max()
         normalized_depth = 255 * (depth_map - depth_min) / (depth_max - depth_min)
         grayscale_depthmap = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2)
         depth_colormap = cv2.applyColorMap(np.uint8(grayscale_depthmap), cv2.COLORMAP_INFERNO)
         return normalized_depth/255, depth_colormap/255
     @spaces.GPU
     def make_prediction(self, image):
         try:
             print("Starting depth estimation...")
+            image = image.copy()
+            original_image_rgb = np.flip(image, 2)
+            self.load_model_if_needed()
+            image_tranformed = self.transform({"image": original_image_rgb/255})["image"]
+            pred = self.predict(image_tranformed, target_size=original_image_rgb.shape[1::-1])
+            depthmap, depth_colormap = self.process_prediction(pred)
             print("Depth estimation complete")
             return depthmap, depth_colormap
         except Exception as e:
             import traceback
             print(traceback.format_exc())
             raise
     if __name__ == "__main__":
         depth_estimator = MonocularDepthEstimator(model_type="dpt_hybrid_384")
         depth_estimator.run("assets/videos/testvideo2.mp4")