Alessio Grancini commited on
Commit
8adc978
·
verified ·
1 Parent(s): 6490caa

Update monocular_depth_estimator.py

Browse files
Files changed (1) hide show
  1. monocular_depth_estimator.py +22 -64
monocular_depth_estimator.py CHANGED
@@ -25,7 +25,7 @@ class MonocularDepthEstimator:
25
  square=False,
26
  grayscale=False):
27
 
28
- # Store parameters but don't initialize CUDA
29
  self.model_type = model_type
30
  self.model_weights_path = model_weights_path
31
  self.is_optimize = optimize
@@ -37,15 +37,14 @@ class MonocularDepthEstimator:
37
  self.transform = None
38
  self.net_w = None
39
  self.net_h = None
40
-
41
- print("Initializing parameters...")
42
 
43
- # Download model if needed
44
  if not os.path.exists(model_weights_path+model_type+".pt"):
45
  print("Model file not found. Downloading...")
46
  urllib.request.urlretrieve(MODEL_FILE_URL[model_type], model_weights_path+model_type+".pt")
47
  print("Model file downloaded successfully.")
48
 
 
49
  def load_model_if_needed(self):
50
  if self.model is None:
51
  print("Loading MiDaS model...")
@@ -58,62 +57,50 @@ class MonocularDepthEstimator:
58
  self.is_square
59
  )
60
  print("Model loaded successfully")
61
- print("Net width and height: ", (self.net_w, self.net_h))
62
 
63
  @spaces.GPU
64
  def predict(self, image, target_size):
65
- # Load model if not loaded
66
  self.load_model_if_needed()
67
-
68
- # convert img to tensor and load to gpu
69
  img_tensor = torch.from_numpy(image).to('cuda').unsqueeze(0)
70
 
71
  if self.is_optimize:
72
  img_tensor = img_tensor.to(memory_format=torch.channels_last)
73
  img_tensor = img_tensor.half()
74
 
75
- prediction = self.model.forward(img_tensor)
76
- prediction = (
77
- torch.nn.functional.interpolate(
78
- prediction.unsqueeze(1),
79
- size=target_size[::-1],
80
- mode="bicubic",
81
- align_corners=False,
 
 
 
 
 
82
  )
83
- .squeeze()
84
- .cpu()
85
- .numpy()
86
- )
87
 
88
  return prediction
89
 
90
  def process_prediction(self, depth_map):
91
- # normalizing depth image
92
  depth_min = depth_map.min()
93
  depth_max = depth_map.max()
94
  normalized_depth = 255 * (depth_map - depth_min) / (depth_max - depth_min)
95
-
96
  grayscale_depthmap = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2)
97
  depth_colormap = cv2.applyColorMap(np.uint8(grayscale_depthmap), cv2.COLORMAP_INFERNO)
98
-
99
  return normalized_depth/255, depth_colormap/255
100
 
101
  @spaces.GPU
102
  def make_prediction(self, image):
103
- image = image.copy()
104
  try:
105
  print("Starting depth estimation...")
106
- with torch.no_grad():
107
- original_image_rgb = np.flip(image, 2) # in [0, 255] (flip required to get RGB)
108
- # resizing the image to feed to the model
109
- self.load_model_if_needed()
110
- image_tranformed = self.transform({"image": original_image_rgb/255})["image"]
111
-
112
- # monocular depth prediction
113
- pred = self.predict(image_tranformed, target_size=original_image_rgb.shape[1::-1])
114
-
115
- # process the model predictions
116
- depthmap, depth_colormap = self.process_prediction(pred)
117
  print("Depth estimation complete")
118
  return depthmap, depth_colormap
119
  except Exception as e:
@@ -121,36 +108,7 @@ class MonocularDepthEstimator:
121
  import traceback
122
  print(traceback.format_exc())
123
  raise
124
-
125
- @spaces.GPU
126
- def run(self, input_path):
127
- cap = cv2.VideoCapture(input_path)
128
-
129
- if not cap.isOpened():
130
- print("Error opening video file")
131
- return
132
-
133
- with torch.no_grad():
134
- while cap.isOpened():
135
- inference_start_time = time.time()
136
- ret, frame = cap.read()
137
-
138
- if ret == True:
139
- _, depth_colormap = self.make_prediction(frame)
140
- inference_end_time = time.time()
141
- fps = round(1/(inference_end_time - inference_start_time))
142
- cv2.putText(depth_colormap, f'FPS: {fps}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (10, 255, 100), 2)
143
- cv2.imshow('MiDaS Depth Estimation - Press Escape to close window ', depth_colormap)
144
-
145
- if cv2.waitKey(1) == 27: # Escape key
146
- break
147
- else:
148
- break
149
-
150
- cap.release()
151
- cv2.destroyAllWindows()
152
-
153
-
154
  if __name__ == "__main__":
155
  depth_estimator = MonocularDepthEstimator(model_type="dpt_hybrid_384")
156
  depth_estimator.run("assets/videos/testvideo2.mp4")
 
25
  square=False,
26
  grayscale=False):
27
 
28
+ # Don't initialize any CUDA/GPU stuff here
29
  self.model_type = model_type
30
  self.model_weights_path = model_weights_path
31
  self.is_optimize = optimize
 
37
  self.transform = None
38
  self.net_w = None
39
  self.net_h = None
 
 
40
 
41
+ print("Initializing parameters...")
42
  if not os.path.exists(model_weights_path+model_type+".pt"):
43
  print("Model file not found. Downloading...")
44
  urllib.request.urlretrieve(MODEL_FILE_URL[model_type], model_weights_path+model_type+".pt")
45
  print("Model file downloaded successfully.")
46
 
47
+ @spaces.GPU
48
  def load_model_if_needed(self):
49
  if self.model is None:
50
  print("Loading MiDaS model...")
 
57
  self.is_square
58
  )
59
  print("Model loaded successfully")
 
60
 
61
  @spaces.GPU
62
  def predict(self, image, target_size):
 
63
  self.load_model_if_needed()
 
 
64
  img_tensor = torch.from_numpy(image).to('cuda').unsqueeze(0)
65
 
66
  if self.is_optimize:
67
  img_tensor = img_tensor.to(memory_format=torch.channels_last)
68
  img_tensor = img_tensor.half()
69
 
70
+ with torch.no_grad():
71
+ prediction = self.model.forward(img_tensor)
72
+ prediction = (
73
+ torch.nn.functional.interpolate(
74
+ prediction.unsqueeze(1),
75
+ size=target_size[::-1],
76
+ mode="bicubic",
77
+ align_corners=False,
78
+ )
79
+ .squeeze()
80
+ .cpu()
81
+ .numpy()
82
  )
 
 
 
 
83
 
84
  return prediction
85
 
86
  def process_prediction(self, depth_map):
 
87
  depth_min = depth_map.min()
88
  depth_max = depth_map.max()
89
  normalized_depth = 255 * (depth_map - depth_min) / (depth_max - depth_min)
 
90
  grayscale_depthmap = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2)
91
  depth_colormap = cv2.applyColorMap(np.uint8(grayscale_depthmap), cv2.COLORMAP_INFERNO)
 
92
  return normalized_depth/255, depth_colormap/255
93
 
94
  @spaces.GPU
95
  def make_prediction(self, image):
 
96
  try:
97
  print("Starting depth estimation...")
98
+ image = image.copy()
99
+ original_image_rgb = np.flip(image, 2)
100
+ self.load_model_if_needed()
101
+ image_tranformed = self.transform({"image": original_image_rgb/255})["image"]
102
+ pred = self.predict(image_tranformed, target_size=original_image_rgb.shape[1::-1])
103
+ depthmap, depth_colormap = self.process_prediction(pred)
 
 
 
 
 
104
  print("Depth estimation complete")
105
  return depthmap, depth_colormap
106
  except Exception as e:
 
108
  import traceback
109
  print(traceback.format_exc())
110
  raise
111
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  if __name__ == "__main__":
113
  depth_estimator = MonocularDepthEstimator(model_type="dpt_hybrid_384")
114
  depth_estimator.run("assets/videos/testvideo2.mp4")