Alessio Grancini commited on
Commit
da90e52
·
verified ·
1 Parent(s): 831d4de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -43
app.py CHANGED
@@ -145,75 +145,87 @@ def get_camera_matrix(depth_estimator):
145
 
146
  @spaces.GPU
147
  def get_detection_data(image):
148
- """
149
- Process image and return structured detection data with camera parameters
150
- """
151
  try:
152
- # Resize image if needed
153
  image = utils.resize(image)
154
 
155
- # Run detections
156
  image_segmentation, objects_data = img_seg.predict(image)
157
  depthmap, depth_colormap = depth_estimator.make_prediction(image)
158
 
159
- # Get original image dimensions
160
- height, width = image.shape[:2]
161
-
162
- # Scale factor for normalizing coordinates
163
- scale_x = width / depthmap.shape[1]
164
- scale_y = height / depthmap.shape[0]
165
-
166
  # Process each detection
167
  detections = []
168
- for obj in objects_data:
169
- cls_id, category, center, mask, color = obj
170
 
171
- # Get bounding box (assuming it's available in objects_data)
172
- bbox = get_object_bbox(mask) # You'll need to implement this
173
 
174
- # Get normalized coordinates
175
- bbox_norm = [
176
- bbox[0] / width,
177
- bbox[1] / height,
178
- bbox[2] / width,
179
- bbox[3] / height
 
 
 
 
 
 
 
 
 
180
  ]
181
 
182
- # Get vertices
183
- vertices = get_box_vertices(bbox_norm)
184
-
185
- # Get depth
186
- depth_value = depth_at_center(depthmap, bbox)
187
-
188
- # Create detection object
189
  detection = {
190
- "category": category,
191
- "confidence": 1.0, # Add actual confidence if available
192
- "bbox": bbox_norm,
193
- "depth": float(depth_value), # Convert to native Python float
194
- "vertices": vertices,
195
- "color": [float(c/255) for c in color], # Normalize color
196
- "mask": mask.tolist() if isinstance(mask, np.ndarray) else mask
 
 
 
 
197
  }
198
  detections.append(detection)
199
 
200
- # Prepare response
201
- response = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  "detections": detections,
203
  "depth_map": depthmap.tolist(),
204
- "camera_params": get_camera_matrix(depth_estimator),
205
  "image_size": {
206
  "width": width,
207
  "height": height
208
- }
 
209
  }
210
 
211
- return response
212
-
213
  except Exception as e:
214
  print(f"Error in get_detection_data: {str(e)}")
215
  raise
216
-
217
  # ENDS
218
 
219
  def cancel():
 
145
 
146
  @spaces.GPU
147
  def get_detection_data(image):
148
+ """Get structured detection data with depth information"""
 
 
149
  try:
150
+ # Resize image to standard size
151
  image = utils.resize(image)
152
 
153
+ # Get detections and depth
154
  image_segmentation, objects_data = img_seg.predict(image)
155
  depthmap, depth_colormap = depth_estimator.make_prediction(image)
156
 
 
 
 
 
 
 
 
157
  # Process each detection
158
  detections = []
159
+ for data in objects_data:
160
+ cls_id, cls_name, cls_center, cls_mask, cls_clr = data
161
 
162
+ # Get masked depth for this object
163
+ masked_depth, mean_depth = utils.get_masked_depth(depthmap, cls_mask)
164
 
165
+ # Get bounding box from mask
166
+ y_indices, x_indices = np.where(cls_mask > 0)
167
+ if len(x_indices) > 0 and len(y_indices) > 0:
168
+ x1, x2 = np.min(x_indices), np.max(x_indices)
169
+ y1, y2 = np.min(y_indices), np.max(y_indices)
170
+ else:
171
+ continue
172
+
173
+ # Normalize coordinates
174
+ height, width = image.shape[:2]
175
+ bbox_normalized = [
176
+ float(x1/width),
177
+ float(y1/height),
178
+ float(x2/width),
179
+ float(y2/height)
180
  ]
181
 
 
 
 
 
 
 
 
182
  detection = {
183
+ "id": int(cls_id),
184
+ "category": cls_name,
185
+ "center": [
186
+ float(cls_center[0]/width),
187
+ float(cls_center[1]/height)
188
+ ],
189
+ "bbox": bbox_normalized,
190
+ "depth": float(mean_depth * 10), # Convert to meters as done in utils
191
+ "color": [float(c/255) for c in cls_clr],
192
+ "mask": cls_mask.tolist(),
193
+ "confidence": 1.0 # Add actual confidence if available
194
  }
195
  detections.append(detection)
196
 
197
+ # Get camera parameters from depth estimator
198
+ camera_params = {
199
+ "fx": depth_estimator.fx_depth,
200
+ "fy": depth_estimator.fy_depth,
201
+ "cx": depth_estimator.cx_depth,
202
+ "cy": depth_estimator.cy_depth
203
+ }
204
+
205
+ # Generate point cloud data if needed
206
+ point_clouds = utils.generate_obj_pcd(depthmap, objects_data)
207
+ pcd_data = [
208
+ {
209
+ "points": np.asarray(pcd.points).tolist(),
210
+ "color": [float(c/255) for c in color]
211
+ }
212
+ for pcd, color in point_clouds
213
+ ]
214
+
215
+ return {
216
  "detections": detections,
217
  "depth_map": depthmap.tolist(),
218
+ "camera_params": camera_params,
219
  "image_size": {
220
  "width": width,
221
  "height": height
222
+ },
223
+ "point_clouds": pcd_data
224
  }
225
 
 
 
226
  except Exception as e:
227
  print(f"Error in get_detection_data: {str(e)}")
228
  raise
 
229
  # ENDS
230
 
231
  def cancel():