Simultaneous-Segmented-Depth-Prediction

Paused

App Files Files Community

Alessio Grancini commited on Feb 11

Commit

da90e52

verified ·

1 Parent(s): 831d4de

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -43

app.py CHANGED Viewed

@@ -145,75 +145,87 @@ def get_camera_matrix(depth_estimator):
 @spaces.GPU
 def get_detection_data(image):
-    """
-    Process image and return structured detection data with camera parameters
-    """
     try:
-        # Resize image if needed
         image = utils.resize(image)
-        # Run detections
         image_segmentation, objects_data = img_seg.predict(image)
         depthmap, depth_colormap = depth_estimator.make_prediction(image)
-        # Get original image dimensions
-        height, width = image.shape[:2]
-        # Scale factor for normalizing coordinates
-        scale_x = width / depthmap.shape[1]
-        scale_y = height / depthmap.shape[0]
         # Process each detection
         detections = []
-        for obj in objects_data:
-            cls_id, category, center, mask, color = obj
-            # Get bounding box (assuming it's available in objects_data)
-            bbox = get_object_bbox(mask)  # You'll need to implement this
-            # Get normalized coordinates
-            bbox_norm = [
-                bbox[0] / width,
-                bbox[1] / height,
-                bbox[2] / width,
-                bbox[3] / height
             ]
-            # Get vertices
-            vertices = get_box_vertices(bbox_norm)
-            # Get depth
-            depth_value = depth_at_center(depthmap, bbox)
-            # Create detection object
             detection = {
-                "category": category,
-                "confidence": 1.0,  # Add actual confidence if available
-                "bbox": bbox_norm,
-                "depth": float(depth_value),  # Convert to native Python float
-                "vertices": vertices,
-                "color": [float(c/255) for c in color],  # Normalize color
-                "mask": mask.tolist() if isinstance(mask, np.ndarray) else mask
             }
             detections.append(detection)
-        # Prepare response
-        response = {
             "detections": detections,
             "depth_map": depthmap.tolist(),
-            "camera_params": get_camera_matrix(depth_estimator),
             "image_size": {
                 "width": width,
                 "height": height
-            }
         }
-        return response
     except Exception as e:
         print(f"Error in get_detection_data: {str(e)}")
         raise
     # ENDS
 def cancel():

 @spaces.GPU
 def get_detection_data(image):
+    """Get structured detection data with depth information"""
     try:
+        # Resize image to standard size
         image = utils.resize(image)
+        # Get detections and depth
         image_segmentation, objects_data = img_seg.predict(image)
         depthmap, depth_colormap = depth_estimator.make_prediction(image)
         # Process each detection
         detections = []
+        for data in objects_data:
+            cls_id, cls_name, cls_center, cls_mask, cls_clr = data
+            # Get masked depth for this object
+            masked_depth, mean_depth = utils.get_masked_depth(depthmap, cls_mask)
+            # Get bounding box from mask
+            y_indices, x_indices = np.where(cls_mask > 0)
+            if len(x_indices) > 0 and len(y_indices) > 0:
+                x1, x2 = np.min(x_indices), np.max(x_indices)
+                y1, y2 = np.min(y_indices), np.max(y_indices)
+            else:
+                continue
+            # Normalize coordinates
+            height, width = image.shape[:2]
+            bbox_normalized = [
+                float(x1/width),
+                float(y1/height),
+                float(x2/width),
+                float(y2/height)
             ]
             detection = {
+                "id": int(cls_id),
+                "category": cls_name,
+                "center": [
+                    float(cls_center[0]/width),
+                    float(cls_center[1]/height)
+                ],
+                "bbox": bbox_normalized,
+                "depth": float(mean_depth * 10),  # Convert to meters as done in utils
+                "color": [float(c/255) for c in cls_clr],
+                "mask": cls_mask.tolist(),
+                "confidence": 1.0  # Add actual confidence if available
             }
             detections.append(detection)
+        # Get camera parameters from depth estimator
+        camera_params = {
+            "fx": depth_estimator.fx_depth,
+            "fy": depth_estimator.fy_depth,
+            "cx": depth_estimator.cx_depth,
+            "cy": depth_estimator.cy_depth
+        }
+        # Generate point cloud data if needed
+        point_clouds = utils.generate_obj_pcd(depthmap, objects_data)
+        pcd_data = [
+            {
+                "points": np.asarray(pcd.points).tolist(),
+                "color": [float(c/255) for c in color]
+            }
+            for pcd, color in point_clouds
+        ]
+        return {
             "detections": detections,
             "depth_map": depthmap.tolist(),
+            "camera_params": camera_params,
             "image_size": {
                 "width": width,
                 "height": height
+            },
+            "point_clouds": pcd_data
         }
     except Exception as e:
         print(f"Error in get_detection_data: {str(e)}")
         raise
     # ENDS
 def cancel():