Simultaneous-Segmented-Depth-Prediction

Sleeping

App Files Files Community

Alessio Grancini commited on Feb 12

Commit

a9bcd01

verified ·

1 Parent(s): a4031b7

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -31

app.py CHANGED Viewed

@@ -143,32 +143,56 @@ def get_camera_matrix(depth_estimator):
         "cy": depth_estimator.cy_depth
     }
 @spaces.GPU
 def get_detection_data(image):
-    """Get structured detection data with depth information"""
-    width, height = 640, 480  # Set default values to avoid UnboundLocalError
     try:
-        # Resize image to standard size
         image = utils.resize(image)
-        # Ensure width and height are properly set
         if hasattr(image, "shape"):
-            height, width = image.shape[:2]  # Extract actual dimensions
         # Get detections and depth
         image_segmentation, objects_data = img_seg.predict(image)
         depthmap, depth_colormap = depth_estimator.make_prediction(image)
-        # Process each detection
         detections = []
         for data in objects_data:
             cls_id, cls_name, cls_center, cls_mask, cls_clr = data
-            # Get masked depth for this object
             masked_depth, mean_depth = utils.get_masked_depth(depthmap, cls_mask)
-            # Get bounding box from mask
             y_indices, x_indices = np.where(cls_mask > 0)
             if len(x_indices) > 0 and len(y_indices) > 0:
                 x1, x2 = np.min(x_indices), np.max(x_indices)
@@ -192,44 +216,35 @@ def get_detection_data(image):
                     float(cls_center[1] / height),
                 ],
                 "bbox": bbox_normalized,
-                "depth": float(mean_depth * 10),  # Convert to meters as done in utils
                 "color": [float(c / 255) for c in cls_clr],
                 "mask": cls_mask.tolist(),
-                "confidence": 1.0,  # Add actual confidence if available
             }
             detections.append(detection)
-        # Get camera parameters from depth estimator (check if attributes exist)
-        try:
-            camera_params = {
-                "fx": getattr(depth_estimator, "fx_depth", 0),
-                "fy": getattr(depth_estimator, "fy_depth", 0),
-                "cx": getattr(depth_estimator, "cx_depth", width // 2),
-                "cy": getattr(depth_estimator, "cy_depth", height // 2),
-            }
-        except AttributeError:
-            print("⚠️ Camera parameters are not properly set in depth_estimator.")
-            camera_params = {"fx": 0, "fy": 0, "cx": width // 2, "cy": height // 2}
-        # Generate point cloud data if needed
-        point_clouds = utils.generate_obj_pcd(depthmap, objects_data)
-        pcd_data = [
-            {"points": np.asarray(pcd.points).tolist(), "color": [float(c / 255) for c in color]}
-            for pcd, color in point_clouds
-        ]
         return {
             "detections": detections,
-            "depth_map": depthmap.tolist(),
             "camera_params": camera_params,
             "image_size": {"width": width, "height": height},
-            "point_clouds": pcd_data,
         }
     except Exception as e:
         print(f"🚨 Error in get_detection_data: {str(e)}")
         return {"error": str(e)}
 def cancel():
     CANCEL_PROCESSING = True

         "cy": depth_estimator.cy_depth
     }
 @spaces.GPU
 def get_detection_data(image):
+    """Get structured detection data with depth information, using Base64 image encoding."""
+    def decode_base64_image(base64_string):
+        """Decodes Base64 string into a NumPy image."""
+        try:
+            img_data = base64.b64decode(base64_string)
+            img = Image.open(BytesIO(img_data))
+            img = np.array(img)
+            return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # Convert to BGR for OpenCV
+        except Exception as e:
+            print(f"🚨 Error decoding base64 image: {e}")
+            return None
+    def encode_base64_image(image):
+        """Encodes a NumPy image into a Base64 string."""
+        _, buffer = cv2.imencode('.png', image)
+        return base64.b64encode(buffer).decode("utf-8")
+    width, height = 640, 480  # Default values
     try:
+        if isinstance(image, str):  # Ensure we're handling a Base64 string
+            image = decode_base64_image(image)
+            if image is None:
+                return {"error": "Invalid base64 image data"}
+        # Resize image
         image = utils.resize(image)
+        # Extract dimensions
         if hasattr(image, "shape"):
+            height, width = image.shape[:2]
         # Get detections and depth
         image_segmentation, objects_data = img_seg.predict(image)
         depthmap, depth_colormap = depth_estimator.make_prediction(image)
+        # Encode results as Base64
+        segmentation_b64 = encode_base64_image(image_segmentation)
+        depth_b64 = encode_base64_image(depth_colormap)
+        # Process detections
         detections = []
         for data in objects_data:
             cls_id, cls_name, cls_center, cls_mask, cls_clr = data
             masked_depth, mean_depth = utils.get_masked_depth(depthmap, cls_mask)
             y_indices, x_indices = np.where(cls_mask > 0)
             if len(x_indices) > 0 and len(y_indices) > 0:
                 x1, x2 = np.min(x_indices), np.max(x_indices)
                     float(cls_center[1] / height),
                 ],
                 "bbox": bbox_normalized,
+                "depth": float(mean_depth * 10),  # Convert to meters
                 "color": [float(c / 255) for c in cls_clr],
                 "mask": cls_mask.tolist(),
+                "confidence": 1.0,  # Placeholder confidence
             }
             detections.append(detection)
+        # Camera parameters
+        camera_params = {
+            "fx": getattr(depth_estimator, "fx_depth", 0),
+            "fy": getattr(depth_estimator, "fy_depth", 0),
+            "cx": getattr(depth_estimator, "cx_depth", width // 2),
+            "cy": getattr(depth_estimator, "cy_depth", height // 2),
+        }
         return {
             "detections": detections,
+            "depth_map": depth_b64,  # Returning depth as Base64 image
+            "segmentation": segmentation_b64,  # Returning segmentation as Base64 image
             "camera_params": camera_params,
             "image_size": {"width": width, "height": height},
         }
     except Exception as e:
         print(f"🚨 Error in get_detection_data: {str(e)}")
         return {"error": str(e)}
 def cancel():
     CANCEL_PROCESSING = True