Alessio Grancini commited on
Commit
4715c42
·
verified ·
1 Parent(s): 557ac16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -41
app.py CHANGED
@@ -146,7 +146,6 @@ def get_camera_matrix(depth_estimator):
146
  "cy": depth_estimator.cy_depth
147
  }
148
 
149
-
150
  def encode_base64_image(image_array):
151
  """
152
  Encodes a NumPy (OpenCV) image array to a base64-encoded PNG DataURL
@@ -168,78 +167,84 @@ def encode_base64_image(image_array):
168
  # Return a data URL
169
  return "data:image/png;base64," + b64_str
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  @spaces.GPU
173
  def get_detection_data(image_data):
174
- """
175
- Get structured detection data with depth information, using a nested JSON + Base64 image.
176
- Expects Lens Studio to send:
177
- {
178
- "image": {
179
- "image": {
180
- "data": "data:image/png;base64,<BASE64>"
181
- }
182
- }
183
- }
184
- or just a direct string.
185
- """
186
  try:
187
- # 1) Extract the nested "data" string if it's a dict
188
  if isinstance(image_data, dict):
189
- # For the structure: {"image": {"image": {"data": "data:image/png;base64,..."}}}
190
  nested_dict = image_data.get("image", {}).get("image", {})
191
  full_data_url = nested_dict.get("data", "")
192
  else:
193
- # If not a dict, assume it's a direct string
194
  full_data_url = image_data
195
 
196
  if not full_data_url:
197
  return {"error": "No base64 data found in input."}
198
 
199
- # 2) Strip the "data:image/..." prefix if present
200
  if full_data_url.startswith("data:image"):
201
- # split once on comma => ["data:image/png;base64", "<BASE64>"]
202
  _, b64_string = full_data_url.split(",", 1)
203
  else:
204
  b64_string = full_data_url
205
 
206
- # 3) Decode base64 -> PIL -> OpenCV
207
  img_data = base64.b64decode(b64_string)
208
  img = Image.open(BytesIO(img_data))
209
  img = np.array(img)
210
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
211
 
212
- # 4) Process image
213
  image = utils.resize(img)
214
  image_segmentation, objects_data = img_seg.predict(image)
215
  depthmap, depth_colormap = depth_estimator.make_prediction(image)
216
 
217
- # 5) Prepare structured response
218
- processed_objects = []
219
  for obj in objects_data:
220
- cls_id, cls_name, center, mask, color = obj
221
- depth_value = depth_at_center(
222
- depthmap,
223
- [center[0] - 10, center[1] - 10, center[0] + 10, center[1] + 10]
224
- )
225
- processed_objects.append({
226
- "class_id": int(cls_id),
227
  "class_name": cls_name,
228
- "center": {"x": float(center[0]), "y": float(center[1])},
229
- "depth": float(depth_value),
230
- "color": [int(c) for c in color]
 
 
231
  })
232
 
233
  response = {
234
- "detections": processed_objects,
235
- "depth_map": encode_base64_image(depth_colormap),
236
- "segmentation": encode_base64_image(image_segmentation),
237
- "camera_matrix": {
238
- "fx": depth_estimator.fx_depth,
239
- "fy": depth_estimator.fy_depth,
240
- "cx": depth_estimator.cx_depth,
241
- "cy": depth_estimator.cy_depth
242
- }
243
  }
244
  return response
245
 
 
146
  "cy": depth_estimator.cy_depth
147
  }
148
 
 
149
  def encode_base64_image(image_array):
150
  """
151
  Encodes a NumPy (OpenCV) image array to a base64-encoded PNG DataURL
 
167
  # Return a data URL
168
  return "data:image/png;base64," + b64_str
169
 
170
+ def generate_image_url(image):
171
+ """Generate a shareable URL for an OpenCV image."""
172
+ success, encoded_buffer = cv2.imencode(".png", image)
173
+ if not success:
174
+ raise ValueError("Could not encode image to PNG buffer")
175
+
176
+ b64_str = base64.b64encode(encoded_buffer).decode("utf-8")
177
+ return "data:image/png;base64," + b64_str
178
+
179
+ def get_3d_position(center, depth, camera_matrix):
180
+ """Project 2D center into 3D space using depth and camera matrix."""
181
+ cx, cy = center
182
+ fx, fy = camera_matrix["fx"], camera_matrix["fy"]
183
+ cx_d, cy_d = camera_matrix["cx"], camera_matrix["cy"]
184
+
185
+ x = (cx - cx_d) * depth / fx
186
+ y = (cy - cy_d) * depth / fy
187
+ z = depth
188
+
189
+ return [x, y, z]
190
+
191
+ def get_bbox_from_mask(mask):
192
+ """Get bounding box (x1, y1, x2, y2) from a binary mask."""
193
+ contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
194
+ biggest_contour = max(contours, key=cv2.contourArea)
195
+ x, y, w, h = cv2.boundingRect(biggest_contour)
196
+ return x, y, x+w, y+h
197
 
198
  @spaces.GPU
199
  def get_detection_data(image_data):
 
 
 
 
 
 
 
 
 
 
 
 
200
  try:
 
201
  if isinstance(image_data, dict):
 
202
  nested_dict = image_data.get("image", {}).get("image", {})
203
  full_data_url = nested_dict.get("data", "")
204
  else:
 
205
  full_data_url = image_data
206
 
207
  if not full_data_url:
208
  return {"error": "No base64 data found in input."}
209
 
 
210
  if full_data_url.startswith("data:image"):
 
211
  _, b64_string = full_data_url.split(",", 1)
212
  else:
213
  b64_string = full_data_url
214
 
 
215
  img_data = base64.b64decode(b64_string)
216
  img = Image.open(BytesIO(img_data))
217
  img = np.array(img)
218
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
219
 
 
220
  image = utils.resize(img)
221
  image_segmentation, objects_data = img_seg.predict(image)
222
  depthmap, depth_colormap = depth_estimator.make_prediction(image)
223
 
224
+ detections = []
 
225
  for obj in objects_data:
226
+ cls_id, cls_name, center, mask, _ = obj
227
+ x1, y1, x2, y2 = get_bbox_from_mask(mask)
228
+ depth_value = depth_at_center(depthmap, [x1, y1, x2, y2])
229
+
230
+ detections.append({
231
+ "class_id": cls_id,
 
232
  "class_name": cls_name,
233
+ "bounding_box": {
234
+ "vertices": get_box_vertices([x1, y1, x2, y2])
235
+ },
236
+ "position_3d": get_3d_position(center, depth_value, get_camera_matrix(depth_estimator)),
237
+ "distance": depth_value
238
  })
239
 
240
  response = {
241
+ "detections": detections,
242
+ "segmentation_url": generate_image_url(image_segmentation),
243
+ "depth_url": generate_image_url(depth_colormap),
244
+ "distance_url": generate_image_url(utils.draw_depth_info(image, depthmap, objects_data)),
245
+ "point_cloud_url": generate_plot_url(utils.generate_obj_pcd(depthmap, objects_data)),
246
+ "camera_matrix": get_camera_matrix(depth_estimator),
247
+ "camera_position": [0, 0, 0] # Assumed at origin based on camera intrinsics
 
 
248
  }
249
  return response
250