Alessio Grancini
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -146,7 +146,6 @@ def get_camera_matrix(depth_estimator):
|
|
146 |
"cy": depth_estimator.cy_depth
|
147 |
}
|
148 |
|
149 |
-
|
150 |
def encode_base64_image(image_array):
|
151 |
"""
|
152 |
Encodes a NumPy (OpenCV) image array to a base64-encoded PNG DataURL
|
@@ -168,78 +167,84 @@ def encode_base64_image(image_array):
|
|
168 |
# Return a data URL
|
169 |
return "data:image/png;base64," + b64_str
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
@spaces.GPU
|
173 |
def get_detection_data(image_data):
|
174 |
-
"""
|
175 |
-
Get structured detection data with depth information, using a nested JSON + Base64 image.
|
176 |
-
Expects Lens Studio to send:
|
177 |
-
{
|
178 |
-
"image": {
|
179 |
-
"image": {
|
180 |
-
"data": "data:image/png;base64,<BASE64>"
|
181 |
-
}
|
182 |
-
}
|
183 |
-
}
|
184 |
-
or just a direct string.
|
185 |
-
"""
|
186 |
try:
|
187 |
-
# 1) Extract the nested "data" string if it's a dict
|
188 |
if isinstance(image_data, dict):
|
189 |
-
# For the structure: {"image": {"image": {"data": "data:image/png;base64,..."}}}
|
190 |
nested_dict = image_data.get("image", {}).get("image", {})
|
191 |
full_data_url = nested_dict.get("data", "")
|
192 |
else:
|
193 |
-
# If not a dict, assume it's a direct string
|
194 |
full_data_url = image_data
|
195 |
|
196 |
if not full_data_url:
|
197 |
return {"error": "No base64 data found in input."}
|
198 |
|
199 |
-
# 2) Strip the "data:image/..." prefix if present
|
200 |
if full_data_url.startswith("data:image"):
|
201 |
-
# split once on comma => ["data:image/png;base64", "<BASE64>"]
|
202 |
_, b64_string = full_data_url.split(",", 1)
|
203 |
else:
|
204 |
b64_string = full_data_url
|
205 |
|
206 |
-
# 3) Decode base64 -> PIL -> OpenCV
|
207 |
img_data = base64.b64decode(b64_string)
|
208 |
img = Image.open(BytesIO(img_data))
|
209 |
img = np.array(img)
|
210 |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
211 |
|
212 |
-
# 4) Process image
|
213 |
image = utils.resize(img)
|
214 |
image_segmentation, objects_data = img_seg.predict(image)
|
215 |
depthmap, depth_colormap = depth_estimator.make_prediction(image)
|
216 |
|
217 |
-
|
218 |
-
processed_objects = []
|
219 |
for obj in objects_data:
|
220 |
-
cls_id, cls_name, center, mask,
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
"class_id": int(cls_id),
|
227 |
"class_name": cls_name,
|
228 |
-
"
|
229 |
-
|
230 |
-
|
|
|
|
|
231 |
})
|
232 |
|
233 |
response = {
|
234 |
-
"detections":
|
235 |
-
"
|
236 |
-
"
|
237 |
-
"
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
"cy": depth_estimator.cy_depth
|
242 |
-
}
|
243 |
}
|
244 |
return response
|
245 |
|
|
|
146 |
"cy": depth_estimator.cy_depth
|
147 |
}
|
148 |
|
|
|
149 |
def encode_base64_image(image_array):
|
150 |
"""
|
151 |
Encodes a NumPy (OpenCV) image array to a base64-encoded PNG DataURL
|
|
|
167 |
# Return a data URL
|
168 |
return "data:image/png;base64," + b64_str
|
169 |
|
170 |
+
def generate_image_url(image):
|
171 |
+
"""Generate a shareable URL for an OpenCV image."""
|
172 |
+
success, encoded_buffer = cv2.imencode(".png", image)
|
173 |
+
if not success:
|
174 |
+
raise ValueError("Could not encode image to PNG buffer")
|
175 |
+
|
176 |
+
b64_str = base64.b64encode(encoded_buffer).decode("utf-8")
|
177 |
+
return "data:image/png;base64," + b64_str
|
178 |
+
|
179 |
+
def get_3d_position(center, depth, camera_matrix):
|
180 |
+
"""Project 2D center into 3D space using depth and camera matrix."""
|
181 |
+
cx, cy = center
|
182 |
+
fx, fy = camera_matrix["fx"], camera_matrix["fy"]
|
183 |
+
cx_d, cy_d = camera_matrix["cx"], camera_matrix["cy"]
|
184 |
+
|
185 |
+
x = (cx - cx_d) * depth / fx
|
186 |
+
y = (cy - cy_d) * depth / fy
|
187 |
+
z = depth
|
188 |
+
|
189 |
+
return [x, y, z]
|
190 |
+
|
191 |
+
def get_bbox_from_mask(mask):
|
192 |
+
"""Get bounding box (x1, y1, x2, y2) from a binary mask."""
|
193 |
+
contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
194 |
+
biggest_contour = max(contours, key=cv2.contourArea)
|
195 |
+
x, y, w, h = cv2.boundingRect(biggest_contour)
|
196 |
+
return x, y, x+w, y+h
|
197 |
|
198 |
@spaces.GPU
|
199 |
def get_detection_data(image_data):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
try:
|
|
|
201 |
if isinstance(image_data, dict):
|
|
|
202 |
nested_dict = image_data.get("image", {}).get("image", {})
|
203 |
full_data_url = nested_dict.get("data", "")
|
204 |
else:
|
|
|
205 |
full_data_url = image_data
|
206 |
|
207 |
if not full_data_url:
|
208 |
return {"error": "No base64 data found in input."}
|
209 |
|
|
|
210 |
if full_data_url.startswith("data:image"):
|
|
|
211 |
_, b64_string = full_data_url.split(",", 1)
|
212 |
else:
|
213 |
b64_string = full_data_url
|
214 |
|
|
|
215 |
img_data = base64.b64decode(b64_string)
|
216 |
img = Image.open(BytesIO(img_data))
|
217 |
img = np.array(img)
|
218 |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
219 |
|
|
|
220 |
image = utils.resize(img)
|
221 |
image_segmentation, objects_data = img_seg.predict(image)
|
222 |
depthmap, depth_colormap = depth_estimator.make_prediction(image)
|
223 |
|
224 |
+
detections = []
|
|
|
225 |
for obj in objects_data:
|
226 |
+
cls_id, cls_name, center, mask, _ = obj
|
227 |
+
x1, y1, x2, y2 = get_bbox_from_mask(mask)
|
228 |
+
depth_value = depth_at_center(depthmap, [x1, y1, x2, y2])
|
229 |
+
|
230 |
+
detections.append({
|
231 |
+
"class_id": cls_id,
|
|
|
232 |
"class_name": cls_name,
|
233 |
+
"bounding_box": {
|
234 |
+
"vertices": get_box_vertices([x1, y1, x2, y2])
|
235 |
+
},
|
236 |
+
"position_3d": get_3d_position(center, depth_value, get_camera_matrix(depth_estimator)),
|
237 |
+
"distance": depth_value
|
238 |
})
|
239 |
|
240 |
response = {
|
241 |
+
"detections": detections,
|
242 |
+
"segmentation_url": generate_image_url(image_segmentation),
|
243 |
+
"depth_url": generate_image_url(depth_colormap),
|
244 |
+
"distance_url": generate_image_url(utils.draw_depth_info(image, depthmap, objects_data)),
|
245 |
+
"point_cloud_url": generate_plot_url(utils.generate_obj_pcd(depthmap, objects_data)),
|
246 |
+
"camera_matrix": get_camera_matrix(depth_estimator),
|
247 |
+
"camera_position": [0, 0, 0] # Assumed at origin based on camera intrinsics
|
|
|
|
|
248 |
}
|
249 |
return response
|
250 |
|