Alessio Grancini
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -107,6 +107,115 @@ def model_selector(model_type):
|
|
107 |
img_seg = ImageSegmenter(model_type=yolo_model)
|
108 |
depth_estimator = MonocularDepthEstimator(model_type=midas_model)
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
def cancel():
|
111 |
CANCEL_PROCESSING = True
|
112 |
|
|
|
107 |
img_seg = ImageSegmenter(model_type=yolo_model)
|
108 |
depth_estimator = MonocularDepthEstimator(model_type=midas_model)
|
109 |
|
110 |
+
# START
|
111 |
+
# added for lens studio
|
112 |
+
|
113 |
+
|
114 |
+
def get_box_vertices(bbox):
|
115 |
+
"""Convert bbox to corner vertices"""
|
116 |
+
x1, y1, x2, y2 = bbox
|
117 |
+
return [
|
118 |
+
[x1, y1], # top-left
|
119 |
+
[x2, y1], # top-right
|
120 |
+
[x2, y2], # bottom-right
|
121 |
+
[x1, y2] # bottom-left
|
122 |
+
]
|
123 |
+
|
124 |
+
def depth_at_center(depth_map, bbox):
|
125 |
+
"""Get depth at center of bounding box"""
|
126 |
+
x1, y1, x2, y2 = bbox
|
127 |
+
center_x = int((x1 + x2) / 2)
|
128 |
+
center_y = int((y1 + y2) / 2)
|
129 |
+
|
130 |
+
# Sample a small region around center for stability
|
131 |
+
region = depth_map[
|
132 |
+
max(0, center_y-2):min(depth_map.shape[0], center_y+3),
|
133 |
+
max(0, center_x-2):min(depth_map.shape[1], center_x+3)
|
134 |
+
]
|
135 |
+
return np.median(region)
|
136 |
+
|
137 |
+
def get_camera_matrix(depth_estimator):
|
138 |
+
"""Get camera calibration matrix"""
|
139 |
+
return {
|
140 |
+
"fx": depth_estimator.fx_depth,
|
141 |
+
"fy": depth_estimator.fy_depth,
|
142 |
+
"cx": depth_estimator.cx_depth,
|
143 |
+
"cy": depth_estimator.cy_depth
|
144 |
+
}
|
145 |
+
|
146 |
+
@spaces.GPU
|
147 |
+
def get_detection_data(image):
|
148 |
+
"""
|
149 |
+
Process image and return structured detection data with camera parameters
|
150 |
+
"""
|
151 |
+
try:
|
152 |
+
# Resize image if needed
|
153 |
+
image = utils.resize(image)
|
154 |
+
|
155 |
+
# Run detections
|
156 |
+
image_segmentation, objects_data = img_seg.predict(image)
|
157 |
+
depthmap, depth_colormap = depth_estimator.make_prediction(image)
|
158 |
+
|
159 |
+
# Get original image dimensions
|
160 |
+
height, width = image.shape[:2]
|
161 |
+
|
162 |
+
# Scale factor for normalizing coordinates
|
163 |
+
scale_x = width / depthmap.shape[1]
|
164 |
+
scale_y = height / depthmap.shape[0]
|
165 |
+
|
166 |
+
# Process each detection
|
167 |
+
detections = []
|
168 |
+
for obj in objects_data:
|
169 |
+
cls_id, category, center, mask, color = obj
|
170 |
+
|
171 |
+
# Get bounding box (assuming it's available in objects_data)
|
172 |
+
bbox = get_object_bbox(mask) # You'll need to implement this
|
173 |
+
|
174 |
+
# Get normalized coordinates
|
175 |
+
bbox_norm = [
|
176 |
+
bbox[0] / width,
|
177 |
+
bbox[1] / height,
|
178 |
+
bbox[2] / width,
|
179 |
+
bbox[3] / height
|
180 |
+
]
|
181 |
+
|
182 |
+
# Get vertices
|
183 |
+
vertices = get_box_vertices(bbox_norm)
|
184 |
+
|
185 |
+
# Get depth
|
186 |
+
depth_value = depth_at_center(depthmap, bbox)
|
187 |
+
|
188 |
+
# Create detection object
|
189 |
+
detection = {
|
190 |
+
"category": category,
|
191 |
+
"confidence": 1.0, # Add actual confidence if available
|
192 |
+
"bbox": bbox_norm,
|
193 |
+
"depth": float(depth_value), # Convert to native Python float
|
194 |
+
"vertices": vertices,
|
195 |
+
"color": [float(c/255) for c in color], # Normalize color
|
196 |
+
"mask": mask.tolist() if isinstance(mask, np.ndarray) else mask
|
197 |
+
}
|
198 |
+
detections.append(detection)
|
199 |
+
|
200 |
+
# Prepare response
|
201 |
+
response = {
|
202 |
+
"detections": detections,
|
203 |
+
"depth_map": depthmap.tolist(),
|
204 |
+
"camera_params": get_camera_matrix(depth_estimator),
|
205 |
+
"image_size": {
|
206 |
+
"width": width,
|
207 |
+
"height": height
|
208 |
+
}
|
209 |
+
}
|
210 |
+
|
211 |
+
return response
|
212 |
+
|
213 |
+
except Exception as e:
|
214 |
+
print(f"Error in get_detection_data: {str(e)}")
|
215 |
+
raise
|
216 |
+
|
217 |
+
# ENDS
|
218 |
+
|
219 |
def cancel():
|
220 |
CANCEL_PROCESSING = True
|
221 |
|