Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -413,7 +413,7 @@ def validate_single_object(mask: np.ndarray, paper_contour: np.ndarray) -> None:
|
|
413 |
|
414 |
# Filter out very small contours (noise) and paper-sized contours
|
415 |
image_area = mask.shape[0] * mask.shape[1]
|
416 |
-
min_area =
|
417 |
max_area = image_area * 0.5 # Maximum 50% of image area (to exclude paper detection)
|
418 |
significant_contours = [c for c in contours if min_area < cv2.contourArea(c) < max_area]
|
419 |
|
@@ -514,17 +514,15 @@ def remove_bg(image: np.ndarray) -> np.ndarray:
|
|
514 |
|
515 |
# return result_mask
|
516 |
def mask_paper_area_in_image(image: np.ndarray, paper_contour: np.ndarray) -> np.ndarray:
|
517 |
-
"""
|
518 |
-
Black out paper area in the input image before sending to BiRefNet
|
519 |
-
"""
|
520 |
masked_image = image.copy()
|
521 |
|
522 |
-
#
|
523 |
rect = cv2.boundingRect(paper_contour)
|
524 |
-
shrink_pixels = int(min(rect[2], rect[3]) * 0.
|
525 |
|
526 |
x, y, w, h = rect
|
527 |
-
# Create mask
|
528 |
outer_mask = np.ones(image.shape[:2], dtype=np.uint8) * 255
|
529 |
|
530 |
inner_contour = np.array([
|
@@ -534,11 +532,8 @@ def mask_paper_area_in_image(image: np.ndarray, paper_contour: np.ndarray) -> np
|
|
534 |
[[x + shrink_pixels, y + h - shrink_pixels]]
|
535 |
])
|
536 |
|
537 |
-
# Black out everything outside inner paper bounds
|
538 |
cv2.fillPoly(outer_mask, [inner_contour], 0)
|
539 |
-
|
540 |
-
# Apply mask to image
|
541 |
-
masked_image[outer_mask == 255] = [0, 0, 0] # Black out paper areas
|
542 |
|
543 |
return masked_image
|
544 |
|
@@ -983,46 +978,44 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
|
|
983 |
|
984 |
# Use YOLOWorld to detect object bounding box
|
985 |
yolo_world = get_yolo_world()
|
|
|
986 |
if yolo_world is None:
|
987 |
logger.warning("YOLOWorld model not available, proceeding with full image")
|
988 |
cropped_image = masked_input_image
|
989 |
-
crop_offset = (0, 0)
|
990 |
else:
|
991 |
-
|
992 |
-
yolo_world.
|
993 |
-
results = yolo_world.predict(masked_input_image, conf=0.2, verbose=False)
|
994 |
|
995 |
if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
|
996 |
logger.warning("No objects detected by YOLOWorld, proceeding with full image")
|
997 |
cropped_image = masked_input_image
|
998 |
crop_offset = (0, 0)
|
999 |
else:
|
1000 |
-
# Get the highest confidence box
|
1001 |
boxes = results[0].boxes.xyxy.cpu().numpy()
|
1002 |
confidences = results[0].boxes.conf.cpu().numpy()
|
1003 |
-
best_box_idx = np.argmax(confidences)
|
1004 |
-
x_min, y_min, x_max, y_max = map(int, boxes[best_box_idx])
|
1005 |
|
1006 |
-
#
|
1007 |
-
|
1008 |
-
|
1009 |
-
|
1010 |
-
|
1011 |
-
|
|
|
|
|
|
|
1012 |
|
1013 |
-
|
1014 |
-
if x_max <= x_min or y_max <= y_min:
|
1015 |
-
logger.warning("Invalid crop region, proceeding with full image")
|
1016 |
cropped_image = masked_input_image
|
1017 |
crop_offset = (0, 0)
|
1018 |
else:
|
1019 |
-
#
|
1020 |
-
|
1021 |
-
|
1022 |
-
logger.info(f"Cropped to box: ({x_min}, {y_min}, {x_max}, {y_max})")
|
1023 |
|
1024 |
-
#
|
1025 |
-
|
|
|
1026 |
|
1027 |
# Remove background from cropped image
|
1028 |
orig_size = image.shape[:2]
|
@@ -1049,7 +1042,7 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
|
|
1049 |
|
1050 |
# Check if we actually have object pixels after paper exclusion
|
1051 |
object_pixels = np.count_nonzero(objects_mask)
|
1052 |
-
if object_pixels <
|
1053 |
raise NoObjectDetectedError("No significant object detected after excluding paper area")
|
1054 |
|
1055 |
# Validate single object
|
|
|
413 |
|
414 |
# Filter out very small contours (noise) and paper-sized contours
|
415 |
image_area = mask.shape[0] * mask.shape[1]
|
416 |
+
min_area = 100 # Minimum area threshold
|
417 |
max_area = image_area * 0.5 # Maximum 50% of image area (to exclude paper detection)
|
418 |
significant_contours = [c for c in contours if min_area < cv2.contourArea(c) < max_area]
|
419 |
|
|
|
514 |
|
515 |
# return result_mask
|
516 |
def mask_paper_area_in_image(image: np.ndarray, paper_contour: np.ndarray) -> np.ndarray:
|
517 |
+
"""Less aggressive masking to preserve corner objects"""
|
|
|
|
|
518 |
masked_image = image.copy()
|
519 |
|
520 |
+
# Much less aggressive shrinking - only 2% instead of 8%
|
521 |
rect = cv2.boundingRect(paper_contour)
|
522 |
+
shrink_pixels = max(5, int(min(rect[2], rect[3]) * 0.02)) # Changed from 0.08 to 0.02
|
523 |
|
524 |
x, y, w, h = rect
|
525 |
+
# Create mask but keep more area
|
526 |
outer_mask = np.ones(image.shape[:2], dtype=np.uint8) * 255
|
527 |
|
528 |
inner_contour = np.array([
|
|
|
532 |
[[x + shrink_pixels, y + h - shrink_pixels]]
|
533 |
])
|
534 |
|
|
|
535 |
cv2.fillPoly(outer_mask, [inner_contour], 0)
|
536 |
+
masked_image[outer_mask == 255] = [128, 128, 128] # Gray instead of black
|
|
|
|
|
537 |
|
538 |
return masked_image
|
539 |
|
|
|
978 |
|
979 |
# Use YOLOWorld to detect object bounding box
|
980 |
yolo_world = get_yolo_world()
|
981 |
+
# Lower confidence and add size-based filtering
|
982 |
if yolo_world is None:
|
983 |
logger.warning("YOLOWorld model not available, proceeding with full image")
|
984 |
cropped_image = masked_input_image
|
985 |
+
crop_offset = (0, 0)
|
986 |
else:
|
987 |
+
yolo_world.set_classes(["small object", "tool", "item", "component", "part", "piece", "device"])
|
988 |
+
results = yolo_world.predict(masked_input_image, conf=0.05, verbose=False) # Much lower confidence
|
|
|
989 |
|
990 |
if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
|
991 |
logger.warning("No objects detected by YOLOWorld, proceeding with full image")
|
992 |
cropped_image = masked_input_image
|
993 |
crop_offset = (0, 0)
|
994 |
else:
|
|
|
995 |
boxes = results[0].boxes.xyxy.cpu().numpy()
|
996 |
confidences = results[0].boxes.conf.cpu().numpy()
|
|
|
|
|
997 |
|
998 |
+
# Filter out boxes that are too large (likely paper detection)
|
999 |
+
valid_boxes = []
|
1000 |
+
image_area = masked_input_image.shape[0] * masked_input_image.shape[1]
|
1001 |
+
|
1002 |
+
for i, box in enumerate(boxes):
|
1003 |
+
x_min, y_min, x_max, y_max = box
|
1004 |
+
box_area = (x_max - x_min) * (y_max - y_min)
|
1005 |
+
if box_area < image_area * 0.3: # Reject if larger than 30% of image
|
1006 |
+
valid_boxes.append((i, confidences[i]))
|
1007 |
|
1008 |
+
if not valid_boxes:
|
|
|
|
|
1009 |
cropped_image = masked_input_image
|
1010 |
crop_offset = (0, 0)
|
1011 |
else:
|
1012 |
+
# Get highest confidence valid box
|
1013 |
+
best_idx = max(valid_boxes, key=lambda x: x[1])[0]
|
1014 |
+
x_min, y_min, x_max, y_max = map(int, boxes[best_idx])
|
|
|
1015 |
|
1016 |
+
# Larger margin for small objects
|
1017 |
+
box_size = min(x_max - x_min, y_max - y_min)
|
1018 |
+
margin = max(30, int(box_size * 0.3)) # At least 30px margin
|
1019 |
|
1020 |
# Remove background from cropped image
|
1021 |
orig_size = image.shape[:2]
|
|
|
1042 |
|
1043 |
# Check if we actually have object pixels after paper exclusion
|
1044 |
object_pixels = np.count_nonzero(objects_mask)
|
1045 |
+
if object_pixels < 300: # Minimum threshold
|
1046 |
raise NoObjectDetectedError("No significant object detected after excluding paper area")
|
1047 |
|
1048 |
# Validate single object
|