Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -85,6 +85,27 @@ birefnet = None
|
|
85 |
paper_model_path = os.path.join(CACHE_DIR, "paper_detector.pt") # You'll need to train/provide this
|
86 |
u2net_model_path = os.path.join(CACHE_DIR, "u2netp.pth")
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
# Device configuration
|
89 |
device = "cpu"
|
90 |
torch.set_float32_matmul_precision(["high", "highest"][0])
|
@@ -951,17 +972,81 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
|
|
951 |
logger.error(f"Unexpected error in paper detection: {e}")
|
952 |
raise gr.Error(f"Error processing image: {str(e)}")
|
953 |
|
954 |
-
# Rest of the function remains the same...
|
955 |
-
# [Keep all the existing object detection and DXF generation code]
|
956 |
-
|
957 |
try:
|
958 |
# Mask paper area in input image first
|
959 |
masked_input_image = mask_paper_area_in_image(image, paper_contour)
|
960 |
|
961 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
962 |
orig_size = image.shape[:2]
|
963 |
-
|
964 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
965 |
objects_mask = remove_bg(image)
|
966 |
processed_size = objects_mask.shape[:2]
|
967 |
|
|
|
85 |
paper_model_path = os.path.join(CACHE_DIR, "paper_detector.pt") # You'll need to train/provide this
|
86 |
u2net_model_path = os.path.join(CACHE_DIR, "u2netp.pth")
|
87 |
|
88 |
+
# Global variable for YOLOWorld
|
89 |
+
yolo_world_global = None
|
90 |
+
yolo_world_model_path = os.path.join(CACHE_DIR, "yolov8_world.pt") # Adjust path as needed
|
91 |
+
|
92 |
+
def get_yolo_world():
|
93 |
+
"""Lazy load YOLOWorld model"""
|
94 |
+
global yolo_world_global
|
95 |
+
if yolo_world_global is None:
|
96 |
+
logger.info("Loading YOLOWorld model...")
|
97 |
+
if os.path.exists(yolo_world_model_path):
|
98 |
+
try:
|
99 |
+
yolo_world_global = YOLOWorld(yolo_world_model_path)
|
100 |
+
logger.info("YOLOWorld model loaded successfully")
|
101 |
+
except Exception as e:
|
102 |
+
logger.error(f"Failed to load YOLOWorld: {e}")
|
103 |
+
yolo_world_global = None
|
104 |
+
else:
|
105 |
+
logger.warning("YOLOWorld model file not found, will raise error if used")
|
106 |
+
yolo_world_global = None
|
107 |
+
return yolo_world_global
|
108 |
+
|
109 |
# Device configuration
|
110 |
device = "cpu"
|
111 |
torch.set_float32_matmul_precision(["high", "highest"][0])
|
|
|
972 |
logger.error(f"Unexpected error in paper detection: {e}")
|
973 |
raise gr.Error(f"Error processing image: {str(e)}")
|
974 |
|
|
|
|
|
|
|
975 |
try:
|
976 |
# Mask paper area in input image first
|
977 |
masked_input_image = mask_paper_area_in_image(image, paper_contour)
|
978 |
|
979 |
+
# NEW: Use YOLOWorld to detect object bounding box
|
980 |
+
yolo_world = get_yolo_world()
|
981 |
+
if yolo_world is None:
|
982 |
+
logger.warning("YOLOWorld model not available, proceeding with full image")
|
983 |
+
cropped_image = masked_input_image
|
984 |
+
crop_offset = (0, 0) # No offset if not cropping
|
985 |
+
else:
|
986 |
+
# Set prompts for tool/object detection
|
987 |
+
yolo_world.set_classes(["tool", "object on paper"])
|
988 |
+
results = yolo_world.predict(masked_input_image, conf=0.5, verbose=False)
|
989 |
+
|
990 |
+
if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
|
991 |
+
logger.warning("No objects detected by YOLOWorld, proceeding with full image")
|
992 |
+
cropped_image = masked_input_image
|
993 |
+
crop_offset = (0, 0)
|
994 |
+
else:
|
995 |
+
# Get the highest confidence box
|
996 |
+
boxes = results[0].boxes.xyxy.cpu().numpy()
|
997 |
+
confidences = results[0].boxes.conf.cpu().numpy()
|
998 |
+
best_box_idx = np.argmax(confidences)
|
999 |
+
x_min, y_min, x_max, y_max = map(int, boxes[best_box_idx])
|
1000 |
+
|
1001 |
+
# Add margin (e.g., 10% of box size, min 20px)
|
1002 |
+
margin = max(20, int(min(x_max - x_min, y_max - y_min) * 0.1))
|
1003 |
+
x_min = max(0, x_min - margin)
|
1004 |
+
y_min = max(0, y_min - margin)
|
1005 |
+
x_max = min(masked_input_image.shape[1], x_max + margin)
|
1006 |
+
y_max = min(masked_input_image.shape[0], y_max + margin)
|
1007 |
+
|
1008 |
+
# Crop the masked image
|
1009 |
+
cropped_image = masked_input_image[y_min:y_max, x_min:x_max]
|
1010 |
+
crop_offset = (x_min, y_min) # Store offset for mask realignment
|
1011 |
+
logger.info(f"Cropped to box: ({x_min}, {y_min}, {x_max}, {y_max})")
|
1012 |
+
|
1013 |
+
# Debug: Save cropped image
|
1014 |
+
cv2.imwrite("./debug/cropped_image.jpg", cropped_image)
|
1015 |
+
|
1016 |
+
# Remove background from cropped image
|
1017 |
orig_size = image.shape[:2]
|
1018 |
+
objects_mask = remove_bg(cropped_image)
|
1019 |
+
processed_size = objects_mask.shape[:2]
|
1020 |
+
|
1021 |
+
# Resize mask to match cropped region and place back in original image space
|
1022 |
+
full_mask = np.zeros((orig_size[0], orig_size[1]), dtype=np.uint8)
|
1023 |
+
resized_mask = cv2.resize(objects_mask, (cropped_image.shape[1], cropped_image.shape[0]))
|
1024 |
+
full_mask[y_min:y_min+resized_mask.shape[0], x_min:x_min+resized_mask.shape[1]] = resized_mask
|
1025 |
+
|
1026 |
+
# Remove paper area from mask to focus only on objects
|
1027 |
+
objects_mask = exclude_paper_area(full_mask, paper_contour)
|
1028 |
+
|
1029 |
+
# Debug: Save intermediate masks
|
1030 |
+
cv2.imwrite("./debug/objects_mask_after_yolo.jpg", objects_mask)
|
1031 |
+
|
1032 |
+
# Check if we actually have object pixels after paper exclusion
|
1033 |
+
object_pixels = np.count_nonzero(objects_mask)
|
1034 |
+
if object_pixels < 1000: # Minimum threshold
|
1035 |
+
raise NoObjectDetectedError("No significant object detected after excluding paper area")
|
1036 |
+
|
1037 |
+
# Validate single object
|
1038 |
+
validate_single_object(objects_mask, paper_contour)
|
1039 |
+
|
1040 |
+
except (MultipleObjectsError, NoObjectDetectedError) as e:
|
1041 |
+
return (
|
1042 |
+
None, None, None, None,
|
1043 |
+
f"Error: {str(e)}"
|
1044 |
+
)
|
1045 |
+
except Exception as e:
|
1046 |
+
raise gr.Error(f"Error in object detection: {str(e)}")
|
1047 |
+
|
1048 |
+
# Rest of the function remains unchanged...
|
1049 |
+
# [Keep existing code for dilation, contour extraction, DXF generation, etc.]
|
1050 |
objects_mask = remove_bg(image)
|
1051 |
processed_size = objects_mask.shape[:2]
|
1052 |
|