Spaces:

MLBench
/

Contour_Detection_Paper

Sleeping

App Files Files Community

mlbench123 commited on Aug 11

Commit

b15fe7a

verified ·

1 Parent(s): cdf5eca

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -6

app.py CHANGED Viewed

@@ -85,6 +85,27 @@ birefnet = None
 paper_model_path = os.path.join(CACHE_DIR, "paper_detector.pt")  # You'll need to train/provide this
 u2net_model_path = os.path.join(CACHE_DIR, "u2netp.pth")
 # Device configuration
 device = "cpu"
 torch.set_float32_matmul_precision(["high", "highest"][0])
@@ -951,17 +972,81 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
         logger.error(f"Unexpected error in paper detection: {e}")
         raise gr.Error(f"Error processing image: {str(e)}")
-    # Rest of the function remains the same...
-    # [Keep all the existing object detection and DXF generation code]
     try:
         # Mask paper area in input image first
         masked_input_image = mask_paper_area_in_image(image, paper_contour)
-        # Remove background from main objects
         orig_size = image.shape[:2]
-        # objects_mask = remove_bg(image)
-        # objects_mask = remove_bg(image)
         objects_mask = remove_bg(image)
         processed_size = objects_mask.shape[:2]

 paper_model_path = os.path.join(CACHE_DIR, "paper_detector.pt")  # You'll need to train/provide this
 u2net_model_path = os.path.join(CACHE_DIR, "u2netp.pth")
+# Global variable for YOLOWorld
+yolo_world_global = None
+yolo_world_model_path = os.path.join(CACHE_DIR, "yolov8_world.pt")  # Adjust path as needed
+def get_yolo_world():
+    """Lazy load YOLOWorld model"""
+    global yolo_world_global
+    if yolo_world_global is None:
+        logger.info("Loading YOLOWorld model...")
+        if os.path.exists(yolo_world_model_path):
+            try:
+                yolo_world_global = YOLOWorld(yolo_world_model_path)
+                logger.info("YOLOWorld model loaded successfully")
+            except Exception as e:
+                logger.error(f"Failed to load YOLOWorld: {e}")
+                yolo_world_global = None
+        else:
+            logger.warning("YOLOWorld model file not found, will raise error if used")
+            yolo_world_global = None
+    return yolo_world_global
 # Device configuration
 device = "cpu"
 torch.set_float32_matmul_precision(["high", "highest"][0])
         logger.error(f"Unexpected error in paper detection: {e}")
         raise gr.Error(f"Error processing image: {str(e)}")
     try:
         # Mask paper area in input image first
         masked_input_image = mask_paper_area_in_image(image, paper_contour)
+        # NEW: Use YOLOWorld to detect object bounding box
+        yolo_world = get_yolo_world()
+        if yolo_world is None:
+            logger.warning("YOLOWorld model not available, proceeding with full image")
+            cropped_image = masked_input_image
+            crop_offset = (0, 0)  # No offset if not cropping
+        else:
+            # Set prompts for tool/object detection
+            yolo_world.set_classes(["tool", "object on paper"])
+            results = yolo_world.predict(masked_input_image, conf=0.5, verbose=False)
+            if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
+                logger.warning("No objects detected by YOLOWorld, proceeding with full image")
+                cropped_image = masked_input_image
+                crop_offset = (0, 0)
+            else:
+                # Get the highest confidence box
+                boxes = results[0].boxes.xyxy.cpu().numpy()
+                confidences = results[0].boxes.conf.cpu().numpy()
+                best_box_idx = np.argmax(confidences)
+                x_min, y_min, x_max, y_max = map(int, boxes[best_box_idx])
+                # Add margin (e.g., 10% of box size, min 20px)
+                margin = max(20, int(min(x_max - x_min, y_max - y_min) * 0.1))
+                x_min = max(0, x_min - margin)
+                y_min = max(0, y_min - margin)
+                x_max = min(masked_input_image.shape[1], x_max + margin)
+                y_max = min(masked_input_image.shape[0], y_max + margin)
+                # Crop the masked image
+                cropped_image = masked_input_image[y_min:y_max, x_min:x_max]
+                crop_offset = (x_min, y_min)  # Store offset for mask realignment
+                logger.info(f"Cropped to box: ({x_min}, {y_min}, {x_max}, {y_max})")
+                # Debug: Save cropped image
+                cv2.imwrite("./debug/cropped_image.jpg", cropped_image)
+        # Remove background from cropped image
         orig_size = image.shape[:2]
+        objects_mask = remove_bg(cropped_image)
+        processed_size = objects_mask.shape[:2]
+        # Resize mask to match cropped region and place back in original image space
+        full_mask = np.zeros((orig_size[0], orig_size[1]), dtype=np.uint8)
+        resized_mask = cv2.resize(objects_mask, (cropped_image.shape[1], cropped_image.shape[0]))
+        full_mask[y_min:y_min+resized_mask.shape[0], x_min:x_min+resized_mask.shape[1]] = resized_mask
+        # Remove paper area from mask to focus only on objects
+        objects_mask = exclude_paper_area(full_mask, paper_contour)
+        # Debug: Save intermediate masks
+        cv2.imwrite("./debug/objects_mask_after_yolo.jpg", objects_mask)
+        # Check if we actually have object pixels after paper exclusion
+        object_pixels = np.count_nonzero(objects_mask)
+        if object_pixels < 1000:  # Minimum threshold
+            raise NoObjectDetectedError("No significant object detected after excluding paper area")
+        # Validate single object
+        validate_single_object(objects_mask, paper_contour)
+    except (MultipleObjectsError, NoObjectDetectedError) as e:
+        return (
+            None, None, None, None,
+            f"Error: {str(e)}"
+        )
+    except Exception as e:
+        raise gr.Error(f"Error in object detection: {str(e)}")
+    # Rest of the function remains unchanged...
+    # [Keep existing code for dilation, contour extraction, DXF generation, etc.]
         objects_mask = remove_bg(image)
         processed_size = objects_mask.shape[:2]