Spaces:

MLBench
/

Contour_Detection_Paper

Running

App Files Files Community

mlbench123 commited on Aug 12

Commit

bacea87

verified ·

1 Parent(s): 70843b0

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -39

app.py CHANGED Viewed

@@ -86,9 +86,8 @@ paper_model_path = os.path.join(CACHE_DIR, "paper_detector.pt")  # You'll need t
 u2net_model_path = os.path.join(CACHE_DIR, "u2netp.pth")
 # Global variable for YOLOWorld
-yolo_world_global = None
-yolo_world_model_path = os.path.join(CACHE_DIR, "yolov8s_world.pt")  # Adjust path as needed
 # Device configuration
@@ -108,11 +107,7 @@ def ensure_model_files():
             shutil.copy("u2netp.pth", u2net_model_path)
         else:
             raise FileNotFoundError("u2netp.pth model file not found")
-    if not os.path.exists(yolo_world_model_path):
-        if os.path.exists("yolov8s_world.pt"):  # Adjust to match your file name
-            shutil.copy("yolov8s_world.pt", yolo_world_model_path)
-        else:
-            logger.warning("yolov8s-world.pt model file not found - falling back to full image processing")
 ensure_model_files()
@@ -134,22 +129,18 @@ def get_paper_detector():
             logger.warning("Paper model file not found, using fallback detection")
             paper_detector_global = None
     return paper_detector_global
-def get_yolo_world():
-    """Lazy load YOLOWorld model"""
-    global yolo_world_global
-    if yolo_world_global is None:
-        logger.info("Loading YOLOWorld model...")
-        if os.path.exists(yolo_world_model_path):
-            try:
-                yolo_world_global = YOLOWorld(yolo_world_model_path)
-                logger.info("YOLOWorld model loaded successfully")
-            except Exception as e:
-                logger.error(f"Failed to load YOLOWorld: {e}")
-                yolo_world_global = None
-        else:
-            logger.warning("YOLOWorld model file not found, will raise error if used")
-            yolo_world_global = None
-    return yolo_world_global
 def get_u2net():
     """Lazy load U2NETP model"""
     global u2net_global
@@ -976,46 +967,43 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
         # Mask paper area in input image first
         masked_input_image = mask_paper_area_in_image(image, paper_contour)
-        # Use YOLOWorld to detect object bounding box
-        yolo_world = get_yolo_world()
-        # Lower confidence and add size-based filtering
-        if yolo_world is None:
-            logger.warning("YOLOWorld model not available, proceeding with full image")
             cropped_image = masked_input_image
             crop_offset = (0, 0)
         else:
-            yolo_world.set_classes(["small object", "tool", "item", "component", "part", "piece", "device"])
-            results = yolo_world.predict(masked_input_image, conf=0.05, verbose=False)  # Much lower confidence
             if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
-                logger.warning("No objects detected by YOLOWorld, proceeding with full image")
                 cropped_image = masked_input_image
                 crop_offset = (0, 0)
             else:
                 boxes = results[0].boxes.xyxy.cpu().numpy()
                 confidences = results[0].boxes.conf.cpu().numpy()
-                # Filter out boxes that are too large (likely paper detection)
-                valid_boxes = []
                 image_area = masked_input_image.shape[0] * masked_input_image.shape[1]
                 for i, box in enumerate(boxes):
                     x_min, y_min, x_max, y_max = box
                     box_area = (x_max - x_min) * (y_max - y_min)
-                    if box_area < image_area * 0.3:  # Reject if larger than 30% of image
                         valid_boxes.append((i, confidences[i]))
                 if not valid_boxes:
                     cropped_image = masked_input_image
                     crop_offset = (0, 0)
                 else:
                     # Get highest confidence valid box
                     best_idx = max(valid_boxes, key=lambda x: x[1])[0]
                     x_min, y_min, x_max, y_max = map(int, boxes[best_idx])
-                    # Larger margin for small objects
-                    box_size = min(x_max - x_min, y_max - y_min)
-                    margin = max(30, int(box_size * 0.3))  # At least 30px margin
         # Remove background from cropped image
         orig_size = image.shape[:2]

 u2net_model_path = os.path.join(CACHE_DIR, "u2netp.pth")
 # Global variable for YOLOWorld
+yolo_v8_global = None
+yolo_v8_model_path = os.path.join(CACHE_DIR, "yolov8s.pt")  # Adjust path as needed
 # Device configuration
             shutil.copy("u2netp.pth", u2net_model_path)
         else:
             raise FileNotFoundError("u2netp.pth model file not found")
+    logger.info("YOLOv8 will auto-download if not present")
 ensure_model_files()
             logger.warning("Paper model file not found, using fallback detection")
             paper_detector_global = None
     return paper_detector_global
+def get_yolo_v8():
+    """Lazy load YOLOv8 model"""
+    global yolo_v8_global
+    if yolo_v8_global is None:
+        logger.info("Loading YOLOv8 model...")
+        try:
+            yolo_v8_global = YOLO(yolo_v8_model_path)  # Auto-downloads if needed
+            logger.info("YOLOv8 model loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load YOLOv8: {e}")
+            yolo_v8_global = None
+    return yolo_v8_global
 def get_u2net():
     """Lazy load U2NETP model"""
     global u2net_global
         # Mask paper area in input image first
         masked_input_image = mask_paper_area_in_image(image, paper_contour)
+        # Use YOLOv8 to detect objects
+        yolo_v8 = get_yolo_v8()
+        if yolo_v8 is None:
+            logger.warning("YOLOv8 model not available, proceeding with full image")
             cropped_image = masked_input_image
             crop_offset = (0, 0)
         else:
+            # YOLOv8 detects all COCO classes by default
+            results = yolo_v8.predict(masked_input_image, conf=0.1, verbose=False)
             if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
+                logger.warning("No objects detected by YOLOv8, proceeding with full image")
                 cropped_image = masked_input_image
                 crop_offset = (0, 0)
             else:
                 boxes = results[0].boxes.xyxy.cpu().numpy()
                 confidences = results[0].boxes.conf.cpu().numpy()
+                # Filter out very large boxes (likely paper/background)
                 image_area = masked_input_image.shape[0] * masked_input_image.shape[1]
+                valid_boxes = []
                 for i, box in enumerate(boxes):
                     x_min, y_min, x_max, y_max = box
                     box_area = (x_max - x_min) * (y_max - y_min)
+                    # Keep boxes that are 5% to 40% of image area
+                    if 0.05 * image_area < box_area < 0.4 * image_area:
                         valid_boxes.append((i, confidences[i]))
                 if not valid_boxes:
+                    logger.warning("No valid objects detected, proceeding with full image")
                     cropped_image = masked_input_image
                     crop_offset = (0, 0)
                 else:
                     # Get highest confidence valid box
                     best_idx = max(valid_boxes, key=lambda x: x[1])[0]
                     x_min, y_min, x_max, y_max = map(int, boxes[best_idx])
         # Remove background from cropped image
         orig_size = image.shape[:2]