mlbench123 commited on
Commit
b15fe7a
·
verified ·
1 Parent(s): cdf5eca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -6
app.py CHANGED
@@ -85,6 +85,27 @@ birefnet = None
85
  paper_model_path = os.path.join(CACHE_DIR, "paper_detector.pt") # You'll need to train/provide this
86
  u2net_model_path = os.path.join(CACHE_DIR, "u2netp.pth")
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  # Device configuration
89
  device = "cpu"
90
  torch.set_float32_matmul_precision(["high", "highest"][0])
@@ -951,17 +972,81 @@ def predict_with_paper(image, paper_size, offset, offset_unit, finger_clearance=
951
  logger.error(f"Unexpected error in paper detection: {e}")
952
  raise gr.Error(f"Error processing image: {str(e)}")
953
 
954
- # Rest of the function remains the same...
955
- # [Keep all the existing object detection and DXF generation code]
956
-
957
  try:
958
  # Mask paper area in input image first
959
  masked_input_image = mask_paper_area_in_image(image, paper_contour)
960
 
961
- # Remove background from main objects
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
962
  orig_size = image.shape[:2]
963
- # objects_mask = remove_bg(image)
964
- # objects_mask = remove_bg(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
965
  objects_mask = remove_bg(image)
966
  processed_size = objects_mask.shape[:2]
967
 
 
85
  paper_model_path = os.path.join(CACHE_DIR, "paper_detector.pt") # You'll need to train/provide this
86
  u2net_model_path = os.path.join(CACHE_DIR, "u2netp.pth")
87
 
88
+ # Global variable for YOLOWorld
89
+ yolo_world_global = None
90
+ yolo_world_model_path = os.path.join(CACHE_DIR, "yolov8_world.pt") # Adjust path as needed
91
+
92
+ def get_yolo_world():
93
+ """Lazy load YOLOWorld model"""
94
+ global yolo_world_global
95
+ if yolo_world_global is None:
96
+ logger.info("Loading YOLOWorld model...")
97
+ if os.path.exists(yolo_world_model_path):
98
+ try:
99
+ yolo_world_global = YOLOWorld(yolo_world_model_path)
100
+ logger.info("YOLOWorld model loaded successfully")
101
+ except Exception as e:
102
+ logger.error(f"Failed to load YOLOWorld: {e}")
103
+ yolo_world_global = None
104
+ else:
105
+ logger.warning("YOLOWorld model file not found, will raise error if used")
106
+ yolo_world_global = None
107
+ return yolo_world_global
108
+
109
  # Device configuration
110
  device = "cpu"
111
  torch.set_float32_matmul_precision(["high", "highest"][0])
 
972
  logger.error(f"Unexpected error in paper detection: {e}")
973
  raise gr.Error(f"Error processing image: {str(e)}")
974
 
 
 
 
975
  try:
976
  # Mask paper area in input image first
977
  masked_input_image = mask_paper_area_in_image(image, paper_contour)
978
 
979
+ # NEW: Use YOLOWorld to detect object bounding box
980
+ yolo_world = get_yolo_world()
981
+ if yolo_world is None:
982
+ logger.warning("YOLOWorld model not available, proceeding with full image")
983
+ cropped_image = masked_input_image
984
+ crop_offset = (0, 0) # No offset if not cropping
985
+ else:
986
+ # Set prompts for tool/object detection
987
+ yolo_world.set_classes(["tool", "object on paper"])
988
+ results = yolo_world.predict(masked_input_image, conf=0.5, verbose=False)
989
+
990
+ if not results or len(results) == 0 or not hasattr(results[0], 'boxes') or len(results[0].boxes) == 0:
991
+ logger.warning("No objects detected by YOLOWorld, proceeding with full image")
992
+ cropped_image = masked_input_image
993
+ crop_offset = (0, 0)
994
+ else:
995
+ # Get the highest confidence box
996
+ boxes = results[0].boxes.xyxy.cpu().numpy()
997
+ confidences = results[0].boxes.conf.cpu().numpy()
998
+ best_box_idx = np.argmax(confidences)
999
+ x_min, y_min, x_max, y_max = map(int, boxes[best_box_idx])
1000
+
1001
+ # Add margin (e.g., 10% of box size, min 20px)
1002
+ margin = max(20, int(min(x_max - x_min, y_max - y_min) * 0.1))
1003
+ x_min = max(0, x_min - margin)
1004
+ y_min = max(0, y_min - margin)
1005
+ x_max = min(masked_input_image.shape[1], x_max + margin)
1006
+ y_max = min(masked_input_image.shape[0], y_max + margin)
1007
+
1008
+ # Crop the masked image
1009
+ cropped_image = masked_input_image[y_min:y_max, x_min:x_max]
1010
+ crop_offset = (x_min, y_min) # Store offset for mask realignment
1011
+ logger.info(f"Cropped to box: ({x_min}, {y_min}, {x_max}, {y_max})")
1012
+
1013
+ # Debug: Save cropped image
1014
+ cv2.imwrite("./debug/cropped_image.jpg", cropped_image)
1015
+
1016
+ # Remove background from cropped image
1017
  orig_size = image.shape[:2]
1018
+ objects_mask = remove_bg(cropped_image)
1019
+ processed_size = objects_mask.shape[:2]
1020
+
1021
+ # Resize mask to match cropped region and place back in original image space
1022
+ full_mask = np.zeros((orig_size[0], orig_size[1]), dtype=np.uint8)
1023
+ resized_mask = cv2.resize(objects_mask, (cropped_image.shape[1], cropped_image.shape[0]))
1024
+ full_mask[y_min:y_min+resized_mask.shape[0], x_min:x_min+resized_mask.shape[1]] = resized_mask
1025
+
1026
+ # Remove paper area from mask to focus only on objects
1027
+ objects_mask = exclude_paper_area(full_mask, paper_contour)
1028
+
1029
+ # Debug: Save intermediate masks
1030
+ cv2.imwrite("./debug/objects_mask_after_yolo.jpg", objects_mask)
1031
+
1032
+ # Check if we actually have object pixels after paper exclusion
1033
+ object_pixels = np.count_nonzero(objects_mask)
1034
+ if object_pixels < 1000: # Minimum threshold
1035
+ raise NoObjectDetectedError("No significant object detected after excluding paper area")
1036
+
1037
+ # Validate single object
1038
+ validate_single_object(objects_mask, paper_contour)
1039
+
1040
+ except (MultipleObjectsError, NoObjectDetectedError) as e:
1041
+ return (
1042
+ None, None, None, None,
1043
+ f"Error: {str(e)}"
1044
+ )
1045
+ except Exception as e:
1046
+ raise gr.Error(f"Error in object detection: {str(e)}")
1047
+
1048
+ # Rest of the function remains unchanged...
1049
+ # [Keep existing code for dilation, contour extraction, DXF generation, etc.]
1050
  objects_mask = remove_bg(image)
1051
  processed_size = objects_mask.shape[:2]
1052