Spaces:

RollAI
/

ChatWithTranscriptStaging

Running

App Files Files Community

AhmadMustafa commited on Mar 4

Commit

d7401be

1 Parent(s): 1cdfb96

add: sep visualization for 9:16 and 16:9

Browse files

Files changed (2) hide show

.gitignore +2 -1
crop_utils.py +172 -141

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- __pycache__


1	+ __pycache__
2	+ .DS_Store

crop_utils.py CHANGED Viewed

@@ -175,7 +175,7 @@ def crop_and_draw_divisions(
     Returns:
         tuple: (cropped_image_16_9, image_with_lines, cropped_image_9_16)
     """
-    yolo_model = YOLO("yolo11n.pt")
     # Calculate division width and boundaries
     division_width = input_image.width / num_divisions
     left_boundary = (left_division - 1) * division_width
@@ -187,7 +187,11 @@ def crop_and_draw_divisions(
     )
     # Run YOLO on the 9:16 crop to get person bbox
-    bbox = yolo_model(cropped_image_9_16, classes=[0])[0].boxes.xyxy.cpu().numpy()[0]
     x1, y1, x2, y2 = bbox
     # Calculate top boundary with head margin
@@ -348,7 +352,7 @@ def find_persons_center(image):
         int: x-coordinate of the center point of all persons
     """
     # Detect persons (class 0 in COCO dataset)
-    results = model(image, classes=[0])
     if not results or len(results[0].boxes) == 0:
         # If no persons detected, return center of image
@@ -411,7 +415,7 @@ def create_layouts(image, left_division, right_division):
     cutout_height = cutout_image.shape[0]
     # 2. Run YOLO on cutout to get person bounding box and center
-    results = model(cutout_image, classes=[0])
     # Default center if no detection
     cutout_center_x = cutout_image.shape[1] // 2
@@ -578,9 +582,9 @@ def create_layouts(image, left_division, right_division):
     return layout_crops, cutout_pil, cutout_16_9_pil, cutout_9_16_pil
-def draw_all_crops_on_original(image, left_division, right_division):
     """
-    Create a visualization showing all crop regions overlaid on the original image.
     Each crop region is outlined with a different color and labeled.
     All crops are centered on the person's center point.
@@ -588,9 +592,10 @@ def draw_all_crops_on_original(image, left_division, right_division):
         image: PIL Image
         left_division: Left division index (1-20)
         right_division: Right division index (1-20)
     Returns:
-        PIL Image: Original image with all crop regions visualized
     """
     # Convert PIL Image to cv2 format
     if isinstance(image, Image.Image):
@@ -613,7 +618,7 @@ def draw_all_crops_on_original(image, left_division, right_division):
     cutout_image = image_cv[:, left_boundary:right_boundary].copy()
     # Get YOLO detections for person bounding box
-    results = model(cutout_image, classes=[0])
     # Default values
     cutout_center_x = cutout_image.shape[1] // 2
@@ -668,171 +673,184 @@ def draw_all_crops_on_original(image, left_division, right_division):
     font_scale = 0.8
     font_thickness = 2
-    # 1. Draw cutout region (original divisions)
-    cv2.rectangle(
-        visualization,
-        (left_boundary, 0),
-        (right_boundary, height),
-        colors["cutout"],
-        thickness,
-    )
-    cv2.putText(
-        visualization,
-        "Cutout",
-        (left_boundary + 5, 30),
-        font,
-        font_scale,
-        colors["cutout"],
-        font_thickness,
-    )
-    # 2. Create 16:9 and 9:16 versions of the cutout - CENTERED on person
-    cutout_width = right_boundary - left_boundary
-    cutout_height = height
-    # For 16:9 version with 20% margin above person
-    aspect_16_9 = 16 / 9
-    target_height_16_9 = int(cutout_width / aspect_16_9)
-    if target_height_16_9 <= height:
-        # Calculate 20% of person height for top margin
-        top_margin = int(original_person_height * 0.2)
-        # Start 20% above the person's top
-        y_start = int(max(0, original_person_top - top_margin))
-        # If this would make the crop exceed the bottom, adjust y_start
-        if y_start + target_height_16_9 > height:
-            y_start = int(max(0, height - target_height_16_9))
-        y_end = int(min(height, y_start + target_height_16_9))
         cv2.rectangle(
             visualization,
-            (left_boundary, y_start),
-            (right_boundary, y_end),
-            colors["16:9"],
             thickness,
         )
         cv2.putText(
             visualization,
-            "16:9",
-            (left_boundary + 5, y_start + 30),
             font,
             font_scale,
-            colors["16:9"],
             font_thickness,
         )
-    # For 9:16 version centered on person
-    aspect_9_16 = 9 / 16
-    target_width_9_16 = int(cutout_height * aspect_9_16)
-    if target_width_9_16 <= cutout_width:
-        # Center horizontally around person
-        x_start = max(
-            0,
-            min(
-                left_boundary + cutout_width - target_width_9_16,
-                original_center_x - target_width_9_16 // 2,
-            ),
         )
-        x_end = x_start + target_width_9_16
         cv2.rectangle(
-            visualization, (x_start, 0), (x_end, height), colors["9:16"], thickness
         )
         cv2.putText(
             visualization,
-            "9:16",
-            (x_start + 5, 60),
             font,
             font_scale,
-            colors["9:16"],
             font_thickness,
         )
-    # 3. Draw centered layout variations
-    # Half width layout
-    half_width = width // 2
-    half_left_x = max(0, min(width - half_width, original_center_x - half_width // 2))
-    half_right_x = half_left_x + half_width
-    cv2.rectangle(
-        visualization,
-        (half_left_x, 0),
-        (half_right_x, height),
-        colors["half"],
-        thickness,
-    )
-    cv2.putText(
-        visualization,
-        "Half Width",
-        (half_left_x + 5, 90),
-        font,
-        font_scale,
-        colors["half"],
-        font_thickness,
-    )
-    # Third width layout
-    third_width = width // 3
-    third_left_x = max(
-        0, min(width - third_width, original_center_x - third_width // 2)
-    )
-    third_right_x = third_left_x + third_width
-    cv2.rectangle(
-        visualization,
-        (third_left_x, 0),
-        (third_right_x, height),
-        colors["third"],
-        thickness,
-    )
-    cv2.putText(
-        visualization,
-        "Third Width",
-        (third_left_x + 5, 120),
-        font,
-        font_scale,
-        colors["third"],
-        font_thickness,
-    )
-    # Two-thirds width layout
-    two_thirds_width = (width * 2) // 3
-    two_thirds_left_x = max(
-        0, min(width - two_thirds_width, original_center_x - two_thirds_width // 2)
-    )
-    two_thirds_right_x = two_thirds_left_x + two_thirds_width
-    cv2.rectangle(
-        visualization,
-        (two_thirds_left_x, 0),
-        (two_thirds_right_x, height),
-        colors["two_thirds"],
-        thickness,
-    )
-    cv2.putText(
-        visualization,
-        "Two-Thirds Width",
-        (two_thirds_left_x + 5, 150),
-        font,
-        font_scale,
-        colors["two_thirds"],
-        font_thickness,
-    )
-    # 4. Draw center point of person(s)
     center_radius = 8
     cv2.circle(
         visualization,
-        (original_center_x, height // 2),
         center_radius,
         (255, 255, 255),
         -1,
     )
     cv2.circle(
-        visualization, (original_center_x, height // 2), center_radius, (0, 0, 0), 2
     )
     cv2.putText(
         visualization,
         "Person Center",
-        (original_center_x + 10, height // 2),
         font,
         font_scale,
         (255, 255, 255),
@@ -852,8 +870,8 @@ def get_image_crop(cid=None, rsid=None, uid=None):
     Returns:
         gr.Gallery: Gallery of all generated images
     """
-    image_paths = get_sprite_firebase(cid, rsid, uid)
     # Lists to store all images
     all_images = []
     all_captions = []
@@ -908,14 +926,27 @@ def get_image_crop(cid=None, rsid=None, uid=None):
             mid_image, left_division, right_division
         )
-        # Create the visualization with all crops overlaid on original
-        all_crops_visualization = draw_all_crops_on_original(
-            mid_image, left_division, right_division
         )
-        # Start with the visualization showing all crops
-        all_images.append(all_crops_visualization)
-        all_captions.append(f"All Crops Visualization {all_crops_visualization.size}")
         # Add input and middle image to gallery
         all_images.append(input_image)

     Returns:
         tuple: (cropped_image_16_9, image_with_lines, cropped_image_9_16)
     """
+    yolo_model = model
     # Calculate division width and boundaries
     division_width = input_image.width / num_divisions
     left_boundary = (left_division - 1) * division_width
     )
     # Run YOLO on the 9:16 crop to get person bbox
+    bbox = (
+        yolo_model(cropped_image_9_16, classes=[0], conf=0.6)[0]
+        .boxes.xyxy.cpu()
+        .numpy()[0]
+    )
     x1, y1, x2, y2 = bbox
     # Calculate top boundary with head margin
         int: x-coordinate of the center point of all persons
     """
     # Detect persons (class 0 in COCO dataset)
+    results = model(image, classes=[0], conf=0.6)
     if not results or len(results[0].boxes) == 0:
         # If no persons detected, return center of image
     cutout_height = cutout_image.shape[0]
     # 2. Run YOLO on cutout to get person bounding box and center
+    results = model(cutout_image, classes=[0], conf=0.6)
     # Default center if no detection
     cutout_center_x = cutout_image.shape[1] // 2
     return layout_crops, cutout_pil, cutout_16_9_pil, cutout_9_16_pil
+def draw_crops_on_original(image, left_division, right_division, crop_types):
     """
+    Create a visualization showing selected crop regions overlaid on the original image.
     Each crop region is outlined with a different color and labeled.
     All crops are centered on the person's center point.
         image: PIL Image
         left_division: Left division index (1-20)
         right_division: Right division index (1-20)
+        crop_types: List of crop types to include in visualization (e.g., ["16:9", "9:16"])
     Returns:
+        PIL Image: Original image with selected crop regions visualized
     """
     # Convert PIL Image to cv2 format
     if isinstance(image, Image.Image):
     cutout_image = image_cv[:, left_boundary:right_boundary].copy()
     # Get YOLO detections for person bounding box
+    results = model(cutout_image, classes=[0], conf=0.6)
     # Default values
     cutout_center_x = cutout_image.shape[1] // 2
     font_scale = 0.8
     font_thickness = 2
+    # Draw cutout region (original divisions) if requested
+    if "cutout" in crop_types:
+        cv2.rectangle(
+            visualization,
+            (left_boundary, 0),
+            (right_boundary, height),
+            colors["cutout"],
+            thickness,
+        )
+        cv2.putText(
+            visualization,
+            "Cutout",
+            (left_boundary + 5, 30),
+            font,
+            font_scale,
+            colors["cutout"],
+            font_thickness,
+        )
+    # Create 16:9 version of the cutout if requested
+    if "16:9" in crop_types:
+        cutout_width = right_boundary - left_boundary
+        cutout_height = height
+        aspect_16_9 = 16 / 9
+        target_height_16_9 = int(cutout_width / aspect_16_9)
+        if target_height_16_9 <= height:
+            # Calculate 20% of person height for top margin
+            top_margin = int(original_person_height * 0.2)
+            # Start 20% above the person's top
+            y_start = int(max(0, original_person_top - top_margin))
+            # If this would make the crop exceed the bottom, adjust y_start
+            if y_start + target_height_16_9 > height:
+                y_start = int(max(0, height - target_height_16_9))
+            y_end = int(min(height, y_start + target_height_16_9))
+            cv2.rectangle(
+                visualization,
+                (left_boundary, y_start),
+                (right_boundary, y_end),
+                colors["16:9"],
+                thickness,
+            )
+            cv2.putText(
+                visualization,
+                "16:9",
+                (left_boundary + 5, y_start + 30),
+                font,
+                font_scale,
+                colors["16:9"],
+                font_thickness,
+            )
+    # Create 9:16 version if requested
+    if "9:16" in crop_types:
+        cutout_width = right_boundary - left_boundary
+        cutout_height = height
+        aspect_9_16 = 9 / 16
+        target_width_9_16 = int(cutout_height * aspect_9_16)
+        if target_width_9_16 <= cutout_width:
+            # Center horizontally around person
+            x_start = max(
+                0,
+                min(
+                    left_boundary + cutout_width - target_width_9_16,
+                    original_center_x - target_width_9_16 // 2,
+                ),
+            )
+            x_end = x_start + target_width_9_16
+            cv2.rectangle(
+                visualization, (x_start, 0), (x_end, height), colors["9:16"], thickness
+            )
+            cv2.putText(
+                visualization,
+                "9:16",
+                (x_start + 5, 60),
+                font,
+                font_scale,
+                colors["9:16"],
+                font_thickness,
+            )
+    # Draw centered half width layout if requested
+    if "half" in crop_types:
+        half_width = width // 2
+        half_left_x = max(
+            0, min(width - half_width, original_center_x - half_width // 2)
+        )
+        half_right_x = half_left_x + half_width
         cv2.rectangle(
             visualization,
+            (half_left_x, 0),
+            (half_right_x, height),
+            colors["half"],
             thickness,
         )
         cv2.putText(
             visualization,
+            "Half Width",
+            (half_left_x + 5, 90),
             font,
             font_scale,
+            colors["half"],
             font_thickness,
         )
+    # Draw centered third width layout if requested
+    if "third" in crop_types:
+        third_width = width // 3
+        third_left_x = max(
+            0, min(width - third_width, original_center_x - third_width // 2)
         )
+        third_right_x = third_left_x + third_width
         cv2.rectangle(
+            visualization,
+            (third_left_x, 0),
+            (third_right_x, height),
+            colors["third"],
+            thickness,
         )
         cv2.putText(
             visualization,
+            "Third Width",
+            (third_left_x + 5, 120),
             font,
             font_scale,
+            colors["third"],
             font_thickness,
         )
+    # Draw centered two-thirds width layout if requested
+    if "two_thirds" in crop_types:
+        two_thirds_width = (width * 2) // 3
+        two_thirds_left_x = max(
+            0, min(width - two_thirds_width, original_center_x - two_thirds_width // 2)
+        )
+        two_thirds_right_x = two_thirds_left_x + two_thirds_width
+        cv2.rectangle(
+            visualization,
+            (two_thirds_left_x, 0),
+            (two_thirds_right_x, height),
+            colors["two_thirds"],
+            thickness,
+        )
+        cv2.putText(
+            visualization,
+            "Two-Thirds Width",
+            (two_thirds_left_x + 5, 150),
+            font,
+            font_scale,
+            colors["two_thirds"],
+            font_thickness,
+        )
+    # Draw center point of person(s)
     center_radius = 8
     cv2.circle(
         visualization,
+        (original_center_x, original_center_y),
         center_radius,
         (255, 255, 255),
         -1,
     )
     cv2.circle(
+        visualization,
+        (original_center_x, original_center_y),
+        center_radius,
+        (0, 0, 0),
+        2,
     )
     cv2.putText(
         visualization,
         "Person Center",
+        (original_center_x + 10, original_center_y),
         font,
         font_scale,
         (255, 255, 255),
     Returns:
         gr.Gallery: Gallery of all generated images
     """
+    # image_paths = get_sprite_firebase(cid, rsid, uid)
+    image_paths = ["data/sprite1.jpg", "data/sprite2.jpg"]
     # Lists to store all images
     all_images = []
     all_captions = []
             mid_image, left_division, right_division
         )
+        # Create the first visualization with 16:9 and 9:16 crops only
+        aspect_ratio_visualization = draw_crops_on_original(
+            mid_image, left_division, right_division, ["16:9", "9:16"]
+        )
+        # Create the second visualization with the remaining layouts
+        other_layouts_visualization = draw_crops_on_original(
+            mid_image,
+            left_division,
+            right_division,
+            ["cutout", "half", "third", "two_thirds"],
+        )
+        # Add visualizations showing crops
+        all_images.append(aspect_ratio_visualization)
+        all_captions.append(
+            f"Aspect Ratio Crops (16:9 & 9:16) {aspect_ratio_visualization.size}"
         )
+        all_images.append(other_layouts_visualization)
+        all_captions.append(f"Other Layout Options {other_layouts_visualization.size}")
         # Add input and middle image to gallery
         all_images.append(input_image)