Spaces:

RollAI
/

ChatWithTranscript

Running

App Files Files Community

AhmadMustafa commited on Mar 3

Commit

0c6b4cf

1 Parent(s): 675f40a

add: 16:9 crops

Browse files

Files changed (2) hide show

crop_utils.py +603 -152
prompts.py +8 -2

crop_utils.py CHANGED Viewed

@@ -13,6 +13,8 @@ from ultralytics import YOLO
 from prompts import remove_unwanted_prompt
 def get_middle_thumbnail(input_image: Image, grid_size=(10, 10), padding=3):
     """
@@ -57,129 +59,6 @@ def get_middle_thumbnail(input_image: Image, grid_size=(10, 10), padding=3):
     return middle_thumb
-def get_person_bbox(frame, model):
-    """Detect person and return the largest bounding box"""
-    results = model(frame, classes=[0])  # class 0 is person in COCO
-    if not results or len(results[0].boxes) == 0:
-        return None
-    # Get all person boxes
-    boxes = results[0].boxes.xyxy.cpu().numpy()
-    # Calculate areas to find the largest person
-    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
-    largest_idx = np.argmax(areas)
-    return boxes[largest_idx]
-def generate_crops(frame):
-    """Generate both 16:9 and 9:16 crops based on person detection"""
-    # Load YOLO model
-    model = YOLO("yolo11n.pt")
-    # Convert PIL Image to cv2 format if needed
-    if isinstance(frame, Image.Image):
-        frame = cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)
-    original_height, original_width = frame.shape[:2]
-    bbox = get_person_bbox(frame, model)
-    if bbox is None:
-        return None, None
-    # Extract coordinates
-    x1, y1, x2, y2 = map(int, bbox)
-    person_height = y2 - y1
-    person_width = x2 - x1
-    person_center_x = (x1 + x2) // 2
-    person_center_y = (y1 + y2) // 2
-    # Generate 16:9 crop (focus on upper body)
-    aspect_ratio_16_9 = 16 / 9
-    crop_width_16_9 = min(original_width, int(person_height * aspect_ratio_16_9))
-    crop_height_16_9 = min(original_height, int(crop_width_16_9 / aspect_ratio_16_9))
-    # For 16:9, center horizontally and align top with person's top
-    x1_16_9 = max(0, person_center_x - crop_width_16_9 // 2)
-    x2_16_9 = min(original_width, x1_16_9 + crop_width_16_9)
-    y1_16_9 = max(0, y1)  # Start from person's top
-    y2_16_9 = min(original_height, y1_16_9 + crop_height_16_9)
-    # Adjust if exceeding boundaries
-    if x2_16_9 > original_width:
-        x1_16_9 = original_width - crop_width_16_9
-        x2_16_9 = original_width
-    if y2_16_9 > original_height:
-        y1_16_9 = original_height - crop_height_16_9
-        y2_16_9 = original_height
-    # Generate 9:16 crop (full body)
-    aspect_ratio_9_16 = 9 / 16
-    crop_width_9_16 = min(original_width, int(person_height * aspect_ratio_9_16))
-    crop_height_9_16 = min(original_height, int(crop_width_9_16 / aspect_ratio_9_16))
-    # For 9:16, center both horizontally and vertically
-    x1_9_16 = max(0, person_center_x - crop_width_9_16 // 2)
-    x2_9_16 = min(original_width, x1_9_16 + crop_width_9_16)
-    y1_9_16 = max(0, person_center_y - crop_height_9_16 // 2)
-    y2_9_16 = min(original_height, y1_9_16 + crop_height_9_16)
-    # Adjust if exceeding boundaries
-    if x2_9_16 > original_width:
-        x1_9_16 = original_width - crop_width_9_16
-        x2_9_16 = original_width
-    if y2_9_16 > original_height:
-        y1_9_16 = original_height - crop_height_9_16
-        y2_9_16 = original_height
-    # Create crops
-    crop_16_9 = frame[y1_16_9:y2_16_9, x1_16_9:x2_16_9]
-    crop_9_16 = frame[y1_9_16:y2_9_16, x1_9_16:x2_9_16]
-    # Resize to standard dimensions
-    crop_16_9 = cv2.resize(crop_16_9, (426, 240))  # 16:9 aspect ratio
-    crop_9_16 = cv2.resize(crop_9_16, (240, 426))  # 9:16 aspect ratio
-    return crop_16_9, crop_9_16
-def visualize_crops(image, bbox, crops_info):
-    """
-    Visualize original bbox and calculated crops
-    bbox: [x1, y1, x2, y2]
-    crops_info: dict with 'crop_16_9' and 'crop_9_16' coordinates
-    """
-    viz = image.copy()
-    # Draw original person bbox in blue
-    cv2.rectangle(
-        viz, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2
-    )
-    # Draw 16:9 crop in green
-    crop_16_9 = crops_info["crop_16_9"]
-    cv2.rectangle(
-        viz,
-        (int(crop_16_9["x1"]), int(crop_16_9["y1"])),
-        (int(crop_16_9["x2"]), int(crop_16_9["y2"])),
-        (0, 255, 0),
-        2,
-    )
-    # Draw 9:16 crop in red
-    crop_9_16 = crops_info["crop_9_16"]
-    cv2.rectangle(
-        viz,
-        (int(crop_9_16["x1"]), int(crop_9_16["y1"])),
-        (int(crop_9_16["x2"]), int(crop_9_16["y2"])),
-        (0, 0, 255),
-        2,
-    )
-    return viz
 def encode_image_to_base64(image: Image.Image, format: str = "JPEG") -> str:
     """
     Convert a PIL image to a base64 string.
@@ -421,9 +300,15 @@ def analyze_image(numbered_input_image: Image, prompt, input_image):
         )
     except Exception as e:
         print(e)
-        return input_image, input_image, input_image
-    return cropped_image_16_9, image_with_lines, cropped_image_9_16
 def get_sprite_firebase(cid, rsid, uid):
@@ -450,26 +335,548 @@ def get_sprite_firebase(cid, rsid, uid):
     return data.val()
 def get_image_crop(cid=None, rsid=None, uid=None):
-    """Function that returns both 16:9 and 9:16 crops"""
-    image_paths = get_sprite_firebase(cid, rsid, uid)
-    input_images = []
-    mid_images = []
-    cropped_image_16_9s = []
-    images_with_lines = []
-    cropped_image_9_16s = []
-    for image_path in image_paths:
-        response = requests.get(image_path)
-        input_image = Image.open(BytesIO(response.content))
-        input_images.append(input_image)
         # Get the middle thumbnail
         mid_image = get_middle_thumbnail(input_image)
-        mid_images.append(mid_image)
         numbered_mid_image = add_top_numbers(
             input_image=mid_image,
             num_divisions=20,
@@ -478,19 +885,63 @@ def get_image_crop(cid=None, rsid=None, uid=None):
             dot_spacing=20,
         )
-        cropped_image_16_9, image_with_lines, cropped_image_9_16 = analyze_image(
-            numbered_mid_image, remove_unwanted_prompt(2), mid_image
         )
-        cropped_image_16_9s.append(cropped_image_16_9)
-        images_with_lines.append(image_with_lines)
-        cropped_image_9_16s.append(cropped_image_9_16)
-    return gr.Gallery(
-        [
-            *input_images,
-            *mid_images,
-            *cropped_image_16_9s,
-            *images_with_lines,
-            *cropped_image_9_16s,
-        ]
-    )

 from prompts import remove_unwanted_prompt
+model = YOLO("yolo11n.pt")
 def get_middle_thumbnail(input_image: Image, grid_size=(10, 10), padding=3):
     """
     return middle_thumb
 def encode_image_to_base64(image: Image.Image, format: str = "JPEG") -> str:
     """
     Convert a PIL image to a base64 string.
         )
     except Exception as e:
         print(e)
+        return input_image, input_image, input_image, 0, 20
+    return (
+        cropped_image_16_9,
+        image_with_lines,
+        cropped_image_9_16,
+        response_json["left_row"],
+        response_json["right_row"],
+    )
 def get_sprite_firebase(cid, rsid, uid):
     return data.val()
+def find_persons_center(image):
+    """
+    Find the center point of all persons in the image.
+    If multiple persons are detected, merge all bounding boxes and find the center.
+    Args:
+        image: CV2/numpy array image
+    Returns:
+        int: x-coordinate of the center point of all persons
+    """
+    # Detect persons (class 0 in COCO dataset)
+    results = model(image, classes=[0])
+    if not results or len(results[0].boxes) == 0:
+        # If no persons detected, return center of image
+        return image.shape[1] // 2
+    # Get all person boxes
+    boxes = results[0].boxes.xyxy.cpu().numpy()
+    # Print the number of persons detected (for debugging)
+    print(f"Detected {len(boxes)} persons in the image")
+    if len(boxes) == 1:
+        # If only one person, return center of their bounding box
+        x1, _, x2, _ = boxes[0]
+        center_x = int((x1 + x2) // 2)
+        print(f"Single person detected at center x: {center_x}")
+        return center_x
+    else:
+        # Multiple persons - create a merged bounding box
+        left_x = min(box[0] for box in boxes)
+        right_x = max(box[2] for box in boxes)
+        merged_center_x = int((left_x + right_x) // 2)
+        print(f"Multiple persons merged bounding box center x: {merged_center_x}")
+        print(f"Merged bounds: left={left_x}, right={right_x}")
+        return merged_center_x
+def create_layouts(image, left_division, right_division):
+    """
+    Create different layout variations of the image using half, one-third, and two-thirds width.
+    All layout variations will be centered on detected persons, including 16:9 and 9:16 crops.
+    Args:
+        image: PIL Image
+        left_division: Left division index (1-20)
+        right_division: Right division index (1-20)
+    Returns:
+        tuple: (list of layout variations, cutout_image, cutout_16_9, cutout_9_16)
+    """
+    # Convert PIL Image to cv2 format
+    if isinstance(image, Image.Image):
+        image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    else:
+        image_cv = image.copy()
+    # Get image dimensions
+    height, width = image_cv.shape[:2]
+    # Calculate division width and crop boundaries
+    division_width = width / 20  # Assuming 20 divisions
+    left_boundary = int((left_division - 1) * division_width)
+    right_boundary = int(right_division * division_width)
+    # 1. Create cutout image based on divisions
+    cutout_image = image_cv[:, left_boundary:right_boundary].copy()
+    cutout_width = right_boundary - left_boundary
+    cutout_height = cutout_image.shape[0]
+    # 2. Run YOLO on cutout to get person bounding box and center
+    results = model(cutout_image, classes=[0])
+    # Default center if no detection
+    cutout_center_x = cutout_image.shape[1] // 2
+    cutout_center_y = cutout_height // 2
+    # Default values for bounding box
+    person_top = 0.0
+    person_height = float(cutout_height)
+    if results and len(results[0].boxes) > 0:
+        # Get person detection
+        boxes = results[0].boxes.xyxy.cpu().numpy()
+        if len(boxes) == 1:
+            # Single person
+            x1, y1, x2, y2 = boxes[0]
+            cutout_center_x = int((x1 + x2) // 2)
+            cutout_center_y = int((y1 + y2) // 2)
+            person_top = y1
+            person_height = y2 - y1
+        else:
+            # Multiple persons - merge bounding boxes
+            left_x = min(box[0] for box in boxes)
+            right_x = max(box[2] for box in boxes)
+            top_y = min(box[1] for box in boxes)  # Top of highest person
+            bottom_y = max(box[3] for box in boxes)  # Bottom of lowest person
+            cutout_center_x = int((left_x + right_x) // 2)
+            cutout_center_y = int((top_y + bottom_y) // 2)
+            person_top = top_y
+            person_height = bottom_y - top_y
+    # 3. Create 16:9 and 9:16 versions with person properly framed
+    aspect_16_9 = 16 / 9
+    aspect_9_16 = 9 / 16
+    # For 16:9 version (with 20% margin above person)
+    target_height_16_9 = int(cutout_width / aspect_16_9)
+    if target_height_16_9 <= cutout_height:
+        # Calculate 20% of person height for top margin
+        top_margin = int(person_height * 0.2)
+        # Start 20% above the person's top
+        y_start = int(max(0, person_top - top_margin))
+        # If this would make the crop exceed the bottom, adjust y_start
+        if y_start + target_height_16_9 > cutout_height:
+            y_start = int(max(0, cutout_height - target_height_16_9))
+        y_end = int(min(cutout_height, y_start + target_height_16_9))
+        cutout_16_9 = cutout_image[y_start:y_end, :].copy()
+    else:
+        # Handle rare case where we need to adjust width (not expected with normal images)
+        new_width = int(cutout_height * aspect_16_9)
+        x_start = max(
+            0, min(cutout_width - new_width, cutout_center_x - new_width // 2)
+        )
+        x_end = min(cutout_width, x_start + new_width)
+        cutout_16_9 = cutout_image[:, x_start:x_end].copy()
+    # For 9:16 version (centered on person)
+    target_width_9_16 = int(cutout_height * aspect_9_16)
+    if target_width_9_16 <= cutout_width:
+        # Center horizontally around person
+        x_start = int(
+            max(
+                0,
+                min(
+                    cutout_width - target_width_9_16,
+                    cutout_center_x - target_width_9_16 // 2,
+                ),
+            )
+        )
+        x_end = int(min(cutout_width, x_start + target_width_9_16))
+        cutout_9_16 = cutout_image[:, x_start:x_end].copy()
+    else:
+        # Handle rare case where we need to adjust height
+        new_height = int(cutout_width / aspect_9_16)
+        y_start = int(
+            max(0, min(cutout_height - new_height, cutout_center_y - new_height // 2))
+        )
+        y_end = int(min(cutout_height, y_start + new_height))
+        cutout_9_16 = cutout_image[y_start:y_end, :].copy()
+    # 4. Scale the center back to original image coordinates
+    original_center_x = left_boundary + cutout_center_x
+    # 5. Create layout variations on the original image centered on persons
+    # Half width layout
+    half_width = width // 2
+    half_left_x = max(0, min(width - half_width, original_center_x - half_width // 2))
+    half_right_x = half_left_x + half_width
+    half_width_crop = image_cv[:, half_left_x:half_right_x].copy()
+    # Third width layout
+    third_width = width // 3
+    third_left_x = max(
+        0, min(width - third_width, original_center_x - third_width // 2)
+    )
+    third_right_x = third_left_x + third_width
+    third_width_crop = image_cv[:, third_left_x:third_right_x].copy()
+    # Two-thirds width layout
+    two_thirds_width = (width * 2) // 3
+    two_thirds_left_x = max(
+        0, min(width - two_thirds_width, original_center_x - two_thirds_width // 2)
+    )
+    two_thirds_right_x = two_thirds_left_x + two_thirds_width
+    two_thirds_crop = image_cv[:, two_thirds_left_x:two_thirds_right_x].copy()
+    # Add labels to all crops
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    label_settings = {
+        "fontScale": 1.0,
+        "fontFace": 1,
+        "thickness": 2,
+    }
+    # Draw label backgrounds for better visibility
+    def add_label(img, label):
+        # Draw background for text
+        text_size = cv2.getTextSize(
+            label, **{k: v for k, v in label_settings.items() if k != "color"}
+        )
+        cv2.rectangle(
+            img,
+            (10, 10),
+            (10 + text_size[0][0] + 10, 10 + text_size[0][1] + 10),
+            (0, 0, 0),
+            -1,
+        )  # Black background
+        # Draw text
+        cv2.putText(
+            img,
+            label,
+            (15, 15 + text_size[0][1]),
+            **label_settings,
+            color=(255, 255, 255),
+            lineType=cv2.LINE_AA,
+        )
+        return img
+    cutout_image = add_label(cutout_image, "Cutout")
+    cutout_16_9 = add_label(cutout_16_9, "16:9")
+    cutout_9_16 = add_label(cutout_9_16, "9:16")
+    half_width_crop = add_label(half_width_crop, "Half Width")
+    third_width_crop = add_label(third_width_crop, "Third Width")
+    two_thirds_crop = add_label(two_thirds_crop, "Two-Thirds Width")
+    # Convert all output images to PIL format
+    layout_crops = []
+    for layout, label in [
+        (half_width_crop, "Half Width"),
+        (third_width_crop, "Third Width"),
+        (two_thirds_crop, "Two-Thirds Width"),
+    ]:
+        pil_layout = Image.fromarray(cv2.cvtColor(layout, cv2.COLOR_BGR2RGB))
+        layout_crops.append(pil_layout)
+    cutout_pil = Image.fromarray(cv2.cvtColor(cutout_image, cv2.COLOR_BGR2RGB))
+    cutout_16_9_pil = Image.fromarray(cv2.cvtColor(cutout_16_9, cv2.COLOR_BGR2RGB))
+    cutout_9_16_pil = Image.fromarray(cv2.cvtColor(cutout_9_16, cv2.COLOR_BGR2RGB))
+    return layout_crops, cutout_pil, cutout_16_9_pil, cutout_9_16_pil
+def draw_all_crops_on_original(image, left_division, right_division):
+    """
+    Create a visualization showing all crop regions overlaid on the original image.
+    Each crop region is outlined with a different color and labeled.
+    All crops are centered on the person's center point.
+    Args:
+        image: PIL Image
+        left_division: Left division index (1-20)
+        right_division: Right division index (1-20)
+    Returns:
+        PIL Image: Original image with all crop regions visualized
+    """
+    # Convert PIL Image to cv2 format
+    if isinstance(image, Image.Image):
+        image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    else:
+        image_cv = image.copy()
+    # Get a clean copy for drawing
+    visualization = image_cv.copy()
+    # Get image dimensions
+    height, width = image_cv.shape[:2]
+    # Calculate division width and crop boundaries
+    division_width = width / 20  # Assuming 20 divisions
+    left_boundary = int((left_division - 1) * division_width)
+    right_boundary = int(right_division * division_width)
+    # Find person bounding box and center in cutout
+    cutout_image = image_cv[:, left_boundary:right_boundary].copy()
+    # Get YOLO detections for person bounding box
+    results = model(cutout_image, classes=[0])
+    # Default values
+    cutout_center_x = cutout_image.shape[1] // 2
+    cutout_center_y = cutout_image.shape[0] // 2
+    person_top = 0.0
+    person_height = float(cutout_image.shape[0])
+    if results and len(results[0].boxes) > 0:
+        # Get person detection
+        boxes = results[0].boxes.xyxy.cpu().numpy()
+        if len(boxes) == 1:
+            # Single person
+            x1, y1, x2, y2 = boxes[0]
+            cutout_center_x = int((x1 + x2) // 2)
+            cutout_center_y = int((y1 + y2) // 2)
+            person_top = y1
+            person_height = y2 - y1
+        else:
+            # Multiple persons - merge bounding boxes
+            left_x = min(box[0] for box in boxes)
+            right_x = max(box[2] for box in boxes)
+            top_y = min(box[1] for box in boxes)  # Top of highest person
+            bottom_y = max(box[3] for box in boxes)  # Bottom of lowest person
+            cutout_center_x = int((left_x + right_x) // 2)
+            cutout_center_y = int((top_y + bottom_y) // 2)
+            person_top = top_y
+            person_height = bottom_y - top_y
+    # Scale back to original image
+    original_center_x = left_boundary + cutout_center_x
+    original_center_y = cutout_center_y
+    original_person_top = (
+        person_top  # Already in original image space since we didn't crop vertically
+    )
+    original_person_height = person_height  # Same in original space
+    # Define colors for different crops (BGR format)
+    colors = {
+        "cutout": (0, 165, 255),  # Orange
+        "16:9": (0, 255, 0),  # Green
+        "9:16": (255, 0, 0),  # Blue
+        "half": (255, 255, 0),  # Cyan
+        "third": (255, 0, 255),  # Magenta
+        "two_thirds": (0, 255, 255),  # Yellow
+    }
+    # Define line thickness and font
+    thickness = 3
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    font_scale = 0.8
+    font_thickness = 2
+    # 1. Draw cutout region (original divisions)
+    cv2.rectangle(
+        visualization,
+        (left_boundary, 0),
+        (right_boundary, height),
+        colors["cutout"],
+        thickness,
+    )
+    cv2.putText(
+        visualization,
+        "Cutout",
+        (left_boundary + 5, 30),
+        font,
+        font_scale,
+        colors["cutout"],
+        font_thickness,
+    )
+    # 2. Create 16:9 and 9:16 versions of the cutout - CENTERED on person
+    cutout_width = right_boundary - left_boundary
+    cutout_height = height
+    # For 16:9 version with 20% margin above person
+    aspect_16_9 = 16 / 9
+    target_height_16_9 = int(cutout_width / aspect_16_9)
+    if target_height_16_9 <= height:
+        # Calculate 20% of person height for top margin
+        top_margin = int(original_person_height * 0.2)
+        # Start 20% above the person's top
+        y_start = int(max(0, original_person_top - top_margin))
+        # If this would make the crop exceed the bottom, adjust y_start
+        if y_start + target_height_16_9 > height:
+            y_start = int(max(0, height - target_height_16_9))
+        y_end = int(min(height, y_start + target_height_16_9))
+        cv2.rectangle(
+            visualization,
+            (left_boundary, y_start),
+            (right_boundary, y_end),
+            colors["16:9"],
+            thickness,
+        )
+        cv2.putText(
+            visualization,
+            "16:9",
+            (left_boundary + 5, y_start + 30),
+            font,
+            font_scale,
+            colors["16:9"],
+            font_thickness,
+        )
+    # For 9:16 version centered on person
+    aspect_9_16 = 9 / 16
+    target_width_9_16 = int(cutout_height * aspect_9_16)
+    if target_width_9_16 <= cutout_width:
+        # Center horizontally around person
+        x_start = max(
+            0,
+            min(
+                left_boundary + cutout_width - target_width_9_16,
+                original_center_x - target_width_9_16 // 2,
+            ),
+        )
+        x_end = x_start + target_width_9_16
+        cv2.rectangle(
+            visualization, (x_start, 0), (x_end, height), colors["9:16"], thickness
+        )
+        cv2.putText(
+            visualization,
+            "9:16",
+            (x_start + 5, 60),
+            font,
+            font_scale,
+            colors["9:16"],
+            font_thickness,
+        )
+    # 3. Draw centered layout variations
+    # Half width layout
+    half_width = width // 2
+    half_left_x = max(0, min(width - half_width, original_center_x - half_width // 2))
+    half_right_x = half_left_x + half_width
+    cv2.rectangle(
+        visualization,
+        (half_left_x, 0),
+        (half_right_x, height),
+        colors["half"],
+        thickness,
+    )
+    cv2.putText(
+        visualization,
+        "Half Width",
+        (half_left_x + 5, 90),
+        font,
+        font_scale,
+        colors["half"],
+        font_thickness,
+    )
+    # Third width layout
+    third_width = width // 3
+    third_left_x = max(
+        0, min(width - third_width, original_center_x - third_width // 2)
+    )
+    third_right_x = third_left_x + third_width
+    cv2.rectangle(
+        visualization,
+        (third_left_x, 0),
+        (third_right_x, height),
+        colors["third"],
+        thickness,
+    )
+    cv2.putText(
+        visualization,
+        "Third Width",
+        (third_left_x + 5, 120),
+        font,
+        font_scale,
+        colors["third"],
+        font_thickness,
+    )
+    # Two-thirds width layout
+    two_thirds_width = (width * 2) // 3
+    two_thirds_left_x = max(
+        0, min(width - two_thirds_width, original_center_x - two_thirds_width // 2)
+    )
+    two_thirds_right_x = two_thirds_left_x + two_thirds_width
+    cv2.rectangle(
+        visualization,
+        (two_thirds_left_x, 0),
+        (two_thirds_right_x, height),
+        colors["two_thirds"],
+        thickness,
+    )
+    cv2.putText(
+        visualization,
+        "Two-Thirds Width",
+        (two_thirds_left_x + 5, 150),
+        font,
+        font_scale,
+        colors["two_thirds"],
+        font_thickness,
+    )
+    # 4. Draw center point of person(s)
+    center_radius = 8
+    cv2.circle(
+        visualization,
+        (original_center_x, height // 2),
+        center_radius,
+        (255, 255, 255),
+        -1,
+    )
+    cv2.circle(
+        visualization, (original_center_x, height // 2), center_radius, (0, 0, 0), 2
+    )
+    cv2.putText(
+        visualization,
+        "Person Center",
+        (original_center_x + 10, height // 2),
+        font,
+        font_scale,
+        (255, 255, 255),
+        font_thickness,
+    )
+    # Convert back to PIL format
+    visualization_pil = Image.fromarray(cv2.cvtColor(visualization, cv2.COLOR_BGR2RGB))
+    return visualization_pil
 def get_image_crop(cid=None, rsid=None, uid=None):
+    """
+    Function that returns both 16:9 and 9:16 crops and layout variations for visualization.
+    Returns:
+        gr.Gallery: Gallery of all generated images
+    """
+    # Uncomment this line when using Firebase
+    # image_paths = get_sprite_firebase(cid, rsid, uid)
+    # For testing, use a local image path
+    image_paths = ["sprite1.jpg", "sprite2.jpg"]
+    # Lists to store all images
+    all_images = []
+    all_captions = []
+    for image_path in image_paths:
+        # Load image (from local file or URL)
+        try:
+            if image_path.startswith(("http://", "https://")):
+                response = requests.get(image_path)
+                input_image = Image.open(BytesIO(response.content))
+            else:
+                input_image = Image.open(image_path)
+        except Exception as e:
+            print(f"Error loading image {image_path}: {e}")
+            continue
         # Get the middle thumbnail
         mid_image = get_middle_thumbnail(input_image)
+        # Add numbered divisions for GPT-4V analysis
         numbered_mid_image = add_top_numbers(
             input_image=mid_image,
             num_divisions=20,
             dot_spacing=20,
         )
+        # Analyze the image to get optimal crop divisions
+        # This uses GPT-4V to identify the optimal crop points
+        (
+            _,
+            _,
+            _,
+            left_division,
+            right_division,
+        ) = analyze_image(numbered_mid_image, remove_unwanted_prompt(2), mid_image)
+        # Safety check for divisions
+        if left_division <= 0:
+            left_division = 1
+        if right_division > 20:
+            right_division = 20
+        if left_division >= right_division:
+            left_division = 1
+            right_division = 20
+        print(f"Using divisions: left={left_division}, right={right_division}")
+        # Create layouts and cutouts
+        layouts, cutout_image, cutout_16_9, cutout_9_16 = create_layouts(
+            mid_image, left_division, right_division
         )
+        # Create the visualization with all crops overlaid on original
+        all_crops_visualization = draw_all_crops_on_original(
+            mid_image, left_division, right_division
+        )
+        # Start with the visualization showing all crops
+        all_images.append(all_crops_visualization)
+        all_captions.append(f"All Crops Visualization {all_crops_visualization.size}")
+        # Add input and middle image to gallery
+        all_images.append(input_image)
+        all_captions.append(f"Input Image {input_image.size}")
+        all_images.append(mid_image)
+        all_captions.append(f"Middle Thumbnail {mid_image.size}")
+        # Add cutout images to gallery
+        all_images.append(cutout_image)
+        all_captions.append(f"Cutout Image {cutout_image.size}")
+        all_images.append(cutout_16_9)
+        all_captions.append(f"16:9 Crop {cutout_16_9.size}")
+        all_images.append(cutout_9_16)
+        all_captions.append(f"9:16 Crop {cutout_9_16.size}")
+        # Add layout variations
+        for i, layout in enumerate(layouts):
+            label = ["Half Width", "Third Width", "Two-Thirds Width"][i]
+            all_images.append(layout)
+            all_captions.append(f"{label} {layout.size}")
+    # Return gallery with all images
+    return gr.Gallery(value=list(zip(all_images, all_captions)))

prompts.py CHANGED Viewed

@@ -153,5 +153,11 @@ If the user provides the correct call type, use the correct_call_type function t
 def remove_unwanted_prompt(number_of_speakers: int):
     if number_of_speakers == 2:
-        return """I want to crop this image such that no unwanted or Partial Object or Partial Human is in the image.
-Please analyze the image such that you tell me the row number on both the left and right sides of the image inside which there is the no unwanted partial object."""

 def remove_unwanted_prompt(number_of_speakers: int):
     if number_of_speakers == 2:
+        return """I want to crop this image only when absolutely necessary to remove partial objects or humans.
+Please analyze the image and tell me:
+1. The column number (1-20) on the left side where I should start the crop. Only suggest cropping (columns 1-4) if there are clear partial objects or humans that need removal. If no cropping is needed on the left, return 1.
+2. The column number (1-20) on the right side where I should end the crop. Only suggest cropping (columns 17-20) if there are clear partial objects or humans that need removal. If no cropping is needed on the right, return 20.
+I'm looking for minimal cropping - only cut when absolutely necessary to remove distracting partial elements."""