AhmadMustafa commited on
Commit
d7401be
·
1 Parent(s): 1cdfb96

add: sep visualization for 9:16 and 16:9

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. crop_utils.py +172 -141
.gitignore CHANGED
@@ -1 +1,2 @@
1
- __pycache__
 
 
1
+ __pycache__
2
+ .DS_Store
crop_utils.py CHANGED
@@ -175,7 +175,7 @@ def crop_and_draw_divisions(
175
  Returns:
176
  tuple: (cropped_image_16_9, image_with_lines, cropped_image_9_16)
177
  """
178
- yolo_model = YOLO("yolo11n.pt")
179
  # Calculate division width and boundaries
180
  division_width = input_image.width / num_divisions
181
  left_boundary = (left_division - 1) * division_width
@@ -187,7 +187,11 @@ def crop_and_draw_divisions(
187
  )
188
 
189
  # Run YOLO on the 9:16 crop to get person bbox
190
- bbox = yolo_model(cropped_image_9_16, classes=[0])[0].boxes.xyxy.cpu().numpy()[0]
 
 
 
 
191
  x1, y1, x2, y2 = bbox
192
 
193
  # Calculate top boundary with head margin
@@ -348,7 +352,7 @@ def find_persons_center(image):
348
  int: x-coordinate of the center point of all persons
349
  """
350
  # Detect persons (class 0 in COCO dataset)
351
- results = model(image, classes=[0])
352
 
353
  if not results or len(results[0].boxes) == 0:
354
  # If no persons detected, return center of image
@@ -411,7 +415,7 @@ def create_layouts(image, left_division, right_division):
411
  cutout_height = cutout_image.shape[0]
412
 
413
  # 2. Run YOLO on cutout to get person bounding box and center
414
- results = model(cutout_image, classes=[0])
415
 
416
  # Default center if no detection
417
  cutout_center_x = cutout_image.shape[1] // 2
@@ -578,9 +582,9 @@ def create_layouts(image, left_division, right_division):
578
  return layout_crops, cutout_pil, cutout_16_9_pil, cutout_9_16_pil
579
 
580
 
581
- def draw_all_crops_on_original(image, left_division, right_division):
582
  """
583
- Create a visualization showing all crop regions overlaid on the original image.
584
  Each crop region is outlined with a different color and labeled.
585
  All crops are centered on the person's center point.
586
 
@@ -588,9 +592,10 @@ def draw_all_crops_on_original(image, left_division, right_division):
588
  image: PIL Image
589
  left_division: Left division index (1-20)
590
  right_division: Right division index (1-20)
 
591
 
592
  Returns:
593
- PIL Image: Original image with all crop regions visualized
594
  """
595
  # Convert PIL Image to cv2 format
596
  if isinstance(image, Image.Image):
@@ -613,7 +618,7 @@ def draw_all_crops_on_original(image, left_division, right_division):
613
  cutout_image = image_cv[:, left_boundary:right_boundary].copy()
614
 
615
  # Get YOLO detections for person bounding box
616
- results = model(cutout_image, classes=[0])
617
 
618
  # Default values
619
  cutout_center_x = cutout_image.shape[1] // 2
@@ -668,171 +673,184 @@ def draw_all_crops_on_original(image, left_division, right_division):
668
  font_scale = 0.8
669
  font_thickness = 2
670
 
671
- # 1. Draw cutout region (original divisions)
672
- cv2.rectangle(
673
- visualization,
674
- (left_boundary, 0),
675
- (right_boundary, height),
676
- colors["cutout"],
677
- thickness,
678
- )
679
- cv2.putText(
680
- visualization,
681
- "Cutout",
682
- (left_boundary + 5, 30),
683
- font,
684
- font_scale,
685
- colors["cutout"],
686
- font_thickness,
687
- )
 
688
 
689
- # 2. Create 16:9 and 9:16 versions of the cutout - CENTERED on person
690
- cutout_width = right_boundary - left_boundary
691
- cutout_height = height
 
 
 
692
 
693
- # For 16:9 version with 20% margin above person
694
- aspect_16_9 = 16 / 9
695
- target_height_16_9 = int(cutout_width / aspect_16_9)
696
- if target_height_16_9 <= height:
697
- # Calculate 20% of person height for top margin
698
- top_margin = int(original_person_height * 0.2)
699
 
700
- # Start 20% above the person's top
701
- y_start = int(max(0, original_person_top - top_margin))
702
 
703
- # If this would make the crop exceed the bottom, adjust y_start
704
- if y_start + target_height_16_9 > height:
705
- y_start = int(max(0, height - target_height_16_9))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
 
707
- y_end = int(min(height, y_start + target_height_16_9))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
708
 
 
 
 
 
 
 
 
709
  cv2.rectangle(
710
  visualization,
711
- (left_boundary, y_start),
712
- (right_boundary, y_end),
713
- colors["16:9"],
714
  thickness,
715
  )
716
  cv2.putText(
717
  visualization,
718
- "16:9",
719
- (left_boundary + 5, y_start + 30),
720
  font,
721
  font_scale,
722
- colors["16:9"],
723
  font_thickness,
724
  )
725
 
726
- # For 9:16 version centered on person
727
- aspect_9_16 = 9 / 16
728
- target_width_9_16 = int(cutout_height * aspect_9_16)
729
- if target_width_9_16 <= cutout_width:
730
- # Center horizontally around person
731
- x_start = max(
732
- 0,
733
- min(
734
- left_boundary + cutout_width - target_width_9_16,
735
- original_center_x - target_width_9_16 // 2,
736
- ),
737
  )
738
- x_end = x_start + target_width_9_16
739
  cv2.rectangle(
740
- visualization, (x_start, 0), (x_end, height), colors["9:16"], thickness
 
 
 
 
741
  )
742
  cv2.putText(
743
  visualization,
744
- "9:16",
745
- (x_start + 5, 60),
746
  font,
747
  font_scale,
748
- colors["9:16"],
749
  font_thickness,
750
  )
751
 
752
- # 3. Draw centered layout variations
753
- # Half width layout
754
- half_width = width // 2
755
- half_left_x = max(0, min(width - half_width, original_center_x - half_width // 2))
756
- half_right_x = half_left_x + half_width
757
- cv2.rectangle(
758
- visualization,
759
- (half_left_x, 0),
760
- (half_right_x, height),
761
- colors["half"],
762
- thickness,
763
- )
764
- cv2.putText(
765
- visualization,
766
- "Half Width",
767
- (half_left_x + 5, 90),
768
- font,
769
- font_scale,
770
- colors["half"],
771
- font_thickness,
772
- )
773
-
774
- # Third width layout
775
- third_width = width // 3
776
- third_left_x = max(
777
- 0, min(width - third_width, original_center_x - third_width // 2)
778
- )
779
- third_right_x = third_left_x + third_width
780
- cv2.rectangle(
781
- visualization,
782
- (third_left_x, 0),
783
- (third_right_x, height),
784
- colors["third"],
785
- thickness,
786
- )
787
- cv2.putText(
788
- visualization,
789
- "Third Width",
790
- (third_left_x + 5, 120),
791
- font,
792
- font_scale,
793
- colors["third"],
794
- font_thickness,
795
- )
796
-
797
- # Two-thirds width layout
798
- two_thirds_width = (width * 2) // 3
799
- two_thirds_left_x = max(
800
- 0, min(width - two_thirds_width, original_center_x - two_thirds_width // 2)
801
- )
802
- two_thirds_right_x = two_thirds_left_x + two_thirds_width
803
- cv2.rectangle(
804
- visualization,
805
- (two_thirds_left_x, 0),
806
- (two_thirds_right_x, height),
807
- colors["two_thirds"],
808
- thickness,
809
- )
810
- cv2.putText(
811
- visualization,
812
- "Two-Thirds Width",
813
- (two_thirds_left_x + 5, 150),
814
- font,
815
- font_scale,
816
- colors["two_thirds"],
817
- font_thickness,
818
- )
819
 
820
- # 4. Draw center point of person(s)
821
  center_radius = 8
822
  cv2.circle(
823
  visualization,
824
- (original_center_x, height // 2),
825
  center_radius,
826
  (255, 255, 255),
827
  -1,
828
  )
829
  cv2.circle(
830
- visualization, (original_center_x, height // 2), center_radius, (0, 0, 0), 2
 
 
 
 
831
  )
832
  cv2.putText(
833
  visualization,
834
  "Person Center",
835
- (original_center_x + 10, height // 2),
836
  font,
837
  font_scale,
838
  (255, 255, 255),
@@ -852,8 +870,8 @@ def get_image_crop(cid=None, rsid=None, uid=None):
852
  Returns:
853
  gr.Gallery: Gallery of all generated images
854
  """
855
- image_paths = get_sprite_firebase(cid, rsid, uid)
856
-
857
  # Lists to store all images
858
  all_images = []
859
  all_captions = []
@@ -908,14 +926,27 @@ def get_image_crop(cid=None, rsid=None, uid=None):
908
  mid_image, left_division, right_division
909
  )
910
 
911
- # Create the visualization with all crops overlaid on original
912
- all_crops_visualization = draw_all_crops_on_original(
913
- mid_image, left_division, right_division
 
 
 
 
 
 
 
 
 
 
 
 
 
 
914
  )
915
 
916
- # Start with the visualization showing all crops
917
- all_images.append(all_crops_visualization)
918
- all_captions.append(f"All Crops Visualization {all_crops_visualization.size}")
919
 
920
  # Add input and middle image to gallery
921
  all_images.append(input_image)
 
175
  Returns:
176
  tuple: (cropped_image_16_9, image_with_lines, cropped_image_9_16)
177
  """
178
+ yolo_model = model
179
  # Calculate division width and boundaries
180
  division_width = input_image.width / num_divisions
181
  left_boundary = (left_division - 1) * division_width
 
187
  )
188
 
189
  # Run YOLO on the 9:16 crop to get person bbox
190
+ bbox = (
191
+ yolo_model(cropped_image_9_16, classes=[0], conf=0.6)[0]
192
+ .boxes.xyxy.cpu()
193
+ .numpy()[0]
194
+ )
195
  x1, y1, x2, y2 = bbox
196
 
197
  # Calculate top boundary with head margin
 
352
  int: x-coordinate of the center point of all persons
353
  """
354
  # Detect persons (class 0 in COCO dataset)
355
+ results = model(image, classes=[0], conf=0.6)
356
 
357
  if not results or len(results[0].boxes) == 0:
358
  # If no persons detected, return center of image
 
415
  cutout_height = cutout_image.shape[0]
416
 
417
  # 2. Run YOLO on cutout to get person bounding box and center
418
+ results = model(cutout_image, classes=[0], conf=0.6)
419
 
420
  # Default center if no detection
421
  cutout_center_x = cutout_image.shape[1] // 2
 
582
  return layout_crops, cutout_pil, cutout_16_9_pil, cutout_9_16_pil
583
 
584
 
585
+ def draw_crops_on_original(image, left_division, right_division, crop_types):
586
  """
587
+ Create a visualization showing selected crop regions overlaid on the original image.
588
  Each crop region is outlined with a different color and labeled.
589
  All crops are centered on the person's center point.
590
 
 
592
  image: PIL Image
593
  left_division: Left division index (1-20)
594
  right_division: Right division index (1-20)
595
+ crop_types: List of crop types to include in visualization (e.g., ["16:9", "9:16"])
596
 
597
  Returns:
598
+ PIL Image: Original image with selected crop regions visualized
599
  """
600
  # Convert PIL Image to cv2 format
601
  if isinstance(image, Image.Image):
 
618
  cutout_image = image_cv[:, left_boundary:right_boundary].copy()
619
 
620
  # Get YOLO detections for person bounding box
621
+ results = model(cutout_image, classes=[0], conf=0.6)
622
 
623
  # Default values
624
  cutout_center_x = cutout_image.shape[1] // 2
 
673
  font_scale = 0.8
674
  font_thickness = 2
675
 
676
+ # Draw cutout region (original divisions) if requested
677
+ if "cutout" in crop_types:
678
+ cv2.rectangle(
679
+ visualization,
680
+ (left_boundary, 0),
681
+ (right_boundary, height),
682
+ colors["cutout"],
683
+ thickness,
684
+ )
685
+ cv2.putText(
686
+ visualization,
687
+ "Cutout",
688
+ (left_boundary + 5, 30),
689
+ font,
690
+ font_scale,
691
+ colors["cutout"],
692
+ font_thickness,
693
+ )
694
 
695
+ # Create 16:9 version of the cutout if requested
696
+ if "16:9" in crop_types:
697
+ cutout_width = right_boundary - left_boundary
698
+ cutout_height = height
699
+ aspect_16_9 = 16 / 9
700
+ target_height_16_9 = int(cutout_width / aspect_16_9)
701
 
702
+ if target_height_16_9 <= height:
703
+ # Calculate 20% of person height for top margin
704
+ top_margin = int(original_person_height * 0.2)
 
 
 
705
 
706
+ # Start 20% above the person's top
707
+ y_start = int(max(0, original_person_top - top_margin))
708
 
709
+ # If this would make the crop exceed the bottom, adjust y_start
710
+ if y_start + target_height_16_9 > height:
711
+ y_start = int(max(0, height - target_height_16_9))
712
+
713
+ y_end = int(min(height, y_start + target_height_16_9))
714
+
715
+ cv2.rectangle(
716
+ visualization,
717
+ (left_boundary, y_start),
718
+ (right_boundary, y_end),
719
+ colors["16:9"],
720
+ thickness,
721
+ )
722
+ cv2.putText(
723
+ visualization,
724
+ "16:9",
725
+ (left_boundary + 5, y_start + 30),
726
+ font,
727
+ font_scale,
728
+ colors["16:9"],
729
+ font_thickness,
730
+ )
731
 
732
+ # Create 9:16 version if requested
733
+ if "9:16" in crop_types:
734
+ cutout_width = right_boundary - left_boundary
735
+ cutout_height = height
736
+ aspect_9_16 = 9 / 16
737
+ target_width_9_16 = int(cutout_height * aspect_9_16)
738
+
739
+ if target_width_9_16 <= cutout_width:
740
+ # Center horizontally around person
741
+ x_start = max(
742
+ 0,
743
+ min(
744
+ left_boundary + cutout_width - target_width_9_16,
745
+ original_center_x - target_width_9_16 // 2,
746
+ ),
747
+ )
748
+ x_end = x_start + target_width_9_16
749
+ cv2.rectangle(
750
+ visualization, (x_start, 0), (x_end, height), colors["9:16"], thickness
751
+ )
752
+ cv2.putText(
753
+ visualization,
754
+ "9:16",
755
+ (x_start + 5, 60),
756
+ font,
757
+ font_scale,
758
+ colors["9:16"],
759
+ font_thickness,
760
+ )
761
 
762
+ # Draw centered half width layout if requested
763
+ if "half" in crop_types:
764
+ half_width = width // 2
765
+ half_left_x = max(
766
+ 0, min(width - half_width, original_center_x - half_width // 2)
767
+ )
768
+ half_right_x = half_left_x + half_width
769
  cv2.rectangle(
770
  visualization,
771
+ (half_left_x, 0),
772
+ (half_right_x, height),
773
+ colors["half"],
774
  thickness,
775
  )
776
  cv2.putText(
777
  visualization,
778
+ "Half Width",
779
+ (half_left_x + 5, 90),
780
  font,
781
  font_scale,
782
+ colors["half"],
783
  font_thickness,
784
  )
785
 
786
+ # Draw centered third width layout if requested
787
+ if "third" in crop_types:
788
+ third_width = width // 3
789
+ third_left_x = max(
790
+ 0, min(width - third_width, original_center_x - third_width // 2)
 
 
 
 
 
 
791
  )
792
+ third_right_x = third_left_x + third_width
793
  cv2.rectangle(
794
+ visualization,
795
+ (third_left_x, 0),
796
+ (third_right_x, height),
797
+ colors["third"],
798
+ thickness,
799
  )
800
  cv2.putText(
801
  visualization,
802
+ "Third Width",
803
+ (third_left_x + 5, 120),
804
  font,
805
  font_scale,
806
+ colors["third"],
807
  font_thickness,
808
  )
809
 
810
+ # Draw centered two-thirds width layout if requested
811
+ if "two_thirds" in crop_types:
812
+ two_thirds_width = (width * 2) // 3
813
+ two_thirds_left_x = max(
814
+ 0, min(width - two_thirds_width, original_center_x - two_thirds_width // 2)
815
+ )
816
+ two_thirds_right_x = two_thirds_left_x + two_thirds_width
817
+ cv2.rectangle(
818
+ visualization,
819
+ (two_thirds_left_x, 0),
820
+ (two_thirds_right_x, height),
821
+ colors["two_thirds"],
822
+ thickness,
823
+ )
824
+ cv2.putText(
825
+ visualization,
826
+ "Two-Thirds Width",
827
+ (two_thirds_left_x + 5, 150),
828
+ font,
829
+ font_scale,
830
+ colors["two_thirds"],
831
+ font_thickness,
832
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
 
834
+ # Draw center point of person(s)
835
  center_radius = 8
836
  cv2.circle(
837
  visualization,
838
+ (original_center_x, original_center_y),
839
  center_radius,
840
  (255, 255, 255),
841
  -1,
842
  )
843
  cv2.circle(
844
+ visualization,
845
+ (original_center_x, original_center_y),
846
+ center_radius,
847
+ (0, 0, 0),
848
+ 2,
849
  )
850
  cv2.putText(
851
  visualization,
852
  "Person Center",
853
+ (original_center_x + 10, original_center_y),
854
  font,
855
  font_scale,
856
  (255, 255, 255),
 
870
  Returns:
871
  gr.Gallery: Gallery of all generated images
872
  """
873
+ # image_paths = get_sprite_firebase(cid, rsid, uid)
874
+ image_paths = ["data/sprite1.jpg", "data/sprite2.jpg"]
875
  # Lists to store all images
876
  all_images = []
877
  all_captions = []
 
926
  mid_image, left_division, right_division
927
  )
928
 
929
+ # Create the first visualization with 16:9 and 9:16 crops only
930
+ aspect_ratio_visualization = draw_crops_on_original(
931
+ mid_image, left_division, right_division, ["16:9", "9:16"]
932
+ )
933
+
934
+ # Create the second visualization with the remaining layouts
935
+ other_layouts_visualization = draw_crops_on_original(
936
+ mid_image,
937
+ left_division,
938
+ right_division,
939
+ ["cutout", "half", "third", "two_thirds"],
940
+ )
941
+
942
+ # Add visualizations showing crops
943
+ all_images.append(aspect_ratio_visualization)
944
+ all_captions.append(
945
+ f"Aspect Ratio Crops (16:9 & 9:16) {aspect_ratio_visualization.size}"
946
  )
947
 
948
+ all_images.append(other_layouts_visualization)
949
+ all_captions.append(f"Other Layout Options {other_layouts_visualization.size}")
 
950
 
951
  # Add input and middle image to gallery
952
  all_images.append(input_image)