JasonSmithSO's picture
Upload 777 files
0034848 verified
raw
history blame
37.2 kB
import math
import random
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
import cv2
import numpy as np
from custom_albumentations.core.bbox_utils import union_of_bboxes
from ...core.transforms_interface import (
BoxInternalType,
DualTransform,
KeypointInternalType,
to_tuple,
)
from ..geometric import functional as FGeometric
from . import functional as F
__all__ = [
"RandomCrop",
"CenterCrop",
"Crop",
"CropNonEmptyMaskIfExists",
"RandomSizedCrop",
"RandomResizedCrop",
"RandomCropNearBBox",
"RandomSizedBBoxSafeCrop",
"CropAndPad",
"RandomCropFromBorders",
"BBoxSafeRandomCrop",
]
class RandomCrop(DualTransform):
"""Crop a random part of the input.
Args:
height (int): height of the crop.
width (int): width of the crop.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(self, height, width, always_apply=False, p=1.0):
super().__init__(always_apply, p)
self.height = height
self.width = width
def apply(self, img, h_start=0, w_start=0, **params):
return F.random_crop(img, self.height, self.width, h_start, w_start)
def get_params(self):
return {"h_start": random.random(), "w_start": random.random()}
def apply_to_bbox(self, bbox, **params):
return F.bbox_random_crop(bbox, self.height, self.width, **params)
def apply_to_keypoint(self, keypoint, **params):
return F.keypoint_random_crop(keypoint, self.height, self.width, **params)
def get_transform_init_args_names(self):
return ("height", "width")
class CenterCrop(DualTransform):
"""Crop the central part of the input.
Args:
height (int): height of the crop.
width (int): width of the crop.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
Note:
It is recommended to use uint8 images as input.
Otherwise the operation will require internal conversion
float32 -> uint8 -> float32 that causes worse performance.
"""
def __init__(self, height, width, always_apply=False, p=1.0):
super(CenterCrop, self).__init__(always_apply, p)
self.height = height
self.width = width
def apply(self, img, **params):
return F.center_crop(img, self.height, self.width)
def apply_to_bbox(self, bbox, **params):
return F.bbox_center_crop(bbox, self.height, self.width, **params)
def apply_to_keypoint(self, keypoint, **params):
return F.keypoint_center_crop(keypoint, self.height, self.width, **params)
def get_transform_init_args_names(self):
return ("height", "width")
class Crop(DualTransform):
"""Crop region from image.
Args:
x_min (int): Minimum upper left x coordinate.
y_min (int): Minimum upper left y coordinate.
x_max (int): Maximum lower right x coordinate.
y_max (int): Maximum lower right y coordinate.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(self, x_min=0, y_min=0, x_max=1024, y_max=1024, always_apply=False, p=1.0):
super(Crop, self).__init__(always_apply, p)
self.x_min = x_min
self.y_min = y_min
self.x_max = x_max
self.y_max = y_max
def apply(self, img, **params):
return F.crop(img, x_min=self.x_min, y_min=self.y_min, x_max=self.x_max, y_max=self.y_max)
def apply_to_bbox(self, bbox, **params):
return F.bbox_crop(bbox, x_min=self.x_min, y_min=self.y_min, x_max=self.x_max, y_max=self.y_max, **params)
def apply_to_keypoint(self, keypoint, **params):
return F.crop_keypoint_by_coords(keypoint, crop_coords=(self.x_min, self.y_min, self.x_max, self.y_max))
def get_transform_init_args_names(self):
return ("x_min", "y_min", "x_max", "y_max")
class CropNonEmptyMaskIfExists(DualTransform):
"""Crop area with mask if mask is non-empty, else make random crop.
Args:
height (int): vertical size of crop in pixels
width (int): horizontal size of crop in pixels
ignore_values (list of int): values to ignore in mask, `0` values are always ignored
(e.g. if background value is 5 set `ignore_values=[5]` to ignore)
ignore_channels (list of int): channels to ignore in mask
(e.g. if background is a first channel set `ignore_channels=[0]` to ignore)
p (float): probability of applying the transform. Default: 1.0.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(self, height, width, ignore_values=None, ignore_channels=None, always_apply=False, p=1.0):
super(CropNonEmptyMaskIfExists, self).__init__(always_apply, p)
if ignore_values is not None and not isinstance(ignore_values, list):
raise ValueError("Expected `ignore_values` of type `list`, got `{}`".format(type(ignore_values)))
if ignore_channels is not None and not isinstance(ignore_channels, list):
raise ValueError("Expected `ignore_channels` of type `list`, got `{}`".format(type(ignore_channels)))
self.height = height
self.width = width
self.ignore_values = ignore_values
self.ignore_channels = ignore_channels
def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
return F.crop(img, x_min, y_min, x_max, y_max)
def apply_to_bbox(self, bbox, x_min=0, x_max=0, y_min=0, y_max=0, **params):
return F.bbox_crop(
bbox, x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, rows=params["rows"], cols=params["cols"]
)
def apply_to_keypoint(self, keypoint, x_min=0, x_max=0, y_min=0, y_max=0, **params):
return F.crop_keypoint_by_coords(keypoint, crop_coords=(x_min, y_min, x_max, y_max))
def _preprocess_mask(self, mask):
mask_height, mask_width = mask.shape[:2]
if self.ignore_values is not None:
ignore_values_np = np.array(self.ignore_values)
mask = np.where(np.isin(mask, ignore_values_np), 0, mask)
if mask.ndim == 3 and self.ignore_channels is not None:
target_channels = np.array([ch for ch in range(mask.shape[-1]) if ch not in self.ignore_channels])
mask = np.take(mask, target_channels, axis=-1)
if self.height > mask_height or self.width > mask_width:
raise ValueError(
"Crop size ({},{}) is larger than image ({},{})".format(
self.height, self.width, mask_height, mask_width
)
)
return mask
def update_params(self, params, **kwargs):
super().update_params(params, **kwargs)
if "mask" in kwargs:
mask = self._preprocess_mask(kwargs["mask"])
elif "masks" in kwargs and len(kwargs["masks"]):
masks = kwargs["masks"]
mask = self._preprocess_mask(np.copy(masks[0])) # need copy as we perform in-place mod afterwards
for m in masks[1:]:
mask |= self._preprocess_mask(m)
else:
raise RuntimeError("Can not find mask for CropNonEmptyMaskIfExists")
mask_height, mask_width = mask.shape[:2]
if mask.any():
mask = mask.sum(axis=-1) if mask.ndim == 3 else mask
non_zero_yx = np.argwhere(mask)
y, x = random.choice(non_zero_yx)
x_min = x - random.randint(0, self.width - 1)
y_min = y - random.randint(0, self.height - 1)
x_min = np.clip(x_min, 0, mask_width - self.width)
y_min = np.clip(y_min, 0, mask_height - self.height)
else:
x_min = random.randint(0, mask_width - self.width)
y_min = random.randint(0, mask_height - self.height)
x_max = x_min + self.width
y_max = y_min + self.height
params.update({"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max})
return params
def get_transform_init_args_names(self):
return ("height", "width", "ignore_values", "ignore_channels")
class _BaseRandomSizedCrop(DualTransform):
# Base class for RandomSizedCrop and RandomResizedCrop
def __init__(self, height, width, interpolation=cv2.INTER_LINEAR, always_apply=False, p=1.0):
super(_BaseRandomSizedCrop, self).__init__(always_apply, p)
self.height = height
self.width = width
self.interpolation = interpolation
def apply(self, img, crop_height=0, crop_width=0, h_start=0, w_start=0, interpolation=cv2.INTER_LINEAR, **params):
crop = F.random_crop(img, crop_height, crop_width, h_start, w_start)
return FGeometric.resize(crop, self.height, self.width, interpolation)
def apply_to_bbox(self, bbox, crop_height=0, crop_width=0, h_start=0, w_start=0, rows=0, cols=0, **params):
return F.bbox_random_crop(bbox, crop_height, crop_width, h_start, w_start, rows, cols)
def apply_to_keypoint(self, keypoint, crop_height=0, crop_width=0, h_start=0, w_start=0, rows=0, cols=0, **params):
keypoint = F.keypoint_random_crop(keypoint, crop_height, crop_width, h_start, w_start, rows, cols)
scale_x = self.width / crop_width
scale_y = self.height / crop_height
keypoint = FGeometric.keypoint_scale(keypoint, scale_x, scale_y)
return keypoint
class RandomSizedCrop(_BaseRandomSizedCrop):
"""Crop a random part of the input and rescale it to some size.
Args:
min_max_height ((int, int)): crop size limits.
height (int): height after crop and resize.
width (int): width after crop and resize.
w2h_ratio (float): aspect ratio of crop.
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(
self, min_max_height, height, width, w2h_ratio=1.0, interpolation=cv2.INTER_LINEAR, always_apply=False, p=1.0
):
super(RandomSizedCrop, self).__init__(
height=height, width=width, interpolation=interpolation, always_apply=always_apply, p=p
)
self.min_max_height = min_max_height
self.w2h_ratio = w2h_ratio
def get_params(self):
crop_height = random.randint(self.min_max_height[0], self.min_max_height[1])
return {
"h_start": random.random(),
"w_start": random.random(),
"crop_height": crop_height,
"crop_width": int(crop_height * self.w2h_ratio),
}
def get_transform_init_args_names(self):
return "min_max_height", "height", "width", "w2h_ratio", "interpolation"
class RandomResizedCrop(_BaseRandomSizedCrop):
"""Torchvision's variant of crop a random part of the input and rescale it to some size.
Args:
height (int): height after crop and resize.
width (int): width after crop and resize.
scale ((float, float)): range of size of the origin size cropped
ratio ((float, float)): range of aspect ratio of the origin aspect ratio cropped
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(
self,
height,
width,
scale=(0.08, 1.0),
ratio=(0.75, 1.3333333333333333),
interpolation=cv2.INTER_LINEAR,
always_apply=False,
p=1.0,
):
super(RandomResizedCrop, self).__init__(
height=height, width=width, interpolation=interpolation, always_apply=always_apply, p=p
)
self.scale = scale
self.ratio = ratio
def get_params_dependent_on_targets(self, params):
img = params["image"]
area = img.shape[0] * img.shape[1]
for _attempt in range(10):
target_area = random.uniform(*self.scale) * area
log_ratio = (math.log(self.ratio[0]), math.log(self.ratio[1]))
aspect_ratio = math.exp(random.uniform(*log_ratio))
w = int(round(math.sqrt(target_area * aspect_ratio))) # skipcq: PTC-W0028
h = int(round(math.sqrt(target_area / aspect_ratio))) # skipcq: PTC-W0028
if 0 < w <= img.shape[1] and 0 < h <= img.shape[0]:
i = random.randint(0, img.shape[0] - h)
j = random.randint(0, img.shape[1] - w)
return {
"crop_height": h,
"crop_width": w,
"h_start": i * 1.0 / (img.shape[0] - h + 1e-10),
"w_start": j * 1.0 / (img.shape[1] - w + 1e-10),
}
# Fallback to central crop
in_ratio = img.shape[1] / img.shape[0]
if in_ratio < min(self.ratio):
w = img.shape[1]
h = int(round(w / min(self.ratio)))
elif in_ratio > max(self.ratio):
h = img.shape[0]
w = int(round(h * max(self.ratio)))
else: # whole image
w = img.shape[1]
h = img.shape[0]
i = (img.shape[0] - h) // 2
j = (img.shape[1] - w) // 2
return {
"crop_height": h,
"crop_width": w,
"h_start": i * 1.0 / (img.shape[0] - h + 1e-10),
"w_start": j * 1.0 / (img.shape[1] - w + 1e-10),
}
def get_params(self):
return {}
@property
def targets_as_params(self):
return ["image"]
def get_transform_init_args_names(self):
return "height", "width", "scale", "ratio", "interpolation"
class RandomCropNearBBox(DualTransform):
"""Crop bbox from image with random shift by x,y coordinates
Args:
max_part_shift (float, (float, float)): Max shift in `height` and `width` dimensions relative
to `cropping_bbox` dimension.
If max_part_shift is a single float, the range will be (max_part_shift, max_part_shift).
Default (0.3, 0.3).
cropping_box_key (str): Additional target key for cropping box. Default `cropping_bbox`
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
Examples:
>>> aug = Compose([RandomCropNearBBox(max_part_shift=(0.1, 0.5), cropping_box_key='test_box')],
>>> bbox_params=BboxParams("pascal_voc"))
>>> result = aug(image=image, bboxes=bboxes, test_box=[0, 5, 10, 20])
"""
def __init__(
self,
max_part_shift: Union[float, Tuple[float, float]] = (0.3, 0.3),
cropping_box_key: str = "cropping_bbox",
always_apply: bool = False,
p: float = 1.0,
):
super(RandomCropNearBBox, self).__init__(always_apply, p)
self.max_part_shift = to_tuple(max_part_shift, low=max_part_shift)
self.cropping_bbox_key = cropping_box_key
if min(self.max_part_shift) < 0 or max(self.max_part_shift) > 1:
raise ValueError("Invalid max_part_shift. Got: {}".format(max_part_shift))
def apply(
self, img: np.ndarray, x_min: int = 0, x_max: int = 0, y_min: int = 0, y_max: int = 0, **params
) -> np.ndarray:
return F.clamping_crop(img, x_min, y_min, x_max, y_max)
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, int]:
bbox = params[self.cropping_bbox_key]
h_max_shift = round((bbox[3] - bbox[1]) * self.max_part_shift[0])
w_max_shift = round((bbox[2] - bbox[0]) * self.max_part_shift[1])
x_min = bbox[0] - random.randint(-w_max_shift, w_max_shift)
x_max = bbox[2] + random.randint(-w_max_shift, w_max_shift)
y_min = bbox[1] - random.randint(-h_max_shift, h_max_shift)
y_max = bbox[3] + random.randint(-h_max_shift, h_max_shift)
x_min = max(0, x_min)
y_min = max(0, y_min)
return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max}
def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
return F.bbox_crop(bbox, **params)
def apply_to_keypoint(
self,
keypoint: Tuple[float, float, float, float],
x_min: int = 0,
x_max: int = 0,
y_min: int = 0,
y_max: int = 0,
**params
) -> Tuple[float, float, float, float]:
return F.crop_keypoint_by_coords(keypoint, crop_coords=(x_min, y_min, x_max, y_max))
@property
def targets_as_params(self) -> List[str]:
return [self.cropping_bbox_key]
def get_transform_init_args_names(self) -> Tuple[str]:
return ("max_part_shift",)
class BBoxSafeRandomCrop(DualTransform):
"""Crop a random part of the input without loss of bboxes.
Args:
erosion_rate (float): erosion rate applied on input image height before crop.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes
Image types:
uint8, float32
"""
def __init__(self, erosion_rate=0.0, always_apply=False, p=1.0):
super(BBoxSafeRandomCrop, self).__init__(always_apply, p)
self.erosion_rate = erosion_rate
def apply(self, img, crop_height=0, crop_width=0, h_start=0, w_start=0, **params):
return F.random_crop(img, crop_height, crop_width, h_start, w_start)
def get_params_dependent_on_targets(self, params):
img_h, img_w = params["image"].shape[:2]
if len(params["bboxes"]) == 0: # less likely, this class is for use with bboxes.
erosive_h = int(img_h * (1.0 - self.erosion_rate))
crop_height = img_h if erosive_h >= img_h else random.randint(erosive_h, img_h)
return {
"h_start": random.random(),
"w_start": random.random(),
"crop_height": crop_height,
"crop_width": int(crop_height * img_w / img_h),
}
# get union of all bboxes
x, y, x2, y2 = union_of_bboxes(
width=img_w, height=img_h, bboxes=params["bboxes"], erosion_rate=self.erosion_rate
)
# find bigger region
bx, by = x * random.random(), y * random.random()
bx2, by2 = x2 + (1 - x2) * random.random(), y2 + (1 - y2) * random.random()
bw, bh = bx2 - bx, by2 - by
crop_height = img_h if bh >= 1.0 else int(img_h * bh)
crop_width = img_w if bw >= 1.0 else int(img_w * bw)
h_start = np.clip(0.0 if bh >= 1.0 else by / (1.0 - bh), 0.0, 1.0)
w_start = np.clip(0.0 if bw >= 1.0 else bx / (1.0 - bw), 0.0, 1.0)
return {"h_start": h_start, "w_start": w_start, "crop_height": crop_height, "crop_width": crop_width}
def apply_to_bbox(self, bbox, crop_height=0, crop_width=0, h_start=0, w_start=0, rows=0, cols=0, **params):
return F.bbox_random_crop(bbox, crop_height, crop_width, h_start, w_start, rows, cols)
@property
def targets_as_params(self):
return ["image", "bboxes"]
def get_transform_init_args_names(self):
return ("erosion_rate",)
class RandomSizedBBoxSafeCrop(BBoxSafeRandomCrop):
"""Crop a random part of the input and rescale it to some size without loss of bboxes.
Args:
height (int): height after crop and resize.
width (int): width after crop and resize.
erosion_rate (float): erosion rate applied on input image height before crop.
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes
Image types:
uint8, float32
"""
def __init__(self, height, width, erosion_rate=0.0, interpolation=cv2.INTER_LINEAR, always_apply=False, p=1.0):
super(RandomSizedBBoxSafeCrop, self).__init__(erosion_rate, always_apply, p)
self.height = height
self.width = width
self.interpolation = interpolation
def apply(self, img, crop_height=0, crop_width=0, h_start=0, w_start=0, interpolation=cv2.INTER_LINEAR, **params):
crop = F.random_crop(img, crop_height, crop_width, h_start, w_start)
return FGeometric.resize(crop, self.height, self.width, interpolation)
def get_transform_init_args_names(self):
return super().get_transform_init_args_names() + ("height", "width", "interpolation")
class CropAndPad(DualTransform):
"""Crop and pad images by pixel amounts or fractions of image sizes.
Cropping removes pixels at the sides (i.e. extracts a subimage from a given full image).
Padding adds pixels to the sides (e.g. black pixels).
This transformation will never crop images below a height or width of ``1``.
Note:
This transformation automatically resizes images back to their original size. To deactivate this, add the
parameter ``keep_size=False``.
Args:
px (int or tuple):
The number of pixels to crop (negative values) or pad (positive values)
on each side of the image. Either this or the parameter `percent` may
be set, not both at the same time.
* If ``None``, then pixel-based cropping/padding will not be used.
* If ``int``, then that exact number of pixels will always be cropped/padded.
* If a ``tuple`` of two ``int`` s with values ``a`` and ``b``,
then each side will be cropped/padded by a random amount sampled
uniformly per image and side from the interval ``[a, b]``. If
however `sample_independently` is set to ``False``, only one
value will be sampled per image and used for all sides.
* If a ``tuple`` of four entries, then the entries represent top,
right, bottom, left. Each entry may be a single ``int`` (always
crop/pad by exactly that value), a ``tuple`` of two ``int`` s
``a`` and ``b`` (crop/pad by an amount within ``[a, b]``), a
``list`` of ``int`` s (crop/pad by a random value that is
contained in the ``list``).
percent (float or tuple):
The number of pixels to crop (negative values) or pad (positive values)
on each side of the image given as a *fraction* of the image
height/width. E.g. if this is set to ``-0.1``, the transformation will
always crop away ``10%`` of the image's height at both the top and the
bottom (both ``10%`` each), as well as ``10%`` of the width at the
right and left.
Expected value range is ``(-1.0, inf)``.
Either this or the parameter `px` may be set, not both
at the same time.
* If ``None``, then fraction-based cropping/padding will not be
used.
* If ``float``, then that fraction will always be cropped/padded.
* If a ``tuple`` of two ``float`` s with values ``a`` and ``b``,
then each side will be cropped/padded by a random fraction
sampled uniformly per image and side from the interval
``[a, b]``. If however `sample_independently` is set to
``False``, only one value will be sampled per image and used for
all sides.
* If a ``tuple`` of four entries, then the entries represent top,
right, bottom, left. Each entry may be a single ``float``
(always crop/pad by exactly that percent value), a ``tuple`` of
two ``float`` s ``a`` and ``b`` (crop/pad by a fraction from
``[a, b]``), a ``list`` of ``float`` s (crop/pad by a random
value that is contained in the list).
pad_mode (int): OpenCV border mode.
pad_cval (number, Sequence[number]):
The constant value to use if the pad mode is ``BORDER_CONSTANT``.
* If ``number``, then that value will be used.
* If a ``tuple`` of two ``number`` s and at least one of them is
a ``float``, then a random number will be uniformly sampled per
image from the continuous interval ``[a, b]`` and used as the
value. If both ``number`` s are ``int`` s, the interval is
discrete.
* If a ``list`` of ``number``, then a random value will be chosen
from the elements of the ``list`` and used as the value.
pad_cval_mask (number, Sequence[number]): Same as pad_cval but only for masks.
keep_size (bool):
After cropping and padding, the result image will usually have a
different height/width compared to the original input image. If this
parameter is set to ``True``, then the cropped/padded image will be
resized to the input image's size, i.e. the output shape is always identical to the input shape.
sample_independently (bool):
If ``False`` *and* the values for `px`/`percent` result in exactly
*one* probability distribution for all image sides, only one single
value will be sampled from that probability distribution and used for
all sides. I.e. the crop/pad amount then is the same for all sides.
If ``True``, four values will be sampled independently, one per side.
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
Targets:
image, mask, bboxes, keypoints
Image types:
any
"""
def __init__(
self,
px: Optional[Union[int, Sequence[float], Sequence[Tuple]]] = None,
percent: Optional[Union[float, Sequence[float], Sequence[Tuple]]] = None,
pad_mode: int = cv2.BORDER_CONSTANT,
pad_cval: Union[float, Sequence[float]] = 0,
pad_cval_mask: Union[float, Sequence[float]] = 0,
keep_size: bool = True,
sample_independently: bool = True,
interpolation: int = cv2.INTER_LINEAR,
always_apply: bool = False,
p: float = 1.0,
):
super().__init__(always_apply, p)
if px is None and percent is None:
raise ValueError("px and percent are empty!")
if px is not None and percent is not None:
raise ValueError("Only px or percent may be set!")
self.px = px
self.percent = percent
self.pad_mode = pad_mode
self.pad_cval = pad_cval
self.pad_cval_mask = pad_cval_mask
self.keep_size = keep_size
self.sample_independently = sample_independently
self.interpolation = interpolation
def apply(
self,
img: np.ndarray,
crop_params: Sequence[int] = (),
pad_params: Sequence[int] = (),
pad_value: Union[int, float] = 0,
rows: int = 0,
cols: int = 0,
interpolation: int = cv2.INTER_LINEAR,
**params
) -> np.ndarray:
return F.crop_and_pad(
img, crop_params, pad_params, pad_value, rows, cols, interpolation, self.pad_mode, self.keep_size
)
def apply_to_mask(
self,
img: np.ndarray,
crop_params: Optional[Sequence[int]] = None,
pad_params: Optional[Sequence[int]] = None,
pad_value_mask: Optional[float] = None,
rows: int = 0,
cols: int = 0,
interpolation: int = cv2.INTER_NEAREST,
**params
) -> np.ndarray:
return F.crop_and_pad(
img, crop_params, pad_params, pad_value_mask, rows, cols, interpolation, self.pad_mode, self.keep_size
)
def apply_to_bbox(
self,
bbox: BoxInternalType,
crop_params: Optional[Sequence[int]] = None,
pad_params: Optional[Sequence[int]] = None,
rows: int = 0,
cols: int = 0,
result_rows: int = 0,
result_cols: int = 0,
**params
) -> BoxInternalType:
return F.crop_and_pad_bbox(bbox, crop_params, pad_params, rows, cols, result_rows, result_cols)
def apply_to_keypoint(
self,
keypoint: KeypointInternalType,
crop_params: Optional[Sequence[int]] = None,
pad_params: Optional[Sequence[int]] = None,
rows: int = 0,
cols: int = 0,
result_rows: int = 0,
result_cols: int = 0,
**params
) -> KeypointInternalType:
return F.crop_and_pad_keypoint(
keypoint, crop_params, pad_params, rows, cols, result_rows, result_cols, self.keep_size
)
@property
def targets_as_params(self) -> List[str]:
return ["image"]
@staticmethod
def __prevent_zero(val1: int, val2: int, max_val: int) -> Tuple[int, int]:
regain = abs(max_val) + 1
regain1 = regain // 2
regain2 = regain // 2
if regain1 + regain2 < regain:
regain1 += 1
if regain1 > val1:
diff = regain1 - val1
regain1 = val1
regain2 += diff
elif regain2 > val2:
diff = regain2 - val2
regain2 = val2
regain1 += diff
val1 = val1 - regain1
val2 = val2 - regain2
return val1, val2
@staticmethod
def _prevent_zero(crop_params: List[int], height: int, width: int) -> Sequence[int]:
top, right, bottom, left = crop_params
remaining_height = height - (top + bottom)
remaining_width = width - (left + right)
if remaining_height < 1:
top, bottom = CropAndPad.__prevent_zero(top, bottom, height)
if remaining_width < 1:
left, right = CropAndPad.__prevent_zero(left, right, width)
return [max(top, 0), max(right, 0), max(bottom, 0), max(left, 0)]
def get_params_dependent_on_targets(self, params) -> dict:
height, width = params["image"].shape[:2]
if self.px is not None:
params = self._get_px_params()
else:
params = self._get_percent_params()
params[0] = int(params[0] * height)
params[1] = int(params[1] * width)
params[2] = int(params[2] * height)
params[3] = int(params[3] * width)
pad_params = [max(i, 0) for i in params]
crop_params = self._prevent_zero([-min(i, 0) for i in params], height, width)
top, right, bottom, left = crop_params
crop_params = [left, top, width - right, height - bottom]
result_rows = crop_params[3] - crop_params[1]
result_cols = crop_params[2] - crop_params[0]
if result_cols == width and result_rows == height:
crop_params = []
top, right, bottom, left = pad_params
pad_params = [top, bottom, left, right]
if any(pad_params):
result_rows += top + bottom
result_cols += left + right
else:
pad_params = []
return {
"crop_params": crop_params or None,
"pad_params": pad_params or None,
"pad_value": None if pad_params is None else self._get_pad_value(self.pad_cval),
"pad_value_mask": None if pad_params is None else self._get_pad_value(self.pad_cval_mask),
"result_rows": result_rows,
"result_cols": result_cols,
}
def _get_px_params(self) -> List[int]:
if self.px is None:
raise ValueError("px is not set")
if isinstance(self.px, int):
params = [self.px] * 4
elif len(self.px) == 2:
if self.sample_independently:
params = [random.randrange(*self.px) for _ in range(4)]
else:
px = random.randrange(*self.px)
params = [px] * 4
else:
params = [i if isinstance(i, int) else random.randrange(*i) for i in self.px] # type: ignore
return params # [top, right, bottom, left]
def _get_percent_params(self) -> List[float]:
if self.percent is None:
raise ValueError("percent is not set")
if isinstance(self.percent, float):
params = [self.percent] * 4
elif len(self.percent) == 2:
if self.sample_independently:
params = [random.uniform(*self.percent) for _ in range(4)]
else:
px = random.uniform(*self.percent)
params = [px] * 4
else:
params = [i if isinstance(i, (int, float)) else random.uniform(*i) for i in self.percent]
return params # params = [top, right, bottom, left]
@staticmethod
def _get_pad_value(pad_value: Union[float, Sequence[float]]) -> Union[int, float]:
if isinstance(pad_value, (int, float)):
return pad_value
if len(pad_value) == 2:
a, b = pad_value
if isinstance(a, int) and isinstance(b, int):
return random.randint(a, b)
return random.uniform(a, b)
return random.choice(pad_value)
def get_transform_init_args_names(self) -> Tuple[str, ...]:
return (
"px",
"percent",
"pad_mode",
"pad_cval",
"pad_cval_mask",
"keep_size",
"sample_independently",
"interpolation",
)
class RandomCropFromBorders(DualTransform):
"""Crop bbox from image randomly cut parts from borders without resize at the end
Args:
crop_left (float): single float value in (0.0, 1.0) range. Default 0.1. Image will be randomly cut
from left side in range [0, crop_left * width)
crop_right (float): single float value in (0.0, 1.0) range. Default 0.1. Image will be randomly cut
from right side in range [(1 - crop_right) * width, width)
crop_top (float): singlefloat value in (0.0, 1.0) range. Default 0.1. Image will be randomly cut
from top side in range [0, crop_top * height)
crop_bottom (float): single float value in (0.0, 1.0) range. Default 0.1. Image will be randomly cut
from bottom side in range [(1 - crop_bottom) * height, height)
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(
self,
crop_left=0.1,
crop_right=0.1,
crop_top=0.1,
crop_bottom=0.1,
always_apply=False,
p=1.0,
):
super(RandomCropFromBorders, self).__init__(always_apply, p)
self.crop_left = crop_left
self.crop_right = crop_right
self.crop_top = crop_top
self.crop_bottom = crop_bottom
def get_params_dependent_on_targets(self, params):
img = params["image"]
x_min = random.randint(0, int(self.crop_left * img.shape[1]))
x_max = random.randint(max(x_min + 1, int((1 - self.crop_right) * img.shape[1])), img.shape[1])
y_min = random.randint(0, int(self.crop_top * img.shape[0]))
y_max = random.randint(max(y_min + 1, int((1 - self.crop_bottom) * img.shape[0])), img.shape[0])
return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max}
def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
return F.clamping_crop(img, x_min, y_min, x_max, y_max)
def apply_to_mask(self, mask, x_min=0, x_max=0, y_min=0, y_max=0, **params):
return F.clamping_crop(mask, x_min, y_min, x_max, y_max)
def apply_to_bbox(self, bbox, x_min=0, x_max=0, y_min=0, y_max=0, **params):
rows, cols = params["rows"], params["cols"]
return F.bbox_crop(bbox, x_min, y_min, x_max, y_max, rows, cols)
def apply_to_keypoint(self, keypoint, x_min=0, x_max=0, y_min=0, y_max=0, **params):
return F.crop_keypoint_by_coords(keypoint, crop_coords=(x_min, y_min, x_max, y_max))
@property
def targets_as_params(self):
return ["image"]
def get_transform_init_args_names(self):
return "crop_left", "crop_right", "crop_top", "crop_bottom"