|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from typing import Any, Optional, Union |
|
|
|
import torch |
|
import torchvision.transforms.functional as transforms_F |
|
from PIL import Image |
|
|
|
|
|
def obtain_image_size(data_dict: dict, input_keys: list) -> tuple[int, int]: |
|
r"""Function for obtaining the image size from the data dict. |
|
|
|
Args: |
|
data_dict (dict): Input data dict |
|
input_keys (list): List of input keys |
|
Returns: |
|
width (int): Width of the input image |
|
height (int): Height of the input image |
|
""" |
|
|
|
data1 = data_dict[input_keys[0]] |
|
if isinstance(data1, Image.Image): |
|
width, height = data1.size |
|
elif isinstance(data1, torch.Tensor): |
|
height, width = data1.size()[-2:] |
|
else: |
|
raise ValueError("data to random crop should be PIL Image or tensor") |
|
|
|
return width, height |
|
|
|
|
|
def obtain_augmentation_size(data_dict: dict, augmentor_cfg: dict) -> Union[int, tuple]: |
|
r"""Function for obtaining size of the augmentation. |
|
When dealing with multi-aspect ratio dataloaders, we need to |
|
find the augmentation size from the aspect ratio of the data. |
|
|
|
Args: |
|
data_dict (dict): Input data dict |
|
augmentor_cfg (dict): Augmentor config |
|
Returns: |
|
aug_size (int): Size of augmentation |
|
""" |
|
aspect_ratio = data_dict["aspect_ratio"] |
|
aug_size = augmentor_cfg["size"][aspect_ratio] |
|
return aug_size |
|
|
|
|
|
class Augmentor: |
|
def __init__(self, input_keys: list, output_keys: Optional[list] = None, args: Optional[dict] = None) -> None: |
|
r"""Base augmentor class |
|
|
|
Args: |
|
input_keys (list): List of input keys |
|
output_keys (list): List of output keys |
|
args (dict): Arguments associated with the augmentation |
|
""" |
|
self.input_keys = input_keys |
|
self.output_keys = output_keys |
|
self.args = args |
|
|
|
def __call__(self, *args: Any, **kwds: Any) -> Any: |
|
raise ValueError("Augmentor not implemented") |
|
|
|
|
|
class ResizeSmallestSideAspectPreserving(Augmentor): |
|
def __init__(self, input_keys: list, output_keys: Optional[list] = None, args: Optional[dict] = None) -> None: |
|
super().__init__(input_keys, output_keys, args) |
|
|
|
def __call__(self, data_dict: dict) -> dict: |
|
r"""Performs aspect-ratio preserving resizing. |
|
Image is resized to the dimension which has the smaller ratio of (size / target_size). |
|
First we compute (w_img / w_target) and (h_img / h_target) and resize the image |
|
to the dimension that has the smaller of these ratios. |
|
|
|
Args: |
|
data_dict (dict): Input data dict |
|
Returns: |
|
data_dict (dict): Output dict where images are resized |
|
""" |
|
|
|
if self.output_keys is None: |
|
self.output_keys = self.input_keys |
|
assert self.args is not None, "Please specify args in augmentations" |
|
|
|
img_w, img_h = self.args["img_w"], self.args["img_h"] |
|
|
|
orig_w, orig_h = obtain_image_size(data_dict, self.input_keys) |
|
scaling_ratio = max((img_w / orig_w), (img_h / orig_h)) |
|
target_size = (int(scaling_ratio * orig_h + 0.5), int(scaling_ratio * orig_w + 0.5)) |
|
|
|
assert ( |
|
target_size[0] >= img_h and target_size[1] >= img_w |
|
), f"Resize error. orig {(orig_w, orig_h)} desire {(img_w, img_h)} compute {target_size}" |
|
|
|
for inp_key, out_key in zip(self.input_keys, self.output_keys): |
|
data_dict[out_key] = transforms_F.resize( |
|
data_dict[inp_key], |
|
size=target_size, |
|
interpolation=getattr(self.args, "interpolation", transforms_F.InterpolationMode.BICUBIC), |
|
antialias=True, |
|
) |
|
|
|
if out_key != inp_key: |
|
del data_dict[inp_key] |
|
return data_dict |
|
|
|
|
|
class CenterCrop(Augmentor): |
|
def __init__(self, input_keys: list, output_keys: Optional[list] = None, args: Optional[dict] = None) -> None: |
|
super().__init__(input_keys, output_keys, args) |
|
|
|
def __call__(self, data_dict: dict) -> dict: |
|
r"""Performs center crop. |
|
|
|
Args: |
|
data_dict (dict): Input data dict |
|
Returns: |
|
data_dict (dict): Output dict where images are center cropped. |
|
We also save the cropping parameters in the aug_params dict |
|
so that it will be used by other transforms. |
|
""" |
|
assert ( |
|
(self.args is not None) and ("img_w" in self.args) and ("img_h" in self.args) |
|
), "Please specify size in args" |
|
|
|
img_w, img_h = self.args["img_w"], self.args["img_h"] |
|
|
|
orig_w, orig_h = obtain_image_size(data_dict, self.input_keys) |
|
for key in self.input_keys: |
|
data_dict[key] = transforms_F.center_crop(data_dict[key], [img_h, img_w]) |
|
|
|
|
|
crop_x0 = (orig_w - img_w) // 2 |
|
crop_y0 = (orig_h - img_h) // 2 |
|
cropping_params = { |
|
"resize_w": orig_w, |
|
"resize_h": orig_h, |
|
"crop_x0": crop_x0, |
|
"crop_y0": crop_y0, |
|
"crop_w": img_w, |
|
"crop_h": img_h, |
|
} |
|
|
|
if "aug_params" not in data_dict: |
|
data_dict["aug_params"] = dict() |
|
|
|
data_dict["aug_params"]["cropping"] = cropping_params |
|
data_dict["padding_mask"] = torch.zeros((1, cropping_params["crop_h"], cropping_params["crop_w"])) |
|
return data_dict |
|
|
|
|
|
class Normalize(Augmentor): |
|
def __init__(self, input_keys: list, output_keys: Optional[list] = None, args: Optional[dict] = None) -> None: |
|
super().__init__(input_keys, output_keys, args) |
|
|
|
def __call__(self, data_dict: dict) -> dict: |
|
r"""Performs data normalization. |
|
|
|
Args: |
|
data_dict (dict): Input data dict |
|
Returns: |
|
data_dict (dict): Output dict where images are center cropped. |
|
""" |
|
assert self.args is not None, "Please specify args" |
|
|
|
mean = self.args["mean"] |
|
std = self.args["std"] |
|
|
|
for key in self.input_keys: |
|
if isinstance(data_dict[key], torch.Tensor): |
|
data_dict[key] = data_dict[key].to(dtype=torch.get_default_dtype()).div(255) |
|
else: |
|
data_dict[key] = transforms_F.to_tensor(data_dict[key]) |
|
|
|
data_dict[key] = transforms_F.normalize(tensor=data_dict[key], mean=mean, std=std) |
|
return data_dict |
|
|