Spaces:
Running
on
Zero
Running
on
Zero
""" | |
Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR) | |
Copyright(c) 2023 lyuwenyu. All Rights Reserved. | |
""" | |
from typing import Any, Dict, List, Optional | |
import PIL | |
import PIL.Image | |
import torch | |
import torch.nn as nn | |
import torchvision | |
import torchvision.transforms.v2 as T | |
import torchvision.transforms.v2.functional as F | |
from ...core import register | |
from .._misc import ( | |
BoundingBoxes, | |
Image, | |
Mask, | |
SanitizeBoundingBoxes, | |
Video, | |
_boxes_keys, | |
convert_to_tv_tensor, | |
) | |
torchvision.disable_beta_transforms_warning() | |
RandomPhotometricDistort = register()(T.RandomPhotometricDistort) | |
RandomZoomOut = register()(T.RandomZoomOut) | |
RandomHorizontalFlip = register()(T.RandomHorizontalFlip) | |
Resize = register()(T.Resize) | |
# ToImageTensor = register()(T.ToImageTensor) | |
# ConvertDtype = register()(T.ConvertDtype) | |
# PILToTensor = register()(T.PILToTensor) | |
SanitizeBoundingBoxes = register(name="SanitizeBoundingBoxes")(SanitizeBoundingBoxes) | |
RandomCrop = register()(T.RandomCrop) | |
Normalize = register()(T.Normalize) | |
class EmptyTransform(T.Transform): | |
def __init__( | |
self, | |
) -> None: | |
super().__init__() | |
def forward(self, *inputs): | |
inputs = inputs if len(inputs) > 1 else inputs[0] | |
return inputs | |
class PadToSize(T.Pad): | |
_transformed_types = ( | |
PIL.Image.Image, | |
Image, | |
Video, | |
Mask, | |
BoundingBoxes, | |
) | |
def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]: | |
sp = F.get_spatial_size(flat_inputs[0]) | |
h, w = self.size[1] - sp[0], self.size[0] - sp[1] | |
self.padding = [0, 0, w, h] | |
return dict(padding=self.padding) | |
def __init__(self, size, fill=0, padding_mode="constant") -> None: | |
if isinstance(size, int): | |
size = (size, size) | |
self.size = size | |
super().__init__(0, fill, padding_mode) | |
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: | |
fill = self._fill[type(inpt)] | |
padding = params["padding"] | |
return F.pad(inpt, padding=padding, fill=fill, padding_mode=self.padding_mode) # type: ignore[arg-type] | |
def __call__(self, *inputs: Any) -> Any: | |
outputs = super().forward(*inputs) | |
if len(outputs) > 1 and isinstance(outputs[1], dict): | |
outputs[1]["padding"] = torch.tensor(self.padding) | |
return outputs | |
class RandomIoUCrop(T.RandomIoUCrop): | |
def __init__( | |
self, | |
min_scale: float = 0.3, | |
max_scale: float = 1, | |
min_aspect_ratio: float = 0.5, | |
max_aspect_ratio: float = 2, | |
sampler_options: Optional[List[float]] = None, | |
trials: int = 40, | |
p: float = 1.0, | |
): | |
super().__init__( | |
min_scale, max_scale, min_aspect_ratio, max_aspect_ratio, sampler_options, trials | |
) | |
self.p = p | |
def __call__(self, *inputs: Any) -> Any: | |
if torch.rand(1) >= self.p: | |
return inputs if len(inputs) > 1 else inputs[0] | |
return super().forward(*inputs) | |
class ConvertBoxes(T.Transform): | |
_transformed_types = (BoundingBoxes,) | |
def __init__(self, fmt="", normalize=False) -> None: | |
super().__init__() | |
self.fmt = fmt | |
self.normalize = normalize | |
def transform(self, inpt: Any, params: Dict[str, Any]) -> Any: | |
return self._transform(inpt, params) | |
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: | |
spatial_size = getattr(inpt, _boxes_keys[1]) | |
if self.fmt: | |
in_fmt = inpt.format.value.lower() | |
inpt = torchvision.ops.box_convert(inpt, in_fmt=in_fmt, out_fmt=self.fmt.lower()) | |
inpt = convert_to_tv_tensor( | |
inpt, key="boxes", box_format=self.fmt.upper(), spatial_size=spatial_size | |
) | |
if self.normalize: | |
inpt = inpt / torch.tensor(spatial_size[::-1]).tile(2)[None] | |
return inpt | |
class ConvertPILImage(T.Transform): | |
_transformed_types = (PIL.Image.Image,) | |
def __init__(self, dtype="float32", scale=True) -> None: | |
super().__init__() | |
self.dtype = dtype | |
self.scale = scale | |
def transform(self, inpt: Any, params: Dict[str, Any]) -> Any: | |
return self._transform(inpt, params) | |
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: | |
inpt = F.pil_to_tensor(inpt) | |
if self.dtype == "float32": | |
inpt = inpt.float() | |
if self.scale: | |
inpt = inpt / 255.0 | |
inpt = Image(inpt) | |
return inpt | |