Spaces:
Configuration error
Configuration error
File size: 7,376 Bytes
0034848 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
import random
from typing import Dict, Sequence, Tuple, Union
import cv2
import numpy as np
from ...core.transforms_interface import (
BoxInternalType,
DualTransform,
KeypointInternalType,
to_tuple,
)
from . import functional as F
__all__ = ["RandomScale", "LongestMaxSize", "SmallestMaxSize", "Resize"]
class RandomScale(DualTransform):
"""Randomly resize the input. Output image size is different from the input image size.
Args:
scale_limit ((float, float) or float): scaling factor range. If scale_limit is a single float value, the
range will be (-scale_limit, scale_limit). Note that the scale_limit will be biased by 1.
If scale_limit is a tuple, like (low, high), sampling will be done from the range (1 + low, 1 + high).
Default: (-0.1, 0.1).
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 0.5.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(self, scale_limit=0.1, interpolation=cv2.INTER_LINEAR, always_apply=False, p=0.5):
super(RandomScale, self).__init__(always_apply, p)
self.scale_limit = to_tuple(scale_limit, bias=1.0)
self.interpolation = interpolation
def get_params(self):
return {"scale": random.uniform(self.scale_limit[0], self.scale_limit[1])}
def apply(self, img, scale=0, interpolation=cv2.INTER_LINEAR, **params):
return F.scale(img, scale, interpolation)
def apply_to_bbox(self, bbox, **params):
# Bounding box coordinates are scale invariant
return bbox
def apply_to_keypoint(self, keypoint, scale=0, **params):
return F.keypoint_scale(keypoint, scale, scale)
def get_transform_init_args(self):
return {"interpolation": self.interpolation, "scale_limit": to_tuple(self.scale_limit, bias=-1.0)}
class LongestMaxSize(DualTransform):
"""Rescale an image so that maximum side is equal to max_size, keeping the aspect ratio of the initial image.
Args:
max_size (int, list of int): maximum size of the image after the transformation. When using a list, max size
will be randomly selected from the values in the list.
interpolation (OpenCV flag): interpolation method. Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(
self,
max_size: Union[int, Sequence[int]] = 1024,
interpolation: int = cv2.INTER_LINEAR,
always_apply: bool = False,
p: float = 1,
):
super(LongestMaxSize, self).__init__(always_apply, p)
self.interpolation = interpolation
self.max_size = max_size
def apply(
self, img: np.ndarray, max_size: int = 1024, interpolation: int = cv2.INTER_LINEAR, **params
) -> np.ndarray:
return F.longest_max_size(img, max_size=max_size, interpolation=interpolation)
def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
# Bounding box coordinates are scale invariant
return bbox
def apply_to_keypoint(self, keypoint: KeypointInternalType, max_size: int = 1024, **params) -> KeypointInternalType:
height = params["rows"]
width = params["cols"]
scale = max_size / max([height, width])
return F.keypoint_scale(keypoint, scale, scale)
def get_params(self) -> Dict[str, int]:
return {"max_size": self.max_size if isinstance(self.max_size, int) else random.choice(self.max_size)}
def get_transform_init_args_names(self) -> Tuple[str, ...]:
return ("max_size", "interpolation")
class SmallestMaxSize(DualTransform):
"""Rescale an image so that minimum side is equal to max_size, keeping the aspect ratio of the initial image.
Args:
max_size (int, list of int): maximum size of smallest side of the image after the transformation. When using a
list, max size will be randomly selected from the values in the list.
interpolation (OpenCV flag): interpolation method. Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(
self,
max_size: Union[int, Sequence[int]] = 1024,
interpolation: int = cv2.INTER_LINEAR,
always_apply: bool = False,
p: float = 1,
):
super(SmallestMaxSize, self).__init__(always_apply, p)
self.interpolation = interpolation
self.max_size = max_size
def apply(
self, img: np.ndarray, max_size: int = 1024, interpolation: int = cv2.INTER_LINEAR, **params
) -> np.ndarray:
return F.smallest_max_size(img, max_size=max_size, interpolation=interpolation)
def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
return bbox
def apply_to_keypoint(self, keypoint: KeypointInternalType, max_size: int = 1024, **params) -> KeypointInternalType:
height = params["rows"]
width = params["cols"]
scale = max_size / min([height, width])
return F.keypoint_scale(keypoint, scale, scale)
def get_params(self) -> Dict[str, int]:
return {"max_size": self.max_size if isinstance(self.max_size, int) else random.choice(self.max_size)}
def get_transform_init_args_names(self) -> Tuple[str, ...]:
return ("max_size", "interpolation")
class Resize(DualTransform):
"""Resize the input to the given height and width.
Args:
height (int): desired height of the output.
width (int): desired width of the output.
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
def __init__(self, height, width, interpolation=cv2.INTER_LINEAR, always_apply=False, p=1):
super(Resize, self).__init__(always_apply, p)
self.height = height
self.width = width
self.interpolation = interpolation
def apply(self, img, interpolation=cv2.INTER_LINEAR, **params):
return F.resize(img, height=self.height, width=self.width, interpolation=interpolation)
def apply_to_bbox(self, bbox, **params):
# Bounding box coordinates are scale invariant
return bbox
def apply_to_keypoint(self, keypoint, **params):
height = params["rows"]
width = params["cols"]
scale_x = self.width / width
scale_y = self.height / height
return F.keypoint_scale(keypoint, scale_x, scale_y)
def get_transform_init_args_names(self):
return ("height", "width", "interpolation")
|