Spaces:

JasonSmithSO
/

FooocusEnhanced

Configuration error

App Files Files Community

JasonSmithSO commited on Jul 15

Commit

0034848

verified ·

1 Parent(s): 74f4a06

Upload 777 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
__init__.py +1 -214
custom_albumentations/LICENSE +21 -0
custom_albumentations/__init__.py +15 -0
custom_albumentations/augmentations/__init__.py +21 -0
custom_albumentations/augmentations/blur/__init__.py +2 -0
custom_albumentations/augmentations/blur/functional.py +106 -0
custom_albumentations/augmentations/blur/transforms.py +486 -0
custom_albumentations/augmentations/crops/__init__.py +2 -0
custom_albumentations/augmentations/crops/functional.py +317 -0
custom_albumentations/augmentations/crops/transforms.py +943 -0
custom_albumentations/augmentations/domain_adaptation.py +337 -0
custom_albumentations/augmentations/dropout/__init__.py +5 -0
custom_albumentations/augmentations/dropout/channel_dropout.py +72 -0
custom_albumentations/augmentations/dropout/coarse_dropout.py +187 -0
custom_albumentations/augmentations/dropout/cutout.py +79 -0
custom_albumentations/augmentations/dropout/functional.py +29 -0
custom_albumentations/augmentations/dropout/grid_dropout.py +155 -0
custom_albumentations/augmentations/dropout/mask_dropout.py +99 -0
custom_albumentations/augmentations/functional.py +1380 -0
custom_albumentations/augmentations/geometric/__init__.py +4 -0
custom_albumentations/augmentations/geometric/functional.py +1300 -0
custom_albumentations/augmentations/geometric/resize.py +198 -0
custom_albumentations/augmentations/geometric/rotate.py +294 -0
custom_albumentations/augmentations/geometric/transforms.py +1499 -0
custom_albumentations/augmentations/transforms.py +2667 -0
custom_albumentations/augmentations/utils.py +211 -0
custom_albumentations/core/__init__.py +0 -0
custom_albumentations/core/bbox_utils.py +522 -0
custom_albumentations/core/composition.py +552 -0
custom_albumentations/core/keypoints_utils.py +286 -0
custom_albumentations/core/serialization.py +247 -0
custom_albumentations/core/transforms_interface.py +293 -0
custom_albumentations/core/utils.py +137 -0
custom_albumentations/imgaug/__init__.py +0 -0
custom_albumentations/imgaug/stubs.py +77 -0
custom_albumentations/imgaug/transforms.py +391 -0
custom_albumentations/pytorch/__init__.py +3 -0
custom_albumentations/pytorch/functional.py +31 -0
custom_albumentations/pytorch/transforms.py +104 -0
custom_albumentations/random_utils.py +96 -0
custom_controlnet_aux/__init__.py +1 -0
custom_controlnet_aux/anime_face_segment/__init__.py +66 -0
custom_controlnet_aux/anime_face_segment/anime_segmentation.py +58 -0
custom_controlnet_aux/anime_face_segment/isnet.py +619 -0
custom_controlnet_aux/anime_face_segment/network.py +100 -0
custom_controlnet_aux/anime_face_segment/util.py +40 -0
custom_controlnet_aux/binary/__init__.py +38 -0
custom_controlnet_aux/canny/__init__.py +17 -0
custom_controlnet_aux/color/__init__.py +37 -0

.gitattributes CHANGED Viewed

@@ -35,3 +35,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 comfyui_screenshot.png filter=lfs diff=lfs merge=lfs -text
 NotoSans-Regular.ttf filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 comfyui_screenshot.png filter=lfs diff=lfs merge=lfs -text
 NotoSans-Regular.ttf filter=lfs diff=lfs merge=lfs -text
+custom_controlnet_aux/mesh_graphormer/hand_landmarker.task filter=lfs diff=lfs merge=lfs -text
+custom_controlnet_aux/tests/test_image.png filter=lfs diff=lfs merge=lfs -text

__init__.py CHANGED Viewed

@@ -1,214 +1 @@
-import sys, os
-from .utils import here, define_preprocessor_inputs, INPUT
-from pathlib import Path
-import traceback
-import importlib
-from .log import log, blue_text, cyan_text, get_summary, get_label
-from .hint_image_enchance import NODE_CLASS_MAPPINGS as HIE_NODE_CLASS_MAPPINGS
-from .hint_image_enchance import NODE_DISPLAY_NAME_MAPPINGS as HIE_NODE_DISPLAY_NAME_MAPPINGS
-#Ref: https://github.com/comfyanonymous/ComfyUI/blob/76d53c4622fc06372975ed2a43ad345935b8a551/nodes.py#L17
-sys.path.insert(0, str(Path(here, "src").resolve()))
-for pkg_name in ["custom_controlnet_aux", "custom_mmpkg"]:
-    sys.path.append(str(Path(here, "src", pkg_name).resolve()))
-#Enable CPU fallback for ops not being supported by MPS like upsample_bicubic2d.out
-#https://github.com/pytorch/pytorch/issues/77764
-#https://github.com/Fannovel16/comfyui_controlnet_aux/issues/2#issuecomment-1763579485
-os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = os.getenv("PYTORCH_ENABLE_MPS_FALLBACK", '1')
-def load_nodes():
-    shorted_errors = []
-    full_error_messages = []
-    node_class_mappings = {}
-    node_display_name_mappings = {}
-    for filename in (here / "node_wrappers").iterdir():
-        module_name = filename.stem
-        if module_name.startswith('.'): continue #Skip hidden files created by the OS (e.g. [.DS_Store](https://en.wikipedia.org/wiki/.DS_Store))
-        try:
-            module = importlib.import_module(
-                f".node_wrappers.{module_name}", package=__package__
-            )
-            node_class_mappings.update(getattr(module, "NODE_CLASS_MAPPINGS"))
-            if hasattr(module, "NODE_DISPLAY_NAME_MAPPINGS"):
-                node_display_name_mappings.update(getattr(module, "NODE_DISPLAY_NAME_MAPPINGS"))
-            log.debug(f"Imported {module_name} nodes")
-        except AttributeError:
-            pass  # wip nodes
-        except Exception:
-            error_message = traceback.format_exc()
-            full_error_messages.append(error_message)
-            error_message = error_message.splitlines()[-1]
-            shorted_errors.append(
-                f"Failed to import module {module_name} because {error_message}"
-            )
-    if len(shorted_errors) > 0:
-        full_err_log = '\n\n'.join(full_error_messages)
-        print(f"\n\nFull error log from comfyui_controlnet_aux: \n{full_err_log}\n\n")
-        log.info(
-            f"Some nodes failed to load:\n\t"
-            + "\n\t".join(shorted_errors)
-            + "\n\n"
-            + "Check that you properly installed the dependencies.\n"
-            + "If you think this is a bug, please report it on the github page (https://github.com/Fannovel16/comfyui_controlnet_aux/issues)"
-        )
-    return node_class_mappings, node_display_name_mappings
-AUX_NODE_MAPPINGS, AUX_DISPLAY_NAME_MAPPINGS = load_nodes()
-#For nodes not mapping image to image or has special requirements
-AIO_NOT_SUPPORTED = ["InpaintPreprocessor", "MeshGraphormer+ImpactDetector-DepthMapPreprocessor", "DiffusionEdge_Preprocessor"]
-AIO_NOT_SUPPORTED += ["SavePoseKpsAsJsonFile", "FacialPartColoringFromPoseKps", "UpperBodyTrackingFromPoseKps", "RenderPeopleKps", "RenderAnimalKps"]
-AIO_NOT_SUPPORTED += ["Unimatch_OptFlowPreprocessor", "MaskOptFlow"]
-def preprocessor_options():
-    auxs = list(AUX_NODE_MAPPINGS.keys())
-    auxs.insert(0, "none")
-    for name in AIO_NOT_SUPPORTED:
-        if name in auxs:
-            auxs.remove(name)
-    return auxs
-PREPROCESSOR_OPTIONS = preprocessor_options()
-class AIO_Preprocessor:
-    @classmethod
-    def INPUT_TYPES(s):
-        return define_preprocessor_inputs(
-            preprocessor=INPUT.COMBO(PREPROCESSOR_OPTIONS, default="none"),
-            resolution=INPUT.RESOLUTION()
-        )
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "execute"
-    CATEGORY = "ControlNet Preprocessors"
-    def execute(self, preprocessor, image, resolution=512):
-        if preprocessor == "none":
-            return (image, )
-        else:
-            aux_class = AUX_NODE_MAPPINGS[preprocessor]
-            input_types = aux_class.INPUT_TYPES()
-            input_types = {
-                **input_types["required"],
-                **(input_types["optional"] if "optional" in input_types else {})
-            }
-            params = {}
-            for name, input_type in input_types.items():
-                if name == "image":
-                    params[name] = image
-                    continue
-                if name == "resolution":
-                    params[name] = resolution
-                    continue
-                if len(input_type) == 2 and ("default" in input_type[1]):
-                    params[name] = input_type[1]["default"]
-                    continue
-                default_values = { "INT": 0, "FLOAT": 0.0 }
-                if input_type[0] in default_values:
-                    params[name] = default_values[input_type[0]]
-            return getattr(aux_class(), aux_class.FUNCTION)(**params)
-class ControlNetAuxSimpleAddText:
-    @classmethod
-    def INPUT_TYPES(s):
-        return dict(
-            required=dict(image=INPUT.IMAGE(), text=INPUT.STRING())
-        )
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "execute"
-    CATEGORY = "ControlNet Preprocessors"
-    def execute(self, image, text):
-        from PIL import Image, ImageDraw, ImageFont
-        import numpy as np
-        import torch
-        font = ImageFont.truetype(str((here / "NotoSans-Regular.ttf").resolve()), 40)
-        img = Image.fromarray(image[0].cpu().numpy().__mul__(255.).astype(np.uint8))
-        ImageDraw.Draw(img).text((0,0), text, fill=(0,255,0), font=font)
-        return (torch.from_numpy(np.array(img)).unsqueeze(0) / 255.,)
-class ExecuteAllControlNetPreprocessors:
-    @classmethod
-    def INPUT_TYPES(s):
-        return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "execute"
-    CATEGORY = "ControlNet Preprocessors"
-    def execute(self, image, resolution=512):
-        try:
-            from comfy_execution.graph_utils import GraphBuilder
-        except:
-            raise RuntimeError("ExecuteAllControlNetPreprocessor requries [Execution Model Inversion](https://github.com/comfyanonymous/ComfyUI/commit/5cfe38). Update ComfyUI/SwarmUI to get this feature")
-        graph = GraphBuilder()
-        curr_outputs = []
-        for preprocc in PREPROCESSOR_OPTIONS:
-            preprocc_node = graph.node("AIO_Preprocessor", preprocessor=preprocc, image=image, resolution=resolution)
-            hint_img = preprocc_node.out(0)
-            add_text_node = graph.node("ControlNetAuxSimpleAddText", image=hint_img, text=preprocc)
-            curr_outputs.append(add_text_node.out(0))
-        while len(curr_outputs) > 1:
-            _outputs = []
-            for i in range(0, len(curr_outputs), 2):
-                if i+1 < len(curr_outputs):
-                    image_batch = graph.node("ImageBatch", image1=curr_outputs[i], image2=curr_outputs[i+1])
-                    _outputs.append(image_batch.out(0))
-                else:
-                    _outputs.append(curr_outputs[i])
-            curr_outputs = _outputs
-        return {
-            "result": (curr_outputs[0],),
-            "expand": graph.finalize(),
-        }
-class ControlNetPreprocessorSelector:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "preprocessor": (PREPROCESSOR_OPTIONS,),
-            }
-        }
-    RETURN_TYPES = (PREPROCESSOR_OPTIONS,)
-    RETURN_NAMES = ("preprocessor",)
-    FUNCTION = "get_preprocessor"
-    CATEGORY = "ControlNet Preprocessors"
-    def get_preprocessor(self, preprocessor: str):
-        return (preprocessor,)
-NODE_CLASS_MAPPINGS = {
-    **AUX_NODE_MAPPINGS,
-    "AIO_Preprocessor": AIO_Preprocessor,
-    "ControlNetPreprocessorSelector": ControlNetPreprocessorSelector,
-    **HIE_NODE_CLASS_MAPPINGS,
-    "ExecuteAllControlNetPreprocessors": ExecuteAllControlNetPreprocessors,
-    "ControlNetAuxSimpleAddText": ControlNetAuxSimpleAddText
-}
-NODE_DISPLAY_NAME_MAPPINGS = {
-    **AUX_DISPLAY_NAME_MAPPINGS,
-    "AIO_Preprocessor": "AIO Aux Preprocessor",
-    "ControlNetPreprocessorSelector": "Preprocessor Selector",
-    **HIE_NODE_DISPLAY_NAME_MAPPINGS,
-    "ExecuteAllControlNetPreprocessors": "Execute All ControlNet Preprocessors"
-}


1	+ #Dummy file ensuring this package will be recognized

custom_albumentations/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2017 Buslaev Alexander, Alexander Parinov, Vladimir Iglovikov
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

custom_albumentations/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from __future__ import absolute_import
+__version__ = "1.3.1"
+from .augmentations import *
+from .core.composition import *
+from .core.serialization import *
+from .core.transforms_interface import *
+try:
+    from .imgaug.transforms import *  # type: ignore
+except ImportError:
+    # imgaug is not installed by default, so we import stubs.
+    # Run `pip install -U albumentations[imgaug] if you need augmentations from imgaug.`
+    from .imgaug.stubs import *  # type: ignore

custom_albumentations/augmentations/__init__.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# Common classes
+from .blur.functional import *
+from .blur.transforms import *
+from .crops.functional import *
+from .crops.transforms import *
+# New transformations goes to individual files listed below
+from .domain_adaptation import *
+from .dropout.channel_dropout import *
+from .dropout.coarse_dropout import *
+from .dropout.cutout import *
+from .dropout.functional import *
+from .dropout.grid_dropout import *
+from .dropout.mask_dropout import *
+from .functional import *
+from .geometric.functional import *
+from .geometric.resize import *
+from .geometric.rotate import *
+from .geometric.transforms import *
+from .transforms import *
+from .utils import *

custom_albumentations/augmentations/blur/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .functional import *
2	+ from .transforms import *

custom_albumentations/augmentations/blur/functional.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from itertools import product
+from math import ceil
+from typing import Sequence, Union
+import cv2
+import numpy as np
+from custom_albumentations.augmentations.functional import convolve
+from custom_albumentations.augmentations.geometric.functional import scale
+from custom_albumentations.augmentations.utils import (
+    _maybe_process_in_chunks,
+    clipped,
+    preserve_shape,
+)
+__all__ = ["blur", "median_blur", "gaussian_blur", "glass_blur"]
+@preserve_shape
+def blur(img: np.ndarray, ksize: int) -> np.ndarray:
+    blur_fn = _maybe_process_in_chunks(cv2.blur, ksize=(ksize, ksize))
+    return blur_fn(img)
+@preserve_shape
+def median_blur(img: np.ndarray, ksize: int) -> np.ndarray:
+    if img.dtype == np.float32 and ksize not in {3, 5}:
+        raise ValueError(f"Invalid ksize value {ksize}. For a float32 image the only valid ksize values are 3 and 5")
+    blur_fn = _maybe_process_in_chunks(cv2.medianBlur, ksize=ksize)
+    return blur_fn(img)
+@preserve_shape
+def gaussian_blur(img: np.ndarray, ksize: int, sigma: float = 0) -> np.ndarray:
+    # When sigma=0, it is computed as `sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`
+    blur_fn = _maybe_process_in_chunks(cv2.GaussianBlur, ksize=(ksize, ksize), sigmaX=sigma)
+    return blur_fn(img)
+@preserve_shape
+def glass_blur(
+    img: np.ndarray, sigma: float, max_delta: int, iterations: int, dxy: np.ndarray, mode: str
+) -> np.ndarray:
+    x = cv2.GaussianBlur(np.array(img), sigmaX=sigma, ksize=(0, 0))
+    if mode == "fast":
+        hs = np.arange(img.shape[0] - max_delta, max_delta, -1)
+        ws = np.arange(img.shape[1] - max_delta, max_delta, -1)
+        h: Union[int, np.ndarray] = np.tile(hs, ws.shape[0])
+        w: Union[int, np.ndarray] = np.repeat(ws, hs.shape[0])
+        for i in range(iterations):
+            dy = dxy[:, i, 0]
+            dx = dxy[:, i, 1]
+            x[h, w], x[h + dy, w + dx] = x[h + dy, w + dx], x[h, w]
+    elif mode == "exact":
+        for ind, (i, h, w) in enumerate(
+            product(
+                range(iterations),
+                range(img.shape[0] - max_delta, max_delta, -1),
+                range(img.shape[1] - max_delta, max_delta, -1),
+            )
+        ):
+            ind = ind if ind < len(dxy) else ind % len(dxy)
+            dy = dxy[ind, i, 0]
+            dx = dxy[ind, i, 1]
+            x[h, w], x[h + dy, w + dx] = x[h + dy, w + dx], x[h, w]
+    else:
+        ValueError(f"Unsupported mode `{mode}`. Supports only `fast` and `exact`.")
+    return cv2.GaussianBlur(x, sigmaX=sigma, ksize=(0, 0))
+def defocus(img: np.ndarray, radius: int, alias_blur: float) -> np.ndarray:
+    length = np.arange(-max(8, radius), max(8, radius) + 1)
+    ksize = 3 if radius <= 8 else 5
+    x, y = np.meshgrid(length, length)
+    aliased_disk = np.array((x**2 + y**2) <= radius**2, dtype=np.float32)
+    aliased_disk /= np.sum(aliased_disk)
+    kernel = gaussian_blur(aliased_disk, ksize, sigma=alias_blur)
+    return convolve(img, kernel=kernel)
+def central_zoom(img: np.ndarray, zoom_factor: int) -> np.ndarray:
+    h, w = img.shape[:2]
+    h_ch, w_ch = ceil(h / zoom_factor), ceil(w / zoom_factor)
+    h_top, w_top = (h - h_ch) // 2, (w - w_ch) // 2
+    img = scale(img[h_top : h_top + h_ch, w_top : w_top + w_ch], zoom_factor, cv2.INTER_LINEAR)
+    h_trim_top, w_trim_top = (img.shape[0] - h) // 2, (img.shape[1] - w) // 2
+    return img[h_trim_top : h_trim_top + h, w_trim_top : w_trim_top + w]
+@clipped
+def zoom_blur(img: np.ndarray, zoom_factors: Union[np.ndarray, Sequence[int]]) -> np.ndarray:
+    out = np.zeros_like(img, dtype=np.float32)
+    for zoom_factor in zoom_factors:
+        out += central_zoom(img, zoom_factor)
+    img = ((img + out) / (len(zoom_factors) + 1)).astype(img.dtype)
+    return img

custom_albumentations/augmentations/blur/transforms.py ADDED Viewed

	@@ -0,0 +1,486 @@

+import random
+import warnings
+from typing import Any, Dict, List, Sequence, Tuple
+import cv2
+import numpy as np
+from custom_albumentations import random_utils
+from custom_albumentations.augmentations import functional as FMain
+from custom_albumentations.augmentations.blur import functional as F
+from custom_albumentations.core.transforms_interface import (
+    ImageOnlyTransform,
+    ScaleFloatType,
+    ScaleIntType,
+    to_tuple,
+)
+__all__ = ["Blur", "MotionBlur", "GaussianBlur", "GlassBlur", "AdvancedBlur", "MedianBlur", "Defocus", "ZoomBlur"]
+class Blur(ImageOnlyTransform):
+    """Blur the input image using a random-sized kernel.
+    Args:
+        blur_limit (int, (int, int)): maximum kernel size for blurring the input image.
+            Should be in range [3, inf). Default: (3, 7).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, blur_limit: ScaleIntType = 7, always_apply: bool = False, p: float = 0.5):
+        super().__init__(always_apply, p)
+        self.blur_limit = to_tuple(blur_limit, 3)
+    def apply(self, img: np.ndarray, ksize: int = 3, **params) -> np.ndarray:
+        return F.blur(img, ksize)
+    def get_params(self) -> Dict[str, Any]:
+        return {"ksize": int(random.choice(list(range(self.blur_limit[0], self.blur_limit[1] + 1, 2))))}
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        return ("blur_limit",)
+class MotionBlur(Blur):
+    """Apply motion blur to the input image using a random-sized kernel.
+    Args:
+        blur_limit (int): maximum kernel size for blurring the input image.
+            Should be in range [3, inf). Default: (3, 7).
+        allow_shifted (bool): if set to true creates non shifted kernels only,
+            otherwise creates randomly shifted kernels. Default: True.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        blur_limit: ScaleIntType = 7,
+        allow_shifted: bool = True,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(blur_limit=blur_limit, always_apply=always_apply, p=p)
+        self.allow_shifted = allow_shifted
+        if not allow_shifted and self.blur_limit[0] % 2 != 1 or self.blur_limit[1] % 2 != 1:
+            raise ValueError(f"Blur limit must be odd when centered=True. Got: {self.blur_limit}")
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        return super().get_transform_init_args_names() + ("allow_shifted",)
+    def apply(self, img: np.ndarray, kernel: np.ndarray = None, **params) -> np.ndarray:  # type: ignore
+        return FMain.convolve(img, kernel=kernel)
+    def get_params(self) -> Dict[str, Any]:
+        ksize = random.choice(list(range(self.blur_limit[0], self.blur_limit[1] + 1, 2)))
+        if ksize <= 2:
+            raise ValueError("ksize must be > 2. Got: {}".format(ksize))
+        kernel = np.zeros((ksize, ksize), dtype=np.uint8)
+        x1, x2 = random.randint(0, ksize - 1), random.randint(0, ksize - 1)
+        if x1 == x2:
+            y1, y2 = random.sample(range(ksize), 2)
+        else:
+            y1, y2 = random.randint(0, ksize - 1), random.randint(0, ksize - 1)
+        def make_odd_val(v1, v2):
+            len_v = abs(v1 - v2) + 1
+            if len_v % 2 != 1:
+                if v2 > v1:
+                    v2 -= 1
+                else:
+                    v1 -= 1
+            return v1, v2
+        if not self.allow_shifted:
+            x1, x2 = make_odd_val(x1, x2)
+            y1, y2 = make_odd_val(y1, y2)
+            xc = (x1 + x2) / 2
+            yc = (y1 + y2) / 2
+            center = ksize / 2 - 0.5
+            dx = xc - center
+            dy = yc - center
+            x1, x2 = [int(i - dx) for i in [x1, x2]]
+            y1, y2 = [int(i - dy) for i in [y1, y2]]
+        cv2.line(kernel, (x1, y1), (x2, y2), 1, thickness=1)
+        # Normalize kernel
+        return {"kernel": kernel.astype(np.float32) / np.sum(kernel)}
+class MedianBlur(Blur):
+    """Blur the input image using a median filter with a random aperture linear size.
+    Args:
+        blur_limit (int): maximum aperture linear size for blurring the input image.
+            Must be odd and in range [3, inf). Default: (3, 7).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, blur_limit: ScaleIntType = 7, always_apply: bool = False, p: float = 0.5):
+        super().__init__(blur_limit, always_apply, p)
+        if self.blur_limit[0] % 2 != 1 or self.blur_limit[1] % 2 != 1:
+            raise ValueError("MedianBlur supports only odd blur limits.")
+    def apply(self, img: np.ndarray, ksize: int = 3, **params) -> np.ndarray:
+        return F.median_blur(img, ksize)
+class GaussianBlur(ImageOnlyTransform):
+    """Blur the input image using a Gaussian filter with a random kernel size.
+    Args:
+        blur_limit (int, (int, int)): maximum Gaussian kernel size for blurring the input image.
+            Must be zero or odd and in range [0, inf). If set to 0 it will be computed from sigma
+            as `round(sigma * (3 if img.dtype == np.uint8 else 4) * 2 + 1) + 1`.
+            If set single value `blur_limit` will be in range (0, blur_limit).
+            Default: (3, 7).
+        sigma_limit (float, (float, float)): Gaussian kernel standard deviation. Must be in range [0, inf).
+            If set single value `sigma_limit` will be in range (0, sigma_limit).
+            If set to 0 sigma will be computed as `sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`. Default: 0.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        blur_limit: ScaleIntType = (3, 7),
+        sigma_limit: ScaleFloatType = 0,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply, p)
+        self.blur_limit = to_tuple(blur_limit, 0)
+        self.sigma_limit = to_tuple(sigma_limit if sigma_limit is not None else 0, 0)
+        if self.blur_limit[0] == 0 and self.sigma_limit[0] == 0:
+            self.blur_limit = 3, max(3, self.blur_limit[1])
+            warnings.warn(
+                "blur_limit and sigma_limit minimum value can not be both equal to 0. "
+                "blur_limit minimum value changed to 3."
+            )
+        if (self.blur_limit[0] != 0 and self.blur_limit[0] % 2 != 1) or (
+            self.blur_limit[1] != 0 and self.blur_limit[1] % 2 != 1
+        ):
+            raise ValueError("GaussianBlur supports only odd blur limits.")
+    def apply(self, img: np.ndarray, ksize: int = 3, sigma: float = 0, **params) -> np.ndarray:
+        return F.gaussian_blur(img, ksize, sigma=sigma)
+    def get_params(self) -> Dict[str, float]:
+        ksize = random.randrange(self.blur_limit[0], self.blur_limit[1] + 1)
+        if ksize != 0 and ksize % 2 != 1:
+            ksize = (ksize + 1) % (self.blur_limit[1] + 1)
+        return {"ksize": ksize, "sigma": random.uniform(*self.sigma_limit)}
+    def get_transform_init_args_names(self) -> Tuple[str, str]:
+        return ("blur_limit", "sigma_limit")
+class GlassBlur(Blur):
+    """Apply glass noise to the input image.
+    Args:
+        sigma (float): standard deviation for Gaussian kernel.
+        max_delta (int): max distance between pixels which are swapped.
+        iterations (int): number of repeats.
+            Should be in range [1, inf). Default: (2).
+        mode (str): mode of computation: fast or exact. Default: "fast".
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    Reference:
+    |  https://arxiv.org/abs/1903.12261
+    |  https://github.com/hendrycks/robustness/blob/master/ImageNet-C/create_c/make_imagenet_c.py
+    """
+    def __init__(
+        self,
+        sigma: float = 0.7,
+        max_delta: int = 4,
+        iterations: int = 2,
+        always_apply: bool = False,
+        mode: str = "fast",
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply=always_apply, p=p)
+        if iterations < 1:
+            raise ValueError(f"Iterations should be more or equal to 1, but we got {iterations}")
+        if mode not in ["fast", "exact"]:
+            raise ValueError(f"Mode should be 'fast' or 'exact', but we got {mode}")
+        self.sigma = sigma
+        self.max_delta = max_delta
+        self.iterations = iterations
+        self.mode = mode
+    def apply(self, img: np.ndarray, dxy: np.ndarray = None, **params) -> np.ndarray:  # type: ignore
+        assert dxy is not None
+        return F.glass_blur(img, self.sigma, self.max_delta, self.iterations, dxy, self.mode)
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, np.ndarray]:
+        img = params["image"]
+        # generate array containing all necessary values for transformations
+        width_pixels = img.shape[0] - self.max_delta * 2
+        height_pixels = img.shape[1] - self.max_delta * 2
+        total_pixels = width_pixels * height_pixels
+        dxy = random_utils.randint(-self.max_delta, self.max_delta, size=(total_pixels, self.iterations, 2))
+        return {"dxy": dxy}
+    def get_transform_init_args_names(self) -> Tuple[str, str, str]:
+        return ("sigma", "max_delta", "iterations")
+    @property
+    def targets_as_params(self) -> List[str]:
+        return ["image"]
+class AdvancedBlur(ImageOnlyTransform):
+    """Blur the input image using a Generalized Normal filter with a randomly selected parameters.
+        This transform also adds multiplicative noise to generated kernel before convolution.
+    Args:
+        blur_limit: maximum Gaussian kernel size for blurring the input image.
+            Must be zero or odd and in range [0, inf). If set to 0 it will be computed from sigma
+            as `round(sigma * (3 if img.dtype == np.uint8 else 4) * 2 + 1) + 1`.
+            If set single value `blur_limit` will be in range (0, blur_limit).
+            Default: (3, 7).
+        sigmaX_limit: Gaussian kernel standard deviation. Must be in range [0, inf).
+            If set single value `sigmaX_limit` will be in range (0, sigma_limit).
+            If set to 0 sigma will be computed as `sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`. Default: 0.
+        sigmaY_limit: Same as `sigmaY_limit` for another dimension.
+        rotate_limit: Range from which a random angle used to rotate Gaussian kernel is picked.
+            If limit is a single int an angle is picked from (-rotate_limit, rotate_limit). Default: (-90, 90).
+        beta_limit: Distribution shape parameter, 1 is the normal distribution. Values below 1.0 make distribution
+            tails heavier than normal, values above 1.0 make it lighter than normal. Default: (0.5, 8.0).
+        noise_limit: Multiplicative factor that control strength of kernel noise. Must be positive and preferably
+            centered around 1.0. If set single value `noise_limit` will be in range (0, noise_limit).
+            Default: (0.75, 1.25).
+        p (float): probability of applying the transform. Default: 0.5.
+    Reference:
+        https://arxiv.org/abs/2107.10833
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        blur_limit: ScaleIntType = (3, 7),
+        sigmaX_limit: ScaleFloatType = (0.2, 1.0),
+        sigmaY_limit: ScaleFloatType = (0.2, 1.0),
+        rotate_limit: ScaleIntType = 90,
+        beta_limit: ScaleFloatType = (0.5, 8.0),
+        noise_limit: ScaleFloatType = (0.9, 1.1),
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply, p)
+        self.blur_limit = to_tuple(blur_limit, 3)
+        self.sigmaX_limit = self.__check_values(to_tuple(sigmaX_limit, 0.0), name="sigmaX_limit")
+        self.sigmaY_limit = self.__check_values(to_tuple(sigmaY_limit, 0.0), name="sigmaY_limit")
+        self.rotate_limit = to_tuple(rotate_limit)
+        self.beta_limit = to_tuple(beta_limit, low=0.0)
+        self.noise_limit = self.__check_values(to_tuple(noise_limit, 0.0), name="noise_limit")
+        if (self.blur_limit[0] != 0 and self.blur_limit[0] % 2 != 1) or (
+            self.blur_limit[1] != 0 and self.blur_limit[1] % 2 != 1
+        ):
+            raise ValueError("AdvancedBlur supports only odd blur limits.")
+        if self.sigmaX_limit[0] == 0 and self.sigmaY_limit[0] == 0:
+            raise ValueError("sigmaX_limit and sigmaY_limit minimum value can not be both equal to 0.")
+        if not (self.beta_limit[0] < 1.0 < self.beta_limit[1]):
+            raise ValueError("Beta limit is expected to include 1.0")
+    @staticmethod
+    def __check_values(
+        value: Sequence[float], name: str, bounds: Tuple[float, float] = (0, float("inf"))
+    ) -> Sequence[float]:
+        if not bounds[0] <= value[0] <= value[1] <= bounds[1]:
+            raise ValueError(f"{name} values should be between {bounds}")
+        return value
+    def apply(self, img: np.ndarray, kernel: np.ndarray = np.array(None), **params) -> np.ndarray:
+        return FMain.convolve(img, kernel=kernel)
+    def get_params(self) -> Dict[str, np.ndarray]:
+        ksize = random.randrange(self.blur_limit[0], self.blur_limit[1] + 1, 2)
+        sigmaX = random.uniform(*self.sigmaX_limit)
+        sigmaY = random.uniform(*self.sigmaY_limit)
+        angle = np.deg2rad(random.uniform(*self.rotate_limit))
+        # Split into 2 cases to avoid selection of narrow kernels (beta > 1) too often.
+        if random.random() < 0.5:
+            beta = random.uniform(self.beta_limit[0], 1)
+        else:
+            beta = random.uniform(1, self.beta_limit[1])
+        noise_matrix = random_utils.uniform(self.noise_limit[0], self.noise_limit[1], size=[ksize, ksize])
+        # Generate mesh grid centered at zero.
+        ax = np.arange(-ksize // 2 + 1.0, ksize // 2 + 1.0)
+        # Shape (ksize, ksize, 2)
+        grid = np.stack(np.meshgrid(ax, ax), axis=-1)
+        # Calculate rotated sigma matrix
+        d_matrix = np.array([[sigmaX**2, 0], [0, sigmaY**2]])
+        u_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
+        sigma_matrix = np.dot(u_matrix, np.dot(d_matrix, u_matrix.T))
+        inverse_sigma = np.linalg.inv(sigma_matrix)
+        # Described in "Parameter Estimation For Multivariate Generalized Gaussian Distributions"
+        kernel = np.exp(-0.5 * np.power(np.sum(np.dot(grid, inverse_sigma) * grid, 2), beta))
+        # Add noise
+        kernel = kernel * noise_matrix
+        # Normalize kernel
+        kernel = kernel.astype(np.float32) / np.sum(kernel)
+        return {"kernel": kernel}
+    def get_transform_init_args_names(self) -> Tuple[str, str, str, str, str, str]:
+        return (
+            "blur_limit",
+            "sigmaX_limit",
+            "sigmaY_limit",
+            "rotate_limit",
+            "beta_limit",
+            "noise_limit",
+        )
+class Defocus(ImageOnlyTransform):
+    """
+    Apply defocus transform. See https://arxiv.org/abs/1903.12261.
+    Args:
+        radius ((int, int) or int): range for radius of defocusing.
+            If limit is a single int, the range will be [1, limit]. Default: (3, 10).
+        alias_blur ((float, float) or float): range for alias_blur of defocusing (sigma of gaussian blur).
+            If limit is a single float, the range will be (0, limit). Default: (0.1, 0.5).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        Any
+    """
+    def __init__(
+        self,
+        radius: ScaleIntType = (3, 10),
+        alias_blur: ScaleFloatType = (0.1, 0.5),
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply, p)
+        self.radius = to_tuple(radius, low=1)
+        self.alias_blur = to_tuple(alias_blur, low=0)
+        if self.radius[0] <= 0:
+            raise ValueError("Parameter radius must be positive")
+        if self.alias_blur[0] < 0:
+            raise ValueError("Parameter alias_blur must be non-negative")
+    def apply(self, img: np.ndarray, radius: int = 3, alias_blur: float = 0.5, **params) -> np.ndarray:
+        return F.defocus(img, radius, alias_blur)
+    def get_params(self) -> Dict[str, Any]:
+        return {
+            "radius": random_utils.randint(self.radius[0], self.radius[1] + 1),
+            "alias_blur": random_utils.uniform(self.alias_blur[0], self.alias_blur[1]),
+        }
+    def get_transform_init_args_names(self) -> Tuple[str, str]:
+        return ("radius", "alias_blur")
+class ZoomBlur(ImageOnlyTransform):
+    """
+    Apply zoom blur transform. See https://arxiv.org/abs/1903.12261.
+    Args:
+        max_factor ((float, float) or float): range for max factor for blurring.
+            If max_factor is a single float, the range will be (1, limit). Default: (1, 1.31).
+            All max_factor values should be larger than 1.
+        step_factor ((float, float) or float): If single float will be used as step parameter for np.arange.
+            If tuple of float step_factor will be in range `[step_factor[0], step_factor[1])`. Default: (0.01, 0.03).
+            All step_factor values should be positive.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        Any
+    """
+    def __init__(
+        self,
+        max_factor: ScaleFloatType = 1.31,
+        step_factor: ScaleFloatType = (0.01, 0.03),
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply, p)
+        self.max_factor = to_tuple(max_factor, low=1.0)
+        self.step_factor = to_tuple(step_factor, step_factor)
+        if self.max_factor[0] < 1:
+            raise ValueError("Max factor must be larger or equal 1")
+        if self.step_factor[0] <= 0:
+            raise ValueError("Step factor must be positive")
+    def apply(self, img: np.ndarray, zoom_factors: np.ndarray = np.array(None), **params) -> np.ndarray:
+        assert zoom_factors is not None
+        return F.zoom_blur(img, zoom_factors)
+    def get_params(self) -> Dict[str, Any]:
+        max_factor = random.uniform(self.max_factor[0], self.max_factor[1])
+        step_factor = random.uniform(self.step_factor[0], self.step_factor[1])
+        return {"zoom_factors": np.arange(1.0, max_factor, step_factor)}
+    def get_transform_init_args_names(self) -> Tuple[str, str]:
+        return ("max_factor", "step_factor")

custom_albumentations/augmentations/crops/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .functional import *
2	+ from .transforms import *

custom_albumentations/augmentations/crops/functional.py ADDED Viewed

	@@ -0,0 +1,317 @@

+from typing import Optional, Sequence, Tuple
+import cv2
+import numpy as np
+from custom_albumentations.augmentations.utils import (
+    _maybe_process_in_chunks,
+    preserve_channel_dim,
+)
+from ...core.bbox_utils import denormalize_bbox, normalize_bbox
+from ...core.transforms_interface import BoxInternalType, KeypointInternalType
+from ..geometric import functional as FGeometric
+__all__ = [
+    "get_random_crop_coords",
+    "random_crop",
+    "crop_bbox_by_coords",
+    "bbox_random_crop",
+    "crop_keypoint_by_coords",
+    "keypoint_random_crop",
+    "get_center_crop_coords",
+    "center_crop",
+    "bbox_center_crop",
+    "keypoint_center_crop",
+    "crop",
+    "bbox_crop",
+    "clamping_crop",
+    "crop_and_pad",
+    "crop_and_pad_bbox",
+    "crop_and_pad_keypoint",
+]
+def get_random_crop_coords(height: int, width: int, crop_height: int, crop_width: int, h_start: float, w_start: float):
+    # h_start is [0, 1) and should map to [0, (height - crop_height)]  (note inclusive)
+    # This is conceptually equivalent to mapping onto `range(0, (height - crop_height + 1))`
+    # See: https://github.com/albumentations-team/albumentations/pull/1080
+    y1 = int((height - crop_height + 1) * h_start)
+    y2 = y1 + crop_height
+    x1 = int((width - crop_width + 1) * w_start)
+    x2 = x1 + crop_width
+    return x1, y1, x2, y2
+def random_crop(img: np.ndarray, crop_height: int, crop_width: int, h_start: float, w_start: float):
+    height, width = img.shape[:2]
+    if height < crop_height or width < crop_width:
+        raise ValueError(
+            "Requested crop size ({crop_height}, {crop_width}) is "
+            "larger than the image size ({height}, {width})".format(
+                crop_height=crop_height, crop_width=crop_width, height=height, width=width
+            )
+        )
+    x1, y1, x2, y2 = get_random_crop_coords(height, width, crop_height, crop_width, h_start, w_start)
+    img = img[y1:y2, x1:x2]
+    return img
+def crop_bbox_by_coords(
+    bbox: BoxInternalType,
+    crop_coords: Tuple[int, int, int, int],
+    crop_height: int,
+    crop_width: int,
+    rows: int,
+    cols: int,
+):
+    """Crop a bounding box using the provided coordinates of bottom-left and top-right corners in pixels and the
+    required height and width of the crop.
+    Args:
+        bbox (tuple): A cropped box `(x_min, y_min, x_max, y_max)`.
+        crop_coords (tuple): Crop coordinates `(x1, y1, x2, y2)`.
+        crop_height (int):
+        crop_width (int):
+        rows (int): Image rows.
+        cols (int): Image cols.
+    Returns:
+        tuple: A cropped bounding box `(x_min, y_min, x_max, y_max)`.
+    """
+    bbox = denormalize_bbox(bbox, rows, cols)
+    x_min, y_min, x_max, y_max = bbox[:4]
+    x1, y1, _, _ = crop_coords
+    cropped_bbox = x_min - x1, y_min - y1, x_max - x1, y_max - y1
+    return normalize_bbox(cropped_bbox, crop_height, crop_width)
+def bbox_random_crop(
+    bbox: BoxInternalType, crop_height: int, crop_width: int, h_start: float, w_start: float, rows: int, cols: int
+):
+    crop_coords = get_random_crop_coords(rows, cols, crop_height, crop_width, h_start, w_start)
+    return crop_bbox_by_coords(bbox, crop_coords, crop_height, crop_width, rows, cols)
+def crop_keypoint_by_coords(
+    keypoint: KeypointInternalType, crop_coords: Tuple[int, int, int, int]
+):  # skipcq: PYL-W0613
+    """Crop a keypoint using the provided coordinates of bottom-left and top-right corners in pixels and the
+    required height and width of the crop.
+    Args:
+        keypoint (tuple): A keypoint `(x, y, angle, scale)`.
+        crop_coords (tuple): Crop box coords `(x1, x2, y1, y2)`.
+    Returns:
+        A keypoint `(x, y, angle, scale)`.
+    """
+    x, y, angle, scale = keypoint[:4]
+    x1, y1, _, _ = crop_coords
+    return x - x1, y - y1, angle, scale
+def keypoint_random_crop(
+    keypoint: KeypointInternalType,
+    crop_height: int,
+    crop_width: int,
+    h_start: float,
+    w_start: float,
+    rows: int,
+    cols: int,
+):
+    """Keypoint random crop.
+    Args:
+        keypoint: (tuple): A keypoint `(x, y, angle, scale)`.
+        crop_height (int): Crop height.
+        crop_width (int): Crop width.
+        h_start (int): Crop height start.
+        w_start (int): Crop width start.
+        rows (int): Image height.
+        cols (int): Image width.
+    Returns:
+        A keypoint `(x, y, angle, scale)`.
+    """
+    crop_coords = get_random_crop_coords(rows, cols, crop_height, crop_width, h_start, w_start)
+    return crop_keypoint_by_coords(keypoint, crop_coords)
+def get_center_crop_coords(height: int, width: int, crop_height: int, crop_width: int):
+    y1 = (height - crop_height) // 2
+    y2 = y1 + crop_height
+    x1 = (width - crop_width) // 2
+    x2 = x1 + crop_width
+    return x1, y1, x2, y2
+def center_crop(img: np.ndarray, crop_height: int, crop_width: int):
+    height, width = img.shape[:2]
+    if height < crop_height or width < crop_width:
+        raise ValueError(
+            "Requested crop size ({crop_height}, {crop_width}) is "
+            "larger than the image size ({height}, {width})".format(
+                crop_height=crop_height, crop_width=crop_width, height=height, width=width
+            )
+        )
+    x1, y1, x2, y2 = get_center_crop_coords(height, width, crop_height, crop_width)
+    img = img[y1:y2, x1:x2]
+    return img
+def bbox_center_crop(bbox: BoxInternalType, crop_height: int, crop_width: int, rows: int, cols: int):
+    crop_coords = get_center_crop_coords(rows, cols, crop_height, crop_width)
+    return crop_bbox_by_coords(bbox, crop_coords, crop_height, crop_width, rows, cols)
+def keypoint_center_crop(keypoint: KeypointInternalType, crop_height: int, crop_width: int, rows: int, cols: int):
+    """Keypoint center crop.
+    Args:
+        keypoint (tuple): A keypoint `(x, y, angle, scale)`.
+        crop_height (int): Crop height.
+        crop_width (int): Crop width.
+        rows (int): Image height.
+        cols (int): Image width.
+    Returns:
+        tuple: A keypoint `(x, y, angle, scale)`.
+    """
+    crop_coords = get_center_crop_coords(rows, cols, crop_height, crop_width)
+    return crop_keypoint_by_coords(keypoint, crop_coords)
+def crop(img: np.ndarray, x_min: int, y_min: int, x_max: int, y_max: int):
+    height, width = img.shape[:2]
+    if x_max <= x_min or y_max <= y_min:
+        raise ValueError(
+            "We should have x_min < x_max and y_min < y_max. But we got"
+            " (x_min = {x_min}, y_min = {y_min}, x_max = {x_max}, y_max = {y_max})".format(
+                x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max
+            )
+        )
+    if x_min < 0 or x_max > width or y_min < 0 or y_max > height:
+        raise ValueError(
+            "Values for crop should be non negative and equal or smaller than image sizes"
+            "(x_min = {x_min}, y_min = {y_min}, x_max = {x_max}, y_max = {y_max}, "
+            "height = {height}, width = {width})".format(
+                x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, height=height, width=width
+            )
+        )
+    return img[y_min:y_max, x_min:x_max]
+def bbox_crop(bbox: BoxInternalType, x_min: int, y_min: int, x_max: int, y_max: int, rows: int, cols: int):
+    """Crop a bounding box.
+    Args:
+        bbox (tuple): A bounding box `(x_min, y_min, x_max, y_max)`.
+        x_min (int):
+        y_min (int):
+        x_max (int):
+        y_max (int):
+        rows (int): Image rows.
+        cols (int): Image cols.
+    Returns:
+        tuple: A cropped bounding box `(x_min, y_min, x_max, y_max)`.
+    """
+    crop_coords = x_min, y_min, x_max, y_max
+    crop_height = y_max - y_min
+    crop_width = x_max - x_min
+    return crop_bbox_by_coords(bbox, crop_coords, crop_height, crop_width, rows, cols)
+def clamping_crop(img: np.ndarray, x_min: int, y_min: int, x_max: int, y_max: int):
+    h, w = img.shape[:2]
+    if x_min < 0:
+        x_min = 0
+    if y_min < 0:
+        y_min = 0
+    if y_max >= h:
+        y_max = h - 1
+    if x_max >= w:
+        x_max = w - 1
+    return img[int(y_min) : int(y_max), int(x_min) : int(x_max)]
+@preserve_channel_dim
+def crop_and_pad(
+    img: np.ndarray,
+    crop_params: Optional[Sequence[int]],
+    pad_params: Optional[Sequence[int]],
+    pad_value: Optional[float],
+    rows: int,
+    cols: int,
+    interpolation: int,
+    pad_mode: int,
+    keep_size: bool,
+) -> np.ndarray:
+    if crop_params is not None and any(i != 0 for i in crop_params):
+        img = crop(img, *crop_params)
+    if pad_params is not None and any(i != 0 for i in pad_params):
+        img = FGeometric.pad_with_params(
+            img, pad_params[0], pad_params[1], pad_params[2], pad_params[3], border_mode=pad_mode, value=pad_value
+        )
+    if keep_size:
+        resize_fn = _maybe_process_in_chunks(cv2.resize, dsize=(cols, rows), interpolation=interpolation)
+        img = resize_fn(img)
+    return img
+def crop_and_pad_bbox(
+    bbox: BoxInternalType,
+    crop_params: Optional[Sequence[int]],
+    pad_params: Optional[Sequence[int]],
+    rows,
+    cols,
+    result_rows,
+    result_cols,
+) -> BoxInternalType:
+    x1, y1, x2, y2 = denormalize_bbox(bbox, rows, cols)[:4]
+    if crop_params is not None:
+        crop_x, crop_y = crop_params[:2]
+        x1, y1, x2, y2 = x1 - crop_x, y1 - crop_y, x2 - crop_x, y2 - crop_y
+    if pad_params is not None:
+        top, bottom, left, right = pad_params
+        x1, y1, x2, y2 = x1 + left, y1 + top, x2 + left, y2 + top
+    return normalize_bbox((x1, y1, x2, y2), result_rows, result_cols)
+def crop_and_pad_keypoint(
+    keypoint: KeypointInternalType,
+    crop_params: Optional[Sequence[int]],
+    pad_params: Optional[Sequence[int]],
+    rows: int,
+    cols: int,
+    result_rows: int,
+    result_cols: int,
+    keep_size: bool,
+) -> KeypointInternalType:
+    x, y, angle, scale = keypoint[:4]
+    if crop_params is not None:
+        crop_x1, crop_y1, crop_x2, crop_y2 = crop_params
+        x, y = x - crop_x1, y - crop_y1
+    if pad_params is not None:
+        top, bottom, left, right = pad_params
+        x, y = x + left, y + top
+    if keep_size and (result_cols != cols or result_rows != rows):
+        scale_x = cols / result_cols
+        scale_y = rows / result_rows
+        return FGeometric.keypoint_scale((x, y, angle, scale), scale_x, scale_y)
+    return x, y, angle, scale

custom_albumentations/augmentations/crops/transforms.py ADDED Viewed

	@@ -0,0 +1,943 @@

+import math
+import random
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+import cv2
+import numpy as np
+from custom_albumentations.core.bbox_utils import union_of_bboxes
+from ...core.transforms_interface import (
+    BoxInternalType,
+    DualTransform,
+    KeypointInternalType,
+    to_tuple,
+)
+from ..geometric import functional as FGeometric
+from . import functional as F
+__all__ = [
+    "RandomCrop",
+    "CenterCrop",
+    "Crop",
+    "CropNonEmptyMaskIfExists",
+    "RandomSizedCrop",
+    "RandomResizedCrop",
+    "RandomCropNearBBox",
+    "RandomSizedBBoxSafeCrop",
+    "CropAndPad",
+    "RandomCropFromBorders",
+    "BBoxSafeRandomCrop",
+]
+class RandomCrop(DualTransform):
+    """Crop a random part of the input.
+    Args:
+        height (int): height of the crop.
+        width (int): width of the crop.
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, height, width, always_apply=False, p=1.0):
+        super().__init__(always_apply, p)
+        self.height = height
+        self.width = width
+    def apply(self, img, h_start=0, w_start=0, **params):
+        return F.random_crop(img, self.height, self.width, h_start, w_start)
+    def get_params(self):
+        return {"h_start": random.random(), "w_start": random.random()}
+    def apply_to_bbox(self, bbox, **params):
+        return F.bbox_random_crop(bbox, self.height, self.width, **params)
+    def apply_to_keypoint(self, keypoint, **params):
+        return F.keypoint_random_crop(keypoint, self.height, self.width, **params)
+    def get_transform_init_args_names(self):
+        return ("height", "width")
+class CenterCrop(DualTransform):
+    """Crop the central part of the input.
+    Args:
+        height (int): height of the crop.
+        width (int): width of the crop.
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    Note:
+        It is recommended to use uint8 images as input.
+        Otherwise the operation will require internal conversion
+        float32 -> uint8 -> float32 that causes worse performance.
+    """
+    def __init__(self, height, width, always_apply=False, p=1.0):
+        super(CenterCrop, self).__init__(always_apply, p)
+        self.height = height
+        self.width = width
+    def apply(self, img, **params):
+        return F.center_crop(img, self.height, self.width)
+    def apply_to_bbox(self, bbox, **params):
+        return F.bbox_center_crop(bbox, self.height, self.width, **params)
+    def apply_to_keypoint(self, keypoint, **params):
+        return F.keypoint_center_crop(keypoint, self.height, self.width, **params)
+    def get_transform_init_args_names(self):
+        return ("height", "width")
+class Crop(DualTransform):
+    """Crop region from image.
+    Args:
+        x_min (int): Minimum upper left x coordinate.
+        y_min (int): Minimum upper left y coordinate.
+        x_max (int): Maximum lower right x coordinate.
+        y_max (int): Maximum lower right y coordinate.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, x_min=0, y_min=0, x_max=1024, y_max=1024, always_apply=False, p=1.0):
+        super(Crop, self).__init__(always_apply, p)
+        self.x_min = x_min
+        self.y_min = y_min
+        self.x_max = x_max
+        self.y_max = y_max
+    def apply(self, img, **params):
+        return F.crop(img, x_min=self.x_min, y_min=self.y_min, x_max=self.x_max, y_max=self.y_max)
+    def apply_to_bbox(self, bbox, **params):
+        return F.bbox_crop(bbox, x_min=self.x_min, y_min=self.y_min, x_max=self.x_max, y_max=self.y_max, **params)
+    def apply_to_keypoint(self, keypoint, **params):
+        return F.crop_keypoint_by_coords(keypoint, crop_coords=(self.x_min, self.y_min, self.x_max, self.y_max))
+    def get_transform_init_args_names(self):
+        return ("x_min", "y_min", "x_max", "y_max")
+class CropNonEmptyMaskIfExists(DualTransform):
+    """Crop area with mask if mask is non-empty, else make random crop.
+    Args:
+        height (int): vertical size of crop in pixels
+        width (int): horizontal size of crop in pixels
+        ignore_values (list of int): values to ignore in mask, `0` values are always ignored
+            (e.g. if background value is 5 set `ignore_values=[5]` to ignore)
+        ignore_channels (list of int): channels to ignore in mask
+            (e.g. if background is a first channel set `ignore_channels=[0]` to ignore)
+        p (float): probability of applying the transform. Default: 1.0.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, height, width, ignore_values=None, ignore_channels=None, always_apply=False, p=1.0):
+        super(CropNonEmptyMaskIfExists, self).__init__(always_apply, p)
+        if ignore_values is not None and not isinstance(ignore_values, list):
+            raise ValueError("Expected `ignore_values` of type `list`, got `{}`".format(type(ignore_values)))
+        if ignore_channels is not None and not isinstance(ignore_channels, list):
+            raise ValueError("Expected `ignore_channels` of type `list`, got `{}`".format(type(ignore_channels)))
+        self.height = height
+        self.width = width
+        self.ignore_values = ignore_values
+        self.ignore_channels = ignore_channels
+    def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        return F.crop(img, x_min, y_min, x_max, y_max)
+    def apply_to_bbox(self, bbox, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        return F.bbox_crop(
+            bbox, x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, rows=params["rows"], cols=params["cols"]
+        )
+    def apply_to_keypoint(self, keypoint, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        return F.crop_keypoint_by_coords(keypoint, crop_coords=(x_min, y_min, x_max, y_max))
+    def _preprocess_mask(self, mask):
+        mask_height, mask_width = mask.shape[:2]
+        if self.ignore_values is not None:
+            ignore_values_np = np.array(self.ignore_values)
+            mask = np.where(np.isin(mask, ignore_values_np), 0, mask)
+        if mask.ndim == 3 and self.ignore_channels is not None:
+            target_channels = np.array([ch for ch in range(mask.shape[-1]) if ch not in self.ignore_channels])
+            mask = np.take(mask, target_channels, axis=-1)
+        if self.height > mask_height or self.width > mask_width:
+            raise ValueError(
+                "Crop size ({},{}) is larger than image ({},{})".format(
+                    self.height, self.width, mask_height, mask_width
+                )
+            )
+        return mask
+    def update_params(self, params, **kwargs):
+        super().update_params(params, **kwargs)
+        if "mask" in kwargs:
+            mask = self._preprocess_mask(kwargs["mask"])
+        elif "masks" in kwargs and len(kwargs["masks"]):
+            masks = kwargs["masks"]
+            mask = self._preprocess_mask(np.copy(masks[0]))  # need copy as we perform in-place mod afterwards
+            for m in masks[1:]:
+                mask |= self._preprocess_mask(m)
+        else:
+            raise RuntimeError("Can not find mask for CropNonEmptyMaskIfExists")
+        mask_height, mask_width = mask.shape[:2]
+        if mask.any():
+            mask = mask.sum(axis=-1) if mask.ndim == 3 else mask
+            non_zero_yx = np.argwhere(mask)
+            y, x = random.choice(non_zero_yx)
+            x_min = x - random.randint(0, self.width - 1)
+            y_min = y - random.randint(0, self.height - 1)
+            x_min = np.clip(x_min, 0, mask_width - self.width)
+            y_min = np.clip(y_min, 0, mask_height - self.height)
+        else:
+            x_min = random.randint(0, mask_width - self.width)
+            y_min = random.randint(0, mask_height - self.height)
+        x_max = x_min + self.width
+        y_max = y_min + self.height
+        params.update({"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max})
+        return params
+    def get_transform_init_args_names(self):
+        return ("height", "width", "ignore_values", "ignore_channels")
+class _BaseRandomSizedCrop(DualTransform):
+    # Base class for RandomSizedCrop and RandomResizedCrop
+    def __init__(self, height, width, interpolation=cv2.INTER_LINEAR, always_apply=False, p=1.0):
+        super(_BaseRandomSizedCrop, self).__init__(always_apply, p)
+        self.height = height
+        self.width = width
+        self.interpolation = interpolation
+    def apply(self, img, crop_height=0, crop_width=0, h_start=0, w_start=0, interpolation=cv2.INTER_LINEAR, **params):
+        crop = F.random_crop(img, crop_height, crop_width, h_start, w_start)
+        return FGeometric.resize(crop, self.height, self.width, interpolation)
+    def apply_to_bbox(self, bbox, crop_height=0, crop_width=0, h_start=0, w_start=0, rows=0, cols=0, **params):
+        return F.bbox_random_crop(bbox, crop_height, crop_width, h_start, w_start, rows, cols)
+    def apply_to_keypoint(self, keypoint, crop_height=0, crop_width=0, h_start=0, w_start=0, rows=0, cols=0, **params):
+        keypoint = F.keypoint_random_crop(keypoint, crop_height, crop_width, h_start, w_start, rows, cols)
+        scale_x = self.width / crop_width
+        scale_y = self.height / crop_height
+        keypoint = FGeometric.keypoint_scale(keypoint, scale_x, scale_y)
+        return keypoint
+class RandomSizedCrop(_BaseRandomSizedCrop):
+    """Crop a random part of the input and rescale it to some size.
+    Args:
+        min_max_height ((int, int)): crop size limits.
+        height (int): height after crop and resize.
+        width (int): width after crop and resize.
+        w2h_ratio (float): aspect ratio of crop.
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self, min_max_height, height, width, w2h_ratio=1.0, interpolation=cv2.INTER_LINEAR, always_apply=False, p=1.0
+    ):
+        super(RandomSizedCrop, self).__init__(
+            height=height, width=width, interpolation=interpolation, always_apply=always_apply, p=p
+        )
+        self.min_max_height = min_max_height
+        self.w2h_ratio = w2h_ratio
+    def get_params(self):
+        crop_height = random.randint(self.min_max_height[0], self.min_max_height[1])
+        return {
+            "h_start": random.random(),
+            "w_start": random.random(),
+            "crop_height": crop_height,
+            "crop_width": int(crop_height * self.w2h_ratio),
+        }
+    def get_transform_init_args_names(self):
+        return "min_max_height", "height", "width", "w2h_ratio", "interpolation"
+class RandomResizedCrop(_BaseRandomSizedCrop):
+    """Torchvision's variant of crop a random part of the input and rescale it to some size.
+    Args:
+        height (int): height after crop and resize.
+        width (int): width after crop and resize.
+        scale ((float, float)): range of size of the origin size cropped
+        ratio ((float, float)): range of aspect ratio of the origin aspect ratio cropped
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        height,
+        width,
+        scale=(0.08, 1.0),
+        ratio=(0.75, 1.3333333333333333),
+        interpolation=cv2.INTER_LINEAR,
+        always_apply=False,
+        p=1.0,
+    ):
+        super(RandomResizedCrop, self).__init__(
+            height=height, width=width, interpolation=interpolation, always_apply=always_apply, p=p
+        )
+        self.scale = scale
+        self.ratio = ratio
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        area = img.shape[0] * img.shape[1]
+        for _attempt in range(10):
+            target_area = random.uniform(*self.scale) * area
+            log_ratio = (math.log(self.ratio[0]), math.log(self.ratio[1]))
+            aspect_ratio = math.exp(random.uniform(*log_ratio))
+            w = int(round(math.sqrt(target_area * aspect_ratio)))  # skipcq: PTC-W0028
+            h = int(round(math.sqrt(target_area / aspect_ratio)))  # skipcq: PTC-W0028
+            if 0 < w <= img.shape[1] and 0 < h <= img.shape[0]:
+                i = random.randint(0, img.shape[0] - h)
+                j = random.randint(0, img.shape[1] - w)
+                return {
+                    "crop_height": h,
+                    "crop_width": w,
+                    "h_start": i * 1.0 / (img.shape[0] - h + 1e-10),
+                    "w_start": j * 1.0 / (img.shape[1] - w + 1e-10),
+                }
+        # Fallback to central crop
+        in_ratio = img.shape[1] / img.shape[0]
+        if in_ratio < min(self.ratio):
+            w = img.shape[1]
+            h = int(round(w / min(self.ratio)))
+        elif in_ratio > max(self.ratio):
+            h = img.shape[0]
+            w = int(round(h * max(self.ratio)))
+        else:  # whole image
+            w = img.shape[1]
+            h = img.shape[0]
+        i = (img.shape[0] - h) // 2
+        j = (img.shape[1] - w) // 2
+        return {
+            "crop_height": h,
+            "crop_width": w,
+            "h_start": i * 1.0 / (img.shape[0] - h + 1e-10),
+            "w_start": j * 1.0 / (img.shape[1] - w + 1e-10),
+        }
+    def get_params(self):
+        return {}
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_transform_init_args_names(self):
+        return "height", "width", "scale", "ratio", "interpolation"
+class RandomCropNearBBox(DualTransform):
+    """Crop bbox from image with random shift by x,y coordinates
+    Args:
+        max_part_shift (float, (float, float)): Max shift in `height` and `width` dimensions relative
+            to `cropping_bbox` dimension.
+            If max_part_shift is a single float, the range will be (max_part_shift, max_part_shift).
+            Default (0.3, 0.3).
+        cropping_box_key (str): Additional target key for cropping box. Default `cropping_bbox`
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    Examples:
+        >>> aug = Compose([RandomCropNearBBox(max_part_shift=(0.1, 0.5), cropping_box_key='test_box')],
+        >>>              bbox_params=BboxParams("pascal_voc"))
+        >>> result = aug(image=image, bboxes=bboxes, test_box=[0, 5, 10, 20])
+    """
+    def __init__(
+        self,
+        max_part_shift: Union[float, Tuple[float, float]] = (0.3, 0.3),
+        cropping_box_key: str = "cropping_bbox",
+        always_apply: bool = False,
+        p: float = 1.0,
+    ):
+        super(RandomCropNearBBox, self).__init__(always_apply, p)
+        self.max_part_shift = to_tuple(max_part_shift, low=max_part_shift)
+        self.cropping_bbox_key = cropping_box_key
+        if min(self.max_part_shift) < 0 or max(self.max_part_shift) > 1:
+            raise ValueError("Invalid max_part_shift. Got: {}".format(max_part_shift))
+    def apply(
+        self, img: np.ndarray, x_min: int = 0, x_max: int = 0, y_min: int = 0, y_max: int = 0, **params
+    ) -> np.ndarray:
+        return F.clamping_crop(img, x_min, y_min, x_max, y_max)
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, int]:
+        bbox = params[self.cropping_bbox_key]
+        h_max_shift = round((bbox[3] - bbox[1]) * self.max_part_shift[0])
+        w_max_shift = round((bbox[2] - bbox[0]) * self.max_part_shift[1])
+        x_min = bbox[0] - random.randint(-w_max_shift, w_max_shift)
+        x_max = bbox[2] + random.randint(-w_max_shift, w_max_shift)
+        y_min = bbox[1] - random.randint(-h_max_shift, h_max_shift)
+        y_max = bbox[3] + random.randint(-h_max_shift, h_max_shift)
+        x_min = max(0, x_min)
+        y_min = max(0, y_min)
+        return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max}
+    def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
+        return F.bbox_crop(bbox, **params)
+    def apply_to_keypoint(
+        self,
+        keypoint: Tuple[float, float, float, float],
+        x_min: int = 0,
+        x_max: int = 0,
+        y_min: int = 0,
+        y_max: int = 0,
+        **params
+    ) -> Tuple[float, float, float, float]:
+        return F.crop_keypoint_by_coords(keypoint, crop_coords=(x_min, y_min, x_max, y_max))
+    @property
+    def targets_as_params(self) -> List[str]:
+        return [self.cropping_bbox_key]
+    def get_transform_init_args_names(self) -> Tuple[str]:
+        return ("max_part_shift",)
+class BBoxSafeRandomCrop(DualTransform):
+    """Crop a random part of the input without loss of bboxes.
+    Args:
+        erosion_rate (float): erosion rate applied on input image height before crop.
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, erosion_rate=0.0, always_apply=False, p=1.0):
+        super(BBoxSafeRandomCrop, self).__init__(always_apply, p)
+        self.erosion_rate = erosion_rate
+    def apply(self, img, crop_height=0, crop_width=0, h_start=0, w_start=0, **params):
+        return F.random_crop(img, crop_height, crop_width, h_start, w_start)
+    def get_params_dependent_on_targets(self, params):
+        img_h, img_w = params["image"].shape[:2]
+        if len(params["bboxes"]) == 0:  # less likely, this class is for use with bboxes.
+            erosive_h = int(img_h * (1.0 - self.erosion_rate))
+            crop_height = img_h if erosive_h >= img_h else random.randint(erosive_h, img_h)
+            return {
+                "h_start": random.random(),
+                "w_start": random.random(),
+                "crop_height": crop_height,
+                "crop_width": int(crop_height * img_w / img_h),
+            }
+        # get union of all bboxes
+        x, y, x2, y2 = union_of_bboxes(
+            width=img_w, height=img_h, bboxes=params["bboxes"], erosion_rate=self.erosion_rate
+        )
+        # find bigger region
+        bx, by = x * random.random(), y * random.random()
+        bx2, by2 = x2 + (1 - x2) * random.random(), y2 + (1 - y2) * random.random()
+        bw, bh = bx2 - bx, by2 - by
+        crop_height = img_h if bh >= 1.0 else int(img_h * bh)
+        crop_width = img_w if bw >= 1.0 else int(img_w * bw)
+        h_start = np.clip(0.0 if bh >= 1.0 else by / (1.0 - bh), 0.0, 1.0)
+        w_start = np.clip(0.0 if bw >= 1.0 else bx / (1.0 - bw), 0.0, 1.0)
+        return {"h_start": h_start, "w_start": w_start, "crop_height": crop_height, "crop_width": crop_width}
+    def apply_to_bbox(self, bbox, crop_height=0, crop_width=0, h_start=0, w_start=0, rows=0, cols=0, **params):
+        return F.bbox_random_crop(bbox, crop_height, crop_width, h_start, w_start, rows, cols)
+    @property
+    def targets_as_params(self):
+        return ["image", "bboxes"]
+    def get_transform_init_args_names(self):
+        return ("erosion_rate",)
+class RandomSizedBBoxSafeCrop(BBoxSafeRandomCrop):
+    """Crop a random part of the input and rescale it to some size without loss of bboxes.
+    Args:
+        height (int): height after crop and resize.
+        width (int): width after crop and resize.
+        erosion_rate (float): erosion rate applied on input image height before crop.
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, height, width, erosion_rate=0.0, interpolation=cv2.INTER_LINEAR, always_apply=False, p=1.0):
+        super(RandomSizedBBoxSafeCrop, self).__init__(erosion_rate, always_apply, p)
+        self.height = height
+        self.width = width
+        self.interpolation = interpolation
+    def apply(self, img, crop_height=0, crop_width=0, h_start=0, w_start=0, interpolation=cv2.INTER_LINEAR, **params):
+        crop = F.random_crop(img, crop_height, crop_width, h_start, w_start)
+        return FGeometric.resize(crop, self.height, self.width, interpolation)
+    def get_transform_init_args_names(self):
+        return super().get_transform_init_args_names() + ("height", "width", "interpolation")
+class CropAndPad(DualTransform):
+    """Crop and pad images by pixel amounts or fractions of image sizes.
+    Cropping removes pixels at the sides (i.e. extracts a subimage from a given full image).
+    Padding adds pixels to the sides (e.g. black pixels).
+    This transformation will never crop images below a height or width of ``1``.
+    Note:
+        This transformation automatically resizes images back to their original size. To deactivate this, add the
+        parameter ``keep_size=False``.
+    Args:
+        px (int or tuple):
+            The number of pixels to crop (negative values) or pad (positive values)
+            on each side of the image. Either this or the parameter `percent` may
+            be set, not both at the same time.
+                * If ``None``, then pixel-based cropping/padding will not be used.
+                * If ``int``, then that exact number of pixels will always be cropped/padded.
+                * If a ``tuple`` of two ``int`` s with values ``a`` and ``b``,
+                  then each side will be cropped/padded by a random amount sampled
+                  uniformly per image and side from the interval ``[a, b]``. If
+                  however `sample_independently` is set to ``False``, only one
+                  value will be sampled per image and used for all sides.
+                * If a ``tuple`` of four entries, then the entries represent top,
+                  right, bottom, left. Each entry may be a single ``int`` (always
+                  crop/pad by exactly that value), a ``tuple`` of two ``int`` s
+                  ``a`` and ``b`` (crop/pad by an amount within ``[a, b]``), a
+                  ``list`` of ``int`` s (crop/pad by a random value that is
+                  contained in the ``list``).
+        percent (float or tuple):
+            The number of pixels to crop (negative values) or pad (positive values)
+            on each side of the image given as a *fraction* of the image
+            height/width. E.g. if this is set to ``-0.1``, the transformation will
+            always crop away ``10%`` of the image's height at both the top and the
+            bottom (both ``10%`` each), as well as ``10%`` of the width at the
+            right and left.
+            Expected value range is ``(-1.0, inf)``.
+            Either this or the parameter `px` may be set, not both
+            at the same time.
+                * If ``None``, then fraction-based cropping/padding will not be
+                  used.
+                * If ``float``, then that fraction will always be cropped/padded.
+                * If a ``tuple`` of two ``float`` s with values ``a`` and ``b``,
+                  then each side will be cropped/padded by a random fraction
+                  sampled uniformly per image and side from the interval
+                  ``[a, b]``. If however `sample_independently` is set to
+                  ``False``, only one value will be sampled per image and used for
+                  all sides.
+                * If a ``tuple`` of four entries, then the entries represent top,
+                  right, bottom, left. Each entry may be a single ``float``
+                  (always crop/pad by exactly that percent value), a ``tuple`` of
+                  two ``float`` s ``a`` and ``b`` (crop/pad by a fraction from
+                  ``[a, b]``), a ``list`` of ``float`` s (crop/pad by a random
+                  value that is contained in the list).
+        pad_mode (int): OpenCV border mode.
+        pad_cval (number, Sequence[number]):
+            The constant value to use if the pad mode is ``BORDER_CONSTANT``.
+                * If ``number``, then that value will be used.
+                * If a ``tuple`` of two ``number`` s and at least one of them is
+                  a ``float``, then a random number will be uniformly sampled per
+                  image from the continuous interval ``[a, b]`` and used as the
+                  value. If both ``number`` s are ``int`` s, the interval is
+                  discrete.
+                * If a ``list`` of ``number``, then a random value will be chosen
+                  from the elements of the ``list`` and used as the value.
+        pad_cval_mask (number, Sequence[number]): Same as pad_cval but only for masks.
+        keep_size (bool):
+            After cropping and padding, the result image will usually have a
+            different height/width compared to the original input image. If this
+            parameter is set to ``True``, then the cropped/padded image will be
+            resized to the input image's size, i.e. the output shape is always identical to the input shape.
+        sample_independently (bool):
+            If ``False`` *and* the values for `px`/`percent` result in exactly
+            *one* probability distribution for all image sides, only one single
+            value will be sampled from that probability distribution and used for
+            all sides. I.e. the crop/pad amount then is the same for all sides.
+            If ``True``, four values will be sampled independently, one per side.
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        any
+    """
+    def __init__(
+        self,
+        px: Optional[Union[int, Sequence[float], Sequence[Tuple]]] = None,
+        percent: Optional[Union[float, Sequence[float], Sequence[Tuple]]] = None,
+        pad_mode: int = cv2.BORDER_CONSTANT,
+        pad_cval: Union[float, Sequence[float]] = 0,
+        pad_cval_mask: Union[float, Sequence[float]] = 0,
+        keep_size: bool = True,
+        sample_independently: bool = True,
+        interpolation: int = cv2.INTER_LINEAR,
+        always_apply: bool = False,
+        p: float = 1.0,
+    ):
+        super().__init__(always_apply, p)
+        if px is None and percent is None:
+            raise ValueError("px and percent are empty!")
+        if px is not None and percent is not None:
+            raise ValueError("Only px or percent may be set!")
+        self.px = px
+        self.percent = percent
+        self.pad_mode = pad_mode
+        self.pad_cval = pad_cval
+        self.pad_cval_mask = pad_cval_mask
+        self.keep_size = keep_size
+        self.sample_independently = sample_independently
+        self.interpolation = interpolation
+    def apply(
+        self,
+        img: np.ndarray,
+        crop_params: Sequence[int] = (),
+        pad_params: Sequence[int] = (),
+        pad_value: Union[int, float] = 0,
+        rows: int = 0,
+        cols: int = 0,
+        interpolation: int = cv2.INTER_LINEAR,
+        **params
+    ) -> np.ndarray:
+        return F.crop_and_pad(
+            img, crop_params, pad_params, pad_value, rows, cols, interpolation, self.pad_mode, self.keep_size
+        )
+    def apply_to_mask(
+        self,
+        img: np.ndarray,
+        crop_params: Optional[Sequence[int]] = None,
+        pad_params: Optional[Sequence[int]] = None,
+        pad_value_mask: Optional[float] = None,
+        rows: int = 0,
+        cols: int = 0,
+        interpolation: int = cv2.INTER_NEAREST,
+        **params
+    ) -> np.ndarray:
+        return F.crop_and_pad(
+            img, crop_params, pad_params, pad_value_mask, rows, cols, interpolation, self.pad_mode, self.keep_size
+        )
+    def apply_to_bbox(
+        self,
+        bbox: BoxInternalType,
+        crop_params: Optional[Sequence[int]] = None,
+        pad_params: Optional[Sequence[int]] = None,
+        rows: int = 0,
+        cols: int = 0,
+        result_rows: int = 0,
+        result_cols: int = 0,
+        **params
+    ) -> BoxInternalType:
+        return F.crop_and_pad_bbox(bbox, crop_params, pad_params, rows, cols, result_rows, result_cols)
+    def apply_to_keypoint(
+        self,
+        keypoint: KeypointInternalType,
+        crop_params: Optional[Sequence[int]] = None,
+        pad_params: Optional[Sequence[int]] = None,
+        rows: int = 0,
+        cols: int = 0,
+        result_rows: int = 0,
+        result_cols: int = 0,
+        **params
+    ) -> KeypointInternalType:
+        return F.crop_and_pad_keypoint(
+            keypoint, crop_params, pad_params, rows, cols, result_rows, result_cols, self.keep_size
+        )
+    @property
+    def targets_as_params(self) -> List[str]:
+        return ["image"]
+    @staticmethod
+    def __prevent_zero(val1: int, val2: int, max_val: int) -> Tuple[int, int]:
+        regain = abs(max_val) + 1
+        regain1 = regain // 2
+        regain2 = regain // 2
+        if regain1 + regain2 < regain:
+            regain1 += 1
+        if regain1 > val1:
+            diff = regain1 - val1
+            regain1 = val1
+            regain2 += diff
+        elif regain2 > val2:
+            diff = regain2 - val2
+            regain2 = val2
+            regain1 += diff
+        val1 = val1 - regain1
+        val2 = val2 - regain2
+        return val1, val2
+    @staticmethod
+    def _prevent_zero(crop_params: List[int], height: int, width: int) -> Sequence[int]:
+        top, right, bottom, left = crop_params
+        remaining_height = height - (top + bottom)
+        remaining_width = width - (left + right)
+        if remaining_height < 1:
+            top, bottom = CropAndPad.__prevent_zero(top, bottom, height)
+        if remaining_width < 1:
+            left, right = CropAndPad.__prevent_zero(left, right, width)
+        return [max(top, 0), max(right, 0), max(bottom, 0), max(left, 0)]
+    def get_params_dependent_on_targets(self, params) -> dict:
+        height, width = params["image"].shape[:2]
+        if self.px is not None:
+            params = self._get_px_params()
+        else:
+            params = self._get_percent_params()
+            params[0] = int(params[0] * height)
+            params[1] = int(params[1] * width)
+            params[2] = int(params[2] * height)
+            params[3] = int(params[3] * width)
+        pad_params = [max(i, 0) for i in params]
+        crop_params = self._prevent_zero([-min(i, 0) for i in params], height, width)
+        top, right, bottom, left = crop_params
+        crop_params = [left, top, width - right, height - bottom]
+        result_rows = crop_params[3] - crop_params[1]
+        result_cols = crop_params[2] - crop_params[0]
+        if result_cols == width and result_rows == height:
+            crop_params = []
+        top, right, bottom, left = pad_params
+        pad_params = [top, bottom, left, right]
+        if any(pad_params):
+            result_rows += top + bottom
+            result_cols += left + right
+        else:
+            pad_params = []
+        return {
+            "crop_params": crop_params or None,
+            "pad_params": pad_params or None,
+            "pad_value": None if pad_params is None else self._get_pad_value(self.pad_cval),
+            "pad_value_mask": None if pad_params is None else self._get_pad_value(self.pad_cval_mask),
+            "result_rows": result_rows,
+            "result_cols": result_cols,
+        }
+    def _get_px_params(self) -> List[int]:
+        if self.px is None:
+            raise ValueError("px is not set")
+        if isinstance(self.px, int):
+            params = [self.px] * 4
+        elif len(self.px) == 2:
+            if self.sample_independently:
+                params = [random.randrange(*self.px) for _ in range(4)]
+            else:
+                px = random.randrange(*self.px)
+                params = [px] * 4
+        else:
+            params = [i if isinstance(i, int) else random.randrange(*i) for i in self.px]  # type: ignore
+        return params  # [top, right, bottom, left]
+    def _get_percent_params(self) -> List[float]:
+        if self.percent is None:
+            raise ValueError("percent is not set")
+        if isinstance(self.percent, float):
+            params = [self.percent] * 4
+        elif len(self.percent) == 2:
+            if self.sample_independently:
+                params = [random.uniform(*self.percent) for _ in range(4)]
+            else:
+                px = random.uniform(*self.percent)
+                params = [px] * 4
+        else:
+            params = [i if isinstance(i, (int, float)) else random.uniform(*i) for i in self.percent]
+        return params  # params = [top, right, bottom, left]
+    @staticmethod
+    def _get_pad_value(pad_value: Union[float, Sequence[float]]) -> Union[int, float]:
+        if isinstance(pad_value, (int, float)):
+            return pad_value
+        if len(pad_value) == 2:
+            a, b = pad_value
+            if isinstance(a, int) and isinstance(b, int):
+                return random.randint(a, b)
+            return random.uniform(a, b)
+        return random.choice(pad_value)
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        return (
+            "px",
+            "percent",
+            "pad_mode",
+            "pad_cval",
+            "pad_cval_mask",
+            "keep_size",
+            "sample_independently",
+            "interpolation",
+        )
+class RandomCropFromBorders(DualTransform):
+    """Crop bbox from image randomly cut parts from borders without resize at the end
+    Args:
+        crop_left (float): single float value in (0.0, 1.0) range. Default 0.1. Image will be randomly cut
+        from left side in range [0, crop_left * width)
+        crop_right (float): single float value in (0.0, 1.0) range. Default 0.1. Image will be randomly cut
+        from right side in range [(1 - crop_right) * width, width)
+        crop_top (float): singlefloat value in (0.0, 1.0) range. Default 0.1. Image will be randomly cut
+        from top side in range [0, crop_top * height)
+        crop_bottom (float): single float value in (0.0, 1.0) range. Default 0.1. Image will be randomly cut
+        from bottom side in range [(1 - crop_bottom) * height, height)
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        crop_left=0.1,
+        crop_right=0.1,
+        crop_top=0.1,
+        crop_bottom=0.1,
+        always_apply=False,
+        p=1.0,
+    ):
+        super(RandomCropFromBorders, self).__init__(always_apply, p)
+        self.crop_left = crop_left
+        self.crop_right = crop_right
+        self.crop_top = crop_top
+        self.crop_bottom = crop_bottom
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        x_min = random.randint(0, int(self.crop_left * img.shape[1]))
+        x_max = random.randint(max(x_min + 1, int((1 - self.crop_right) * img.shape[1])), img.shape[1])
+        y_min = random.randint(0, int(self.crop_top * img.shape[0]))
+        y_max = random.randint(max(y_min + 1, int((1 - self.crop_bottom) * img.shape[0])), img.shape[0])
+        return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max}
+    def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        return F.clamping_crop(img, x_min, y_min, x_max, y_max)
+    def apply_to_mask(self, mask, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        return F.clamping_crop(mask, x_min, y_min, x_max, y_max)
+    def apply_to_bbox(self, bbox, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        rows, cols = params["rows"], params["cols"]
+        return F.bbox_crop(bbox, x_min, y_min, x_max, y_max, rows, cols)
+    def apply_to_keypoint(self, keypoint, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        return F.crop_keypoint_by_coords(keypoint, crop_coords=(x_min, y_min, x_max, y_max))
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_transform_init_args_names(self):
+        return "crop_left", "crop_right", "crop_top", "crop_bottom"

custom_albumentations/augmentations/domain_adaptation.py ADDED Viewed

	@@ -0,0 +1,337 @@

+import random
+from typing import Any, Callable, Literal, Sequence, Tuple
+import cv2
+import numpy as np
+from custom_qudida import DomainAdapter
+from skimage.exposure import match_histograms
+from sklearn.decomposition import PCA
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+from custom_albumentations.augmentations.utils import (
+    clipped,
+    get_opencv_dtype_from_numpy,
+    is_grayscale_image,
+    is_multispectral_image,
+    preserve_shape,
+    read_rgb_image,
+)
+from ..core.transforms_interface import ImageOnlyTransform, ScaleFloatType, to_tuple
+__all__ = [
+    "HistogramMatching",
+    "FDA",
+    "PixelDistributionAdaptation",
+    "fourier_domain_adaptation",
+    "apply_histogram",
+    "adapt_pixel_distribution",
+]
+@clipped
+@preserve_shape
+def fourier_domain_adaptation(img: np.ndarray, target_img: np.ndarray, beta: float) -> np.ndarray:
+    """
+    Fourier Domain Adaptation from https://github.com/YanchaoYang/FDA
+    Args:
+        img:  source image
+        target_img:  target image for domain adaptation
+        beta: coefficient from source paper
+    Returns:
+        transformed image
+    """
+    img = np.squeeze(img)
+    target_img = np.squeeze(target_img)
+    if target_img.shape != img.shape:
+        raise ValueError(
+            "The source and target images must have the same shape,"
+            " but got {} and {} respectively.".format(img.shape, target_img.shape)
+        )
+    # get fft of both source and target
+    fft_src = np.fft.fft2(img.astype(np.float32), axes=(0, 1))
+    fft_trg = np.fft.fft2(target_img.astype(np.float32), axes=(0, 1))
+    # extract amplitude and phase of both fft-s
+    amplitude_src, phase_src = np.abs(fft_src), np.angle(fft_src)
+    amplitude_trg = np.abs(fft_trg)
+    # mutate the amplitude part of source with target
+    amplitude_src = np.fft.fftshift(amplitude_src, axes=(0, 1))
+    amplitude_trg = np.fft.fftshift(amplitude_trg, axes=(0, 1))
+    height, width = amplitude_src.shape[:2]
+    border = np.floor(min(height, width) * beta).astype(int)
+    center_y, center_x = np.floor([height / 2.0, width / 2.0]).astype(int)
+    y1, y2 = center_y - border, center_y + border + 1
+    x1, x2 = center_x - border, center_x + border + 1
+    amplitude_src[y1:y2, x1:x2] = amplitude_trg[y1:y2, x1:x2]
+    amplitude_src = np.fft.ifftshift(amplitude_src, axes=(0, 1))
+    # get mutated image
+    src_image_transformed = np.fft.ifft2(amplitude_src * np.exp(1j * phase_src), axes=(0, 1))
+    src_image_transformed = np.real(src_image_transformed)
+    return src_image_transformed
+@preserve_shape
+def apply_histogram(img: np.ndarray, reference_image: np.ndarray, blend_ratio: float) -> np.ndarray:
+    if img.dtype != reference_image.dtype:
+        raise RuntimeError(
+            f"Dtype of image and reference image must be the same. Got {img.dtype} and {reference_image.dtype}"
+        )
+    if img.shape[:2] != reference_image.shape[:2]:
+        reference_image = cv2.resize(reference_image, dsize=(img.shape[1], img.shape[0]))
+    img, reference_image = np.squeeze(img), np.squeeze(reference_image)
+    try:
+        matched = match_histograms(img, reference_image, channel_axis=2 if len(img.shape) == 3 else None)
+    except TypeError:
+        matched = match_histograms(img, reference_image, multichannel=True)  # case for scikit-image<0.19.1
+    img = cv2.addWeighted(
+        matched,
+        blend_ratio,
+        img,
+        1 - blend_ratio,
+        0,
+        dtype=get_opencv_dtype_from_numpy(img.dtype),
+    )
+    return img
+@preserve_shape
+def adapt_pixel_distribution(
+    img: np.ndarray, ref: np.ndarray, transform_type: str = "pca", weight: float = 0.5
+) -> np.ndarray:
+    initial_type = img.dtype
+    transformer = {"pca": PCA, "standard": StandardScaler, "minmax": MinMaxScaler}[transform_type]()
+    adapter = DomainAdapter(transformer=transformer, ref_img=ref)
+    result = adapter(img).astype("float32")
+    blended = (img.astype("float32") * (1 - weight) + result * weight).astype(initial_type)
+    return blended
+class HistogramMatching(ImageOnlyTransform):
+    """
+    Apply histogram matching. It manipulates the pixels of an input image so that its histogram matches
+    the histogram of the reference image. If the images have multiple channels, the matching is done independently
+    for each channel, as long as the number of channels is equal in the input image and the reference.
+    Histogram matching can be used as a lightweight normalisation for image processing,
+    such as feature matching, especially in circumstances where the images have been taken from different
+    sources or in different conditions (i.e. lighting).
+    See:
+        https://scikit-image.org/docs/dev/auto_examples/color_exposure/plot_histogram_matching.html
+    Args:
+        reference_images (Sequence[Any]): Sequence of objects that will be converted to images by `read_fn`. By default,
+        it expects a sequence of paths to images.
+        blend_ratio (float, float): Tuple of min and max blend ratio. Matched image will be blended with original
+            with random blend factor for increased diversity of generated images.
+        read_fn (Callable): Used-defined function to read image. Function should get an element of `reference_images`
+        and return numpy array of image pixels. Default: takes as input a path to an image and returns a numpy array.
+        p (float): probability of applying the transform. Default: 1.0.
+    Targets:
+        image
+    Image types:
+        uint8, uint16, float32
+    """
+    def __init__(
+        self,
+        reference_images: Sequence[Any],
+        blend_ratio: Tuple[float, float] = (0.5, 1.0),
+        read_fn: Callable[[Any], np.ndarray] = read_rgb_image,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply=always_apply, p=p)
+        self.reference_images = reference_images
+        self.read_fn = read_fn
+        self.blend_ratio = blend_ratio
+    def apply(self, img, reference_image=None, blend_ratio=0.5, **params):
+        return apply_histogram(img, reference_image, blend_ratio)
+    def get_params(self):
+        return {
+            "reference_image": self.read_fn(random.choice(self.reference_images)),
+            "blend_ratio": random.uniform(self.blend_ratio[0], self.blend_ratio[1]),
+        }
+    def get_transform_init_args_names(self):
+        return ("reference_images", "blend_ratio", "read_fn")
+    def _to_dict(self):
+        raise NotImplementedError("HistogramMatching can not be serialized.")
+class FDA(ImageOnlyTransform):
+    """
+    Fourier Domain Adaptation from https://github.com/YanchaoYang/FDA
+    Simple "style transfer".
+    Args:
+        reference_images (Sequence[Any]): Sequence of objects that will be converted to images by `read_fn`. By default,
+        it expects a sequence of paths to images.
+        beta_limit (float or tuple of float): coefficient beta from paper. Recommended less 0.3.
+        read_fn (Callable): Used-defined function to read image. Function should get an element of `reference_images`
+        and return numpy array of image pixels. Default: takes as input a path to an image and returns a numpy array.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    Reference:
+        https://github.com/YanchaoYang/FDA
+        https://openaccess.thecvf.com/content_CVPR_2020/papers/Yang_FDA_Fourier_Domain_Adaptation_for_Semantic_Segmentation_CVPR_2020_paper.pdf
+    Example:
+        >>> import numpy as np
+        >>> import custom_albumentations as albumentations as A
+        >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
+        >>> target_image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
+        >>> aug = A.Compose([A.FDA([target_image], p=1, read_fn=lambda x: x)])
+        >>> result = aug(image=image)
+    """
+    def __init__(
+        self,
+        reference_images: Sequence[Any],
+        beta_limit: ScaleFloatType = 0.1,
+        read_fn: Callable[[Any], np.ndarray] = read_rgb_image,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(FDA, self).__init__(always_apply=always_apply, p=p)
+        self.reference_images = reference_images
+        self.read_fn = read_fn
+        self.beta_limit = to_tuple(beta_limit, low=0)
+    def apply(self, img, target_image=None, beta=0.1, **params):
+        return fourier_domain_adaptation(img=img, target_img=target_image, beta=beta)
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        target_img = self.read_fn(random.choice(self.reference_images))
+        target_img = cv2.resize(target_img, dsize=(img.shape[1], img.shape[0]))
+        return {"target_image": target_img}
+    def get_params(self):
+        return {"beta": random.uniform(self.beta_limit[0], self.beta_limit[1])}
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_transform_init_args_names(self):
+        return ("reference_images", "beta_limit", "read_fn")
+    def _to_dict(self):
+        raise NotImplementedError("FDA can not be serialized.")
+class PixelDistributionAdaptation(ImageOnlyTransform):
+    """
+    Another naive and quick pixel-level domain adaptation. It fits a simple transform (such as PCA, StandardScaler
+    or MinMaxScaler) on both original and reference image, transforms original image with transform trained on this
+    image and then performs inverse transformation using transform fitted on reference image.
+    Args:
+        reference_images (Sequence[Any]): Sequence of objects that will be converted to images by `read_fn`. By default,
+        it expects a sequence of paths to images.
+        blend_ratio (float, float): Tuple of min and max blend ratio. Matched image will be blended with original
+            with random blend factor for increased diversity of generated images.
+        read_fn (Callable): Used-defined function to read image. Function should get an element of `reference_images`
+        and return numpy array of image pixels. Default: takes as input a path to an image and returns a numpy array.
+        transform_type (str): type of transform; "pca", "standard", "minmax" are allowed.
+        p (float): probability of applying the transform. Default: 1.0.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    See also: https://github.com/arsenyinfo/qudida
+    """
+    def __init__(
+        self,
+        reference_images: Sequence[Any],
+        blend_ratio: Tuple[float, float] = (0.25, 1.0),
+        read_fn: Callable[[Any], np.ndarray] = read_rgb_image,
+        transform_type: Literal["pca", "standard", "minmax"] = "pca",
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply=always_apply, p=p)
+        self.reference_images = reference_images
+        self.read_fn = read_fn
+        self.blend_ratio = blend_ratio
+        expected_transformers = ("pca", "standard", "minmax")
+        if transform_type not in expected_transformers:
+            raise ValueError(f"Got unexpected transform_type {transform_type}. Expected one of {expected_transformers}")
+        self.transform_type = transform_type
+    @staticmethod
+    def _validate_shape(img: np.ndarray):
+        if is_grayscale_image(img) or is_multispectral_image(img):
+            raise ValueError(
+                f"Unexpected image shape: expected 3 dimensions, got {len(img.shape)}."
+                f"Is it a grayscale or multispectral image? It's not supported for now."
+            )
+    def ensure_uint8(self, img: np.ndarray) -> Tuple[np.ndarray, bool]:
+        if img.dtype == np.float32:
+            if img.min() < 0 or img.max() > 1:
+                message = (
+                    "PixelDistributionAdaptation uses uint8 under the hood, so float32 should be converted,"
+                    "Can not do it automatically when the image is out of [0..1] range."
+                )
+                raise TypeError(message)
+            return (img * 255).astype("uint8"), True
+        return img, False
+    def apply(self, img, reference_image, blend_ratio, **params):
+        self._validate_shape(img)
+        reference_image, _ = self.ensure_uint8(reference_image)
+        img, needs_reconvert = self.ensure_uint8(img)
+        adapted = adapt_pixel_distribution(
+            img=img,
+            ref=reference_image,
+            weight=blend_ratio,
+            transform_type=self.transform_type,
+        )
+        if needs_reconvert:
+            adapted = adapted.astype("float32") * (1 / 255)
+        return adapted
+    def get_params(self):
+        return {
+            "reference_image": self.read_fn(random.choice(self.reference_images)),
+            "blend_ratio": random.uniform(self.blend_ratio[0], self.blend_ratio[1]),
+        }
+    def get_transform_init_args_names(self):
+        return ("reference_images", "blend_ratio", "read_fn", "transform_type")
+    def _to_dict(self):
+        raise NotImplementedError("PixelDistributionAdaptation can not be serialized.")

custom_albumentations/augmentations/dropout/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .channel_dropout import *
+from .coarse_dropout import *
+from .cutout import *
+from .grid_dropout import *
+from .mask_dropout import *

custom_albumentations/augmentations/dropout/channel_dropout.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import random
+from typing import Any, Mapping, Tuple, Union
+import numpy as np
+from custom_albumentations.core.transforms_interface import ImageOnlyTransform
+from .functional import channel_dropout
+__all__ = ["ChannelDropout"]
+class ChannelDropout(ImageOnlyTransform):
+    """Randomly Drop Channels in the input Image.
+    Args:
+        channel_drop_range (int, int): range from which we choose the number of channels to drop.
+        fill_value (int, float): pixel value for the dropped channel.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, uint16, unit32, float32
+    """
+    def __init__(
+        self,
+        channel_drop_range: Tuple[int, int] = (1, 1),
+        fill_value: Union[int, float] = 0,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(ChannelDropout, self).__init__(always_apply, p)
+        self.channel_drop_range = channel_drop_range
+        self.min_channels = channel_drop_range[0]
+        self.max_channels = channel_drop_range[1]
+        if not 1 <= self.min_channels <= self.max_channels:
+            raise ValueError("Invalid channel_drop_range. Got: {}".format(channel_drop_range))
+        self.fill_value = fill_value
+    def apply(self, img: np.ndarray, channels_to_drop: Tuple[int, ...] = (0,), **params) -> np.ndarray:
+        return channel_dropout(img, channels_to_drop, self.fill_value)
+    def get_params_dependent_on_targets(self, params: Mapping[str, Any]):
+        img = params["image"]
+        num_channels = img.shape[-1]
+        if len(img.shape) == 2 or num_channels == 1:
+            raise NotImplementedError("Images has one channel. ChannelDropout is not defined.")
+        if self.max_channels >= num_channels:
+            raise ValueError("Can not drop all channels in ChannelDropout.")
+        num_drop_channels = random.randint(self.min_channels, self.max_channels)
+        channels_to_drop = random.sample(range(num_channels), k=num_drop_channels)
+        return {"channels_to_drop": channels_to_drop}
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        return "channel_drop_range", "fill_value"
+    @property
+    def targets_as_params(self):
+        return ["image"]

custom_albumentations/augmentations/dropout/coarse_dropout.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import random
+from typing import Iterable, List, Optional, Sequence, Tuple, Union
+import numpy as np
+from ...core.transforms_interface import DualTransform, KeypointType
+from .functional import cutout
+__all__ = ["CoarseDropout"]
+class CoarseDropout(DualTransform):
+    """CoarseDropout of the rectangular regions in the image.
+    Args:
+        max_holes (int): Maximum number of regions to zero out.
+        max_height (int, float): Maximum height of the hole.
+        If float, it is calculated as a fraction of the image height.
+        max_width (int, float): Maximum width of the hole.
+        If float, it is calculated as a fraction of the image width.
+        min_holes (int): Minimum number of regions to zero out. If `None`,
+            `min_holes` is be set to `max_holes`. Default: `None`.
+        min_height (int, float): Minimum height of the hole. Default: None. If `None`,
+            `min_height` is set to `max_height`. Default: `None`.
+            If float, it is calculated as a fraction of the image height.
+        min_width (int, float): Minimum width of the hole. If `None`, `min_height` is
+            set to `max_width`. Default: `None`.
+            If float, it is calculated as a fraction of the image width.
+        fill_value (int, float, list of int, list of float): value for dropped pixels.
+        mask_fill_value (int, float, list of int, list of float): fill value for dropped pixels
+            in mask. If `None` - mask is not affected. Default: `None`.
+    Targets:
+        image, mask, keypoints
+    Image types:
+        uint8, float32
+    Reference:
+    |  https://arxiv.org/abs/1708.04552
+    |  https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py
+    |  https://github.com/aleju/imgaug/blob/master/imgaug/augmenters/arithmetic.py
+    """
+    def __init__(
+        self,
+        max_holes: int = 8,
+        max_height: int = 8,
+        max_width: int = 8,
+        min_holes: Optional[int] = None,
+        min_height: Optional[int] = None,
+        min_width: Optional[int] = None,
+        fill_value: int = 0,
+        mask_fill_value: Optional[int] = None,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(CoarseDropout, self).__init__(always_apply, p)
+        self.max_holes = max_holes
+        self.max_height = max_height
+        self.max_width = max_width
+        self.min_holes = min_holes if min_holes is not None else max_holes
+        self.min_height = min_height if min_height is not None else max_height
+        self.min_width = min_width if min_width is not None else max_width
+        self.fill_value = fill_value
+        self.mask_fill_value = mask_fill_value
+        if not 0 < self.min_holes <= self.max_holes:
+            raise ValueError("Invalid combination of min_holes and max_holes. Got: {}".format([min_holes, max_holes]))
+        self.check_range(self.max_height)
+        self.check_range(self.min_height)
+        self.check_range(self.max_width)
+        self.check_range(self.min_width)
+        if not 0 < self.min_height <= self.max_height:
+            raise ValueError(
+                "Invalid combination of min_height and max_height. Got: {}".format([min_height, max_height])
+            )
+        if not 0 < self.min_width <= self.max_width:
+            raise ValueError("Invalid combination of min_width and max_width. Got: {}".format([min_width, max_width]))
+    def check_range(self, dimension):
+        if isinstance(dimension, float) and not 0 <= dimension < 1.0:
+            raise ValueError(
+                "Invalid value {}. If using floats, the value should be in the range [0.0, 1.0)".format(dimension)
+            )
+    def apply(
+        self,
+        img: np.ndarray,
+        fill_value: Union[int, float] = 0,
+        holes: Iterable[Tuple[int, int, int, int]] = (),
+        **params
+    ) -> np.ndarray:
+        return cutout(img, holes, fill_value)
+    def apply_to_mask(
+        self,
+        img: np.ndarray,
+        mask_fill_value: Union[int, float] = 0,
+        holes: Iterable[Tuple[int, int, int, int]] = (),
+        **params
+    ) -> np.ndarray:
+        if mask_fill_value is None:
+            return img
+        return cutout(img, holes, mask_fill_value)
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        height, width = img.shape[:2]
+        holes = []
+        for _n in range(random.randint(self.min_holes, self.max_holes)):
+            if all(
+                [
+                    isinstance(self.min_height, int),
+                    isinstance(self.min_width, int),
+                    isinstance(self.max_height, int),
+                    isinstance(self.max_width, int),
+                ]
+            ):
+                hole_height = random.randint(self.min_height, self.max_height)
+                hole_width = random.randint(self.min_width, self.max_width)
+            elif all(
+                [
+                    isinstance(self.min_height, float),
+                    isinstance(self.min_width, float),
+                    isinstance(self.max_height, float),
+                    isinstance(self.max_width, float),
+                ]
+            ):
+                hole_height = int(height * random.uniform(self.min_height, self.max_height))
+                hole_width = int(width * random.uniform(self.min_width, self.max_width))
+            else:
+                raise ValueError(
+                    "Min width, max width, \
+                    min height and max height \
+                    should all either be ints or floats. \
+                    Got: {} respectively".format(
+                        [
+                            type(self.min_width),
+                            type(self.max_width),
+                            type(self.min_height),
+                            type(self.max_height),
+                        ]
+                    )
+                )
+            y1 = random.randint(0, height - hole_height)
+            x1 = random.randint(0, width - hole_width)
+            y2 = y1 + hole_height
+            x2 = x1 + hole_width
+            holes.append((x1, y1, x2, y2))
+        return {"holes": holes}
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def _keypoint_in_hole(self, keypoint: KeypointType, hole: Tuple[int, int, int, int]) -> bool:
+        x1, y1, x2, y2 = hole
+        x, y = keypoint[:2]
+        return x1 <= x < x2 and y1 <= y < y2
+    def apply_to_keypoints(
+        self, keypoints: Sequence[KeypointType], holes: Iterable[Tuple[int, int, int, int]] = (), **params
+    ) -> List[KeypointType]:
+        result = set(keypoints)
+        for hole in holes:
+            for kp in keypoints:
+                if self._keypoint_in_hole(kp, hole):
+                    result.discard(kp)
+        return list(result)
+    def get_transform_init_args_names(self):
+        return (
+            "max_holes",
+            "max_height",
+            "max_width",
+            "min_holes",
+            "min_height",
+            "min_width",
+            "fill_value",
+            "mask_fill_value",
+        )

custom_albumentations/augmentations/dropout/cutout.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import random
+import warnings
+from typing import Any, Dict, Tuple, Union
+import numpy as np
+from custom_albumentations.core.transforms_interface import ImageOnlyTransform
+from .functional import cutout
+__all__ = ["Cutout"]
+class Cutout(ImageOnlyTransform):
+    """CoarseDropout of the square regions in the image.
+    Args:
+        num_holes (int): number of regions to zero out
+        max_h_size (int): maximum height of the hole
+        max_w_size (int): maximum width of the hole
+        fill_value (int, float, list of int, list of float): value for dropped pixels.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    Reference:
+    |  https://arxiv.org/abs/1708.04552
+    |  https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py
+    |  https://github.com/aleju/imgaug/blob/master/imgaug/augmenters/arithmetic.py
+    """
+    def __init__(
+        self,
+        num_holes: int = 8,
+        max_h_size: int = 8,
+        max_w_size: int = 8,
+        fill_value: Union[int, float] = 0,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(Cutout, self).__init__(always_apply, p)
+        self.num_holes = num_holes
+        self.max_h_size = max_h_size
+        self.max_w_size = max_w_size
+        self.fill_value = fill_value
+        warnings.warn(
+            f"{self.__class__.__name__} has been deprecated. Please use CoarseDropout",
+            FutureWarning,
+        )
+    def apply(self, img: np.ndarray, fill_value: Union[int, float] = 0, holes=(), **params):
+        return cutout(img, holes, fill_value)
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        img = params["image"]
+        height, width = img.shape[:2]
+        holes = []
+        for _n in range(self.num_holes):
+            y = random.randint(0, height)
+            x = random.randint(0, width)
+            y1 = np.clip(y - self.max_h_size // 2, 0, height)
+            y2 = np.clip(y1 + self.max_h_size, 0, height)
+            x1 = np.clip(x - self.max_w_size // 2, 0, width)
+            x2 = np.clip(x1 + self.max_w_size, 0, width)
+            holes.append((x1, y1, x2, y2))
+        return {"holes": holes}
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        return ("num_holes", "max_h_size", "max_w_size")

custom_albumentations/augmentations/dropout/functional.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from typing import Iterable, List, Tuple, Union
+import numpy as np
+from custom_albumentations.augmentations.utils import preserve_shape
+__all__ = ["cutout", "channel_dropout"]
+@preserve_shape
+def channel_dropout(
+    img: np.ndarray, channels_to_drop: Union[int, Tuple[int, ...], np.ndarray], fill_value: Union[int, float] = 0
+) -> np.ndarray:
+    if len(img.shape) == 2 or img.shape[2] == 1:
+        raise NotImplementedError("Only one channel. ChannelDropout is not defined.")
+    img = img.copy()
+    img[..., channels_to_drop] = fill_value
+    return img
+def cutout(
+    img: np.ndarray, holes: Iterable[Tuple[int, int, int, int]], fill_value: Union[int, float] = 0
+) -> np.ndarray:
+    # Make a copy of the input image since we don't want to modify it directly
+    img = img.copy()
+    for x1, y1, x2, y2 in holes:
+        img[y1:y2, x1:x2] = fill_value
+    return img

custom_albumentations/augmentations/dropout/grid_dropout.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import random
+from typing import Iterable, Optional, Tuple
+import numpy as np
+from ...core.transforms_interface import DualTransform
+from . import functional as F
+__all__ = ["GridDropout"]
+class GridDropout(DualTransform):
+    """GridDropout, drops out rectangular regions of an image and the corresponding mask in a grid fashion.
+    Args:
+        ratio (float): the ratio of the mask holes to the unit_size (same for horizontal and vertical directions).
+            Must be between 0 and 1. Default: 0.5.
+        unit_size_min (int): minimum size of the grid unit. Must be between 2 and the image shorter edge.
+            If 'None', holes_number_x and holes_number_y are used to setup the grid. Default: `None`.
+        unit_size_max (int): maximum size of the grid unit. Must be between 2 and the image shorter edge.
+            If 'None', holes_number_x and holes_number_y are used to setup the grid. Default: `None`.
+        holes_number_x (int): the number of grid units in x direction. Must be between 1 and image width//2.
+            If 'None', grid unit width is set as image_width//10. Default: `None`.
+        holes_number_y (int): the number of grid units in y direction. Must be between 1 and image height//2.
+            If `None`, grid unit height is set equal to the grid unit width or image height, whatever is smaller.
+        shift_x (int): offsets of the grid start in x direction from (0,0) coordinate.
+            Clipped between 0 and grid unit_width - hole_width. Default: 0.
+        shift_y (int): offsets of the grid start in y direction from (0,0) coordinate.
+            Clipped between 0 and grid unit height - hole_height. Default: 0.
+        random_offset (boolean): weather to offset the grid randomly between 0 and grid unit size - hole size
+            If 'True', entered shift_x, shift_y are ignored and set randomly. Default: `False`.
+        fill_value (int): value for the dropped pixels. Default = 0
+        mask_fill_value (int): value for the dropped pixels in mask.
+            If `None`, transformation is not applied to the mask. Default: `None`.
+    Targets:
+        image, mask
+    Image types:
+        uint8, float32
+    References:
+        https://arxiv.org/abs/2001.04086
+    """
+    def __init__(
+        self,
+        ratio: float = 0.5,
+        unit_size_min: Optional[int] = None,
+        unit_size_max: Optional[int] = None,
+        holes_number_x: Optional[int] = None,
+        holes_number_y: Optional[int] = None,
+        shift_x: int = 0,
+        shift_y: int = 0,
+        random_offset: bool = False,
+        fill_value: int = 0,
+        mask_fill_value: Optional[int] = None,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(GridDropout, self).__init__(always_apply, p)
+        self.ratio = ratio
+        self.unit_size_min = unit_size_min
+        self.unit_size_max = unit_size_max
+        self.holes_number_x = holes_number_x
+        self.holes_number_y = holes_number_y
+        self.shift_x = shift_x
+        self.shift_y = shift_y
+        self.random_offset = random_offset
+        self.fill_value = fill_value
+        self.mask_fill_value = mask_fill_value
+        if not 0 < self.ratio <= 1:
+            raise ValueError("ratio must be between 0 and 1.")
+    def apply(self, img: np.ndarray, holes: Iterable[Tuple[int, int, int, int]] = (), **params) -> np.ndarray:
+        return F.cutout(img, holes, self.fill_value)
+    def apply_to_mask(self, img: np.ndarray, holes: Iterable[Tuple[int, int, int, int]] = (), **params) -> np.ndarray:
+        if self.mask_fill_value is None:
+            return img
+        return F.cutout(img, holes, self.mask_fill_value)
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        height, width = img.shape[:2]
+        # set grid using unit size limits
+        if self.unit_size_min and self.unit_size_max:
+            if not 2 <= self.unit_size_min <= self.unit_size_max:
+                raise ValueError("Max unit size should be >= min size, both at least 2 pixels.")
+            if self.unit_size_max > min(height, width):
+                raise ValueError("Grid size limits must be within the shortest image edge.")
+            unit_width = random.randint(self.unit_size_min, self.unit_size_max + 1)
+            unit_height = unit_width
+        else:
+            # set grid using holes numbers
+            if self.holes_number_x is None:
+                unit_width = max(2, width // 10)
+            else:
+                if not 1 <= self.holes_number_x <= width // 2:
+                    raise ValueError("The hole_number_x must be between 1 and image width//2.")
+                unit_width = width // self.holes_number_x
+            if self.holes_number_y is None:
+                unit_height = max(min(unit_width, height), 2)
+            else:
+                if not 1 <= self.holes_number_y <= height // 2:
+                    raise ValueError("The hole_number_y must be between 1 and image height//2.")
+                unit_height = height // self.holes_number_y
+        hole_width = int(unit_width * self.ratio)
+        hole_height = int(unit_height * self.ratio)
+        # min 1 pixel and max unit length - 1
+        hole_width = min(max(hole_width, 1), unit_width - 1)
+        hole_height = min(max(hole_height, 1), unit_height - 1)
+        # set offset of the grid
+        if self.shift_x is None:
+            shift_x = 0
+        else:
+            shift_x = min(max(0, self.shift_x), unit_width - hole_width)
+        if self.shift_y is None:
+            shift_y = 0
+        else:
+            shift_y = min(max(0, self.shift_y), unit_height - hole_height)
+        if self.random_offset:
+            shift_x = random.randint(0, unit_width - hole_width)
+            shift_y = random.randint(0, unit_height - hole_height)
+        holes = []
+        for i in range(width // unit_width + 1):
+            for j in range(height // unit_height + 1):
+                x1 = min(shift_x + unit_width * i, width)
+                y1 = min(shift_y + unit_height * j, height)
+                x2 = min(x1 + hole_width, width)
+                y2 = min(y1 + hole_height, height)
+                holes.append((x1, y1, x2, y2))
+        return {"holes": holes}
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_transform_init_args_names(self):
+        return (
+            "ratio",
+            "unit_size_min",
+            "unit_size_max",
+            "holes_number_x",
+            "holes_number_y",
+            "shift_x",
+            "shift_y",
+            "random_offset",
+            "fill_value",
+            "mask_fill_value",
+        )

custom_albumentations/augmentations/dropout/mask_dropout.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import random
+from typing import Any, Dict, Optional, Tuple, Union
+import cv2
+import numpy as np
+from skimage.measure import label
+from ...core.transforms_interface import DualTransform, to_tuple
+__all__ = ["MaskDropout"]
+class MaskDropout(DualTransform):
+    """
+    Image & mask augmentation that zero out mask and image regions corresponding
+    to randomly chosen object instance from mask.
+    Mask must be single-channel image, zero values treated as background.
+    Image can be any number of channels.
+    Inspired by https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114254
+    Args:
+        max_objects: Maximum number of labels that can be zeroed out. Can be tuple, in this case it's [min, max]
+        image_fill_value: Fill value to use when filling image.
+            Can be 'inpaint' to apply inpaining (works only  for 3-chahnel images)
+        mask_fill_value: Fill value to use when filling mask.
+    Targets:
+        image, mask
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        max_objects: int = 1,
+        image_fill_value: Union[int, float, str] = 0,
+        mask_fill_value: Union[int, float] = 0,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(MaskDropout, self).__init__(always_apply, p)
+        self.max_objects = to_tuple(max_objects, 1)
+        self.image_fill_value = image_fill_value
+        self.mask_fill_value = mask_fill_value
+    @property
+    def targets_as_params(self):
+        return ["mask"]
+    def get_params_dependent_on_targets(self, params) -> Dict[str, Any]:
+        mask = params["mask"]
+        label_image, num_labels = label(mask, return_num=True)
+        if num_labels == 0:
+            dropout_mask = None
+        else:
+            objects_to_drop = random.randint(int(self.max_objects[0]), int(self.max_objects[1]))
+            objects_to_drop = min(num_labels, objects_to_drop)
+            if objects_to_drop == num_labels:
+                dropout_mask = mask > 0
+            else:
+                labels_index = random.sample(range(1, num_labels + 1), objects_to_drop)
+                dropout_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=bool)
+                for label_index in labels_index:
+                    dropout_mask |= label_image == label_index
+        params.update({"dropout_mask": dropout_mask})
+        return params
+    def apply(self, img: np.ndarray, dropout_mask: Optional[np.ndarray] = None, **params) -> np.ndarray:
+        if dropout_mask is None:
+            return img
+        if self.image_fill_value == "inpaint":
+            dropout_mask = dropout_mask.astype(np.uint8)
+            _, _, w, h = cv2.boundingRect(dropout_mask)
+            radius = min(3, max(w, h) // 2)
+            img = cv2.inpaint(img, dropout_mask, radius, cv2.INPAINT_NS)
+        else:
+            img = img.copy()
+            img[dropout_mask] = self.image_fill_value
+        return img
+    def apply_to_mask(self, img: np.ndarray, dropout_mask: Optional[np.ndarray] = None, **params) -> np.ndarray:
+        if dropout_mask is None:
+            return img
+        img = img.copy()
+        img[dropout_mask] = self.mask_fill_value
+        return img
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        return "max_objects", "image_fill_value", "mask_fill_value"

custom_albumentations/augmentations/functional.py ADDED Viewed

	@@ -0,0 +1,1380 @@

+from __future__ import division
+from typing import Optional, Sequence, Union
+from warnings import warn
+import cv2
+import numpy as np
+import skimage
+from custom_albumentations import random_utils
+from custom_albumentations.augmentations.utils import (
+    MAX_VALUES_BY_DTYPE,
+    _maybe_process_in_chunks,
+    clip,
+    clipped,
+    ensure_contiguous,
+    is_grayscale_image,
+    is_rgb_image,
+    non_rgb_warning,
+    preserve_channel_dim,
+    preserve_shape,
+)
+__all__ = [
+    "add_fog",
+    "add_rain",
+    "add_shadow",
+    "add_gravel",
+    "add_snow",
+    "add_sun_flare",
+    "add_weighted",
+    "adjust_brightness_torchvision",
+    "adjust_contrast_torchvision",
+    "adjust_hue_torchvision",
+    "adjust_saturation_torchvision",
+    "brightness_contrast_adjust",
+    "channel_shuffle",
+    "clahe",
+    "convolve",
+    "downscale",
+    "equalize",
+    "fancy_pca",
+    "from_float",
+    "gamma_transform",
+    "gauss_noise",
+    "image_compression",
+    "invert",
+    "iso_noise",
+    "linear_transformation_rgb",
+    "move_tone_curve",
+    "multiply",
+    "noop",
+    "normalize",
+    "posterize",
+    "shift_hsv",
+    "shift_rgb",
+    "solarize",
+    "superpixels",
+    "swap_tiles_on_image",
+    "to_float",
+    "to_gray",
+    "gray_to_rgb",
+    "unsharp_mask",
+]
+def normalize_cv2(img, mean, denominator):
+    if mean.shape and len(mean) != 4 and mean.shape != img.shape:
+        mean = np.array(mean.tolist() + [0] * (4 - len(mean)), dtype=np.float64)
+    if not denominator.shape:
+        denominator = np.array([denominator.tolist()] * 4, dtype=np.float64)
+    elif len(denominator) != 4 and denominator.shape != img.shape:
+        denominator = np.array(denominator.tolist() + [1] * (4 - len(denominator)), dtype=np.float64)
+    img = np.ascontiguousarray(img.astype("float32"))
+    cv2.subtract(img, mean.astype(np.float64), img)
+    cv2.multiply(img, denominator.astype(np.float64), img)
+    return img
+def normalize_numpy(img, mean, denominator):
+    img = img.astype(np.float32)
+    img -= mean
+    img *= denominator
+    return img
+def normalize(img, mean, std, max_pixel_value=255.0):
+    mean = np.array(mean, dtype=np.float32)
+    mean *= max_pixel_value
+    std = np.array(std, dtype=np.float32)
+    std *= max_pixel_value
+    denominator = np.reciprocal(std, dtype=np.float32)
+    if img.ndim == 3 and img.shape[-1] == 3:
+        return normalize_cv2(img, mean, denominator)
+    return normalize_numpy(img, mean, denominator)
+def _shift_hsv_uint8(img, hue_shift, sat_shift, val_shift):
+    dtype = img.dtype
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+    hue, sat, val = cv2.split(img)
+    if hue_shift != 0:
+        lut_hue = np.arange(0, 256, dtype=np.int16)
+        lut_hue = np.mod(lut_hue + hue_shift, 180).astype(dtype)
+        hue = cv2.LUT(hue, lut_hue)
+    if sat_shift != 0:
+        lut_sat = np.arange(0, 256, dtype=np.int16)
+        lut_sat = np.clip(lut_sat + sat_shift, 0, 255).astype(dtype)
+        sat = cv2.LUT(sat, lut_sat)
+    if val_shift != 0:
+        lut_val = np.arange(0, 256, dtype=np.int16)
+        lut_val = np.clip(lut_val + val_shift, 0, 255).astype(dtype)
+        val = cv2.LUT(val, lut_val)
+    img = cv2.merge((hue, sat, val)).astype(dtype)
+    img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
+    return img
+def _shift_hsv_non_uint8(img, hue_shift, sat_shift, val_shift):
+    dtype = img.dtype
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+    hue, sat, val = cv2.split(img)
+    if hue_shift != 0:
+        hue = cv2.add(hue, hue_shift)
+        hue = np.mod(hue, 360)  # OpenCV fails with negative values
+    if sat_shift != 0:
+        sat = clip(cv2.add(sat, sat_shift), dtype, 1.0)
+    if val_shift != 0:
+        val = clip(cv2.add(val, val_shift), dtype, 1.0)
+    img = cv2.merge((hue, sat, val))
+    img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
+    return img
+@preserve_shape
+def shift_hsv(img, hue_shift, sat_shift, val_shift):
+    if hue_shift == 0 and sat_shift == 0 and val_shift == 0:
+        return img
+    is_gray = is_grayscale_image(img)
+    if is_gray:
+        if hue_shift != 0 or sat_shift != 0:
+            hue_shift = 0
+            sat_shift = 0
+            warn(
+                "HueSaturationValue: hue_shift and sat_shift are not applicable to grayscale image. "
+                "Set them to 0 or use RGB image"
+            )
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    if img.dtype == np.uint8:
+        img = _shift_hsv_uint8(img, hue_shift, sat_shift, val_shift)
+    else:
+        img = _shift_hsv_non_uint8(img, hue_shift, sat_shift, val_shift)
+    if is_gray:
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+    return img
+def solarize(img, threshold=128):
+    """Invert all pixel values above a threshold.
+    Args:
+        img (numpy.ndarray): The image to solarize.
+        threshold (int): All pixels above this greyscale level are inverted.
+    Returns:
+        numpy.ndarray: Solarized image.
+    """
+    dtype = img.dtype
+    max_val = MAX_VALUES_BY_DTYPE[dtype]
+    if dtype == np.dtype("uint8"):
+        lut = [(i if i < threshold else max_val - i) for i in range(max_val + 1)]
+        prev_shape = img.shape
+        img = cv2.LUT(img, np.array(lut, dtype=dtype))
+        if len(prev_shape) != len(img.shape):
+            img = np.expand_dims(img, -1)
+        return img
+    result_img = img.copy()
+    cond = img >= threshold
+    result_img[cond] = max_val - result_img[cond]
+    return result_img
+@preserve_shape
+def posterize(img, bits):
+    """Reduce the number of bits for each color channel.
+    Args:
+        img (numpy.ndarray): image to posterize.
+        bits (int): number of high bits. Must be in range [0, 8]
+    Returns:
+        numpy.ndarray: Image with reduced color channels.
+    """
+    bits = np.uint8(bits)
+    if img.dtype != np.uint8:
+        raise TypeError("Image must have uint8 channel type")
+    if np.any((bits < 0) | (bits > 8)):
+        raise ValueError("bits must be in range [0, 8]")
+    if not bits.shape or len(bits) == 1:
+        if bits == 0:
+            return np.zeros_like(img)
+        if bits == 8:
+            return img.copy()
+        lut = np.arange(0, 256, dtype=np.uint8)
+        mask = ~np.uint8(2 ** (8 - bits) - 1)
+        lut &= mask
+        return cv2.LUT(img, lut)
+    if not is_rgb_image(img):
+        raise TypeError("If bits is iterable image must be RGB")
+    result_img = np.empty_like(img)
+    for i, channel_bits in enumerate(bits):
+        if channel_bits == 0:
+            result_img[..., i] = np.zeros_like(img[..., i])
+        elif channel_bits == 8:
+            result_img[..., i] = img[..., i].copy()
+        else:
+            lut = np.arange(0, 256, dtype=np.uint8)
+            mask = ~np.uint8(2 ** (8 - channel_bits) - 1)
+            lut &= mask
+            result_img[..., i] = cv2.LUT(img[..., i], lut)
+    return result_img
+def _equalize_pil(img, mask=None):
+    histogram = cv2.calcHist([img], [0], mask, [256], (0, 256)).ravel()
+    h = [_f for _f in histogram if _f]
+    if len(h) <= 1:
+        return img.copy()
+    step = np.sum(h[:-1]) // 255
+    if not step:
+        return img.copy()
+    lut = np.empty(256, dtype=np.uint8)
+    n = step // 2
+    for i in range(256):
+        lut[i] = min(n // step, 255)
+        n += histogram[i]
+    return cv2.LUT(img, np.array(lut))
+def _equalize_cv(img, mask=None):
+    if mask is None:
+        return cv2.equalizeHist(img)
+    histogram = cv2.calcHist([img], [0], mask, [256], (0, 256)).ravel()
+    i = 0
+    for val in histogram:
+        if val > 0:
+            break
+        i += 1
+    i = min(i, 255)
+    total = np.sum(histogram)
+    if histogram[i] == total:
+        return np.full_like(img, i)
+    scale = 255.0 / (total - histogram[i])
+    _sum = 0
+    lut = np.zeros(256, dtype=np.uint8)
+    i += 1
+    for i in range(i, len(histogram)):
+        _sum += histogram[i]
+        lut[i] = clip(round(_sum * scale), np.dtype("uint8"), 255)
+    return cv2.LUT(img, lut)
+@preserve_channel_dim
+def equalize(img, mask=None, mode="cv", by_channels=True):
+    """Equalize the image histogram.
+    Args:
+        img (numpy.ndarray): RGB or grayscale image.
+        mask (numpy.ndarray): An optional mask.  If given, only the pixels selected by
+            the mask are included in the analysis. Maybe 1 channel or 3 channel array.
+        mode (str): {'cv', 'pil'}. Use OpenCV or Pillow equalization method.
+        by_channels (bool): If True, use equalization by channels separately,
+            else convert image to YCbCr representation and use equalization by `Y` channel.
+    Returns:
+        numpy.ndarray: Equalized image.
+    """
+    if img.dtype != np.uint8:
+        raise TypeError("Image must have uint8 channel type")
+    modes = ["cv", "pil"]
+    if mode not in modes:
+        raise ValueError("Unsupported equalization mode. Supports: {}. " "Got: {}".format(modes, mode))
+    if mask is not None:
+        if is_rgb_image(mask) and is_grayscale_image(img):
+            raise ValueError("Wrong mask shape. Image shape: {}. " "Mask shape: {}".format(img.shape, mask.shape))
+        if not by_channels and not is_grayscale_image(mask):
+            raise ValueError(
+                "When by_channels=False only 1-channel mask supports. " "Mask shape: {}".format(mask.shape)
+            )
+    if mode == "pil":
+        function = _equalize_pil
+    else:
+        function = _equalize_cv
+    if mask is not None:
+        mask = mask.astype(np.uint8)
+    if is_grayscale_image(img):
+        return function(img, mask)
+    if not by_channels:
+        result_img = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
+        result_img[..., 0] = function(result_img[..., 0], mask)
+        return cv2.cvtColor(result_img, cv2.COLOR_YCrCb2RGB)
+    result_img = np.empty_like(img)
+    for i in range(3):
+        if mask is None:
+            _mask = None
+        elif is_grayscale_image(mask):
+            _mask = mask
+        else:
+            _mask = mask[..., i]
+        result_img[..., i] = function(img[..., i], _mask)
+    return result_img
+@preserve_shape
+def move_tone_curve(img, low_y, high_y):
+    """Rescales the relationship between bright and dark areas of the image by manipulating its tone curve.
+    Args:
+        img (numpy.ndarray): RGB or grayscale image.
+        low_y (float): y-position of a Bezier control point used
+            to adjust the tone curve, must be in range [0, 1]
+        high_y (float): y-position of a Bezier control point used
+            to adjust image tone curve, must be in range [0, 1]
+    """
+    input_dtype = img.dtype
+    if low_y < 0 or low_y > 1:
+        raise ValueError("low_shift must be in range [0, 1]")
+    if high_y < 0 or high_y > 1:
+        raise ValueError("high_shift must be in range [0, 1]")
+    if input_dtype != np.uint8:
+        raise ValueError("Unsupported image type {}".format(input_dtype))
+    t = np.linspace(0.0, 1.0, 256)
+    # Defines responze of a four-point bezier curve
+    def evaluate_bez(t):
+        return 3 * (1 - t) ** 2 * t * low_y + 3 * (1 - t) * t**2 * high_y + t**3
+    evaluate_bez = np.vectorize(evaluate_bez)
+    remapping = np.rint(evaluate_bez(t) * 255).astype(np.uint8)
+    lut_fn = _maybe_process_in_chunks(cv2.LUT, lut=remapping)
+    img = lut_fn(img)
+    return img
+@clipped
+def _shift_rgb_non_uint8(img, r_shift, g_shift, b_shift):
+    if r_shift == g_shift == b_shift:
+        return img + r_shift
+    result_img = np.empty_like(img)
+    shifts = [r_shift, g_shift, b_shift]
+    for i, shift in enumerate(shifts):
+        result_img[..., i] = img[..., i] + shift
+    return result_img
+def _shift_image_uint8(img, value):
+    max_value = MAX_VALUES_BY_DTYPE[img.dtype]
+    lut = np.arange(0, max_value + 1).astype("float32")
+    lut += value
+    lut = np.clip(lut, 0, max_value).astype(img.dtype)
+    return cv2.LUT(img, lut)
+@preserve_shape
+def _shift_rgb_uint8(img, r_shift, g_shift, b_shift):
+    if r_shift == g_shift == b_shift:
+        h, w, c = img.shape
+        img = img.reshape([h, w * c])
+        return _shift_image_uint8(img, r_shift)
+    result_img = np.empty_like(img)
+    shifts = [r_shift, g_shift, b_shift]
+    for i, shift in enumerate(shifts):
+        result_img[..., i] = _shift_image_uint8(img[..., i], shift)
+    return result_img
+def shift_rgb(img, r_shift, g_shift, b_shift):
+    if img.dtype == np.uint8:
+        return _shift_rgb_uint8(img, r_shift, g_shift, b_shift)
+    return _shift_rgb_non_uint8(img, r_shift, g_shift, b_shift)
+@clipped
+def linear_transformation_rgb(img, transformation_matrix):
+    result_img = cv2.transform(img, transformation_matrix)
+    return result_img
+@preserve_channel_dim
+def clahe(img, clip_limit=2.0, tile_grid_size=(8, 8)):
+    if img.dtype != np.uint8:
+        raise TypeError("clahe supports only uint8 inputs")
+    clahe_mat = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)
+    if len(img.shape) == 2 or img.shape[2] == 1:
+        img = clahe_mat.apply(img)
+    else:
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
+        img[:, :, 0] = clahe_mat.apply(img[:, :, 0])
+        img = cv2.cvtColor(img, cv2.COLOR_LAB2RGB)
+    return img
+@preserve_shape
+def convolve(img, kernel):
+    conv_fn = _maybe_process_in_chunks(cv2.filter2D, ddepth=-1, kernel=kernel)
+    return conv_fn(img)
+@preserve_shape
+def image_compression(img, quality, image_type):
+    if image_type in [".jpeg", ".jpg"]:
+        quality_flag = cv2.IMWRITE_JPEG_QUALITY
+    elif image_type == ".webp":
+        quality_flag = cv2.IMWRITE_WEBP_QUALITY
+    else:
+        NotImplementedError("Only '.jpg' and '.webp' compression transforms are implemented. ")
+    input_dtype = img.dtype
+    needs_float = False
+    if input_dtype == np.float32:
+        warn(
+            "Image compression augmentation "
+            "is most effective with uint8 inputs, "
+            "{} is used as input.".format(input_dtype),
+            UserWarning,
+        )
+        img = from_float(img, dtype=np.dtype("uint8"))
+        needs_float = True
+    elif input_dtype not in (np.uint8, np.float32):
+        raise ValueError("Unexpected dtype {} for image augmentation".format(input_dtype))
+    _, encoded_img = cv2.imencode(image_type, img, (int(quality_flag), quality))
+    img = cv2.imdecode(encoded_img, cv2.IMREAD_UNCHANGED)
+    if needs_float:
+        img = to_float(img, max_value=255)
+    return img
+@preserve_shape
+def add_snow(img, snow_point, brightness_coeff):
+    """Bleaches out pixels, imitation snow.
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        img (numpy.ndarray): Image.
+        snow_point: Number of show points.
+        brightness_coeff: Brightness coefficient.
+    Returns:
+        numpy.ndarray: Image.
+    """
+    non_rgb_warning(img)
+    input_dtype = img.dtype
+    needs_float = False
+    snow_point *= 127.5  # = 255 / 2
+    snow_point += 85  # = 255 / 3
+    if input_dtype == np.float32:
+        img = from_float(img, dtype=np.dtype("uint8"))
+        needs_float = True
+    elif input_dtype not in (np.uint8, np.float32):
+        raise ValueError("Unexpected dtype {} for RandomSnow augmentation".format(input_dtype))
+    image_HLS = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
+    image_HLS = np.array(image_HLS, dtype=np.float32)
+    image_HLS[:, :, 1][image_HLS[:, :, 1] < snow_point] *= brightness_coeff
+    image_HLS[:, :, 1] = clip(image_HLS[:, :, 1], np.uint8, 255)
+    image_HLS = np.array(image_HLS, dtype=np.uint8)
+    image_RGB = cv2.cvtColor(image_HLS, cv2.COLOR_HLS2RGB)
+    if needs_float:
+        image_RGB = to_float(image_RGB, max_value=255)
+    return image_RGB
+@preserve_shape
+def add_rain(
+    img,
+    slant,
+    drop_length,
+    drop_width,
+    drop_color,
+    blur_value,
+    brightness_coefficient,
+    rain_drops,
+):
+    """
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        img (numpy.ndarray): Image.
+        slant (int):
+        drop_length:
+        drop_width:
+        drop_color:
+        blur_value (int): Rainy view are blurry.
+        brightness_coefficient (float): Rainy days are usually shady.
+        rain_drops:
+    Returns:
+        numpy.ndarray: Image.
+    """
+    non_rgb_warning(img)
+    input_dtype = img.dtype
+    needs_float = False
+    if input_dtype == np.float32:
+        img = from_float(img, dtype=np.dtype("uint8"))
+        needs_float = True
+    elif input_dtype not in (np.uint8, np.float32):
+        raise ValueError("Unexpected dtype {} for RandomRain augmentation".format(input_dtype))
+    image = img.copy()
+    for rain_drop_x0, rain_drop_y0 in rain_drops:
+        rain_drop_x1 = rain_drop_x0 + slant
+        rain_drop_y1 = rain_drop_y0 + drop_length
+        cv2.line(
+            image,
+            (rain_drop_x0, rain_drop_y0),
+            (rain_drop_x1, rain_drop_y1),
+            drop_color,
+            drop_width,
+        )
+    image = cv2.blur(image, (blur_value, blur_value))  # rainy view are blurry
+    image_hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype(np.float32)
+    image_hsv[:, :, 2] *= brightness_coefficient
+    image_rgb = cv2.cvtColor(image_hsv.astype(np.uint8), cv2.COLOR_HSV2RGB)
+    if needs_float:
+        image_rgb = to_float(image_rgb, max_value=255)
+    return image_rgb
+@preserve_shape
+def add_fog(img, fog_coef, alpha_coef, haze_list):
+    """Add fog to the image.
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        img (numpy.ndarray): Image.
+        fog_coef (float): Fog coefficient.
+        alpha_coef (float): Alpha coefficient.
+        haze_list (list):
+    Returns:
+        numpy.ndarray: Image.
+    """
+    non_rgb_warning(img)
+    input_dtype = img.dtype
+    needs_float = False
+    if input_dtype == np.float32:
+        img = from_float(img, dtype=np.dtype("uint8"))
+        needs_float = True
+    elif input_dtype not in (np.uint8, np.float32):
+        raise ValueError("Unexpected dtype {} for RandomFog augmentation".format(input_dtype))
+    width = img.shape[1]
+    hw = max(int(width // 3 * fog_coef), 10)
+    for haze_points in haze_list:
+        x, y = haze_points
+        overlay = img.copy()
+        output = img.copy()
+        alpha = alpha_coef * fog_coef
+        rad = hw // 2
+        point = (x + hw // 2, y + hw // 2)
+        cv2.circle(overlay, point, int(rad), (255, 255, 255), -1)
+        cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)
+        img = output.copy()
+    image_rgb = cv2.blur(img, (hw // 10, hw // 10))
+    if needs_float:
+        image_rgb = to_float(image_rgb, max_value=255)
+    return image_rgb
+@preserve_shape
+def add_sun_flare(img, flare_center_x, flare_center_y, src_radius, src_color, circles):
+    """Add sun flare.
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        img (numpy.ndarray):
+        flare_center_x (float):
+        flare_center_y (float):
+        src_radius:
+        src_color (int, int, int):
+        circles (list):
+    Returns:
+        numpy.ndarray:
+    """
+    non_rgb_warning(img)
+    input_dtype = img.dtype
+    needs_float = False
+    if input_dtype == np.float32:
+        img = from_float(img, dtype=np.dtype("uint8"))
+        needs_float = True
+    elif input_dtype not in (np.uint8, np.float32):
+        raise ValueError("Unexpected dtype {} for RandomSunFlareaugmentation".format(input_dtype))
+    overlay = img.copy()
+    output = img.copy()
+    for alpha, (x, y), rad3, (r_color, g_color, b_color) in circles:
+        cv2.circle(overlay, (x, y), rad3, (r_color, g_color, b_color), -1)
+        cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)
+    point = (int(flare_center_x), int(flare_center_y))
+    overlay = output.copy()
+    num_times = src_radius // 10
+    alpha = np.linspace(0.0, 1, num=num_times)
+    rad = np.linspace(1, src_radius, num=num_times)
+    for i in range(num_times):
+        cv2.circle(overlay, point, int(rad[i]), src_color, -1)
+        alp = alpha[num_times - i - 1] * alpha[num_times - i - 1] * alpha[num_times - i - 1]
+        cv2.addWeighted(overlay, alp, output, 1 - alp, 0, output)
+    image_rgb = output
+    if needs_float:
+        image_rgb = to_float(image_rgb, max_value=255)
+    return image_rgb
+@ensure_contiguous
+@preserve_shape
+def add_shadow(img, vertices_list):
+    """Add shadows to the image.
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        img (numpy.ndarray):
+        vertices_list (list):
+    Returns:
+        numpy.ndarray:
+    """
+    non_rgb_warning(img)
+    input_dtype = img.dtype
+    needs_float = False
+    if input_dtype == np.float32:
+        img = from_float(img, dtype=np.dtype("uint8"))
+        needs_float = True
+    elif input_dtype not in (np.uint8, np.float32):
+        raise ValueError("Unexpected dtype {} for RandomShadow augmentation".format(input_dtype))
+    image_hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
+    mask = np.zeros_like(img)
+    # adding all shadow polygons on empty mask, single 255 denotes only red channel
+    for vertices in vertices_list:
+        cv2.fillPoly(mask, vertices, 255)
+    # if red channel is hot, image's "Lightness" channel's brightness is lowered
+    red_max_value_ind = mask[:, :, 0] == 255
+    image_hls[:, :, 1][red_max_value_ind] = image_hls[:, :, 1][red_max_value_ind] * 0.5
+    image_rgb = cv2.cvtColor(image_hls, cv2.COLOR_HLS2RGB)
+    if needs_float:
+        image_rgb = to_float(image_rgb, max_value=255)
+    return image_rgb
+@ensure_contiguous
+@preserve_shape
+def add_gravel(img: np.ndarray, gravels: list):
+    """Add gravel to the image.
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        img (numpy.ndarray): image to add gravel to
+        gravels (list): list of gravel parameters. (float, float, float, float):
+            (top-left x, top-left y, bottom-right x, bottom right y)
+    Returns:
+        numpy.ndarray:
+    """
+    non_rgb_warning(img)
+    input_dtype = img.dtype
+    needs_float = False
+    if input_dtype == np.float32:
+        img = from_float(img, dtype=np.dtype("uint8"))
+        needs_float = True
+    elif input_dtype not in (np.uint8, np.float32):
+        raise ValueError("Unexpected dtype {} for AddGravel augmentation".format(input_dtype))
+    image_hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
+    for gravel in gravels:
+        y1, y2, x1, x2, sat = gravel
+        image_hls[x1:x2, y1:y2, 1] = sat
+    image_rgb = cv2.cvtColor(image_hls, cv2.COLOR_HLS2RGB)
+    if needs_float:
+        image_rgb = to_float(image_rgb, max_value=255)
+    return image_rgb
+def invert(img: np.ndarray) -> np.ndarray:
+    # Supports all the valid dtypes
+    # clips the img to avoid unexpected behaviour.
+    return MAX_VALUES_BY_DTYPE[img.dtype] - img
+def channel_shuffle(img, channels_shuffled):
+    img = img[..., channels_shuffled]
+    return img
+@preserve_shape
+def gamma_transform(img, gamma):
+    if img.dtype == np.uint8:
+        table = (np.arange(0, 256.0 / 255, 1.0 / 255) ** gamma) * 255
+        img = cv2.LUT(img, table.astype(np.uint8))
+    else:
+        img = np.power(img, gamma)
+    return img
+@clipped
+def gauss_noise(image, gauss):
+    image = image.astype("float32")
+    return image + gauss
+@clipped
+def _brightness_contrast_adjust_non_uint(img, alpha=1, beta=0, beta_by_max=False):
+    dtype = img.dtype
+    img = img.astype("float32")
+    if alpha != 1:
+        img *= alpha
+    if beta != 0:
+        if beta_by_max:
+            max_value = MAX_VALUES_BY_DTYPE[dtype]
+            img += beta * max_value
+        else:
+            img += beta * np.mean(img)
+    return img
+@preserve_shape
+def _brightness_contrast_adjust_uint(img, alpha=1, beta=0, beta_by_max=False):
+    dtype = np.dtype("uint8")
+    max_value = MAX_VALUES_BY_DTYPE[dtype]
+    lut = np.arange(0, max_value + 1).astype("float32")
+    if alpha != 1:
+        lut *= alpha
+    if beta != 0:
+        if beta_by_max:
+            lut += beta * max_value
+        else:
+            lut += (alpha * beta) * np.mean(img)
+    lut = np.clip(lut, 0, max_value).astype(dtype)
+    img = cv2.LUT(img, lut)
+    return img
+def brightness_contrast_adjust(img, alpha=1, beta=0, beta_by_max=False):
+    if img.dtype == np.uint8:
+        return _brightness_contrast_adjust_uint(img, alpha, beta, beta_by_max)
+    return _brightness_contrast_adjust_non_uint(img, alpha, beta, beta_by_max)
+@clipped
+def iso_noise(image, color_shift=0.05, intensity=0.5, random_state=None, **kwargs):
+    """
+    Apply poisson noise to image to simulate camera sensor noise.
+    Args:
+        image (numpy.ndarray): Input image, currently, only RGB, uint8 images are supported.
+        color_shift (float):
+        intensity (float): Multiplication factor for noise values. Values of ~0.5 are produce noticeable,
+                   yet acceptable level of noise.
+        random_state:
+        **kwargs:
+    Returns:
+        numpy.ndarray: Noised image
+    """
+    if image.dtype != np.uint8:
+        raise TypeError("Image must have uint8 channel type")
+    if not is_rgb_image(image):
+        raise TypeError("Image must be RGB")
+    one_over_255 = float(1.0 / 255.0)
+    image = np.multiply(image, one_over_255, dtype=np.float32)
+    hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
+    _, stddev = cv2.meanStdDev(hls)
+    luminance_noise = random_utils.poisson(stddev[1] * intensity * 255, size=hls.shape[:2], random_state=random_state)
+    color_noise = random_utils.normal(0, color_shift * 360 * intensity, size=hls.shape[:2], random_state=random_state)
+    hue = hls[..., 0]
+    hue += color_noise
+    hue[hue < 0] += 360
+    hue[hue > 360] -= 360
+    luminance = hls[..., 1]
+    luminance += (luminance_noise / 255) * (1.0 - luminance)
+    image = cv2.cvtColor(hls, cv2.COLOR_HLS2RGB) * 255
+    return image.astype(np.uint8)
+def to_gray(img):
+    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+    return cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
+def gray_to_rgb(img):
+    return cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+@preserve_shape
+def downscale(img, scale, down_interpolation=cv2.INTER_AREA, up_interpolation=cv2.INTER_LINEAR):
+    h, w = img.shape[:2]
+    need_cast = (
+        up_interpolation != cv2.INTER_NEAREST or down_interpolation != cv2.INTER_NEAREST
+    ) and img.dtype == np.uint8
+    if need_cast:
+        img = to_float(img)
+    downscaled = cv2.resize(img, None, fx=scale, fy=scale, interpolation=down_interpolation)
+    upscaled = cv2.resize(downscaled, (w, h), interpolation=up_interpolation)
+    if need_cast:
+        upscaled = from_float(np.clip(upscaled, 0, 1), dtype=np.dtype("uint8"))
+    return upscaled
+def to_float(img, max_value=None):
+    if max_value is None:
+        try:
+            max_value = MAX_VALUES_BY_DTYPE[img.dtype]
+        except KeyError:
+            raise RuntimeError(
+                "Can't infer the maximum value for dtype {}. You need to specify the maximum value manually by "
+                "passing the max_value argument".format(img.dtype)
+            )
+    return img.astype("float32") / max_value
+def from_float(img, dtype, max_value=None):
+    if max_value is None:
+        try:
+            max_value = MAX_VALUES_BY_DTYPE[dtype]
+        except KeyError:
+            raise RuntimeError(
+                "Can't infer the maximum value for dtype {}. You need to specify the maximum value manually by "
+                "passing the max_value argument".format(dtype)
+            )
+    return (img * max_value).astype(dtype)
+def noop(input_obj, **params):  # skipcq: PYL-W0613
+    return input_obj
+def swap_tiles_on_image(image, tiles):
+    """
+    Swap tiles on image.
+    Args:
+        image (np.ndarray): Input image.
+        tiles (np.ndarray): array of tuples(
+            current_left_up_corner_row, current_left_up_corner_col,
+            old_left_up_corner_row, old_left_up_corner_col,
+            height_tile, width_tile)
+    Returns:
+        np.ndarray: Output image.
+    """
+    new_image = image.copy()
+    for tile in tiles:
+        new_image[tile[0] : tile[0] + tile[4], tile[1] : tile[1] + tile[5]] = image[
+            tile[2] : tile[2] + tile[4], tile[3] : tile[3] + tile[5]
+        ]
+    return new_image
+@clipped
+def _multiply_uint8(img, multiplier):
+    img = img.astype(np.float32)
+    return np.multiply(img, multiplier)
+@preserve_shape
+def _multiply_uint8_optimized(img, multiplier):
+    if is_grayscale_image(img) or len(multiplier) == 1:
+        multiplier = multiplier[0]
+        lut = np.arange(0, 256, dtype=np.float32)
+        lut *= multiplier
+        lut = clip(lut, np.uint8, MAX_VALUES_BY_DTYPE[img.dtype])
+        func = _maybe_process_in_chunks(cv2.LUT, lut=lut)
+        return func(img)
+    channels = img.shape[-1]
+    lut = [np.arange(0, 256, dtype=np.float32)] * channels
+    lut = np.stack(lut, axis=-1)
+    lut *= multiplier
+    lut = clip(lut, np.uint8, MAX_VALUES_BY_DTYPE[img.dtype])
+    images = []
+    for i in range(channels):
+        func = _maybe_process_in_chunks(cv2.LUT, lut=lut[:, i])
+        images.append(func(img[:, :, i]))
+    return np.stack(images, axis=-1)
+@clipped
+def _multiply_non_uint8(img, multiplier):
+    return img * multiplier
+def multiply(img, multiplier):
+    """
+    Args:
+        img (numpy.ndarray): Image.
+        multiplier (numpy.ndarray): Multiplier coefficient.
+    Returns:
+        numpy.ndarray: Image multiplied by `multiplier` coefficient.
+    """
+    if img.dtype == np.uint8:
+        if len(multiplier.shape) == 1:
+            return _multiply_uint8_optimized(img, multiplier)
+        return _multiply_uint8(img, multiplier)
+    return _multiply_non_uint8(img, multiplier)
+def bbox_from_mask(mask):
+    """Create bounding box from binary mask (fast version)
+    Args:
+        mask (numpy.ndarray): binary mask.
+    Returns:
+        tuple: A bounding box tuple `(x_min, y_min, x_max, y_max)`.
+    """
+    rows = np.any(mask, axis=1)
+    if not rows.any():
+        return -1, -1, -1, -1
+    cols = np.any(mask, axis=0)
+    y_min, y_max = np.where(rows)[0][[0, -1]]
+    x_min, x_max = np.where(cols)[0][[0, -1]]
+    return x_min, y_min, x_max + 1, y_max + 1
+def mask_from_bbox(img, bbox):
+    """Create binary mask from bounding box
+    Args:
+        img (numpy.ndarray): input image
+        bbox: A bounding box tuple `(x_min, y_min, x_max, y_max)`
+    Returns:
+        mask (numpy.ndarray): binary mask
+    """
+    mask = np.zeros(img.shape[:2], dtype=np.uint8)
+    x_min, y_min, x_max, y_max = bbox
+    mask[y_min:y_max, x_min:x_max] = 1
+    return mask
+def fancy_pca(img, alpha=0.1):
+    """Perform 'Fancy PCA' augmentation from:
+    http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
+    Args:
+        img (numpy.ndarray): numpy array with (h, w, rgb) shape, as ints between 0-255
+        alpha (float): how much to perturb/scale the eigen vecs and vals
+                the paper used std=0.1
+    Returns:
+        numpy.ndarray: numpy image-like array as uint8 range(0, 255)
+    """
+    if not is_rgb_image(img) or img.dtype != np.uint8:
+        raise TypeError("Image must be RGB image in uint8 format.")
+    orig_img = img.astype(float).copy()
+    img = img / 255.0  # rescale to 0 to 1 range
+    # flatten image to columns of RGB
+    img_rs = img.reshape(-1, 3)
+    # img_rs shape (640000, 3)
+    # center mean
+    img_centered = img_rs - np.mean(img_rs, axis=0)
+    # paper says 3x3 covariance matrix
+    img_cov = np.cov(img_centered, rowvar=False)
+    # eigen values and eigen vectors
+    eig_vals, eig_vecs = np.linalg.eigh(img_cov)
+    # sort values and vector
+    sort_perm = eig_vals[::-1].argsort()
+    eig_vals[::-1].sort()
+    eig_vecs = eig_vecs[:, sort_perm]
+    # get [p1, p2, p3]
+    m1 = np.column_stack((eig_vecs))
+    # get 3x1 matrix of eigen values multiplied by random variable draw from normal
+    # distribution with mean of 0 and standard deviation of 0.1
+    m2 = np.zeros((3, 1))
+    # according to the paper alpha should only be draw once per augmentation (not once per channel)
+    # alpha = np.random.normal(0, alpha_std)
+    # broad cast to speed things up
+    m2[:, 0] = alpha * eig_vals[:]
+    # this is the vector that we're going to add to each pixel in a moment
+    add_vect = np.matrix(m1) * np.matrix(m2)
+    for idx in range(3):  # RGB
+        orig_img[..., idx] += add_vect[idx] * 255
+    # for image processing it was found that working with float 0.0 to 1.0
+    # was easier than integers between 0-255
+    # orig_img /= 255.0
+    orig_img = np.clip(orig_img, 0.0, 255.0)
+    # orig_img *= 255
+    orig_img = orig_img.astype(np.uint8)
+    return orig_img
+def _adjust_brightness_torchvision_uint8(img, factor):
+    lut = np.arange(0, 256) * factor
+    lut = np.clip(lut, 0, 255).astype(np.uint8)
+    return cv2.LUT(img, lut)
+@preserve_shape
+def adjust_brightness_torchvision(img, factor):
+    if factor == 0:
+        return np.zeros_like(img)
+    elif factor == 1:
+        return img
+    if img.dtype == np.uint8:
+        return _adjust_brightness_torchvision_uint8(img, factor)
+    return clip(img * factor, img.dtype, MAX_VALUES_BY_DTYPE[img.dtype])
+def _adjust_contrast_torchvision_uint8(img, factor, mean):
+    lut = np.arange(0, 256) * factor
+    lut = lut + mean * (1 - factor)
+    lut = clip(lut, img.dtype, 255)
+    return cv2.LUT(img, lut)
+@preserve_shape
+def adjust_contrast_torchvision(img, factor):
+    if factor == 1:
+        return img
+    if is_grayscale_image(img):
+        mean = img.mean()
+    else:
+        mean = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY).mean()
+    if factor == 0:
+        if img.dtype != np.float32:
+            mean = int(mean + 0.5)
+        return np.full_like(img, mean, dtype=img.dtype)
+    if img.dtype == np.uint8:
+        return _adjust_contrast_torchvision_uint8(img, factor, mean)
+    return clip(
+        img.astype(np.float32) * factor + mean * (1 - factor),
+        img.dtype,
+        MAX_VALUES_BY_DTYPE[img.dtype],
+    )
+@preserve_shape
+def adjust_saturation_torchvision(img, factor, gamma=0):
+    if factor == 1:
+        return img
+    if is_grayscale_image(img):
+        gray = img
+        return gray
+    else:
+        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+        gray = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
+    if factor == 0:
+        return gray
+    result = cv2.addWeighted(img, factor, gray, 1 - factor, gamma=gamma)
+    if img.dtype == np.uint8:
+        return result
+    # OpenCV does not clip values for float dtype
+    return clip(result, img.dtype, MAX_VALUES_BY_DTYPE[img.dtype])
+def _adjust_hue_torchvision_uint8(img, factor):
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+    lut = np.arange(0, 256, dtype=np.int16)
+    lut = np.mod(lut + 180 * factor, 180).astype(np.uint8)
+    img[..., 0] = cv2.LUT(img[..., 0], lut)
+    return cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
+def adjust_hue_torchvision(img, factor):
+    if is_grayscale_image(img):
+        return img
+    if factor == 0:
+        return img
+    if img.dtype == np.uint8:
+        return _adjust_hue_torchvision_uint8(img, factor)
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+    img[..., 0] = np.mod(img[..., 0] + factor * 360, 360)
+    return cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
+@preserve_shape
+def superpixels(
+    image: np.ndarray, n_segments: int, replace_samples: Sequence[bool], max_size: Optional[int], interpolation: int
+) -> np.ndarray:
+    if not np.any(replace_samples):
+        return image
+    orig_shape = image.shape
+    if max_size is not None:
+        size = max(image.shape[:2])
+        if size > max_size:
+            scale = max_size / size
+            height, width = image.shape[:2]
+            new_height, new_width = int(height * scale), int(width * scale)
+            resize_fn = _maybe_process_in_chunks(cv2.resize, dsize=(new_width, new_height), interpolation=interpolation)
+            image = resize_fn(image)
+    segments = skimage.segmentation.slic(
+        image, n_segments=n_segments, compactness=10, channel_axis=-1 if image.ndim > 2 else None
+    )
+    min_value = 0
+    max_value = MAX_VALUES_BY_DTYPE[image.dtype]
+    image = np.copy(image)
+    if image.ndim == 2:
+        image = image.reshape(*image.shape, 1)
+    nb_channels = image.shape[2]
+    for c in range(nb_channels):
+        # segments+1 here because otherwise regionprops always misses the last label
+        regions = skimage.measure.regionprops(segments + 1, intensity_image=image[..., c])
+        for ridx, region in enumerate(regions):
+            # with mod here, because slic can sometimes create more superpixel than requested.
+            # replace_samples then does not have enough values, so we just start over with the first one again.
+            if replace_samples[ridx % len(replace_samples)]:
+                mean_intensity = region.mean_intensity
+                image_sp_c = image[..., c]
+                if image_sp_c.dtype.kind in ["i", "u", "b"]:
+                    # After rounding the value can end up slightly outside of the value_range. Hence, we need to clip.
+                    # We do clip via min(max(...)) instead of np.clip because
+                    # the latter one does not seem to keep dtypes for dtypes with large itemsizes (e.g. uint64).
+                    value: Union[int, float]
+                    value = int(np.round(mean_intensity))
+                    value = min(max(value, min_value), max_value)
+                else:
+                    value = mean_intensity
+                image_sp_c[segments == ridx] = value
+    if orig_shape != image.shape:
+        resize_fn = _maybe_process_in_chunks(
+            cv2.resize, dsize=(orig_shape[1], orig_shape[0]), interpolation=interpolation
+        )
+        image = resize_fn(image)
+    return image
+@clipped
+def add_weighted(img1, alpha, img2, beta):
+    return img1.astype(float) * alpha + img2.astype(float) * beta
+@clipped
+@preserve_shape
+def unsharp_mask(image: np.ndarray, ksize: int, sigma: float = 0.0, alpha: float = 0.2, threshold: int = 10):
+    blur_fn = _maybe_process_in_chunks(cv2.GaussianBlur, ksize=(ksize, ksize), sigmaX=sigma)
+    input_dtype = image.dtype
+    if input_dtype == np.uint8:
+        image = to_float(image)
+    elif input_dtype not in (np.uint8, np.float32):
+        raise ValueError("Unexpected dtype {} for UnsharpMask augmentation".format(input_dtype))
+    blur = blur_fn(image)
+    residual = image - blur
+    # Do not sharpen noise
+    mask = np.abs(residual) * 255 > threshold
+    mask = mask.astype("float32")
+    sharp = image + alpha * residual
+    # Avoid color noise artefacts.
+    sharp = np.clip(sharp, 0, 1)
+    soft_mask = blur_fn(mask)
+    output = soft_mask * sharp + (1 - soft_mask) * image
+    return from_float(output, dtype=input_dtype)
+@preserve_shape
+def pixel_dropout(image: np.ndarray, drop_mask: np.ndarray, drop_value: Union[float, Sequence[float]]) -> np.ndarray:
+    if isinstance(drop_value, (int, float)) and drop_value == 0:
+        drop_values = np.zeros_like(image)
+    else:
+        drop_values = np.full_like(image, drop_value)  # type: ignore
+    return np.where(drop_mask, drop_values, image)
+@clipped
+@preserve_shape
+def spatter(
+    img: np.ndarray,
+    non_mud: Optional[np.ndarray],
+    mud: Optional[np.ndarray],
+    rain: Optional[np.ndarray],
+    mode: str,
+) -> np.ndarray:
+    non_rgb_warning(img)
+    coef = MAX_VALUES_BY_DTYPE[img.dtype]
+    img = img.astype(np.float32) * (1 / coef)
+    if mode == "rain":
+        assert rain is not None
+        img = img + rain
+    elif mode == "mud":
+        assert non_mud is not None and mud is not None
+        img = img * non_mud + mud
+    else:
+        raise ValueError("Unsupported spatter mode: " + str(mode))
+    return img * 255

custom_albumentations/augmentations/geometric/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .functional import *
+from .resize import *
+from .rotate import *
+from .transforms import *

custom_albumentations/augmentations/geometric/functional.py ADDED Viewed

	@@ -0,0 +1,1300 @@

+import math
+from typing import List, Optional, Sequence, Tuple, Union
+import cv2
+import numpy as np
+import skimage.transform
+from scipy.ndimage import gaussian_filter
+from custom_albumentations.augmentations.utils import (
+    _maybe_process_in_chunks,
+    angle_2pi_range,
+    clipped,
+    preserve_channel_dim,
+    preserve_shape,
+)
+from ... import random_utils
+from ...core.bbox_utils import denormalize_bbox, normalize_bbox
+from ...core.transforms_interface import (
+    BoxInternalType,
+    FillValueType,
+    ImageColorType,
+    KeypointInternalType,
+)
+__all__ = [
+    "optical_distortion",
+    "elastic_transform_approx",
+    "grid_distortion",
+    "pad",
+    "pad_with_params",
+    "bbox_rot90",
+    "keypoint_rot90",
+    "rotate",
+    "bbox_rotate",
+    "keypoint_rotate",
+    "shift_scale_rotate",
+    "keypoint_shift_scale_rotate",
+    "bbox_shift_scale_rotate",
+    "elastic_transform",
+    "resize",
+    "scale",
+    "keypoint_scale",
+    "py3round",
+    "_func_max_size",
+    "longest_max_size",
+    "smallest_max_size",
+    "perspective",
+    "perspective_bbox",
+    "rotation2DMatrixToEulerAngles",
+    "perspective_keypoint",
+    "_is_identity_matrix",
+    "warp_affine",
+    "keypoint_affine",
+    "bbox_affine",
+    "safe_rotate",
+    "bbox_safe_rotate",
+    "keypoint_safe_rotate",
+    "piecewise_affine",
+    "to_distance_maps",
+    "from_distance_maps",
+    "keypoint_piecewise_affine",
+    "bbox_piecewise_affine",
+    "bbox_flip",
+    "bbox_hflip",
+    "bbox_transpose",
+    "bbox_vflip",
+    "hflip",
+    "hflip_cv2",
+    "transpose",
+    "keypoint_flip",
+    "keypoint_hflip",
+    "keypoint_transpose",
+    "keypoint_vflip",
+]
+def bbox_rot90(bbox: BoxInternalType, factor: int, rows: int, cols: int) -> BoxInternalType:  # skipcq: PYL-W0613
+    """Rotates a bounding box by 90 degrees CCW (see np.rot90)
+    Args:
+        bbox: A bounding box tuple (x_min, y_min, x_max, y_max).
+        factor: Number of CCW rotations. Must be in set {0, 1, 2, 3} See np.rot90.
+        rows: Image rows.
+        cols: Image cols.
+    Returns:
+        tuple: A bounding box tuple (x_min, y_min, x_max, y_max).
+    """
+    if factor not in {0, 1, 2, 3}:
+        raise ValueError("Parameter n must be in set {0, 1, 2, 3}")
+    x_min, y_min, x_max, y_max = bbox[:4]
+    if factor == 1:
+        bbox = y_min, 1 - x_max, y_max, 1 - x_min
+    elif factor == 2:
+        bbox = 1 - x_max, 1 - y_max, 1 - x_min, 1 - y_min
+    elif factor == 3:
+        bbox = 1 - y_max, x_min, 1 - y_min, x_max
+    return bbox
+@angle_2pi_range
+def keypoint_rot90(keypoint: KeypointInternalType, factor: int, rows: int, cols: int, **params) -> KeypointInternalType:
+    """Rotates a keypoint by 90 degrees CCW (see np.rot90)
+    Args:
+        keypoint: A keypoint `(x, y, angle, scale)`.
+        factor: Number of CCW rotations. Must be in range [0;3] See np.rot90.
+        rows: Image height.
+        cols: Image width.
+    Returns:
+        tuple: A keypoint `(x, y, angle, scale)`.
+    Raises:
+        ValueError: if factor not in set {0, 1, 2, 3}
+    """
+    x, y, angle, scale = keypoint[:4]
+    if factor not in {0, 1, 2, 3}:
+        raise ValueError("Parameter n must be in set {0, 1, 2, 3}")
+    if factor == 1:
+        x, y, angle = y, (cols - 1) - x, angle - math.pi / 2
+    elif factor == 2:
+        x, y, angle = (cols - 1) - x, (rows - 1) - y, angle - math.pi
+    elif factor == 3:
+        x, y, angle = (rows - 1) - y, x, angle + math.pi / 2
+    return x, y, angle, scale
+@preserve_channel_dim
+def rotate(
+    img: np.ndarray,
+    angle: float,
+    interpolation: int = cv2.INTER_LINEAR,
+    border_mode: int = cv2.BORDER_REFLECT_101,
+    value: Optional[ImageColorType] = None,
+):
+    height, width = img.shape[:2]
+    # for images we use additional shifts of (0.5, 0.5) as otherwise
+    # we get an ugly black border for 90deg rotations
+    matrix = cv2.getRotationMatrix2D((width / 2 - 0.5, height / 2 - 0.5), angle, 1.0)
+    warp_fn = _maybe_process_in_chunks(
+        cv2.warpAffine, M=matrix, dsize=(width, height), flags=interpolation, borderMode=border_mode, borderValue=value
+    )
+    return warp_fn(img)
+def bbox_rotate(bbox: BoxInternalType, angle: float, method: str, rows: int, cols: int) -> BoxInternalType:
+    """Rotates a bounding box by angle degrees.
+    Args:
+        bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
+        angle: Angle of rotation in degrees.
+        method: Rotation method used. Should be one of: "largest_box", "ellipse". Default: "largest_box".
+        rows: Image rows.
+        cols: Image cols.
+    Returns:
+        A bounding box `(x_min, y_min, x_max, y_max)`.
+    References:
+        https://arxiv.org/abs/2109.13488
+    """
+    x_min, y_min, x_max, y_max = bbox[:4]
+    scale = cols / float(rows)
+    if method == "largest_box":
+        x = np.array([x_min, x_max, x_max, x_min]) - 0.5
+        y = np.array([y_min, y_min, y_max, y_max]) - 0.5
+    elif method == "ellipse":
+        w = (x_max - x_min) / 2
+        h = (y_max - y_min) / 2
+        data = np.arange(0, 360, dtype=np.float32)
+        x = w * np.sin(np.radians(data)) + (w + x_min - 0.5)
+        y = h * np.cos(np.radians(data)) + (h + y_min - 0.5)
+    else:
+        raise ValueError(f"Method {method} is not a valid rotation method.")
+    angle = np.deg2rad(angle)
+    x_t = (np.cos(angle) * x * scale + np.sin(angle) * y) / scale
+    y_t = -np.sin(angle) * x * scale + np.cos(angle) * y
+    x_t = x_t + 0.5
+    y_t = y_t + 0.5
+    x_min, x_max = min(x_t), max(x_t)
+    y_min, y_max = min(y_t), max(y_t)
+    return x_min, y_min, x_max, y_max
+@angle_2pi_range
+def keypoint_rotate(keypoint, angle, rows, cols, **params):
+    """Rotate a keypoint by angle.
+    Args:
+        keypoint (tuple): A keypoint `(x, y, angle, scale)`.
+        angle (float): Rotation angle.
+        rows (int): Image height.
+        cols (int): Image width.
+    Returns:
+        tuple: A keypoint `(x, y, angle, scale)`.
+    """
+    center = (cols - 1) * 0.5, (rows - 1) * 0.5
+    matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
+    x, y, a, s = keypoint[:4]
+    x, y = cv2.transform(np.array([[[x, y]]]), matrix).squeeze()
+    return x, y, a + math.radians(angle), s
+@preserve_channel_dim
+def shift_scale_rotate(
+    img, angle, scale, dx, dy, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_REFLECT_101, value=None
+):
+    height, width = img.shape[:2]
+    # for images we use additional shifts of (0.5, 0.5) as otherwise
+    # we get an ugly black border for 90deg rotations
+    center = (width / 2 - 0.5, height / 2 - 0.5)
+    matrix = cv2.getRotationMatrix2D(center, angle, scale)
+    matrix[0, 2] += dx * width
+    matrix[1, 2] += dy * height
+    warp_affine_fn = _maybe_process_in_chunks(
+        cv2.warpAffine, M=matrix, dsize=(width, height), flags=interpolation, borderMode=border_mode, borderValue=value
+    )
+    return warp_affine_fn(img)
+@angle_2pi_range
+def keypoint_shift_scale_rotate(keypoint, angle, scale, dx, dy, rows, cols, **params):
+    (
+        x,
+        y,
+        a,
+        s,
+    ) = keypoint[:4]
+    height, width = rows, cols
+    center = (cols - 1) * 0.5, (rows - 1) * 0.5
+    matrix = cv2.getRotationMatrix2D(center, angle, scale)
+    matrix[0, 2] += dx * width
+    matrix[1, 2] += dy * height
+    x, y = cv2.transform(np.array([[[x, y]]]), matrix).squeeze()
+    angle = a + math.radians(angle)
+    scale = s * scale
+    return x, y, angle, scale
+def bbox_shift_scale_rotate(bbox, angle, scale, dx, dy, rotate_method, rows, cols, **kwargs):  # skipcq: PYL-W0613
+    """Rotates, shifts and scales a bounding box. Rotation is made by angle degrees,
+    scaling is made by scale factor and shifting is made by dx and dy.
+    Args:
+        bbox (tuple): A bounding box `(x_min, y_min, x_max, y_max)`.
+        angle (int): Angle of rotation in degrees.
+        scale (int): Scale factor.
+        dx (int): Shift along x-axis in pixel units.
+        dy (int): Shift along y-axis in pixel units.
+        rotate_method(str): Rotation method used. Should be one of: "largest_box", "ellipse".
+            Default: "largest_box".
+        rows (int): Image rows.
+        cols (int): Image cols.
+    Returns:
+        A bounding box `(x_min, y_min, x_max, y_max)`.
+    """
+    height, width = rows, cols
+    center = (width / 2, height / 2)
+    if rotate_method == "ellipse":
+        x_min, y_min, x_max, y_max = bbox_rotate(bbox, angle, rotate_method, rows, cols)
+        matrix = cv2.getRotationMatrix2D(center, 0, scale)
+    else:
+        x_min, y_min, x_max, y_max = bbox[:4]
+        matrix = cv2.getRotationMatrix2D(center, angle, scale)
+    matrix[0, 2] += dx * width
+    matrix[1, 2] += dy * height
+    x = np.array([x_min, x_max, x_max, x_min])
+    y = np.array([y_min, y_min, y_max, y_max])
+    ones = np.ones(shape=(len(x)))
+    points_ones = np.vstack([x, y, ones]).transpose()
+    points_ones[:, 0] *= width
+    points_ones[:, 1] *= height
+    tr_points = matrix.dot(points_ones.T).T
+    tr_points[:, 0] /= width
+    tr_points[:, 1] /= height
+    x_min, x_max = min(tr_points[:, 0]), max(tr_points[:, 0])
+    y_min, y_max = min(tr_points[:, 1]), max(tr_points[:, 1])
+    return x_min, y_min, x_max, y_max
+@preserve_shape
+def elastic_transform(
+    img: np.ndarray,
+    alpha: float,
+    sigma: float,
+    alpha_affine: float,
+    interpolation: int = cv2.INTER_LINEAR,
+    border_mode: int = cv2.BORDER_REFLECT_101,
+    value: Optional[ImageColorType] = None,
+    random_state: Optional[np.random.RandomState] = None,
+    approximate: bool = False,
+    same_dxdy: bool = False,
+):
+    """Elastic deformation of images as described in [Simard2003]_ (with modifications).
+    Based on https://gist.github.com/ernestum/601cdf56d2b424757de5
+    .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
+         Convolutional Neural Networks applied to Visual Document Analysis", in
+         Proc. of the International Conference on Document Analysis and
+         Recognition, 2003.
+    """
+    height, width = img.shape[:2]
+    # Random affine
+    center_square = np.array((height, width), dtype=np.float32) // 2
+    square_size = min((height, width)) // 3
+    alpha = float(alpha)
+    sigma = float(sigma)
+    alpha_affine = float(alpha_affine)
+    pts1 = np.array(
+        [
+            center_square + square_size,
+            [center_square[0] + square_size, center_square[1] - square_size],
+            center_square - square_size,
+        ],
+        dtype=np.float32,
+    )
+    pts2 = pts1 + random_utils.uniform(-alpha_affine, alpha_affine, size=pts1.shape, random_state=random_state).astype(
+        np.float32
+    )
+    matrix = cv2.getAffineTransform(pts1, pts2)
+    warp_fn = _maybe_process_in_chunks(
+        cv2.warpAffine, M=matrix, dsize=(width, height), flags=interpolation, borderMode=border_mode, borderValue=value
+    )
+    img = warp_fn(img)
+    if approximate:
+        # Approximate computation smooth displacement map with a large enough kernel.
+        # On large images (512+) this is approximately 2X times faster
+        dx = random_utils.rand(height, width, random_state=random_state).astype(np.float32) * 2 - 1
+        cv2.GaussianBlur(dx, (17, 17), sigma, dst=dx)
+        dx *= alpha
+        if same_dxdy:
+            # Speed up even more
+            dy = dx
+        else:
+            dy = random_utils.rand(height, width, random_state=random_state).astype(np.float32) * 2 - 1
+            cv2.GaussianBlur(dy, (17, 17), sigma, dst=dy)
+            dy *= alpha
+    else:
+        dx = np.float32(
+            gaussian_filter((random_utils.rand(height, width, random_state=random_state) * 2 - 1), sigma) * alpha
+        )
+        if same_dxdy:
+            # Speed up
+            dy = dx
+        else:
+            dy = np.float32(
+                gaussian_filter((random_utils.rand(height, width, random_state=random_state) * 2 - 1), sigma) * alpha
+            )
+    x, y = np.meshgrid(np.arange(width), np.arange(height))
+    map_x = np.float32(x + dx)
+    map_y = np.float32(y + dy)
+    remap_fn = _maybe_process_in_chunks(
+        cv2.remap, map1=map_x, map2=map_y, interpolation=interpolation, borderMode=border_mode, borderValue=value
+    )
+    return remap_fn(img)
+@preserve_channel_dim
+def resize(img, height, width, interpolation=cv2.INTER_LINEAR):
+    img_height, img_width = img.shape[:2]
+    if height == img_height and width == img_width:
+        return img
+    resize_fn = _maybe_process_in_chunks(cv2.resize, dsize=(width, height), interpolation=interpolation)
+    return resize_fn(img)
+@preserve_channel_dim
+def scale(img: np.ndarray, scale: float, interpolation: int = cv2.INTER_LINEAR) -> np.ndarray:
+    height, width = img.shape[:2]
+    new_height, new_width = int(height * scale), int(width * scale)
+    return resize(img, new_height, new_width, interpolation)
+def keypoint_scale(keypoint: KeypointInternalType, scale_x: float, scale_y: float) -> KeypointInternalType:
+    """Scales a keypoint by scale_x and scale_y.
+    Args:
+        keypoint: A keypoint `(x, y, angle, scale)`.
+        scale_x: Scale coefficient x-axis.
+        scale_y: Scale coefficient y-axis.
+    Returns:
+        A keypoint `(x, y, angle, scale)`.
+    """
+    x, y, angle, scale = keypoint[:4]
+    return x * scale_x, y * scale_y, angle, scale * max(scale_x, scale_y)
+def py3round(number):
+    """Unified rounding in all python versions."""
+    if abs(round(number) - number) == 0.5:
+        return int(2.0 * round(number / 2.0))
+    return int(round(number))
+def _func_max_size(img, max_size, interpolation, func):
+    height, width = img.shape[:2]
+    scale = max_size / float(func(width, height))
+    if scale != 1.0:
+        new_height, new_width = tuple(py3round(dim * scale) for dim in (height, width))
+        img = resize(img, height=new_height, width=new_width, interpolation=interpolation)
+    return img
+@preserve_channel_dim
+def longest_max_size(img: np.ndarray, max_size: int, interpolation: int) -> np.ndarray:
+    return _func_max_size(img, max_size, interpolation, max)
+@preserve_channel_dim
+def smallest_max_size(img: np.ndarray, max_size: int, interpolation: int) -> np.ndarray:
+    return _func_max_size(img, max_size, interpolation, min)
+@preserve_channel_dim
+def perspective(
+    img: np.ndarray,
+    matrix: np.ndarray,
+    max_width: int,
+    max_height: int,
+    border_val: Union[int, float, List[int], List[float], np.ndarray],
+    border_mode: int,
+    keep_size: bool,
+    interpolation: int,
+):
+    h, w = img.shape[:2]
+    perspective_func = _maybe_process_in_chunks(
+        cv2.warpPerspective,
+        M=matrix,
+        dsize=(max_width, max_height),
+        borderMode=border_mode,
+        borderValue=border_val,
+        flags=interpolation,
+    )
+    warped = perspective_func(img)
+    if keep_size:
+        return resize(warped, h, w, interpolation=interpolation)
+    return warped
+def perspective_bbox(
+    bbox: BoxInternalType,
+    height: int,
+    width: int,
+    matrix: np.ndarray,
+    max_width: int,
+    max_height: int,
+    keep_size: bool,
+) -> BoxInternalType:
+    x1, y1, x2, y2 = denormalize_bbox(bbox, height, width)[:4]
+    points = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]], dtype=np.float32)
+    x1, y1, x2, y2 = float("inf"), float("inf"), 0, 0
+    for pt in points:
+        pt = perspective_keypoint(pt.tolist() + [0, 0], height, width, matrix, max_width, max_height, keep_size)
+        x, y = pt[:2]
+        x1 = min(x1, x)
+        x2 = max(x2, x)
+        y1 = min(y1, y)
+        y2 = max(y2, y)
+    return normalize_bbox((x1, y1, x2, y2), height if keep_size else max_height, width if keep_size else max_width)
+def rotation2DMatrixToEulerAngles(matrix: np.ndarray, y_up: bool = False) -> float:
+    """
+    Args:
+        matrix (np.ndarray): Rotation matrix
+        y_up (bool): is Y axis looks up or down
+    """
+    if y_up:
+        return np.arctan2(matrix[1, 0], matrix[0, 0])
+    return np.arctan2(-matrix[1, 0], matrix[0, 0])
+@angle_2pi_range
+def perspective_keypoint(
+    keypoint: KeypointInternalType,
+    height: int,
+    width: int,
+    matrix: np.ndarray,
+    max_width: int,
+    max_height: int,
+    keep_size: bool,
+) -> KeypointInternalType:
+    x, y, angle, scale = keypoint
+    keypoint_vector = np.array([x, y], dtype=np.float32).reshape([1, 1, 2])
+    x, y = cv2.perspectiveTransform(keypoint_vector, matrix)[0, 0]
+    angle += rotation2DMatrixToEulerAngles(matrix[:2, :2], y_up=True)
+    scale_x = np.sign(matrix[0, 0]) * np.sqrt(matrix[0, 0] ** 2 + matrix[0, 1] ** 2)
+    scale_y = np.sign(matrix[1, 1]) * np.sqrt(matrix[1, 0] ** 2 + matrix[1, 1] ** 2)
+    scale *= max(scale_x, scale_y)
+    if keep_size:
+        scale_x = width / max_width
+        scale_y = height / max_height
+        return keypoint_scale((x, y, angle, scale), scale_x, scale_y)
+    return x, y, angle, scale
+def _is_identity_matrix(matrix: skimage.transform.ProjectiveTransform) -> bool:
+    return np.allclose(matrix.params, np.eye(3, dtype=np.float32))
+@preserve_channel_dim
+def warp_affine(
+    image: np.ndarray,
+    matrix: skimage.transform.ProjectiveTransform,
+    interpolation: int,
+    cval: Union[int, float, Sequence[int], Sequence[float]],
+    mode: int,
+    output_shape: Sequence[int],
+) -> np.ndarray:
+    if _is_identity_matrix(matrix):
+        return image
+    dsize = int(np.round(output_shape[1])), int(np.round(output_shape[0]))
+    warp_fn = _maybe_process_in_chunks(
+        cv2.warpAffine, M=matrix.params[:2], dsize=dsize, flags=interpolation, borderMode=mode, borderValue=cval
+    )
+    tmp = warp_fn(image)
+    return tmp
+@angle_2pi_range
+def keypoint_affine(
+    keypoint: KeypointInternalType,
+    matrix: skimage.transform.ProjectiveTransform,
+    scale: dict,
+) -> KeypointInternalType:
+    if _is_identity_matrix(matrix):
+        return keypoint
+    x, y, a, s = keypoint[:4]
+    x, y = cv2.transform(np.array([[[x, y]]]), matrix.params[:2]).squeeze()
+    a += rotation2DMatrixToEulerAngles(matrix.params[:2])
+    s *= np.max([scale["x"], scale["y"]])
+    return x, y, a, s
+def bbox_affine(
+    bbox: BoxInternalType,
+    matrix: skimage.transform.ProjectiveTransform,
+    rotate_method: str,
+    rows: int,
+    cols: int,
+    output_shape: Sequence[int],
+) -> BoxInternalType:
+    if _is_identity_matrix(matrix):
+        return bbox
+    x_min, y_min, x_max, y_max = denormalize_bbox(bbox, rows, cols)[:4]
+    if rotate_method == "largest_box":
+        points = np.array(
+            [
+                [x_min, y_min],
+                [x_max, y_min],
+                [x_max, y_max],
+                [x_min, y_max],
+            ]
+        )
+    elif rotate_method == "ellipse":
+        w = (x_max - x_min) / 2
+        h = (y_max - y_min) / 2
+        data = np.arange(0, 360, dtype=np.float32)
+        x = w * np.sin(np.radians(data)) + (w + x_min - 0.5)
+        y = h * np.cos(np.radians(data)) + (h + y_min - 0.5)
+        points = np.hstack([x.reshape(-1, 1), y.reshape(-1, 1)])
+    else:
+        raise ValueError(f"Method {rotate_method} is not a valid rotation method.")
+    points = skimage.transform.matrix_transform(points, matrix.params)
+    x_min = np.min(points[:, 0])
+    x_max = np.max(points[:, 0])
+    y_min = np.min(points[:, 1])
+    y_max = np.max(points[:, 1])
+    return normalize_bbox((x_min, y_min, x_max, y_max), output_shape[0], output_shape[1])
+@preserve_channel_dim
+def safe_rotate(
+    img: np.ndarray,
+    matrix: np.ndarray,
+    interpolation: int,
+    value: FillValueType = None,
+    border_mode: int = cv2.BORDER_REFLECT_101,
+) -> np.ndarray:
+    h, w = img.shape[:2]
+    warp_fn = _maybe_process_in_chunks(
+        cv2.warpAffine,
+        M=matrix,
+        dsize=(w, h),
+        flags=interpolation,
+        borderMode=border_mode,
+        borderValue=value,
+    )
+    return warp_fn(img)
+def bbox_safe_rotate(bbox: BoxInternalType, matrix: np.ndarray, cols: int, rows: int) -> BoxInternalType:
+    x1, y1, x2, y2 = denormalize_bbox(bbox, rows, cols)[:4]
+    points = np.array(
+        [
+            [x1, y1, 1],
+            [x2, y1, 1],
+            [x2, y2, 1],
+            [x1, y2, 1],
+        ]
+    )
+    points = points @ matrix.T
+    x1 = points[:, 0].min()
+    x2 = points[:, 0].max()
+    y1 = points[:, 1].min()
+    y2 = points[:, 1].max()
+    def fix_point(pt1: float, pt2: float, max_val: float) -> Tuple[float, float]:
+        # In my opinion, these errors should be very low, around 1-2 pixels.
+        if pt1 < 0:
+            return 0, pt2 + pt1
+        if pt2 > max_val:
+            return pt1 - (pt2 - max_val), max_val
+        return pt1, pt2
+    x1, x2 = fix_point(x1, x2, cols)
+    y1, y2 = fix_point(y1, y2, rows)
+    return normalize_bbox((x1, y1, x2, y2), rows, cols)
+def keypoint_safe_rotate(
+    keypoint: KeypointInternalType,
+    matrix: np.ndarray,
+    angle: float,
+    scale_x: float,
+    scale_y: float,
+    cols: int,
+    rows: int,
+) -> KeypointInternalType:
+    x, y, a, s = keypoint[:4]
+    point = np.array([[x, y, 1]])
+    x, y = (point @ matrix.T)[0]
+    # To avoid problems with float errors
+    x = np.clip(x, 0, cols - 1)
+    y = np.clip(y, 0, rows - 1)
+    a += angle
+    s *= max(scale_x, scale_y)
+    return x, y, a, s
+@clipped
+def piecewise_affine(
+    img: np.ndarray,
+    matrix: Optional[skimage.transform.PiecewiseAffineTransform],
+    interpolation: int,
+    mode: str,
+    cval: float,
+) -> np.ndarray:
+    if matrix is None:
+        return img
+    return skimage.transform.warp(
+        img, matrix, order=interpolation, mode=mode, cval=cval, preserve_range=True, output_shape=img.shape
+    )
+def to_distance_maps(
+    keypoints: Sequence[Tuple[float, float]], height: int, width: int, inverted: bool = False
+) -> np.ndarray:
+    """Generate a ``(H,W,N)`` array of distance maps for ``N`` keypoints.
+    The ``n``-th distance map contains at every location ``(y, x)`` the
+    euclidean distance to the ``n``-th keypoint.
+    This function can be used as a helper when augmenting keypoints with a
+    method that only supports the augmentation of images.
+    Args:
+        keypoint: keypoint coordinates
+        height: image height
+        width: image width
+        inverted (bool): If ``True``, inverted distance maps are returned where each
+            distance value d is replaced by ``d/(d+1)``, i.e. the distance
+            maps have values in the range ``(0.0, 1.0]`` with ``1.0`` denoting
+            exactly the position of the respective keypoint.
+    Returns:
+        (H, W, N) ndarray
+            A ``float32`` array containing ``N`` distance maps for ``N``
+            keypoints. Each location ``(y, x, n)`` in the array denotes the
+            euclidean distance at ``(y, x)`` to the ``n``-th keypoint.
+            If `inverted` is ``True``, the distance ``d`` is replaced
+            by ``d/(d+1)``. The height and width of the array match the
+            height and width in ``KeypointsOnImage.shape``.
+    """
+    distance_maps = np.zeros((height, width, len(keypoints)), dtype=np.float32)
+    yy = np.arange(0, height)
+    xx = np.arange(0, width)
+    grid_xx, grid_yy = np.meshgrid(xx, yy)
+    for i, (x, y) in enumerate(keypoints):
+        distance_maps[:, :, i] = (grid_xx - x) ** 2 + (grid_yy - y) ** 2
+    distance_maps = np.sqrt(distance_maps)
+    if inverted:
+        return 1 / (distance_maps + 1)
+    return distance_maps
+def from_distance_maps(
+    distance_maps: np.ndarray,
+    inverted: bool,
+    if_not_found_coords: Optional[Union[Sequence[int], dict]],
+    threshold: Optional[float] = None,
+) -> List[Tuple[float, float]]:
+    """Convert outputs of ``to_distance_maps()`` to ``KeypointsOnImage``.
+    This is the inverse of `to_distance_maps`.
+    Args:
+        distance_maps (np.ndarray): The distance maps. ``N`` is the number of keypoints.
+        inverted (bool): Whether the given distance maps were generated in inverted mode
+            (i.e. :func:`KeypointsOnImage.to_distance_maps` was called with ``inverted=True``) or in non-inverted mode.
+        if_not_found_coords (tuple, list, dict or None, optional):
+            Coordinates to use for keypoints that cannot be found in `distance_maps`.
+            * If this is a ``list``/``tuple``, it must contain two ``int`` values.
+            * If it is a ``dict``, it must contain the keys ``x`` and ``y`` with each containing one ``int`` value.
+            * If this is ``None``, then the keypoint will not be added.
+        threshold (float): The search for keypoints works by searching for the
+            argmin (non-inverted) or argmax (inverted) in each channel. This
+            parameters contains the maximum (non-inverted) or minimum (inverted) value to accept in order to view a hit
+            as a keypoint. Use ``None`` to use no min/max.
+        nb_channels (None, int): Number of channels of the image on which the keypoints are placed.
+            Some keypoint augmenters require that information. If set to ``None``, the keypoint's shape will be set
+            to ``(height, width)``, otherwise ``(height, width, nb_channels)``.
+    """
+    if distance_maps.ndim != 3:
+        raise ValueError(
+            f"Expected three-dimensional input, "
+            f"got {distance_maps.ndim} dimensions and shape {distance_maps.shape}."
+        )
+    height, width, nb_keypoints = distance_maps.shape
+    drop_if_not_found = False
+    if if_not_found_coords is None:
+        drop_if_not_found = True
+        if_not_found_x = -1
+        if_not_found_y = -1
+    elif isinstance(if_not_found_coords, (tuple, list)):
+        if len(if_not_found_coords) != 2:
+            raise ValueError(
+                f"Expected tuple/list 'if_not_found_coords' to contain exactly two entries, "
+                f"got {len(if_not_found_coords)}."
+            )
+        if_not_found_x = if_not_found_coords[0]
+        if_not_found_y = if_not_found_coords[1]
+    elif isinstance(if_not_found_coords, dict):
+        if_not_found_x = if_not_found_coords["x"]
+        if_not_found_y = if_not_found_coords["y"]
+    else:
+        raise ValueError(
+            f"Expected if_not_found_coords to be None or tuple or list or dict, got {type(if_not_found_coords)}."
+        )
+    keypoints = []
+    for i in range(nb_keypoints):
+        if inverted:
+            hitidx_flat = np.argmax(distance_maps[..., i])
+        else:
+            hitidx_flat = np.argmin(distance_maps[..., i])
+        hitidx_ndim = np.unravel_index(hitidx_flat, (height, width))
+        if not inverted and threshold is not None:
+            found = distance_maps[hitidx_ndim[0], hitidx_ndim[1], i] < threshold
+        elif inverted and threshold is not None:
+            found = distance_maps[hitidx_ndim[0], hitidx_ndim[1], i] >= threshold
+        else:
+            found = True
+        if found:
+            keypoints.append((float(hitidx_ndim[1]), float(hitidx_ndim[0])))
+        else:
+            if not drop_if_not_found:
+                keypoints.append((if_not_found_x, if_not_found_y))
+    return keypoints
+def keypoint_piecewise_affine(
+    keypoint: KeypointInternalType,
+    matrix: Optional[skimage.transform.PiecewiseAffineTransform],
+    h: int,
+    w: int,
+    keypoints_threshold: float,
+) -> KeypointInternalType:
+    if matrix is None:
+        return keypoint
+    x, y, a, s = keypoint[:4]
+    dist_maps = to_distance_maps([(x, y)], h, w, True)
+    dist_maps = piecewise_affine(dist_maps, matrix, 0, "constant", 0)
+    x, y = from_distance_maps(dist_maps, True, {"x": -1, "y": -1}, keypoints_threshold)[0]
+    return x, y, a, s
+def bbox_piecewise_affine(
+    bbox: BoxInternalType,
+    matrix: Optional[skimage.transform.PiecewiseAffineTransform],
+    h: int,
+    w: int,
+    keypoints_threshold: float,
+) -> BoxInternalType:
+    if matrix is None:
+        return bbox
+    x1, y1, x2, y2 = denormalize_bbox(bbox, h, w)[:4]
+    keypoints = [
+        (x1, y1),
+        (x2, y1),
+        (x2, y2),
+        (x1, y2),
+    ]
+    dist_maps = to_distance_maps(keypoints, h, w, True)
+    dist_maps = piecewise_affine(dist_maps, matrix, 0, "constant", 0)
+    keypoints = from_distance_maps(dist_maps, True, {"x": -1, "y": -1}, keypoints_threshold)
+    keypoints = [i for i in keypoints if 0 <= i[0] < w and 0 <= i[1] < h]
+    keypoints_arr = np.array(keypoints)
+    x1 = keypoints_arr[:, 0].min()
+    y1 = keypoints_arr[:, 1].min()
+    x2 = keypoints_arr[:, 0].max()
+    y2 = keypoints_arr[:, 1].max()
+    return normalize_bbox((x1, y1, x2, y2), h, w)
+def vflip(img: np.ndarray) -> np.ndarray:
+    return np.ascontiguousarray(img[::-1, ...])
+def hflip(img: np.ndarray) -> np.ndarray:
+    return np.ascontiguousarray(img[:, ::-1, ...])
+def hflip_cv2(img: np.ndarray) -> np.ndarray:
+    return cv2.flip(img, 1)
+@preserve_shape
+def random_flip(img: np.ndarray, code: int) -> np.ndarray:
+    return cv2.flip(img, code)
+def transpose(img: np.ndarray) -> np.ndarray:
+    return img.transpose(1, 0, 2) if len(img.shape) > 2 else img.transpose(1, 0)
+def rot90(img: np.ndarray, factor: int) -> np.ndarray:
+    img = np.rot90(img, factor)
+    return np.ascontiguousarray(img)
+def bbox_vflip(bbox: BoxInternalType, rows: int, cols: int) -> BoxInternalType:  # skipcq: PYL-W0613
+    """Flip a bounding box vertically around the x-axis.
+    Args:
+        bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
+        rows: Image rows.
+        cols: Image cols.
+    Returns:
+        tuple: A bounding box `(x_min, y_min, x_max, y_max)`.
+    """
+    x_min, y_min, x_max, y_max = bbox[:4]
+    return x_min, 1 - y_max, x_max, 1 - y_min
+def bbox_hflip(bbox: BoxInternalType, rows: int, cols: int) -> BoxInternalType:  # skipcq: PYL-W0613
+    """Flip a bounding box horizontally around the y-axis.
+    Args:
+        bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
+        rows: Image rows.
+        cols: Image cols.
+    Returns:
+        A bounding box `(x_min, y_min, x_max, y_max)`.
+    """
+    x_min, y_min, x_max, y_max = bbox[:4]
+    return 1 - x_max, y_min, 1 - x_min, y_max
+def bbox_flip(bbox: BoxInternalType, d: int, rows: int, cols: int) -> BoxInternalType:
+    """Flip a bounding box either vertically, horizontally or both depending on the value of `d`.
+    Args:
+        bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
+        d: dimension. 0 for vertical flip, 1 for horizontal, -1 for transpose
+        rows: Image rows.
+        cols: Image cols.
+    Returns:
+        A bounding box `(x_min, y_min, x_max, y_max)`.
+    Raises:
+        ValueError: if value of `d` is not -1, 0 or 1.
+    """
+    if d == 0:
+        bbox = bbox_vflip(bbox, rows, cols)
+    elif d == 1:
+        bbox = bbox_hflip(bbox, rows, cols)
+    elif d == -1:
+        bbox = bbox_hflip(bbox, rows, cols)
+        bbox = bbox_vflip(bbox, rows, cols)
+    else:
+        raise ValueError("Invalid d value {}. Valid values are -1, 0 and 1".format(d))
+    return bbox
+def bbox_transpose(
+    bbox: KeypointInternalType, axis: int, rows: int, cols: int
+) -> KeypointInternalType:  # skipcq: PYL-W0613
+    """Transposes a bounding box along given axis.
+    Args:
+        bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
+        axis: 0 - main axis, 1 - secondary axis.
+        rows: Image rows.
+        cols: Image cols.
+    Returns:
+        A bounding box tuple `(x_min, y_min, x_max, y_max)`.
+    Raises:
+        ValueError: If axis not equal to 0 or 1.
+    """
+    x_min, y_min, x_max, y_max = bbox[:4]
+    if axis not in {0, 1}:
+        raise ValueError("Axis must be either 0 or 1.")
+    if axis == 0:
+        bbox = (y_min, x_min, y_max, x_max)
+    if axis == 1:
+        bbox = (1 - y_max, 1 - x_max, 1 - y_min, 1 - x_min)
+    return bbox
+@angle_2pi_range
+def keypoint_vflip(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
+    """Flip a keypoint vertically around the x-axis.
+    Args:
+        keypoint: A keypoint `(x, y, angle, scale)`.
+        rows: Image height.
+        cols: Image width.
+    Returns:
+        tuple: A keypoint `(x, y, angle, scale)`.
+    """
+    x, y, angle, scale = keypoint[:4]
+    angle = -angle
+    return x, (rows - 1) - y, angle, scale
+@angle_2pi_range
+def keypoint_hflip(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
+    """Flip a keypoint horizontally around the y-axis.
+    Args:
+        keypoint: A keypoint `(x, y, angle, scale)`.
+        rows: Image height.
+        cols: Image width.
+    Returns:
+        A keypoint `(x, y, angle, scale)`.
+    """
+    x, y, angle, scale = keypoint[:4]
+    angle = math.pi - angle
+    return (cols - 1) - x, y, angle, scale
+def keypoint_flip(keypoint: KeypointInternalType, d: int, rows: int, cols: int) -> KeypointInternalType:
+    """Flip a keypoint either vertically, horizontally or both depending on the value of `d`.
+    Args:
+        keypoint: A keypoint `(x, y, angle, scale)`.
+        d: Number of flip. Must be -1, 0 or 1:
+            * 0 - vertical flip,
+            * 1 - horizontal flip,
+            * -1 - vertical and horizontal flip.
+        rows: Image height.
+        cols: Image width.
+    Returns:
+        A keypoint `(x, y, angle, scale)`.
+    Raises:
+        ValueError: if value of `d` is not -1, 0 or 1.
+    """
+    if d == 0:
+        keypoint = keypoint_vflip(keypoint, rows, cols)
+    elif d == 1:
+        keypoint = keypoint_hflip(keypoint, rows, cols)
+    elif d == -1:
+        keypoint = keypoint_hflip(keypoint, rows, cols)
+        keypoint = keypoint_vflip(keypoint, rows, cols)
+    else:
+        raise ValueError(f"Invalid d value {d}. Valid values are -1, 0 and 1")
+    return keypoint
+def keypoint_transpose(keypoint: KeypointInternalType) -> KeypointInternalType:
+    """Rotate a keypoint by angle.
+    Args:
+        keypoint: A keypoint `(x, y, angle, scale)`.
+    Returns:
+        A keypoint `(x, y, angle, scale)`.
+    """
+    x, y, angle, scale = keypoint[:4]
+    if angle <= np.pi:
+        angle = np.pi - angle
+    else:
+        angle = 3 * np.pi - angle
+    return y, x, angle, scale
+@preserve_channel_dim
+def pad(
+    img: np.ndarray,
+    min_height: int,
+    min_width: int,
+    border_mode: int = cv2.BORDER_REFLECT_101,
+    value: Optional[ImageColorType] = None,
+) -> np.ndarray:
+    height, width = img.shape[:2]
+    if height < min_height:
+        h_pad_top = int((min_height - height) / 2.0)
+        h_pad_bottom = min_height - height - h_pad_top
+    else:
+        h_pad_top = 0
+        h_pad_bottom = 0
+    if width < min_width:
+        w_pad_left = int((min_width - width) / 2.0)
+        w_pad_right = min_width - width - w_pad_left
+    else:
+        w_pad_left = 0
+        w_pad_right = 0
+    img = pad_with_params(img, h_pad_top, h_pad_bottom, w_pad_left, w_pad_right, border_mode, value)
+    if img.shape[:2] != (max(min_height, height), max(min_width, width)):
+        raise RuntimeError(
+            "Invalid result shape. Got: {}. Expected: {}".format(
+                img.shape[:2], (max(min_height, height), max(min_width, width))
+            )
+        )
+    return img
+@preserve_channel_dim
+def pad_with_params(
+    img: np.ndarray,
+    h_pad_top: int,
+    h_pad_bottom: int,
+    w_pad_left: int,
+    w_pad_right: int,
+    border_mode: int = cv2.BORDER_REFLECT_101,
+    value: Optional[ImageColorType] = None,
+) -> np.ndarray:
+    pad_fn = _maybe_process_in_chunks(
+        cv2.copyMakeBorder,
+        top=h_pad_top,
+        bottom=h_pad_bottom,
+        left=w_pad_left,
+        right=w_pad_right,
+        borderType=border_mode,
+        value=value,
+    )
+    return pad_fn(img)
+@preserve_shape
+def optical_distortion(
+    img: np.ndarray,
+    k: int = 0,
+    dx: int = 0,
+    dy: int = 0,
+    interpolation: int = cv2.INTER_LINEAR,
+    border_mode: int = cv2.BORDER_REFLECT_101,
+    value: Optional[ImageColorType] = None,
+) -> np.ndarray:
+    """Barrel / pincushion distortion. Unconventional augment.
+    Reference:
+        |  https://stackoverflow.com/questions/6199636/formulas-for-barrel-pincushion-distortion
+        |  https://stackoverflow.com/questions/10364201/image-transformation-in-opencv
+        |  https://stackoverflow.com/questions/2477774/correcting-fisheye-distortion-programmatically
+        |  http://www.coldvision.io/2017/03/02/advanced-lane-finding-using-opencv/
+    """
+    height, width = img.shape[:2]
+    fx = width
+    fy = height
+    cx = width * 0.5 + dx
+    cy = height * 0.5 + dy
+    camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
+    distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
+    map1, map2 = cv2.initUndistortRectifyMap(
+        camera_matrix, distortion, None, None, (width, height), cv2.CV_32FC1  # type: ignore[attr-defined]
+    )
+    return cv2.remap(img, map1, map2, interpolation=interpolation, borderMode=border_mode, borderValue=value)
+@preserve_shape
+def grid_distortion(
+    img: np.ndarray,
+    num_steps: int = 10,
+    xsteps: Tuple = (),
+    ysteps: Tuple = (),
+    interpolation: int = cv2.INTER_LINEAR,
+    border_mode: int = cv2.BORDER_REFLECT_101,
+    value: Optional[ImageColorType] = None,
+) -> np.ndarray:
+    """Perform a grid distortion of an input image.
+    Reference:
+        http://pythology.blogspot.sg/2014/03/interpolation-on-regular-distorted-grid.html
+    """
+    height, width = img.shape[:2]
+    x_step = width // num_steps
+    xx = np.zeros(width, np.float32)
+    prev = 0
+    for idx in range(num_steps + 1):
+        x = idx * x_step
+        start = int(x)
+        end = int(x) + x_step
+        if end > width:
+            end = width
+            cur = width
+        else:
+            cur = prev + x_step * xsteps[idx]
+        xx[start:end] = np.linspace(prev, cur, end - start)
+        prev = cur
+    y_step = height // num_steps
+    yy = np.zeros(height, np.float32)
+    prev = 0
+    for idx in range(num_steps + 1):
+        y = idx * y_step
+        start = int(y)
+        end = int(y) + y_step
+        if end > height:
+            end = height
+            cur = height
+        else:
+            cur = prev + y_step * ysteps[idx]
+        yy[start:end] = np.linspace(prev, cur, end - start)
+        prev = cur
+    map_x, map_y = np.meshgrid(xx, yy)
+    map_x = map_x.astype(np.float32)
+    map_y = map_y.astype(np.float32)
+    remap_fn = _maybe_process_in_chunks(
+        cv2.remap,
+        map1=map_x,
+        map2=map_y,
+        interpolation=interpolation,
+        borderMode=border_mode,
+        borderValue=value,
+    )
+    return remap_fn(img)
+@preserve_shape
+def elastic_transform_approx(
+    img: np.ndarray,
+    alpha: float,
+    sigma: float,
+    alpha_affine: float,
+    interpolation: int = cv2.INTER_LINEAR,
+    border_mode: int = cv2.BORDER_REFLECT_101,
+    value: Optional[ImageColorType] = None,
+    random_state: Optional[np.random.RandomState] = None,
+) -> np.ndarray:
+    """Elastic deformation of images as described in [Simard2003]_ (with modifications for speed).
+    Based on https://gist.github.com/ernestum/601cdf56d2b424757de5
+    .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
+         Convolutional Neural Networks applied to Visual Document Analysis", in
+         Proc. of the International Conference on Document Analysis and
+         Recognition, 2003.
+    """
+    height, width = img.shape[:2]
+    # Random affine
+    center_square = np.array((height, width), dtype=np.float32) // 2
+    square_size = min((height, width)) // 3
+    alpha = float(alpha)
+    sigma = float(sigma)
+    alpha_affine = float(alpha_affine)
+    pts1 = np.array(
+        [
+            center_square + square_size,
+            [center_square[0] + square_size, center_square[1] - square_size],
+            center_square - square_size,
+        ],
+        dtype=np.float32,
+    )
+    pts2 = pts1 + random_utils.uniform(-alpha_affine, alpha_affine, size=pts1.shape, random_state=random_state).astype(
+        np.float32
+    )
+    matrix = cv2.getAffineTransform(pts1, pts2)
+    warp_fn = _maybe_process_in_chunks(
+        cv2.warpAffine,
+        M=matrix,
+        dsize=(width, height),
+        flags=interpolation,
+        borderMode=border_mode,
+        borderValue=value,
+    )
+    img = warp_fn(img)
+    dx = random_utils.rand(height, width, random_state=random_state).astype(np.float32) * 2 - 1
+    cv2.GaussianBlur(dx, (17, 17), sigma, dst=dx)
+    dx *= alpha
+    dy = random_utils.rand(height, width, random_state=random_state).astype(np.float32) * 2 - 1
+    cv2.GaussianBlur(dy, (17, 17), sigma, dst=dy)
+    dy *= alpha
+    x, y = np.meshgrid(np.arange(width), np.arange(height))
+    map_x = np.float32(x + dx)
+    map_y = np.float32(y + dy)
+    remap_fn = _maybe_process_in_chunks(
+        cv2.remap,
+        map1=map_x,
+        map2=map_y,
+        interpolation=interpolation,
+        borderMode=border_mode,
+        borderValue=value,
+    )
+    return remap_fn(img)

custom_albumentations/augmentations/geometric/resize.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import random
+from typing import Dict, Sequence, Tuple, Union
+import cv2
+import numpy as np
+from ...core.transforms_interface import (
+    BoxInternalType,
+    DualTransform,
+    KeypointInternalType,
+    to_tuple,
+)
+from . import functional as F
+__all__ = ["RandomScale", "LongestMaxSize", "SmallestMaxSize", "Resize"]
+class RandomScale(DualTransform):
+    """Randomly resize the input. Output image size is different from the input image size.
+    Args:
+        scale_limit ((float, float) or float): scaling factor range. If scale_limit is a single float value, the
+            range will be (-scale_limit, scale_limit). Note that the scale_limit will be biased by 1.
+            If scale_limit is a tuple, like (low, high), sampling will be done from the range (1 + low, 1 + high).
+            Default: (-0.1, 0.1).
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, scale_limit=0.1, interpolation=cv2.INTER_LINEAR, always_apply=False, p=0.5):
+        super(RandomScale, self).__init__(always_apply, p)
+        self.scale_limit = to_tuple(scale_limit, bias=1.0)
+        self.interpolation = interpolation
+    def get_params(self):
+        return {"scale": random.uniform(self.scale_limit[0], self.scale_limit[1])}
+    def apply(self, img, scale=0, interpolation=cv2.INTER_LINEAR, **params):
+        return F.scale(img, scale, interpolation)
+    def apply_to_bbox(self, bbox, **params):
+        # Bounding box coordinates are scale invariant
+        return bbox
+    def apply_to_keypoint(self, keypoint, scale=0, **params):
+        return F.keypoint_scale(keypoint, scale, scale)
+    def get_transform_init_args(self):
+        return {"interpolation": self.interpolation, "scale_limit": to_tuple(self.scale_limit, bias=-1.0)}
+class LongestMaxSize(DualTransform):
+    """Rescale an image so that maximum side is equal to max_size, keeping the aspect ratio of the initial image.
+    Args:
+        max_size (int, list of int): maximum size of the image after the transformation. When using a list, max size
+            will be randomly selected from the values in the list.
+        interpolation (OpenCV flag): interpolation method. Default: cv2.INTER_LINEAR.
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        max_size: Union[int, Sequence[int]] = 1024,
+        interpolation: int = cv2.INTER_LINEAR,
+        always_apply: bool = False,
+        p: float = 1,
+    ):
+        super(LongestMaxSize, self).__init__(always_apply, p)
+        self.interpolation = interpolation
+        self.max_size = max_size
+    def apply(
+        self, img: np.ndarray, max_size: int = 1024, interpolation: int = cv2.INTER_LINEAR, **params
+    ) -> np.ndarray:
+        return F.longest_max_size(img, max_size=max_size, interpolation=interpolation)
+    def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
+        # Bounding box coordinates are scale invariant
+        return bbox
+    def apply_to_keypoint(self, keypoint: KeypointInternalType, max_size: int = 1024, **params) -> KeypointInternalType:
+        height = params["rows"]
+        width = params["cols"]
+        scale = max_size / max([height, width])
+        return F.keypoint_scale(keypoint, scale, scale)
+    def get_params(self) -> Dict[str, int]:
+        return {"max_size": self.max_size if isinstance(self.max_size, int) else random.choice(self.max_size)}
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        return ("max_size", "interpolation")
+class SmallestMaxSize(DualTransform):
+    """Rescale an image so that minimum side is equal to max_size, keeping the aspect ratio of the initial image.
+    Args:
+        max_size (int, list of int): maximum size of smallest side of the image after the transformation. When using a
+            list, max size will be randomly selected from the values in the list.
+        interpolation (OpenCV flag): interpolation method. Default: cv2.INTER_LINEAR.
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        max_size: Union[int, Sequence[int]] = 1024,
+        interpolation: int = cv2.INTER_LINEAR,
+        always_apply: bool = False,
+        p: float = 1,
+    ):
+        super(SmallestMaxSize, self).__init__(always_apply, p)
+        self.interpolation = interpolation
+        self.max_size = max_size
+    def apply(
+        self, img: np.ndarray, max_size: int = 1024, interpolation: int = cv2.INTER_LINEAR, **params
+    ) -> np.ndarray:
+        return F.smallest_max_size(img, max_size=max_size, interpolation=interpolation)
+    def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
+        return bbox
+    def apply_to_keypoint(self, keypoint: KeypointInternalType, max_size: int = 1024, **params) -> KeypointInternalType:
+        height = params["rows"]
+        width = params["cols"]
+        scale = max_size / min([height, width])
+        return F.keypoint_scale(keypoint, scale, scale)
+    def get_params(self) -> Dict[str, int]:
+        return {"max_size": self.max_size if isinstance(self.max_size, int) else random.choice(self.max_size)}
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        return ("max_size", "interpolation")
+class Resize(DualTransform):
+    """Resize the input to the given height and width.
+    Args:
+        height (int): desired height of the output.
+        width (int): desired width of the output.
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, height, width, interpolation=cv2.INTER_LINEAR, always_apply=False, p=1):
+        super(Resize, self).__init__(always_apply, p)
+        self.height = height
+        self.width = width
+        self.interpolation = interpolation
+    def apply(self, img, interpolation=cv2.INTER_LINEAR, **params):
+        return F.resize(img, height=self.height, width=self.width, interpolation=interpolation)
+    def apply_to_bbox(self, bbox, **params):
+        # Bounding box coordinates are scale invariant
+        return bbox
+    def apply_to_keypoint(self, keypoint, **params):
+        height = params["rows"]
+        width = params["cols"]
+        scale_x = self.width / width
+        scale_y = self.height / height
+        return F.keypoint_scale(keypoint, scale_x, scale_y)
+    def get_transform_init_args_names(self):
+        return ("height", "width", "interpolation")

custom_albumentations/augmentations/geometric/rotate.py ADDED Viewed

	@@ -0,0 +1,294 @@

+import math
+import random
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+import cv2
+import numpy as np
+from ...core.transforms_interface import (
+    BoxInternalType,
+    DualTransform,
+    FillValueType,
+    KeypointInternalType,
+    to_tuple,
+)
+from ..crops import functional as FCrops
+from . import functional as F
+__all__ = ["Rotate", "RandomRotate90", "SafeRotate"]
+class RandomRotate90(DualTransform):
+    """Randomly rotate the input by 90 degrees zero or more times.
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def apply(self, img, factor=0, **params):
+        """
+        Args:
+            factor (int): number of times the input will be rotated by 90 degrees.
+        """
+        return np.ascontiguousarray(np.rot90(img, factor))
+    def get_params(self):
+        # Random int in the range [0, 3]
+        return {"factor": random.randint(0, 3)}
+    def apply_to_bbox(self, bbox, factor=0, **params):
+        return F.bbox_rot90(bbox, factor, **params)
+    def apply_to_keypoint(self, keypoint, factor=0, **params):
+        return F.keypoint_rot90(keypoint, factor, **params)
+    def get_transform_init_args_names(self):
+        return ()
+class Rotate(DualTransform):
+    """Rotate the input by an angle selected randomly from the uniform distribution.
+    Args:
+        limit ((int, int) or int): range from which a random angle is picked. If limit is a single int
+            an angle is picked from (-limit, limit). Default: (-90, 90)
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of:
+            cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
+            Default: cv2.BORDER_REFLECT_101
+        value (int, float, list of ints, list of float): padding value if border_mode is cv2.BORDER_CONSTANT.
+        mask_value (int, float,
+                    list of ints,
+                    list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
+        rotate_method (str): rotation method used for the bounding boxes. Should be one of "largest_box" or "ellipse".
+            Default: "largest_box"
+        crop_border (bool): If True would make a largest possible crop within rotated image
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        limit=90,
+        interpolation=cv2.INTER_LINEAR,
+        border_mode=cv2.BORDER_REFLECT_101,
+        value=None,
+        mask_value=None,
+        rotate_method="largest_box",
+        crop_border=False,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(Rotate, self).__init__(always_apply, p)
+        self.limit = to_tuple(limit)
+        self.interpolation = interpolation
+        self.border_mode = border_mode
+        self.value = value
+        self.mask_value = mask_value
+        self.rotate_method = rotate_method
+        self.crop_border = crop_border
+        if rotate_method not in ["largest_box", "ellipse"]:
+            raise ValueError(f"Rotation method {self.rotate_method} is not valid.")
+    def apply(
+        self, img, angle=0, interpolation=cv2.INTER_LINEAR, x_min=None, x_max=None, y_min=None, y_max=None, **params
+    ):
+        img_out = F.rotate(img, angle, interpolation, self.border_mode, self.value)
+        if self.crop_border:
+            img_out = FCrops.crop(img_out, x_min, y_min, x_max, y_max)
+        return img_out
+    def apply_to_mask(self, img, angle=0, x_min=None, x_max=None, y_min=None, y_max=None, **params):
+        img_out = F.rotate(img, angle, cv2.INTER_NEAREST, self.border_mode, self.mask_value)
+        if self.crop_border:
+            img_out = FCrops.crop(img_out, x_min, y_min, x_max, y_max)
+        return img_out
+    def apply_to_bbox(self, bbox, angle=0, x_min=None, x_max=None, y_min=None, y_max=None, cols=0, rows=0, **params):
+        bbox_out = F.bbox_rotate(bbox, angle, self.rotate_method, rows, cols)
+        if self.crop_border:
+            bbox_out = FCrops.bbox_crop(bbox_out, x_min, y_min, x_max, y_max, rows, cols)
+        return bbox_out
+    def apply_to_keypoint(
+        self, keypoint, angle=0, x_min=None, x_max=None, y_min=None, y_max=None, cols=0, rows=0, **params
+    ):
+        keypoint_out = F.keypoint_rotate(keypoint, angle, rows, cols, **params)
+        if self.crop_border:
+            keypoint_out = FCrops.crop_keypoint_by_coords(keypoint_out, (x_min, y_min, x_max, y_max))
+        return keypoint_out
+    @staticmethod
+    def _rotated_rect_with_max_area(h, w, angle):
+        """
+        Given a rectangle of size wxh that has been rotated by 'angle' (in
+        degrees), computes the width and height of the largest possible
+        axis-aligned rectangle (maximal area) within the rotated rectangle.
+        Code from: https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
+        """
+        angle = math.radians(angle)
+        width_is_longer = w >= h
+        side_long, side_short = (w, h) if width_is_longer else (h, w)
+        # since the solutions for angle, -angle and 180-angle are all the same,
+        # it is sufficient to look at the first quadrant and the absolute values of sin,cos:
+        sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
+        if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < 1e-10:
+            # half constrained case: two crop corners touch the longer side,
+            # the other two corners are on the mid-line parallel to the longer line
+            x = 0.5 * side_short
+            wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
+        else:
+            # fully constrained case: crop touches all 4 sides
+            cos_2a = cos_a * cos_a - sin_a * sin_a
+            wr, hr = (w * cos_a - h * sin_a) / cos_2a, (h * cos_a - w * sin_a) / cos_2a
+        return dict(
+            x_min=max(0, int(w / 2 - wr / 2)),
+            x_max=min(w, int(w / 2 + wr / 2)),
+            y_min=max(0, int(h / 2 - hr / 2)),
+            y_max=min(h, int(h / 2 + hr / 2)),
+        )
+    @property
+    def targets_as_params(self) -> List[str]:
+        return ["image"]
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        out_params = {"angle": random.uniform(self.limit[0], self.limit[1])}
+        if self.crop_border:
+            h, w = params["image"].shape[:2]
+            out_params.update(self._rotated_rect_with_max_area(h, w, out_params["angle"]))
+        return out_params
+    def get_transform_init_args_names(self):
+        return ("limit", "interpolation", "border_mode", "value", "mask_value", "rotate_method", "crop_border")
+class SafeRotate(DualTransform):
+    """Rotate the input inside the input's frame by an angle selected randomly from the uniform distribution.
+    The resulting image may have artifacts in it. After rotation, the image may have a different aspect ratio, and
+    after resizing, it returns to its original shape with the original aspect ratio of the image. For these reason we
+    may see some artifacts.
+    Args:
+        limit ((int, int) or int): range from which a random angle is picked. If limit is a single int
+            an angle is picked from (-limit, limit). Default: (-90, 90)
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of:
+            cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
+            Default: cv2.BORDER_REFLECT_101
+        value (int, float, list of ints, list of float): padding value if border_mode is cv2.BORDER_CONSTANT.
+        mask_value (int, float,
+                    list of ints,
+                    list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        limit: Union[float, Tuple[float, float]] = 90,
+        interpolation: int = cv2.INTER_LINEAR,
+        border_mode: int = cv2.BORDER_REFLECT_101,
+        value: FillValueType = None,
+        mask_value: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(SafeRotate, self).__init__(always_apply, p)
+        self.limit = to_tuple(limit)
+        self.interpolation = interpolation
+        self.border_mode = border_mode
+        self.value = value
+        self.mask_value = mask_value
+    def apply(self, img: np.ndarray, matrix: np.ndarray = np.array(None), **params) -> np.ndarray:
+        return F.safe_rotate(img, matrix, self.interpolation, self.value, self.border_mode)
+    def apply_to_mask(self, img: np.ndarray, matrix: np.ndarray = np.array(None), **params) -> np.ndarray:
+        return F.safe_rotate(img, matrix, cv2.INTER_NEAREST, self.mask_value, self.border_mode)
+    def apply_to_bbox(self, bbox: BoxInternalType, cols: int = 0, rows: int = 0, **params) -> BoxInternalType:
+        return F.bbox_safe_rotate(bbox, params["matrix"], cols, rows)
+    def apply_to_keypoint(
+        self,
+        keypoint: KeypointInternalType,
+        angle: float = 0,
+        scale_x: float = 0,
+        scale_y: float = 0,
+        cols: int = 0,
+        rows: int = 0,
+        **params
+    ) -> KeypointInternalType:
+        return F.keypoint_safe_rotate(keypoint, params["matrix"], angle, scale_x, scale_y, cols, rows)
+    @property
+    def targets_as_params(self) -> List[str]:
+        return ["image"]
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        angle = random.uniform(self.limit[0], self.limit[1])
+        image = params["image"]
+        h, w = image.shape[:2]
+        # https://stackoverflow.com/questions/43892506/opencv-python-rotate-image-without-cropping-sides
+        image_center = (w / 2, h / 2)
+        # Rotation Matrix
+        rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
+        # rotation calculates the cos and sin, taking absolutes of those.
+        abs_cos = abs(rotation_mat[0, 0])
+        abs_sin = abs(rotation_mat[0, 1])
+        # find the new width and height bounds
+        new_w = math.ceil(h * abs_sin + w * abs_cos)
+        new_h = math.ceil(h * abs_cos + w * abs_sin)
+        scale_x = w / new_w
+        scale_y = h / new_h
+        # Shift the image to create padding
+        rotation_mat[0, 2] += new_w / 2 - image_center[0]
+        rotation_mat[1, 2] += new_h / 2 - image_center[1]
+        # Rescale to original size
+        scale_mat = np.diag(np.ones(3))
+        scale_mat[0, 0] *= scale_x
+        scale_mat[1, 1] *= scale_y
+        _tmp = np.diag(np.ones(3))
+        _tmp[:2] = rotation_mat
+        _tmp = scale_mat @ _tmp
+        rotation_mat = _tmp[:2]
+        return {"matrix": rotation_mat, "angle": angle, "scale_x": scale_x, "scale_y": scale_y}
+    def get_transform_init_args_names(self) -> Tuple[str, str, str, str, str]:
+        return ("limit", "interpolation", "border_mode", "value", "mask_value")

custom_albumentations/augmentations/geometric/transforms.py ADDED Viewed

	@@ -0,0 +1,1499 @@

+import math
+import random
+from enum import Enum
+from typing import Dict, Optional, Sequence, Tuple, Union
+import cv2
+import numpy as np
+import skimage.transform
+from custom_albumentations.core.bbox_utils import denormalize_bbox, normalize_bbox
+from ... import random_utils
+from ...core.transforms_interface import (
+    BoxInternalType,
+    DualTransform,
+    ImageColorType,
+    KeypointInternalType,
+    ScaleFloatType,
+    to_tuple,
+)
+from ..functional import bbox_from_mask
+from . import functional as F
+__all__ = [
+    "ShiftScaleRotate",
+    "ElasticTransform",
+    "Perspective",
+    "Affine",
+    "PiecewiseAffine",
+    "VerticalFlip",
+    "HorizontalFlip",
+    "Flip",
+    "Transpose",
+    "OpticalDistortion",
+    "GridDistortion",
+    "PadIfNeeded",
+]
+class ShiftScaleRotate(DualTransform):
+    """Randomly apply affine transforms: translate, scale and rotate the input.
+    Args:
+        shift_limit ((float, float) or float): shift factor range for both height and width. If shift_limit
+            is a single float value, the range will be (-shift_limit, shift_limit). Absolute values for lower and
+            upper bounds should lie in range [0, 1]. Default: (-0.0625, 0.0625).
+        scale_limit ((float, float) or float): scaling factor range. If scale_limit is a single float value, the
+            range will be (-scale_limit, scale_limit). Note that the scale_limit will be biased by 1.
+            If scale_limit is a tuple, like (low, high), sampling will be done from the range (1 + low, 1 + high).
+            Default: (-0.1, 0.1).
+        rotate_limit ((int, int) or int): rotation range. If rotate_limit is a single int value, the
+            range will be (-rotate_limit, rotate_limit). Default: (-45, 45).
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of:
+            cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
+            Default: cv2.BORDER_REFLECT_101
+        value (int, float, list of int, list of float): padding value if border_mode is cv2.BORDER_CONSTANT.
+        mask_value (int, float,
+                    list of int,
+                    list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
+        shift_limit_x ((float, float) or float): shift factor range for width. If it is set then this value
+            instead of shift_limit will be used for shifting width.  If shift_limit_x is a single float value,
+            the range will be (-shift_limit_x, shift_limit_x). Absolute values for lower and upper bounds should lie in
+            the range [0, 1]. Default: None.
+        shift_limit_y ((float, float) or float): shift factor range for height. If it is set then this value
+            instead of shift_limit will be used for shifting height.  If shift_limit_y is a single float value,
+            the range will be (-shift_limit_y, shift_limit_y). Absolute values for lower and upper bounds should lie
+            in the range [0, 1]. Default: None.
+        rotate_method (str): rotation method used for the bounding boxes. Should be one of "largest_box" or "ellipse".
+            Default: "largest_box"
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        shift_limit=0.0625,
+        scale_limit=0.1,
+        rotate_limit=45,
+        interpolation=cv2.INTER_LINEAR,
+        border_mode=cv2.BORDER_REFLECT_101,
+        value=None,
+        mask_value=None,
+        shift_limit_x=None,
+        shift_limit_y=None,
+        rotate_method="largest_box",
+        always_apply=False,
+        p=0.5,
+    ):
+        super(ShiftScaleRotate, self).__init__(always_apply, p)
+        self.shift_limit_x = to_tuple(shift_limit_x if shift_limit_x is not None else shift_limit)
+        self.shift_limit_y = to_tuple(shift_limit_y if shift_limit_y is not None else shift_limit)
+        self.scale_limit = to_tuple(scale_limit, bias=1.0)
+        self.rotate_limit = to_tuple(rotate_limit)
+        self.interpolation = interpolation
+        self.border_mode = border_mode
+        self.value = value
+        self.mask_value = mask_value
+        self.rotate_method = rotate_method
+        if self.rotate_method not in ["largest_box", "ellipse"]:
+            raise ValueError(f"Rotation method {self.rotate_method} is not valid.")
+    def apply(self, img, angle=0, scale=0, dx=0, dy=0, interpolation=cv2.INTER_LINEAR, **params):
+        return F.shift_scale_rotate(img, angle, scale, dx, dy, interpolation, self.border_mode, self.value)
+    def apply_to_mask(self, img, angle=0, scale=0, dx=0, dy=0, **params):
+        return F.shift_scale_rotate(img, angle, scale, dx, dy, cv2.INTER_NEAREST, self.border_mode, self.mask_value)
+    def apply_to_keypoint(self, keypoint, angle=0, scale=0, dx=0, dy=0, rows=0, cols=0, **params):
+        return F.keypoint_shift_scale_rotate(keypoint, angle, scale, dx, dy, rows, cols)
+    def get_params(self):
+        return {
+            "angle": random.uniform(self.rotate_limit[0], self.rotate_limit[1]),
+            "scale": random.uniform(self.scale_limit[0], self.scale_limit[1]),
+            "dx": random.uniform(self.shift_limit_x[0], self.shift_limit_x[1]),
+            "dy": random.uniform(self.shift_limit_y[0], self.shift_limit_y[1]),
+        }
+    def apply_to_bbox(self, bbox, angle, scale, dx, dy, **params):
+        return F.bbox_shift_scale_rotate(bbox, angle, scale, dx, dy, self.rotate_method, **params)
+    def get_transform_init_args(self):
+        return {
+            "shift_limit_x": self.shift_limit_x,
+            "shift_limit_y": self.shift_limit_y,
+            "scale_limit": to_tuple(self.scale_limit, bias=-1.0),
+            "rotate_limit": self.rotate_limit,
+            "interpolation": self.interpolation,
+            "border_mode": self.border_mode,
+            "value": self.value,
+            "mask_value": self.mask_value,
+            "rotate_method": self.rotate_method,
+        }
+class ElasticTransform(DualTransform):
+    """Elastic deformation of images as described in [Simard2003]_ (with modifications).
+    Based on https://gist.github.com/ernestum/601cdf56d2b424757de5
+    .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
+         Convolutional Neural Networks applied to Visual Document Analysis", in
+         Proc. of the International Conference on Document Analysis and
+         Recognition, 2003.
+    Args:
+        alpha (float):
+        sigma (float): Gaussian filter parameter.
+        alpha_affine (float): The range will be (-alpha_affine, alpha_affine)
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of:
+            cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
+            Default: cv2.BORDER_REFLECT_101
+        value (int, float, list of ints, list of float): padding value if border_mode is cv2.BORDER_CONSTANT.
+        mask_value (int, float,
+                    list of ints,
+                    list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
+        approximate (boolean): Whether to smooth displacement map with fixed kernel size.
+                               Enabling this option gives ~2X speedup on large images.
+        same_dxdy (boolean): Whether to use same random generated shift for x and y.
+                             Enabling this option gives ~2X speedup.
+    Targets:
+        image, mask, bbox
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        alpha=1,
+        sigma=50,
+        alpha_affine=50,
+        interpolation=cv2.INTER_LINEAR,
+        border_mode=cv2.BORDER_REFLECT_101,
+        value=None,
+        mask_value=None,
+        always_apply=False,
+        approximate=False,
+        same_dxdy=False,
+        p=0.5,
+    ):
+        super(ElasticTransform, self).__init__(always_apply, p)
+        self.alpha = alpha
+        self.alpha_affine = alpha_affine
+        self.sigma = sigma
+        self.interpolation = interpolation
+        self.border_mode = border_mode
+        self.value = value
+        self.mask_value = mask_value
+        self.approximate = approximate
+        self.same_dxdy = same_dxdy
+    def apply(self, img, random_state=None, interpolation=cv2.INTER_LINEAR, **params):
+        return F.elastic_transform(
+            img,
+            self.alpha,
+            self.sigma,
+            self.alpha_affine,
+            interpolation,
+            self.border_mode,
+            self.value,
+            np.random.RandomState(random_state),
+            self.approximate,
+            self.same_dxdy,
+        )
+    def apply_to_mask(self, img, random_state=None, **params):
+        return F.elastic_transform(
+            img,
+            self.alpha,
+            self.sigma,
+            self.alpha_affine,
+            cv2.INTER_NEAREST,
+            self.border_mode,
+            self.mask_value,
+            np.random.RandomState(random_state),
+            self.approximate,
+            self.same_dxdy,
+        )
+    def apply_to_bbox(self, bbox, random_state=None, **params):
+        rows, cols = params["rows"], params["cols"]
+        mask = np.zeros((rows, cols), dtype=np.uint8)
+        bbox_denorm = F.denormalize_bbox(bbox, rows, cols)
+        x_min, y_min, x_max, y_max = bbox_denorm[:4]
+        x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
+        mask[y_min:y_max, x_min:x_max] = 1
+        mask = F.elastic_transform(
+            mask,
+            self.alpha,
+            self.sigma,
+            self.alpha_affine,
+            cv2.INTER_NEAREST,
+            self.border_mode,
+            self.mask_value,
+            np.random.RandomState(random_state),
+            self.approximate,
+        )
+        bbox_returned = bbox_from_mask(mask)
+        bbox_returned = F.normalize_bbox(bbox_returned, rows, cols)
+        return bbox_returned
+    def get_params(self):
+        return {"random_state": random.randint(0, 10000)}
+    def get_transform_init_args_names(self):
+        return (
+            "alpha",
+            "sigma",
+            "alpha_affine",
+            "interpolation",
+            "border_mode",
+            "value",
+            "mask_value",
+            "approximate",
+            "same_dxdy",
+        )
+class Perspective(DualTransform):
+    """Perform a random four point perspective transform of the input.
+    Args:
+        scale (float or (float, float)): standard deviation of the normal distributions. These are used to sample
+            the random distances of the subimage's corners from the full image's corners.
+            If scale is a single float value, the range will be (0, scale). Default: (0.05, 0.1).
+        keep_size (bool): Whether to resize image’s back to their original size after applying the perspective
+            transform. If set to False, the resulting images may end up having different shapes
+            and will always be a list, never an array. Default: True
+        pad_mode (OpenCV flag): OpenCV border mode.
+        pad_val (int, float, list of int, list of float): padding value if border_mode is cv2.BORDER_CONSTANT.
+            Default: 0
+        mask_pad_val (int, float, list of int, list of float): padding value for mask
+            if border_mode is cv2.BORDER_CONSTANT. Default: 0
+        fit_output (bool): If True, the image plane size and position will be adjusted to still capture
+            the whole image after perspective transformation. (Followed by image resizing if keep_size is set to True.)
+            Otherwise, parts of the transformed image may be outside of the image plane.
+            This setting should not be set to True when using large scale values as it could lead to very large images.
+            Default: False
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, keypoints, bboxes
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        scale=(0.05, 0.1),
+        keep_size=True,
+        pad_mode=cv2.BORDER_CONSTANT,
+        pad_val=0,
+        mask_pad_val=0,
+        fit_output=False,
+        interpolation=cv2.INTER_LINEAR,
+        always_apply=False,
+        p=0.5,
+    ):
+        super().__init__(always_apply, p)
+        self.scale = to_tuple(scale, 0)
+        self.keep_size = keep_size
+        self.pad_mode = pad_mode
+        self.pad_val = pad_val
+        self.mask_pad_val = mask_pad_val
+        self.fit_output = fit_output
+        self.interpolation = interpolation
+    def apply(self, img, matrix=None, max_height=None, max_width=None, **params):
+        return F.perspective(
+            img, matrix, max_width, max_height, self.pad_val, self.pad_mode, self.keep_size, params["interpolation"]
+        )
+    def apply_to_bbox(self, bbox, matrix=None, max_height=None, max_width=None, **params):
+        return F.perspective_bbox(bbox, params["rows"], params["cols"], matrix, max_width, max_height, self.keep_size)
+    def apply_to_keypoint(self, keypoint, matrix=None, max_height=None, max_width=None, **params):
+        return F.perspective_keypoint(
+            keypoint, params["rows"], params["cols"], matrix, max_width, max_height, self.keep_size
+        )
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_params_dependent_on_targets(self, params):
+        h, w = params["image"].shape[:2]
+        scale = random_utils.uniform(*self.scale)
+        points = random_utils.normal(0, scale, [4, 2])
+        points = np.mod(np.abs(points), 0.32)
+        # top left -- no changes needed, just use jitter
+        # top right
+        points[1, 0] = 1.0 - points[1, 0]  # w = 1.0 - jitter
+        # bottom right
+        points[2] = 1.0 - points[2]  # w = 1.0 - jitt
+        # bottom left
+        points[3, 1] = 1.0 - points[3, 1]  # h = 1.0 - jitter
+        points[:, 0] *= w
+        points[:, 1] *= h
+        # Obtain a consistent order of the points and unpack them individually.
+        # Warning: don't just do (tl, tr, br, bl) = _order_points(...)
+        # here, because the reordered points is used further below.
+        points = self._order_points(points)
+        tl, tr, br, bl = points
+        # compute the width of the new image, which will be the
+        # maximum distance between bottom-right and bottom-left
+        # x-coordiates or the top-right and top-left x-coordinates
+        min_width = None
+        max_width = None
+        while min_width is None or min_width < 2:
+            width_top = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+            width_bottom = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+            max_width = int(max(width_top, width_bottom))
+            min_width = int(min(width_top, width_bottom))
+            if min_width < 2:
+                step_size = (2 - min_width) / 2
+                tl[0] -= step_size
+                tr[0] += step_size
+                bl[0] -= step_size
+                br[0] += step_size
+        # compute the height of the new image, which will be the maximum distance between the top-right
+        # and bottom-right y-coordinates or the top-left and bottom-left y-coordinates
+        min_height = None
+        max_height = None
+        while min_height is None or min_height < 2:
+            height_right = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+            height_left = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+            max_height = int(max(height_right, height_left))
+            min_height = int(min(height_right, height_left))
+            if min_height < 2:
+                step_size = (2 - min_height) / 2
+                tl[1] -= step_size
+                tr[1] -= step_size
+                bl[1] += step_size
+                br[1] += step_size
+        # now that we have the dimensions of the new image, construct
+        # the set of destination points to obtain a "birds eye view",
+        # (i.e. top-down view) of the image, again specifying points
+        # in the top-left, top-right, bottom-right, and bottom-left order
+        # do not use width-1 or height-1 here, as for e.g. width=3, height=2
+        # the bottom right coordinate is at (3.0, 2.0) and not (2.0, 1.0)
+        dst = np.array([[0, 0], [max_width, 0], [max_width, max_height], [0, max_height]], dtype=np.float32)
+        # compute the perspective transform matrix and then apply it
+        m = cv2.getPerspectiveTransform(points, dst)
+        if self.fit_output:
+            m, max_width, max_height = self._expand_transform(m, (h, w))
+        return {"matrix": m, "max_height": max_height, "max_width": max_width, "interpolation": self.interpolation}
+    @classmethod
+    def _expand_transform(cls, matrix, shape):
+        height, width = shape
+        # do not use width-1 or height-1 here, as for e.g. width=3, height=2, max_height
+        # the bottom right coordinate is at (3.0, 2.0) and not (2.0, 1.0)
+        rect = np.array([[0, 0], [width, 0], [width, height], [0, height]], dtype=np.float32)
+        dst = cv2.perspectiveTransform(np.array([rect]), matrix)[0]
+        # get min x, y over transformed 4 points
+        # then modify target points by subtracting these minima  => shift to (0, 0)
+        dst -= dst.min(axis=0, keepdims=True)
+        dst = np.around(dst, decimals=0)
+        matrix_expanded = cv2.getPerspectiveTransform(rect, dst)
+        max_width, max_height = dst.max(axis=0)
+        return matrix_expanded, int(max_width), int(max_height)
+    @staticmethod
+    def _order_points(pts: np.ndarray) -> np.ndarray:
+        pts = np.array(sorted(pts, key=lambda x: x[0]))
+        left = pts[:2]  # points with smallest x coordinate - left points
+        right = pts[2:]  # points with greatest x coordinate - right points
+        if left[0][1] < left[1][1]:
+            tl, bl = left
+        else:
+            bl, tl = left
+        if right[0][1] < right[1][1]:
+            tr, br = right
+        else:
+            br, tr = right
+        return np.array([tl, tr, br, bl], dtype=np.float32)
+    def get_transform_init_args_names(self):
+        return "scale", "keep_size", "pad_mode", "pad_val", "mask_pad_val", "fit_output", "interpolation"
+class Affine(DualTransform):
+    """Augmentation to apply affine transformations to images.
+    This is mostly a wrapper around the corresponding classes and functions in OpenCV.
+    Affine transformations involve:
+        - Translation ("move" image on the x-/y-axis)
+        - Rotation
+        - Scaling ("zoom" in/out)
+        - Shear (move one side of the image, turning a square into a trapezoid)
+    All such transformations can create "new" pixels in the image without a defined content, e.g.
+    if the image is translated to the left, pixels are created on the right.
+    A method has to be defined to deal with these pixel values.
+    The parameters `cval` and `mode` of this class deal with this.
+    Some transformations involve interpolations between several pixels
+    of the input image to generate output pixel values. The parameters `interpolation` and
+    `mask_interpolation` deals with the method of interpolation used for this.
+    Args:
+        scale (number, tuple of number or dict): Scaling factor to use, where ``1.0`` denotes "no change" and
+            ``0.5`` is zoomed out to ``50`` percent of the original size.
+                * If a single number, then that value will be used for all images.
+                * If a tuple ``(a, b)``, then a value will be uniformly sampled per image from the interval ``[a, b]``.
+                  That the same range will be used for both x- and y-axis. To keep the aspect ratio, set
+                  ``keep_ratio=True``, then the same value will be used for both x- and y-axis.
+                * If a dictionary, then it is expected to have the keys ``x`` and/or ``y``.
+                  Each of these keys can have the same values as described above.
+                  Using a dictionary allows to set different values for the two axis and sampling will then happen
+                  *independently* per axis, resulting in samples that differ between the axes. Note that when
+                  the ``keep_ratio=True``, the x- and y-axis ranges should be the same.
+        translate_percent (None, number, tuple of number or dict): Translation as a fraction of the image height/width
+            (x-translation, y-translation), where ``0`` denotes "no change"
+            and ``0.5`` denotes "half of the axis size".
+                * If ``None`` then equivalent to ``0.0`` unless `translate_px` has a value other than ``None``.
+                * If a single number, then that value will be used for all images.
+                * If a tuple ``(a, b)``, then a value will be uniformly sampled per image from the interval ``[a, b]``.
+                  That sampled fraction value will be used identically for both x- and y-axis.
+                * If a dictionary, then it is expected to have the keys ``x`` and/or ``y``.
+                  Each of these keys can have the same values as described above.
+                  Using a dictionary allows to set different values for the two axis and sampling will then happen
+                  *independently* per axis, resulting in samples that differ between the axes.
+        translate_px (None, int, tuple of int or dict): Translation in pixels.
+                * If ``None`` then equivalent to ``0`` unless `translate_percent` has a value other than ``None``.
+                * If a single int, then that value will be used for all images.
+                * If a tuple ``(a, b)``, then a value will be uniformly sampled per image from
+                  the discrete interval ``[a..b]``. That number will be used identically for both x- and y-axis.
+                * If a dictionary, then it is expected to have the keys ``x`` and/or ``y``.
+                  Each of these keys can have the same values as described above.
+                  Using a dictionary allows to set different values for the two axis and sampling will then happen
+                  *independently* per axis, resulting in samples that differ between the axes.
+        rotate (number or tuple of number): Rotation in degrees (**NOT** radians), i.e. expected value range is
+            around ``[-360, 360]``. Rotation happens around the *center* of the image,
+            not the top left corner as in some other frameworks.
+                * If a number, then that value will be used for all images.
+                * If a tuple ``(a, b)``, then a value will be uniformly sampled per image from the interval ``[a, b]``
+                  and used as the rotation value.
+        shear (number, tuple of number or dict): Shear in degrees (**NOT** radians), i.e. expected value range is
+            around ``[-360, 360]``, with reasonable values being in the range of ``[-45, 45]``.
+                * If a number, then that value will be used for all images as
+                  the shear on the x-axis (no shear on the y-axis will be done).
+                * If a tuple ``(a, b)``, then two value will be uniformly sampled per image
+                  from the interval ``[a, b]`` and be used as the x- and y-shear value.
+                * If a dictionary, then it is expected to have the keys ``x`` and/or ``y``.
+                  Each of these keys can have the same values as described above.
+                  Using a dictionary allows to set different values for the two axis and sampling will then happen
+                  *independently* per axis, resulting in samples that differ between the axes.
+        interpolation (int): OpenCV interpolation flag.
+        mask_interpolation (int): OpenCV interpolation flag.
+        cval (number or sequence of number): The constant value to use when filling in newly created pixels.
+            (E.g. translating by 1px to the right will create a new 1px-wide column of pixels
+            on the left of the image).
+            The value is only used when `mode=constant`. The expected value range is ``[0, 255]`` for ``uint8`` images.
+        cval_mask (number or tuple of number): Same as cval but only for masks.
+        mode (int): OpenCV border flag.
+        fit_output (bool): If True, the image plane size and position will be adjusted to tightly capture
+            the whole image after affine transformation (`translate_percent` and `translate_px` are ignored).
+            Otherwise (``False``),  parts of the transformed image may end up outside the image plane.
+            Fitting the output shape can be useful to avoid corners of the image being outside the image plane
+            after applying rotations. Default: False
+        keep_ratio (bool): When True, the original aspect ratio will be kept when the random scale is applied.
+                           Default: False.
+        rotate_method (str): rotation method used for the bounding boxes. Should be one of "largest_box" or
+            "ellipse"[1].
+            Default: "largest_box"
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, keypoints, bboxes
+    Image types:
+        uint8, float32
+    Reference:
+        [1] https://arxiv.org/abs/2109.13488
+    """
+    def __init__(
+        self,
+        scale: Optional[Union[float, Sequence[float], dict]] = None,
+        translate_percent: Optional[Union[float, Sequence[float], dict]] = None,
+        translate_px: Optional[Union[int, Sequence[int], dict]] = None,
+        rotate: Optional[Union[float, Sequence[float]]] = None,
+        shear: Optional[Union[float, Sequence[float], dict]] = None,
+        interpolation: int = cv2.INTER_LINEAR,
+        mask_interpolation: int = cv2.INTER_NEAREST,
+        cval: Union[int, float, Sequence[int], Sequence[float]] = 0,
+        cval_mask: Union[int, float, Sequence[int], Sequence[float]] = 0,
+        mode: int = cv2.BORDER_CONSTANT,
+        fit_output: bool = False,
+        keep_ratio: bool = False,
+        rotate_method: str = "largest_box",
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply=always_apply, p=p)
+        params = [scale, translate_percent, translate_px, rotate, shear]
+        if all([p is None for p in params]):
+            scale = {"x": (0.9, 1.1), "y": (0.9, 1.1)}
+            translate_percent = {"x": (-0.1, 0.1), "y": (-0.1, 0.1)}
+            rotate = (-15, 15)
+            shear = {"x": (-10, 10), "y": (-10, 10)}
+        else:
+            scale = scale if scale is not None else 1.0
+            rotate = rotate if rotate is not None else 0.0
+            shear = shear if shear is not None else 0.0
+        self.interpolation = interpolation
+        self.mask_interpolation = mask_interpolation
+        self.cval = cval
+        self.cval_mask = cval_mask
+        self.mode = mode
+        self.scale = self._handle_dict_arg(scale, "scale")
+        self.translate_percent, self.translate_px = self._handle_translate_arg(translate_px, translate_percent)
+        self.rotate = to_tuple(rotate, rotate)
+        self.fit_output = fit_output
+        self.shear = self._handle_dict_arg(shear, "shear")
+        self.keep_ratio = keep_ratio
+        self.rotate_method = rotate_method
+        if self.keep_ratio and self.scale["x"] != self.scale["y"]:
+            raise ValueError(
+                "When keep_ratio is True, the x and y scale range should be identical. got {}".format(self.scale)
+            )
+    def get_transform_init_args_names(self):
+        return (
+            "interpolation",
+            "mask_interpolation",
+            "cval",
+            "mode",
+            "scale",
+            "translate_percent",
+            "translate_px",
+            "rotate",
+            "fit_output",
+            "shear",
+            "cval_mask",
+            "keep_ratio",
+            "rotate_method",
+        )
+    @staticmethod
+    def _handle_dict_arg(val: Union[float, Sequence[float], dict], name: str, default: float = 1.0):
+        if isinstance(val, dict):
+            if "x" not in val and "y" not in val:
+                raise ValueError(
+                    f'Expected {name} dictionary to contain at least key "x" or ' 'key "y". Found neither of them.'
+                )
+            x = val.get("x", default)
+            y = val.get("y", default)
+            return {"x": to_tuple(x, x), "y": to_tuple(y, y)}
+        return {"x": to_tuple(val, val), "y": to_tuple(val, val)}
+    @classmethod
+    def _handle_translate_arg(
+        cls,
+        translate_px: Optional[Union[float, Sequence[float], dict]],
+        translate_percent: Optional[Union[float, Sequence[float], dict]],
+    ):
+        if translate_percent is None and translate_px is None:
+            translate_px = 0
+        if translate_percent is not None and translate_px is not None:
+            raise ValueError(
+                "Expected either translate_percent or translate_px to be " "provided, " "but neither of them was."
+            )
+        if translate_percent is not None:
+            # translate by percent
+            return cls._handle_dict_arg(translate_percent, "translate_percent", default=0.0), translate_px
+        if translate_px is None:
+            raise ValueError("translate_px is None.")
+        # translate by pixels
+        return translate_percent, cls._handle_dict_arg(translate_px, "translate_px")
+    def apply(
+        self,
+        img: np.ndarray,
+        matrix: skimage.transform.ProjectiveTransform = None,
+        output_shape: Sequence[int] = (),
+        **params
+    ) -> np.ndarray:
+        return F.warp_affine(
+            img,
+            matrix,
+            interpolation=self.interpolation,
+            cval=self.cval,
+            mode=self.mode,
+            output_shape=output_shape,
+        )
+    def apply_to_mask(
+        self,
+        img: np.ndarray,
+        matrix: skimage.transform.ProjectiveTransform = None,
+        output_shape: Sequence[int] = (),
+        **params
+    ) -> np.ndarray:
+        return F.warp_affine(
+            img,
+            matrix,
+            interpolation=self.mask_interpolation,
+            cval=self.cval_mask,
+            mode=self.mode,
+            output_shape=output_shape,
+        )
+    def apply_to_bbox(
+        self,
+        bbox: BoxInternalType,
+        matrix: skimage.transform.ProjectiveTransform = None,
+        rows: int = 0,
+        cols: int = 0,
+        output_shape: Sequence[int] = (),
+        **params
+    ) -> BoxInternalType:
+        return F.bbox_affine(bbox, matrix, self.rotate_method, rows, cols, output_shape)
+    def apply_to_keypoint(
+        self,
+        keypoint: KeypointInternalType,
+        matrix: Optional[skimage.transform.ProjectiveTransform] = None,
+        scale: Optional[dict] = None,
+        **params
+    ) -> KeypointInternalType:
+        assert scale is not None and matrix is not None
+        return F.keypoint_affine(keypoint, matrix=matrix, scale=scale)
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_params_dependent_on_targets(self, params: dict) -> dict:
+        h, w = params["image"].shape[:2]
+        translate: Dict[str, Union[int, float]]
+        if self.translate_px is not None:
+            translate = {key: random.randint(*value) for key, value in self.translate_px.items()}
+        elif self.translate_percent is not None:
+            translate = {key: random.uniform(*value) for key, value in self.translate_percent.items()}
+            translate["x"] = translate["x"] * w
+            translate["y"] = translate["y"] * h
+        else:
+            translate = {"x": 0, "y": 0}
+        # Look to issue https://github.com/albumentations-team/albumentations/issues/1079
+        shear = {key: -random.uniform(*value) for key, value in self.shear.items()}
+        scale = {key: random.uniform(*value) for key, value in self.scale.items()}
+        if self.keep_ratio:
+            scale["y"] = scale["x"]
+        # Look to issue https://github.com/albumentations-team/albumentations/issues/1079
+        rotate = -random.uniform(*self.rotate)
+        # for images we use additional shifts of (0.5, 0.5) as otherwise
+        # we get an ugly black border for 90deg rotations
+        shift_x = w / 2 - 0.5
+        shift_y = h / 2 - 0.5
+        matrix_to_topleft = skimage.transform.SimilarityTransform(translation=[-shift_x, -shift_y])
+        matrix_shear_y_rot = skimage.transform.AffineTransform(rotation=-np.pi / 2)
+        matrix_shear_y = skimage.transform.AffineTransform(shear=np.deg2rad(shear["y"]))
+        matrix_shear_y_rot_inv = skimage.transform.AffineTransform(rotation=np.pi / 2)
+        matrix_transforms = skimage.transform.AffineTransform(
+            scale=(scale["x"], scale["y"]),
+            translation=(translate["x"], translate["y"]),
+            rotation=np.deg2rad(rotate),
+            shear=np.deg2rad(shear["x"]),
+        )
+        matrix_to_center = skimage.transform.SimilarityTransform(translation=[shift_x, shift_y])
+        matrix = (
+            matrix_to_topleft
+            + matrix_shear_y_rot
+            + matrix_shear_y
+            + matrix_shear_y_rot_inv
+            + matrix_transforms
+            + matrix_to_center
+        )
+        if self.fit_output:
+            matrix, output_shape = self._compute_affine_warp_output_shape(matrix, params["image"].shape)
+        else:
+            output_shape = params["image"].shape
+        return {
+            "rotate": rotate,
+            "scale": scale,
+            "matrix": matrix,
+            "output_shape": output_shape,
+        }
+    @staticmethod
+    def _compute_affine_warp_output_shape(
+        matrix: skimage.transform.ProjectiveTransform, input_shape: Sequence[int]
+    ) -> Tuple[skimage.transform.ProjectiveTransform, Sequence[int]]:
+        height, width = input_shape[:2]
+        if height == 0 or width == 0:
+            return matrix, input_shape
+        # determine shape of output image
+        corners = np.array([[0, 0], [0, height - 1], [width - 1, height - 1], [width - 1, 0]])
+        corners = matrix(corners)
+        minc = corners[:, 0].min()
+        minr = corners[:, 1].min()
+        maxc = corners[:, 0].max()
+        maxr = corners[:, 1].max()
+        out_height = maxr - minr + 1
+        out_width = maxc - minc + 1
+        if len(input_shape) == 3:
+            output_shape = np.ceil((out_height, out_width, input_shape[2]))
+        else:
+            output_shape = np.ceil((out_height, out_width))
+        output_shape_tuple = tuple([int(v) for v in output_shape.tolist()])
+        # fit output image in new shape
+        translation = (-minc, -minr)
+        matrix_to_fit = skimage.transform.SimilarityTransform(translation=translation)
+        matrix = matrix + matrix_to_fit
+        return matrix, output_shape_tuple
+class PiecewiseAffine(DualTransform):
+    """Apply affine transformations that differ between local neighbourhoods.
+    This augmentation places a regular grid of points on an image and randomly moves the neighbourhood of these point
+    around via affine transformations. This leads to local distortions.
+    This is mostly a wrapper around scikit-image's ``PiecewiseAffine``.
+    See also ``Affine`` for a similar technique.
+    Note:
+        This augmenter is very slow. Try to use ``ElasticTransformation`` instead, which is at least 10x faster.
+    Note:
+        For coordinate-based inputs (keypoints, bounding boxes, polygons, ...),
+        this augmenter still has to perform an image-based augmentation,
+        which will make it significantly slower and not fully correct for such inputs than other transforms.
+    Args:
+        scale (float, tuple of float): Each point on the regular grid is moved around via a normal distribution.
+            This scale factor is equivalent to the normal distribution's sigma.
+            Note that the jitter (how far each point is moved in which direction) is multiplied by the height/width of
+            the image if ``absolute_scale=False`` (default), so this scale can be the same for different sized images.
+            Recommended values are in the range ``0.01`` to ``0.05`` (weak to strong augmentations).
+                * If a single ``float``, then that value will always be used as the scale.
+                * If a tuple ``(a, b)`` of ``float`` s, then a random value will
+                  be uniformly sampled per image from the interval ``[a, b]``.
+        nb_rows (int, tuple of int): Number of rows of points that the regular grid should have.
+            Must be at least ``2``. For large images, you might want to pick a higher value than ``4``.
+            You might have to then adjust scale to lower values.
+                * If a single ``int``, then that value will always be used as the number of rows.
+                * If a tuple ``(a, b)``, then a value from the discrete interval
+                  ``[a..b]`` will be uniformly sampled per image.
+        nb_cols (int, tuple of int): Number of columns. Analogous to `nb_rows`.
+        interpolation (int): The order of interpolation. The order has to be in the range 0-5:
+             - 0: Nearest-neighbor
+             - 1: Bi-linear (default)
+             - 2: Bi-quadratic
+             - 3: Bi-cubic
+             - 4: Bi-quartic
+             - 5: Bi-quintic
+        mask_interpolation (int): same as interpolation but for mask.
+        cval (number): The constant value to use when filling in newly created pixels.
+        cval_mask (number): Same as cval but only for masks.
+        mode (str): {'constant', 'edge', 'symmetric', 'reflect', 'wrap'}, optional
+            Points outside the boundaries of the input are filled according
+            to the given mode.  Modes match the behaviour of `numpy.pad`.
+        absolute_scale (bool): Take `scale` as an absolute value rather than a relative value.
+        keypoints_threshold (float): Used as threshold in conversion from distance maps to keypoints.
+            The search for keypoints works by searching for the
+            argmin (non-inverted) or argmax (inverted) in each channel. This
+            parameters contains the maximum (non-inverted) or minimum (inverted) value to accept in order to view a hit
+            as a keypoint. Use ``None`` to use no min/max. Default: 0.01
+    Targets:
+        image, mask, keypoints, bboxes
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        scale: ScaleFloatType = (0.03, 0.05),
+        nb_rows: Union[int, Sequence[int]] = 4,
+        nb_cols: Union[int, Sequence[int]] = 4,
+        interpolation: int = 1,
+        mask_interpolation: int = 0,
+        cval: int = 0,
+        cval_mask: int = 0,
+        mode: str = "constant",
+        absolute_scale: bool = False,
+        always_apply: bool = False,
+        keypoints_threshold: float = 0.01,
+        p: float = 0.5,
+    ):
+        super(PiecewiseAffine, self).__init__(always_apply, p)
+        self.scale = to_tuple(scale, scale)
+        self.nb_rows = to_tuple(nb_rows, nb_rows)
+        self.nb_cols = to_tuple(nb_cols, nb_cols)
+        self.interpolation = interpolation
+        self.mask_interpolation = mask_interpolation
+        self.cval = cval
+        self.cval_mask = cval_mask
+        self.mode = mode
+        self.absolute_scale = absolute_scale
+        self.keypoints_threshold = keypoints_threshold
+    def get_transform_init_args_names(self):
+        return (
+            "scale",
+            "nb_rows",
+            "nb_cols",
+            "interpolation",
+            "mask_interpolation",
+            "cval",
+            "cval_mask",
+            "mode",
+            "absolute_scale",
+            "keypoints_threshold",
+        )
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_params_dependent_on_targets(self, params) -> dict:
+        h, w = params["image"].shape[:2]
+        nb_rows = np.clip(random.randint(*self.nb_rows), 2, None)
+        nb_cols = np.clip(random.randint(*self.nb_cols), 2, None)
+        nb_cells = nb_cols * nb_rows
+        scale = random.uniform(*self.scale)
+        jitter: np.ndarray = random_utils.normal(0, scale, (nb_cells, 2))
+        if not np.any(jitter > 0):
+            for i in range(10):  # See: https://github.com/albumentations-team/albumentations/issues/1442
+                jitter = random_utils.normal(0, scale, (nb_cells, 2))
+                if np.any(jitter > 0):
+                    break
+            if not np.any(jitter > 0):
+                return {"matrix": None}
+        y = np.linspace(0, h, nb_rows)
+        x = np.linspace(0, w, nb_cols)
+        # (H, W) and (H, W) for H=rows, W=cols
+        xx_src, yy_src = np.meshgrid(x, y)
+        # (1, HW, 2) => (HW, 2) for H=rows, W=cols
+        points_src = np.dstack([yy_src.flat, xx_src.flat])[0]
+        if self.absolute_scale:
+            jitter[:, 0] = jitter[:, 0] / h if h > 0 else 0.0
+            jitter[:, 1] = jitter[:, 1] / w if w > 0 else 0.0
+        jitter[:, 0] = jitter[:, 0] * h
+        jitter[:, 1] = jitter[:, 1] * w
+        points_dest = np.copy(points_src)
+        points_dest[:, 0] = points_dest[:, 0] + jitter[:, 0]
+        points_dest[:, 1] = points_dest[:, 1] + jitter[:, 1]
+        # Restrict all destination points to be inside the image plane.
+        # This is necessary, as otherwise keypoints could be augmented
+        # outside of the image plane and these would be replaced by
+        # (-1, -1), which would not conform with the behaviour of the other augmenters.
+        points_dest[:, 0] = np.clip(points_dest[:, 0], 0, h - 1)
+        points_dest[:, 1] = np.clip(points_dest[:, 1], 0, w - 1)
+        matrix = skimage.transform.PiecewiseAffineTransform()
+        matrix.estimate(points_src[:, ::-1], points_dest[:, ::-1])
+        return {
+            "matrix": matrix,
+        }
+    def apply(
+        self, img: np.ndarray, matrix: Optional[skimage.transform.PiecewiseAffineTransform] = None, **params
+    ) -> np.ndarray:
+        return F.piecewise_affine(img, matrix, self.interpolation, self.mode, self.cval)
+    def apply_to_mask(
+        self, img: np.ndarray, matrix: Optional[skimage.transform.PiecewiseAffineTransform] = None, **params
+    ) -> np.ndarray:
+        return F.piecewise_affine(img, matrix, self.mask_interpolation, self.mode, self.cval_mask)
+    def apply_to_bbox(
+        self,
+        bbox: BoxInternalType,
+        rows: int = 0,
+        cols: int = 0,
+        matrix: Optional[skimage.transform.PiecewiseAffineTransform] = None,
+        **params
+    ) -> BoxInternalType:
+        return F.bbox_piecewise_affine(bbox, matrix, rows, cols, self.keypoints_threshold)
+    def apply_to_keypoint(
+        self,
+        keypoint: KeypointInternalType,
+        rows: int = 0,
+        cols: int = 0,
+        matrix: Optional[skimage.transform.PiecewiseAffineTransform] = None,
+        **params
+    ):
+        return F.keypoint_piecewise_affine(keypoint, matrix, rows, cols, self.keypoints_threshold)
+class PadIfNeeded(DualTransform):
+    """Pad side of the image / max if side is less than desired number.
+    Args:
+        min_height (int): minimal result image height.
+        min_width (int): minimal result image width.
+        pad_height_divisor (int): if not None, ensures image height is dividable by value of this argument.
+        pad_width_divisor (int): if not None, ensures image width is dividable by value of this argument.
+        position (Union[str, PositionType]): Position of the image. should be PositionType.CENTER or
+            PositionType.TOP_LEFT or PositionType.TOP_RIGHT or PositionType.BOTTOM_LEFT or PositionType.BOTTOM_RIGHT.
+            or PositionType.RANDOM. Default: PositionType.CENTER.
+        border_mode (OpenCV flag): OpenCV border mode.
+        value (int, float, list of int, list of float): padding value if border_mode is cv2.BORDER_CONSTANT.
+        mask_value (int, float,
+                    list of int,
+                    list of float): padding value for mask if border_mode is cv2.BORDER_CONSTANT.
+        p (float): probability of applying the transform. Default: 1.0.
+    Targets:
+        image, mask, bbox, keypoints
+    Image types:
+        uint8, float32
+    """
+    class PositionType(Enum):
+        CENTER = "center"
+        TOP_LEFT = "top_left"
+        TOP_RIGHT = "top_right"
+        BOTTOM_LEFT = "bottom_left"
+        BOTTOM_RIGHT = "bottom_right"
+        RANDOM = "random"
+    def __init__(
+        self,
+        min_height: Optional[int] = 1024,
+        min_width: Optional[int] = 1024,
+        pad_height_divisor: Optional[int] = None,
+        pad_width_divisor: Optional[int] = None,
+        position: Union[PositionType, str] = PositionType.CENTER,
+        border_mode: int = cv2.BORDER_REFLECT_101,
+        value: Optional[ImageColorType] = None,
+        mask_value: Optional[ImageColorType] = None,
+        always_apply: bool = False,
+        p: float = 1.0,
+    ):
+        if (min_height is None) == (pad_height_divisor is None):
+            raise ValueError("Only one of 'min_height' and 'pad_height_divisor' parameters must be set")
+        if (min_width is None) == (pad_width_divisor is None):
+            raise ValueError("Only one of 'min_width' and 'pad_width_divisor' parameters must be set")
+        super(PadIfNeeded, self).__init__(always_apply, p)
+        self.min_height = min_height
+        self.min_width = min_width
+        self.pad_width_divisor = pad_width_divisor
+        self.pad_height_divisor = pad_height_divisor
+        self.position = PadIfNeeded.PositionType(position)
+        self.border_mode = border_mode
+        self.value = value
+        self.mask_value = mask_value
+    def update_params(self, params, **kwargs):
+        params = super(PadIfNeeded, self).update_params(params, **kwargs)
+        rows = params["rows"]
+        cols = params["cols"]
+        if self.min_height is not None:
+            if rows < self.min_height:
+                h_pad_top = int((self.min_height - rows) / 2.0)
+                h_pad_bottom = self.min_height - rows - h_pad_top
+            else:
+                h_pad_top = 0
+                h_pad_bottom = 0
+        else:
+            pad_remained = rows % self.pad_height_divisor
+            pad_rows = self.pad_height_divisor - pad_remained if pad_remained > 0 else 0
+            h_pad_top = pad_rows // 2
+            h_pad_bottom = pad_rows - h_pad_top
+        if self.min_width is not None:
+            if cols < self.min_width:
+                w_pad_left = int((self.min_width - cols) / 2.0)
+                w_pad_right = self.min_width - cols - w_pad_left
+            else:
+                w_pad_left = 0
+                w_pad_right = 0
+        else:
+            pad_remainder = cols % self.pad_width_divisor
+            pad_cols = self.pad_width_divisor - pad_remainder if pad_remainder > 0 else 0
+            w_pad_left = pad_cols // 2
+            w_pad_right = pad_cols - w_pad_left
+        h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = self.__update_position_params(
+            h_top=h_pad_top, h_bottom=h_pad_bottom, w_left=w_pad_left, w_right=w_pad_right
+        )
+        params.update(
+            {
+                "pad_top": h_pad_top,
+                "pad_bottom": h_pad_bottom,
+                "pad_left": w_pad_left,
+                "pad_right": w_pad_right,
+            }
+        )
+        return params
+    def apply(
+        self, img: np.ndarray, pad_top: int = 0, pad_bottom: int = 0, pad_left: int = 0, pad_right: int = 0, **params
+    ) -> np.ndarray:
+        return F.pad_with_params(
+            img,
+            pad_top,
+            pad_bottom,
+            pad_left,
+            pad_right,
+            border_mode=self.border_mode,
+            value=self.value,
+        )
+    def apply_to_mask(
+        self, img: np.ndarray, pad_top: int = 0, pad_bottom: int = 0, pad_left: int = 0, pad_right: int = 0, **params
+    ) -> np.ndarray:
+        return F.pad_with_params(
+            img,
+            pad_top,
+            pad_bottom,
+            pad_left,
+            pad_right,
+            border_mode=self.border_mode,
+            value=self.mask_value,
+        )
+    def apply_to_bbox(
+        self,
+        bbox: BoxInternalType,
+        pad_top: int = 0,
+        pad_bottom: int = 0,
+        pad_left: int = 0,
+        pad_right: int = 0,
+        rows: int = 0,
+        cols: int = 0,
+        **params
+    ) -> BoxInternalType:
+        x_min, y_min, x_max, y_max = denormalize_bbox(bbox, rows, cols)[:4]
+        bbox = x_min + pad_left, y_min + pad_top, x_max + pad_left, y_max + pad_top
+        return normalize_bbox(bbox, rows + pad_top + pad_bottom, cols + pad_left + pad_right)
+    def apply_to_keypoint(
+        self,
+        keypoint: KeypointInternalType,
+        pad_top: int = 0,
+        pad_bottom: int = 0,
+        pad_left: int = 0,
+        pad_right: int = 0,
+        **params
+    ) -> KeypointInternalType:
+        x, y, angle, scale = keypoint[:4]
+        return x + pad_left, y + pad_top, angle, scale
+    def get_transform_init_args_names(self):
+        return (
+            "min_height",
+            "min_width",
+            "pad_height_divisor",
+            "pad_width_divisor",
+            "border_mode",
+            "value",
+            "mask_value",
+        )
+    def __update_position_params(
+        self, h_top: int, h_bottom: int, w_left: int, w_right: int
+    ) -> Tuple[int, int, int, int]:
+        if self.position == PadIfNeeded.PositionType.TOP_LEFT:
+            h_bottom += h_top
+            w_right += w_left
+            h_top = 0
+            w_left = 0
+        elif self.position == PadIfNeeded.PositionType.TOP_RIGHT:
+            h_bottom += h_top
+            w_left += w_right
+            h_top = 0
+            w_right = 0
+        elif self.position == PadIfNeeded.PositionType.BOTTOM_LEFT:
+            h_top += h_bottom
+            w_right += w_left
+            h_bottom = 0
+            w_left = 0
+        elif self.position == PadIfNeeded.PositionType.BOTTOM_RIGHT:
+            h_top += h_bottom
+            w_left += w_right
+            h_bottom = 0
+            w_right = 0
+        elif self.position == PadIfNeeded.PositionType.RANDOM:
+            h_pad = h_top + h_bottom
+            w_pad = w_left + w_right
+            h_top = random.randint(0, h_pad)
+            h_bottom = h_pad - h_top
+            w_left = random.randint(0, w_pad)
+            w_right = w_pad - w_left
+        return h_top, h_bottom, w_left, w_right
+class VerticalFlip(DualTransform):
+    """Flip the input vertically around the x-axis.
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def apply(self, img: np.ndarray, **params) -> np.ndarray:
+        return F.vflip(img)
+    def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
+        return F.bbox_vflip(bbox, **params)
+    def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType:
+        return F.keypoint_vflip(keypoint, **params)
+    def get_transform_init_args_names(self):
+        return ()
+class HorizontalFlip(DualTransform):
+    """Flip the input horizontally around the y-axis.
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def apply(self, img: np.ndarray, **params) -> np.ndarray:
+        if img.ndim == 3 and img.shape[2] > 1 and img.dtype == np.uint8:
+            # Opencv is faster than numpy only in case of
+            # non-gray scale 8bits images
+            return F.hflip_cv2(img)
+        return F.hflip(img)
+    def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
+        return F.bbox_hflip(bbox, **params)
+    def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType:
+        return F.keypoint_hflip(keypoint, **params)
+    def get_transform_init_args_names(self):
+        return ()
+class Flip(DualTransform):
+    """Flip the input either horizontally, vertically or both horizontally and vertically.
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def apply(self, img: np.ndarray, d: int = 0, **params) -> np.ndarray:
+        """Args:
+        d (int): code that specifies how to flip the input. 0 for vertical flipping, 1 for horizontal flipping,
+                -1 for both vertical and horizontal flipping (which is also could be seen as rotating the input by
+                180 degrees).
+        """
+        return F.random_flip(img, d)
+    def get_params(self):
+        # Random int in the range [-1, 1]
+        return {"d": random.randint(-1, 1)}
+    def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
+        return F.bbox_flip(bbox, **params)
+    def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType:
+        return F.keypoint_flip(keypoint, **params)
+    def get_transform_init_args_names(self):
+        return ()
+class Transpose(DualTransform):
+    """Transpose the input by swapping rows and columns.
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        uint8, float32
+    """
+    def apply(self, img: np.ndarray, **params) -> np.ndarray:
+        return F.transpose(img)
+    def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
+        return F.bbox_transpose(bbox, 0, **params)
+    def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType:
+        return F.keypoint_transpose(keypoint)
+    def get_transform_init_args_names(self):
+        return ()
+class OpticalDistortion(DualTransform):
+    """
+    Args:
+        distort_limit (float, (float, float)): If distort_limit is a single float, the range
+            will be (-distort_limit, distort_limit). Default: (-0.05, 0.05).
+        shift_limit (float, (float, float))): If shift_limit is a single float, the range
+            will be (-shift_limit, shift_limit). Default: (-0.05, 0.05).
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of:
+            cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
+            Default: cv2.BORDER_REFLECT_101
+        value (int, float, list of ints, list of float): padding value if border_mode is cv2.BORDER_CONSTANT.
+        mask_value (int, float,
+                    list of ints,
+                    list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
+    Targets:
+        image, mask, bbox
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        distort_limit: ScaleFloatType = 0.05,
+        shift_limit: ScaleFloatType = 0.05,
+        interpolation: int = cv2.INTER_LINEAR,
+        border_mode: int = cv2.BORDER_REFLECT_101,
+        value: Optional[ImageColorType] = None,
+        mask_value: Optional[ImageColorType] = None,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(OpticalDistortion, self).__init__(always_apply, p)
+        self.shift_limit = to_tuple(shift_limit)
+        self.distort_limit = to_tuple(distort_limit)
+        self.interpolation = interpolation
+        self.border_mode = border_mode
+        self.value = value
+        self.mask_value = mask_value
+    def apply(
+        self, img: np.ndarray, k: int = 0, dx: int = 0, dy: int = 0, interpolation: int = cv2.INTER_LINEAR, **params
+    ) -> np.ndarray:
+        return F.optical_distortion(img, k, dx, dy, interpolation, self.border_mode, self.value)
+    def apply_to_mask(self, img: np.ndarray, k: int = 0, dx: int = 0, dy: int = 0, **params) -> np.ndarray:
+        return F.optical_distortion(img, k, dx, dy, cv2.INTER_NEAREST, self.border_mode, self.mask_value)
+    def apply_to_bbox(self, bbox: BoxInternalType, k: int = 0, dx: int = 0, dy: int = 0, **params) -> BoxInternalType:
+        rows, cols = params["rows"], params["cols"]
+        mask = np.zeros((rows, cols), dtype=np.uint8)
+        bbox_denorm = F.denormalize_bbox(bbox, rows, cols)
+        x_min, y_min, x_max, y_max = bbox_denorm[:4]
+        x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
+        mask[y_min:y_max, x_min:x_max] = 1
+        mask = F.optical_distortion(mask, k, dx, dy, cv2.INTER_NEAREST, self.border_mode, self.mask_value)
+        bbox_returned = bbox_from_mask(mask)
+        bbox_returned = F.normalize_bbox(bbox_returned, rows, cols)
+        return bbox_returned
+    def get_params(self):
+        return {
+            "k": random.uniform(self.distort_limit[0], self.distort_limit[1]),
+            "dx": round(random.uniform(self.shift_limit[0], self.shift_limit[1])),
+            "dy": round(random.uniform(self.shift_limit[0], self.shift_limit[1])),
+        }
+    def get_transform_init_args_names(self):
+        return (
+            "distort_limit",
+            "shift_limit",
+            "interpolation",
+            "border_mode",
+            "value",
+            "mask_value",
+        )
+class GridDistortion(DualTransform):
+    """
+    Args:
+        num_steps (int): count of grid cells on each side.
+        distort_limit (float, (float, float)): If distort_limit is a single float, the range
+            will be (-distort_limit, distort_limit). Default: (-0.03, 0.03).
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of:
+            cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
+            Default: cv2.BORDER_REFLECT_101
+        value (int, float, list of ints, list of float): padding value if border_mode is cv2.BORDER_CONSTANT.
+        mask_value (int, float,
+                    list of ints,
+                    list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
+        normalized (bool): if true, distortion will be normalized to do not go outside the image. Default: False
+            See for more information: https://github.com/albumentations-team/albumentations/pull/722
+    Targets:
+        image, mask
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        num_steps: int = 5,
+        distort_limit: ScaleFloatType = 0.3,
+        interpolation: int = cv2.INTER_LINEAR,
+        border_mode: int = cv2.BORDER_REFLECT_101,
+        value: Optional[ImageColorType] = None,
+        mask_value: Optional[ImageColorType] = None,
+        normalized: bool = False,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(GridDistortion, self).__init__(always_apply, p)
+        self.num_steps = num_steps
+        self.distort_limit = to_tuple(distort_limit)
+        self.interpolation = interpolation
+        self.border_mode = border_mode
+        self.value = value
+        self.mask_value = mask_value
+        self.normalized = normalized
+    def apply(
+        self, img: np.ndarray, stepsx: Tuple = (), stepsy: Tuple = (), interpolation: int = cv2.INTER_LINEAR, **params
+    ) -> np.ndarray:
+        return F.grid_distortion(img, self.num_steps, stepsx, stepsy, interpolation, self.border_mode, self.value)
+    def apply_to_mask(self, img: np.ndarray, stepsx: Tuple = (), stepsy: Tuple = (), **params) -> np.ndarray:
+        return F.grid_distortion(
+            img, self.num_steps, stepsx, stepsy, cv2.INTER_NEAREST, self.border_mode, self.mask_value
+        )
+    def apply_to_bbox(self, bbox: BoxInternalType, stepsx: Tuple = (), stepsy: Tuple = (), **params) -> BoxInternalType:
+        rows, cols = params["rows"], params["cols"]
+        mask = np.zeros((rows, cols), dtype=np.uint8)
+        bbox_denorm = F.denormalize_bbox(bbox, rows, cols)
+        x_min, y_min, x_max, y_max = bbox_denorm[:4]
+        x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
+        mask[y_min:y_max, x_min:x_max] = 1
+        mask = F.grid_distortion(
+            mask, self.num_steps, stepsx, stepsy, cv2.INTER_NEAREST, self.border_mode, self.mask_value
+        )
+        bbox_returned = bbox_from_mask(mask)
+        bbox_returned = F.normalize_bbox(bbox_returned, rows, cols)
+        return bbox_returned
+    def _normalize(self, h, w, xsteps, ysteps):
+        # compensate for smaller last steps in source image.
+        x_step = w // self.num_steps
+        last_x_step = min(w, ((self.num_steps + 1) * x_step)) - (self.num_steps * x_step)
+        xsteps[-1] *= last_x_step / x_step
+        y_step = h // self.num_steps
+        last_y_step = min(h, ((self.num_steps + 1) * y_step)) - (self.num_steps * y_step)
+        ysteps[-1] *= last_y_step / y_step
+        # now normalize such that distortion never leaves image bounds.
+        tx = w / math.floor(w / self.num_steps)
+        ty = h / math.floor(h / self.num_steps)
+        xsteps = np.array(xsteps) * (tx / np.sum(xsteps))
+        ysteps = np.array(ysteps) * (ty / np.sum(ysteps))
+        return {"stepsx": xsteps, "stepsy": ysteps}
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_params_dependent_on_targets(self, params):
+        h, w = params["image"].shape[:2]
+        stepsx = [1 + random.uniform(self.distort_limit[0], self.distort_limit[1]) for _ in range(self.num_steps + 1)]
+        stepsy = [1 + random.uniform(self.distort_limit[0], self.distort_limit[1]) for _ in range(self.num_steps + 1)]
+        if self.normalized:
+            return self._normalize(h, w, stepsx, stepsy)
+        return {"stepsx": stepsx, "stepsy": stepsy}
+    def get_transform_init_args_names(self):
+        return "num_steps", "distort_limit", "interpolation", "border_mode", "value", "mask_value", "normalized"

custom_albumentations/augmentations/transforms.py ADDED Viewed

	@@ -0,0 +1,2667 @@

+from __future__ import absolute_import, division
+import math
+import numbers
+import random
+import warnings
+from enum import IntEnum
+from types import LambdaType
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+import cv2
+import numpy as np
+from scipy import special
+from scipy.ndimage import gaussian_filter
+from custom_albumentations import random_utils
+from custom_albumentations.augmentations.blur.functional import blur
+from custom_albumentations.augmentations.utils import (
+    get_num_channels,
+    is_grayscale_image,
+    is_rgb_image,
+)
+from ..core.transforms_interface import (
+    DualTransform,
+    ImageOnlyTransform,
+    NoOp,
+    ScaleFloatType,
+    to_tuple,
+)
+from ..core.utils import format_args
+from . import functional as F
+__all__ = [
+    "Normalize",
+    "RandomGamma",
+    "RandomGridShuffle",
+    "HueSaturationValue",
+    "RGBShift",
+    "RandomBrightness",
+    "RandomContrast",
+    "GaussNoise",
+    "CLAHE",
+    "ChannelShuffle",
+    "InvertImg",
+    "ToGray",
+    "ToRGB",
+    "ToSepia",
+    "JpegCompression",
+    "ImageCompression",
+    "ToFloat",
+    "FromFloat",
+    "RandomBrightnessContrast",
+    "RandomSnow",
+    "RandomGravel",
+    "RandomRain",
+    "RandomFog",
+    "RandomSunFlare",
+    "RandomShadow",
+    "RandomToneCurve",
+    "Lambda",
+    "ISONoise",
+    "Solarize",
+    "Equalize",
+    "Posterize",
+    "Downscale",
+    "MultiplicativeNoise",
+    "FancyPCA",
+    "ColorJitter",
+    "Sharpen",
+    "Emboss",
+    "Superpixels",
+    "TemplateTransform",
+    "RingingOvershoot",
+    "UnsharpMask",
+    "PixelDropout",
+    "Spatter",
+]
+class RandomGridShuffle(DualTransform):
+    """
+    Random shuffle grid's cells on image.
+    Args:
+        grid ((int, int)): size of grid for splitting image.
+    Targets:
+        image, mask, keypoints
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, grid: Tuple[int, int] = (3, 3), always_apply: bool = False, p: float = 0.5):
+        super(RandomGridShuffle, self).__init__(always_apply, p)
+        self.grid = grid
+    def apply(self, img: np.ndarray, tiles: np.ndarray = np.array(None), **params):
+        return F.swap_tiles_on_image(img, tiles)
+    def apply_to_mask(self, img: np.ndarray, tiles: np.ndarray = np.array(None), **params):
+        return F.swap_tiles_on_image(img, tiles)
+    def apply_to_keypoint(
+        self, keypoint: Tuple[float, ...], tiles: np.ndarray = np.array(None), rows: int = 0, cols: int = 0, **params
+    ):
+        for (
+            current_left_up_corner_row,
+            current_left_up_corner_col,
+            old_left_up_corner_row,
+            old_left_up_corner_col,
+            height_tile,
+            width_tile,
+        ) in tiles:
+            x, y = keypoint[:2]
+            if (old_left_up_corner_row <= y < (old_left_up_corner_row + height_tile)) and (
+                old_left_up_corner_col <= x < (old_left_up_corner_col + width_tile)
+            ):
+                x = x - old_left_up_corner_col + current_left_up_corner_col
+                y = y - old_left_up_corner_row + current_left_up_corner_row
+                keypoint = (x, y) + tuple(keypoint[2:])
+                break
+        return keypoint
+    def get_params_dependent_on_targets(self, params):
+        height, width = params["image"].shape[:2]
+        n, m = self.grid
+        if n <= 0 or m <= 0:
+            raise ValueError("Grid's values must be positive. Current grid [%s, %s]" % (n, m))
+        if n > height // 2 or m > width // 2:
+            raise ValueError("Incorrect size cell of grid. Just shuffle pixels of image")
+        height_split = np.linspace(0, height, n + 1, dtype=np.int32)
+        width_split = np.linspace(0, width, m + 1, dtype=np.int32)
+        height_matrix, width_matrix = np.meshgrid(height_split, width_split, indexing="ij")
+        index_height_matrix = height_matrix[:-1, :-1]
+        index_width_matrix = width_matrix[:-1, :-1]
+        shifted_index_height_matrix = height_matrix[1:, 1:]
+        shifted_index_width_matrix = width_matrix[1:, 1:]
+        height_tile_sizes = shifted_index_height_matrix - index_height_matrix
+        width_tile_sizes = shifted_index_width_matrix - index_width_matrix
+        tiles_sizes = np.stack((height_tile_sizes, width_tile_sizes), axis=2)
+        index_matrix = np.indices((n, m))
+        new_index_matrix = np.stack(index_matrix, axis=2)
+        for bbox_size in np.unique(tiles_sizes.reshape(-1, 2), axis=0):
+            eq_mat = np.all(tiles_sizes == bbox_size, axis=2)
+            new_index_matrix[eq_mat] = random_utils.permutation(new_index_matrix[eq_mat])
+        new_index_matrix = np.split(new_index_matrix, 2, axis=2)
+        old_x = index_height_matrix[new_index_matrix[0], new_index_matrix[1]].reshape(-1)
+        old_y = index_width_matrix[new_index_matrix[0], new_index_matrix[1]].reshape(-1)
+        shift_x = height_tile_sizes.reshape(-1)
+        shift_y = width_tile_sizes.reshape(-1)
+        curr_x = index_height_matrix.reshape(-1)
+        curr_y = index_width_matrix.reshape(-1)
+        tiles = np.stack([curr_x, curr_y, old_x, old_y, shift_x, shift_y], axis=1)
+        return {"tiles": tiles}
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_transform_init_args_names(self):
+        return ("grid",)
+class Normalize(ImageOnlyTransform):
+    """Normalization is applied by the formula: `img = (img - mean * max_pixel_value) / (std * max_pixel_value)`
+    Args:
+        mean (float, list of float): mean values
+        std  (float, list of float): std values
+        max_pixel_value (float): maximum possible pixel value
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        mean=(0.485, 0.456, 0.406),
+        std=(0.229, 0.224, 0.225),
+        max_pixel_value=255.0,
+        always_apply=False,
+        p=1.0,
+    ):
+        super(Normalize, self).__init__(always_apply, p)
+        self.mean = mean
+        self.std = std
+        self.max_pixel_value = max_pixel_value
+    def apply(self, image, **params):
+        return F.normalize(image, self.mean, self.std, self.max_pixel_value)
+    def get_transform_init_args_names(self):
+        return ("mean", "std", "max_pixel_value")
+class ImageCompression(ImageOnlyTransform):
+    """Decreases image quality by Jpeg, WebP compression of an image.
+    Args:
+        quality_lower (float): lower bound on the image quality.
+                               Should be in [0, 100] range for jpeg and [1, 100] for webp.
+        quality_upper (float): upper bound on the image quality.
+                               Should be in [0, 100] range for jpeg and [1, 100] for webp.
+        compression_type (ImageCompressionType): should be ImageCompressionType.JPEG or ImageCompressionType.WEBP.
+            Default: ImageCompressionType.JPEG
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    class ImageCompressionType(IntEnum):
+        JPEG = 0
+        WEBP = 1
+    def __init__(
+        self,
+        quality_lower=99,
+        quality_upper=100,
+        compression_type=ImageCompressionType.JPEG,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(ImageCompression, self).__init__(always_apply, p)
+        self.compression_type = ImageCompression.ImageCompressionType(compression_type)
+        low_thresh_quality_assert = 0
+        if self.compression_type == ImageCompression.ImageCompressionType.WEBP:
+            low_thresh_quality_assert = 1
+        if not low_thresh_quality_assert <= quality_lower <= 100:
+            raise ValueError("Invalid quality_lower. Got: {}".format(quality_lower))
+        if not low_thresh_quality_assert <= quality_upper <= 100:
+            raise ValueError("Invalid quality_upper. Got: {}".format(quality_upper))
+        self.quality_lower = quality_lower
+        self.quality_upper = quality_upper
+    def apply(self, image, quality=100, image_type=".jpg", **params):
+        if not image.ndim == 2 and image.shape[-1] not in (1, 3, 4):
+            raise TypeError("ImageCompression transformation expects 1, 3 or 4 channel images.")
+        return F.image_compression(image, quality, image_type)
+    def get_params(self):
+        image_type = ".jpg"
+        if self.compression_type == ImageCompression.ImageCompressionType.WEBP:
+            image_type = ".webp"
+        return {
+            "quality": random.randint(self.quality_lower, self.quality_upper),
+            "image_type": image_type,
+        }
+    def get_transform_init_args(self):
+        return {
+            "quality_lower": self.quality_lower,
+            "quality_upper": self.quality_upper,
+            "compression_type": self.compression_type.value,
+        }
+class JpegCompression(ImageCompression):
+    """Decreases image quality by Jpeg compression of an image.
+    Args:
+        quality_lower (float): lower bound on the jpeg quality. Should be in [0, 100] range
+        quality_upper (float): upper bound on the jpeg quality. Should be in [0, 100] range
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, quality_lower=99, quality_upper=100, always_apply=False, p=0.5):
+        super(JpegCompression, self).__init__(
+            quality_lower=quality_lower,
+            quality_upper=quality_upper,
+            compression_type=ImageCompression.ImageCompressionType.JPEG,
+            always_apply=always_apply,
+            p=p,
+        )
+        warnings.warn(
+            f"{self.__class__.__name__} has been deprecated. Please use ImageCompression",
+            FutureWarning,
+        )
+    def get_transform_init_args(self):
+        return {
+            "quality_lower": self.quality_lower,
+            "quality_upper": self.quality_upper,
+        }
+class RandomSnow(ImageOnlyTransform):
+    """Bleach out some pixel values simulating snow.
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        snow_point_lower (float): lower_bond of the amount of snow. Should be in [0, 1] range
+        snow_point_upper (float): upper_bond of the amount of snow. Should be in [0, 1] range
+        brightness_coeff (float): larger number will lead to a more snow on the image. Should be >= 0
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        snow_point_lower=0.1,
+        snow_point_upper=0.3,
+        brightness_coeff=2.5,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(RandomSnow, self).__init__(always_apply, p)
+        if not 0 <= snow_point_lower <= snow_point_upper <= 1:
+            raise ValueError(
+                "Invalid combination of snow_point_lower and snow_point_upper. Got: {}".format(
+                    (snow_point_lower, snow_point_upper)
+                )
+            )
+        if brightness_coeff < 0:
+            raise ValueError("brightness_coeff must be greater than 0. Got: {}".format(brightness_coeff))
+        self.snow_point_lower = snow_point_lower
+        self.snow_point_upper = snow_point_upper
+        self.brightness_coeff = brightness_coeff
+    def apply(self, image, snow_point=0.1, **params):
+        return F.add_snow(image, snow_point, self.brightness_coeff)
+    def get_params(self):
+        return {"snow_point": random.uniform(self.snow_point_lower, self.snow_point_upper)}
+    def get_transform_init_args_names(self):
+        return ("snow_point_lower", "snow_point_upper", "brightness_coeff")
+class RandomGravel(ImageOnlyTransform):
+    """Add gravels.
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        gravel_roi (float, float, float, float): (top-left x, top-left y,
+            bottom-right x, bottom right y). Should be in [0, 1] range
+        number_of_patches (int): no. of gravel patches required
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        gravel_roi: tuple = (0.1, 0.4, 0.9, 0.9),
+        number_of_patches: int = 2,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(RandomGravel, self).__init__(always_apply, p)
+        (gravel_lower_x, gravel_lower_y, gravel_upper_x, gravel_upper_y) = gravel_roi
+        if not 0 <= gravel_lower_x < gravel_upper_x <= 1 or not 0 <= gravel_lower_y < gravel_upper_y <= 1:
+            raise ValueError("Invalid gravel_roi. Got: %s." % gravel_roi)
+        if number_of_patches < 1:
+            raise ValueError("Invalid gravel number_of_patches. Got: %s." % number_of_patches)
+        self.gravel_roi = gravel_roi
+        self.number_of_patches = number_of_patches
+    def generate_gravel_patch(self, rectangular_roi):
+        x1, y1, x2, y2 = rectangular_roi
+        gravels = []
+        area = abs((x2 - x1) * (y2 - y1))
+        count = area // 10
+        gravels = np.empty([count, 2], dtype=np.int64)
+        gravels[:, 0] = random_utils.randint(x1, x2, count)
+        gravels[:, 1] = random_utils.randint(y1, y2, count)
+        return gravels
+    def apply(self, image, gravels_infos=(), **params):
+        return F.add_gravel(image, gravels_infos)
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        height, width = img.shape[:2]
+        x_min, y_min, x_max, y_max = self.gravel_roi
+        x_min = int(x_min * width)
+        x_max = int(x_max * width)
+        y_min = int(y_min * height)
+        y_max = int(y_max * height)
+        max_height = 200
+        max_width = 30
+        rectangular_rois = np.zeros([self.number_of_patches, 4], dtype=np.int64)
+        xx1 = random_utils.randint(x_min + 1, x_max, self.number_of_patches)  # xmax
+        xx2 = random_utils.randint(x_min, xx1)  # xmin
+        yy1 = random_utils.randint(y_min + 1, y_max, self.number_of_patches)  # ymax
+        yy2 = random_utils.randint(y_min, yy1)  # ymin
+        rectangular_rois[:, 0] = xx2
+        rectangular_rois[:, 1] = yy2
+        rectangular_rois[:, 2] = [min(tup) for tup in zip(xx1, xx2 + max_height)]
+        rectangular_rois[:, 3] = [min(tup) for tup in zip(yy1, yy2 + max_width)]
+        minx = []
+        maxx = []
+        miny = []
+        maxy = []
+        val = []
+        for roi in rectangular_rois:
+            gravels = self.generate_gravel_patch(roi)
+            x = gravels[:, 0]
+            y = gravels[:, 1]
+            r = random_utils.randint(1, 4, len(gravels))
+            sat = random_utils.randint(0, 255, len(gravels))
+            miny.append(np.maximum(y - r, 0))
+            maxy.append(np.minimum(y + r, y))
+            minx.append(np.maximum(x - r, 0))
+            maxx.append(np.minimum(x + r, x))
+            val.append(sat)
+        return {
+            "gravels_infos": np.stack(
+                [
+                    np.concatenate(miny),
+                    np.concatenate(maxy),
+                    np.concatenate(minx),
+                    np.concatenate(maxx),
+                    np.concatenate(val),
+                ],
+                1,
+            )
+        }
+    def get_transform_init_args_names(self):
+        return {"gravel_roi": self.gravel_roi, "number_of_patches": self.number_of_patches}
+class RandomRain(ImageOnlyTransform):
+    """Adds rain effects.
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        slant_lower: should be in range [-20, 20].
+        slant_upper: should be in range [-20, 20].
+        drop_length: should be in range [0, 100].
+        drop_width: should be in range [1, 5].
+        drop_color (list of (r, g, b)): rain lines color.
+        blur_value (int): rainy view are blurry
+        brightness_coefficient (float): rainy days are usually shady. Should be in range [0, 1].
+        rain_type: One of [None, "drizzle", "heavy", "torrential"]
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        slant_lower=-10,
+        slant_upper=10,
+        drop_length=20,
+        drop_width=1,
+        drop_color=(200, 200, 200),
+        blur_value=7,
+        brightness_coefficient=0.7,
+        rain_type=None,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(RandomRain, self).__init__(always_apply, p)
+        if rain_type not in ["drizzle", "heavy", "torrential", None]:
+            raise ValueError(
+                "raint_type must be one of ({}). Got: {}".format(["drizzle", "heavy", "torrential", None], rain_type)
+            )
+        if not -20 <= slant_lower <= slant_upper <= 20:
+            raise ValueError(
+                "Invalid combination of slant_lower and slant_upper. Got: {}".format((slant_lower, slant_upper))
+            )
+        if not 1 <= drop_width <= 5:
+            raise ValueError("drop_width must be in range [1, 5]. Got: {}".format(drop_width))
+        if not 0 <= drop_length <= 100:
+            raise ValueError("drop_length must be in range [0, 100]. Got: {}".format(drop_length))
+        if not 0 <= brightness_coefficient <= 1:
+            raise ValueError("brightness_coefficient must be in range [0, 1]. Got: {}".format(brightness_coefficient))
+        self.slant_lower = slant_lower
+        self.slant_upper = slant_upper
+        self.drop_length = drop_length
+        self.drop_width = drop_width
+        self.drop_color = drop_color
+        self.blur_value = blur_value
+        self.brightness_coefficient = brightness_coefficient
+        self.rain_type = rain_type
+    def apply(self, image, slant=10, drop_length=20, rain_drops=(), **params):
+        return F.add_rain(
+            image,
+            slant,
+            drop_length,
+            self.drop_width,
+            self.drop_color,
+            self.blur_value,
+            self.brightness_coefficient,
+            rain_drops,
+        )
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        slant = int(random.uniform(self.slant_lower, self.slant_upper))
+        height, width = img.shape[:2]
+        area = height * width
+        if self.rain_type == "drizzle":
+            num_drops = area // 770
+            drop_length = 10
+        elif self.rain_type == "heavy":
+            num_drops = width * height // 600
+            drop_length = 30
+        elif self.rain_type == "torrential":
+            num_drops = area // 500
+            drop_length = 60
+        else:
+            drop_length = self.drop_length
+            num_drops = area // 600
+        rain_drops = []
+        for _i in range(num_drops):  # If You want heavy rain, try increasing this
+            if slant < 0:
+                x = random.randint(slant, width)
+            else:
+                x = random.randint(0, width - slant)
+            y = random.randint(0, height - drop_length)
+            rain_drops.append((x, y))
+        return {"drop_length": drop_length, "slant": slant, "rain_drops": rain_drops}
+    def get_transform_init_args_names(self):
+        return (
+            "slant_lower",
+            "slant_upper",
+            "drop_length",
+            "drop_width",
+            "drop_color",
+            "blur_value",
+            "brightness_coefficient",
+            "rain_type",
+        )
+class RandomFog(ImageOnlyTransform):
+    """Simulates fog for the image
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        fog_coef_lower (float): lower limit for fog intensity coefficient. Should be in [0, 1] range.
+        fog_coef_upper (float): upper limit for fog intensity coefficient. Should be in [0, 1] range.
+        alpha_coef (float): transparency of the fog circles. Should be in [0, 1] range.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        fog_coef_lower=0.3,
+        fog_coef_upper=1,
+        alpha_coef=0.08,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(RandomFog, self).__init__(always_apply, p)
+        if not 0 <= fog_coef_lower <= fog_coef_upper <= 1:
+            raise ValueError(
+                "Invalid combination if fog_coef_lower and fog_coef_upper. Got: {}".format(
+                    (fog_coef_lower, fog_coef_upper)
+                )
+            )
+        if not 0 <= alpha_coef <= 1:
+            raise ValueError("alpha_coef must be in range [0, 1]. Got: {}".format(alpha_coef))
+        self.fog_coef_lower = fog_coef_lower
+        self.fog_coef_upper = fog_coef_upper
+        self.alpha_coef = alpha_coef
+    def apply(self, image, fog_coef=0.1, haze_list=(), **params):
+        return F.add_fog(image, fog_coef, self.alpha_coef, haze_list)
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        fog_coef = random.uniform(self.fog_coef_lower, self.fog_coef_upper)
+        height, width = imshape = img.shape[:2]
+        hw = max(1, int(width // 3 * fog_coef))
+        haze_list = []
+        midx = width // 2 - 2 * hw
+        midy = height // 2 - hw
+        index = 1
+        while midx > -hw or midy > -hw:
+            for _i in range(hw // 10 * index):
+                x = random.randint(midx, width - midx - hw)
+                y = random.randint(midy, height - midy - hw)
+                haze_list.append((x, y))
+            midx -= 3 * hw * width // sum(imshape)
+            midy -= 3 * hw * height // sum(imshape)
+            index += 1
+        return {"haze_list": haze_list, "fog_coef": fog_coef}
+    def get_transform_init_args_names(self):
+        return ("fog_coef_lower", "fog_coef_upper", "alpha_coef")
+class RandomSunFlare(ImageOnlyTransform):
+    """Simulates Sun Flare for the image
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        flare_roi (float, float, float, float): region of the image where flare will
+            appear (x_min, y_min, x_max, y_max). All values should be in range [0, 1].
+        angle_lower (float): should be in range [0, `angle_upper`].
+        angle_upper (float): should be in range [`angle_lower`, 1].
+        num_flare_circles_lower (int): lower limit for the number of flare circles.
+            Should be in range [0, `num_flare_circles_upper`].
+        num_flare_circles_upper (int): upper limit for the number of flare circles.
+            Should be in range [`num_flare_circles_lower`, inf].
+        src_radius (int):
+        src_color ((int, int, int)): color of the flare
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        flare_roi=(0, 0, 1, 0.5),
+        angle_lower=0,
+        angle_upper=1,
+        num_flare_circles_lower=6,
+        num_flare_circles_upper=10,
+        src_radius=400,
+        src_color=(255, 255, 255),
+        always_apply=False,
+        p=0.5,
+    ):
+        super(RandomSunFlare, self).__init__(always_apply, p)
+        (
+            flare_center_lower_x,
+            flare_center_lower_y,
+            flare_center_upper_x,
+            flare_center_upper_y,
+        ) = flare_roi
+        if (
+            not 0 <= flare_center_lower_x < flare_center_upper_x <= 1
+            or not 0 <= flare_center_lower_y < flare_center_upper_y <= 1
+        ):
+            raise ValueError("Invalid flare_roi. Got: {}".format(flare_roi))
+        if not 0 <= angle_lower < angle_upper <= 1:
+            raise ValueError(
+                "Invalid combination of angle_lower nad angle_upper. Got: {}".format((angle_lower, angle_upper))
+            )
+        if not 0 <= num_flare_circles_lower < num_flare_circles_upper:
+            raise ValueError(
+                "Invalid combination of num_flare_circles_lower nad num_flare_circles_upper. Got: {}".format(
+                    (num_flare_circles_lower, num_flare_circles_upper)
+                )
+            )
+        self.flare_center_lower_x = flare_center_lower_x
+        self.flare_center_upper_x = flare_center_upper_x
+        self.flare_center_lower_y = flare_center_lower_y
+        self.flare_center_upper_y = flare_center_upper_y
+        self.angle_lower = angle_lower
+        self.angle_upper = angle_upper
+        self.num_flare_circles_lower = num_flare_circles_lower
+        self.num_flare_circles_upper = num_flare_circles_upper
+        self.src_radius = src_radius
+        self.src_color = src_color
+    def apply(self, image, flare_center_x=0.5, flare_center_y=0.5, circles=(), **params):
+        return F.add_sun_flare(
+            image,
+            flare_center_x,
+            flare_center_y,
+            self.src_radius,
+            self.src_color,
+            circles,
+        )
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        height, width = img.shape[:2]
+        angle = 2 * math.pi * random.uniform(self.angle_lower, self.angle_upper)
+        flare_center_x = random.uniform(self.flare_center_lower_x, self.flare_center_upper_x)
+        flare_center_y = random.uniform(self.flare_center_lower_y, self.flare_center_upper_y)
+        flare_center_x = int(width * flare_center_x)
+        flare_center_y = int(height * flare_center_y)
+        num_circles = random.randint(self.num_flare_circles_lower, self.num_flare_circles_upper)
+        circles = []
+        x = []
+        y = []
+        def line(t):
+            return (flare_center_x + t * math.cos(angle), flare_center_y + t * math.sin(angle))
+        for t_val in range(-flare_center_x, width - flare_center_x, 10):
+            rand_x, rand_y = line(t_val)
+            x.append(rand_x)
+            y.append(rand_y)
+        for _i in range(num_circles):
+            alpha = random.uniform(0.05, 0.2)
+            r = random.randint(0, len(x) - 1)
+            rad = random.randint(1, max(height // 100 - 2, 2))
+            r_color = random.randint(max(self.src_color[0] - 50, 0), self.src_color[0])
+            g_color = random.randint(max(self.src_color[1] - 50, 0), self.src_color[1])
+            b_color = random.randint(max(self.src_color[2] - 50, 0), self.src_color[2])
+            circles += [
+                (
+                    alpha,
+                    (int(x[r]), int(y[r])),
+                    pow(rad, 3),
+                    (r_color, g_color, b_color),
+                )
+            ]
+        return {
+            "circles": circles,
+            "flare_center_x": flare_center_x,
+            "flare_center_y": flare_center_y,
+        }
+    def get_transform_init_args(self):
+        return {
+            "flare_roi": (
+                self.flare_center_lower_x,
+                self.flare_center_lower_y,
+                self.flare_center_upper_x,
+                self.flare_center_upper_y,
+            ),
+            "angle_lower": self.angle_lower,
+            "angle_upper": self.angle_upper,
+            "num_flare_circles_lower": self.num_flare_circles_lower,
+            "num_flare_circles_upper": self.num_flare_circles_upper,
+            "src_radius": self.src_radius,
+            "src_color": self.src_color,
+        }
+class RandomShadow(ImageOnlyTransform):
+    """Simulates shadows for the image
+    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
+    Args:
+        shadow_roi (float, float, float, float): region of the image where shadows
+            will appear (x_min, y_min, x_max, y_max). All values should be in range [0, 1].
+        num_shadows_lower (int): Lower limit for the possible number of shadows.
+            Should be in range [0, `num_shadows_upper`].
+        num_shadows_upper (int): Lower limit for the possible number of shadows.
+            Should be in range [`num_shadows_lower`, inf].
+        shadow_dimension (int): number of edges in the shadow polygons
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        shadow_roi=(0, 0.5, 1, 1),
+        num_shadows_lower=1,
+        num_shadows_upper=2,
+        shadow_dimension=5,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(RandomShadow, self).__init__(always_apply, p)
+        (shadow_lower_x, shadow_lower_y, shadow_upper_x, shadow_upper_y) = shadow_roi
+        if not 0 <= shadow_lower_x <= shadow_upper_x <= 1 or not 0 <= shadow_lower_y <= shadow_upper_y <= 1:
+            raise ValueError("Invalid shadow_roi. Got: {}".format(shadow_roi))
+        if not 0 <= num_shadows_lower <= num_shadows_upper:
+            raise ValueError(
+                "Invalid combination of num_shadows_lower nad num_shadows_upper. Got: {}".format(
+                    (num_shadows_lower, num_shadows_upper)
+                )
+            )
+        self.shadow_roi = shadow_roi
+        self.num_shadows_lower = num_shadows_lower
+        self.num_shadows_upper = num_shadows_upper
+        self.shadow_dimension = shadow_dimension
+    def apply(self, image, vertices_list=(), **params):
+        return F.add_shadow(image, vertices_list)
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        height, width = img.shape[:2]
+        num_shadows = random.randint(self.num_shadows_lower, self.num_shadows_upper)
+        x_min, y_min, x_max, y_max = self.shadow_roi
+        x_min = int(x_min * width)
+        x_max = int(x_max * width)
+        y_min = int(y_min * height)
+        y_max = int(y_max * height)
+        vertices_list = []
+        for _index in range(num_shadows):
+            vertex = []
+            for _dimension in range(self.shadow_dimension):
+                vertex.append((random.randint(x_min, x_max), random.randint(y_min, y_max)))
+            vertices = np.array([vertex], dtype=np.int32)
+            vertices_list.append(vertices)
+        return {"vertices_list": vertices_list}
+    def get_transform_init_args_names(self):
+        return (
+            "shadow_roi",
+            "num_shadows_lower",
+            "num_shadows_upper",
+            "shadow_dimension",
+        )
+class RandomToneCurve(ImageOnlyTransform):
+    """Randomly change the relationship between bright and dark areas of the image by manipulating its tone curve.
+    Args:
+        scale (float): standard deviation of the normal distribution.
+            Used to sample random distances to move two control points that modify the image's curve.
+            Values should be in range [0, 1]. Default: 0.1
+    Targets:
+        image
+    Image types:
+        uint8
+    """
+    def __init__(
+        self,
+        scale=0.1,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(RandomToneCurve, self).__init__(always_apply, p)
+        self.scale = scale
+    def apply(self, image, low_y, high_y, **params):
+        return F.move_tone_curve(image, low_y, high_y)
+    def get_params(self):
+        return {
+            "low_y": np.clip(random_utils.normal(loc=0.25, scale=self.scale), 0, 1),
+            "high_y": np.clip(random_utils.normal(loc=0.75, scale=self.scale), 0, 1),
+        }
+    def get_transform_init_args_names(self):
+        return ("scale",)
+class HueSaturationValue(ImageOnlyTransform):
+    """Randomly change hue, saturation and value of the input image.
+    Args:
+        hue_shift_limit ((int, int) or int): range for changing hue. If hue_shift_limit is a single int, the range
+            will be (-hue_shift_limit, hue_shift_limit). Default: (-20, 20).
+        sat_shift_limit ((int, int) or int): range for changing saturation. If sat_shift_limit is a single int,
+            the range will be (-sat_shift_limit, sat_shift_limit). Default: (-30, 30).
+        val_shift_limit ((int, int) or int): range for changing value. If val_shift_limit is a single int, the range
+            will be (-val_shift_limit, val_shift_limit). Default: (-20, 20).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        hue_shift_limit=20,
+        sat_shift_limit=30,
+        val_shift_limit=20,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(HueSaturationValue, self).__init__(always_apply, p)
+        self.hue_shift_limit = to_tuple(hue_shift_limit)
+        self.sat_shift_limit = to_tuple(sat_shift_limit)
+        self.val_shift_limit = to_tuple(val_shift_limit)
+    def apply(self, image, hue_shift=0, sat_shift=0, val_shift=0, **params):
+        if not is_rgb_image(image) and not is_grayscale_image(image):
+            raise TypeError("HueSaturationValue transformation expects 1-channel or 3-channel images.")
+        return F.shift_hsv(image, hue_shift, sat_shift, val_shift)
+    def get_params(self):
+        return {
+            "hue_shift": random.uniform(self.hue_shift_limit[0], self.hue_shift_limit[1]),
+            "sat_shift": random.uniform(self.sat_shift_limit[0], self.sat_shift_limit[1]),
+            "val_shift": random.uniform(self.val_shift_limit[0], self.val_shift_limit[1]),
+        }
+    def get_transform_init_args_names(self):
+        return ("hue_shift_limit", "sat_shift_limit", "val_shift_limit")
+class Solarize(ImageOnlyTransform):
+    """Invert all pixel values above a threshold.
+    Args:
+        threshold ((int, int) or int, or (float, float) or float): range for solarizing threshold.
+            If threshold is a single value, the range will be [threshold, threshold]. Default: 128.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        any
+    """
+    def __init__(self, threshold=128, always_apply=False, p=0.5):
+        super(Solarize, self).__init__(always_apply, p)
+        if isinstance(threshold, (int, float)):
+            self.threshold = to_tuple(threshold, low=threshold)
+        else:
+            self.threshold = to_tuple(threshold, low=0)
+    def apply(self, image, threshold=0, **params):
+        return F.solarize(image, threshold)
+    def get_params(self):
+        return {"threshold": random.uniform(self.threshold[0], self.threshold[1])}
+    def get_transform_init_args_names(self):
+        return ("threshold",)
+class Posterize(ImageOnlyTransform):
+    """Reduce the number of bits for each color channel.
+    Args:
+        num_bits ((int, int) or int,
+                  or list of ints [r, g, b],
+                  or list of ints [[r1, r1], [g1, g2], [b1, b2]]): number of high bits.
+            If num_bits is a single value, the range will be [num_bits, num_bits].
+            Must be in range [0, 8]. Default: 4.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+    image
+    Image types:
+        uint8
+    """
+    def __init__(self, num_bits=4, always_apply=False, p=0.5):
+        super(Posterize, self).__init__(always_apply, p)
+        if isinstance(num_bits, (list, tuple)):
+            if len(num_bits) == 3:
+                self.num_bits = [to_tuple(i, 0) for i in num_bits]
+            else:
+                self.num_bits = to_tuple(num_bits, 0)
+        else:
+            self.num_bits = to_tuple(num_bits, num_bits)
+    def apply(self, image, num_bits=1, **params):
+        return F.posterize(image, num_bits)
+    def get_params(self):
+        if len(self.num_bits) == 3:
+            return {"num_bits": [random.randint(i[0], i[1]) for i in self.num_bits]}
+        return {"num_bits": random.randint(self.num_bits[0], self.num_bits[1])}
+    def get_transform_init_args_names(self):
+        return ("num_bits",)
+class Equalize(ImageOnlyTransform):
+    """Equalize the image histogram.
+    Args:
+        mode (str): {'cv', 'pil'}. Use OpenCV or Pillow equalization method.
+        by_channels (bool): If True, use equalization by channels separately,
+            else convert image to YCbCr representation and use equalization by `Y` channel.
+        mask (np.ndarray, callable): If given, only the pixels selected by
+            the mask are included in the analysis. Maybe 1 channel or 3 channel array or callable.
+            Function signature must include `image` argument.
+        mask_params (list of str): Params for mask function.
+    Targets:
+        image
+    Image types:
+        uint8
+    """
+    def __init__(
+        self,
+        mode="cv",
+        by_channels=True,
+        mask=None,
+        mask_params=(),
+        always_apply=False,
+        p=0.5,
+    ):
+        modes = ["cv", "pil"]
+        if mode not in modes:
+            raise ValueError("Unsupported equalization mode. Supports: {}. " "Got: {}".format(modes, mode))
+        super(Equalize, self).__init__(always_apply, p)
+        self.mode = mode
+        self.by_channels = by_channels
+        self.mask = mask
+        self.mask_params = mask_params
+    def apply(self, image, mask=None, **params):
+        return F.equalize(image, mode=self.mode, by_channels=self.by_channels, mask=mask)
+    def get_params_dependent_on_targets(self, params):
+        if not callable(self.mask):
+            return {"mask": self.mask}
+        return {"mask": self.mask(**params)}
+    @property
+    def targets_as_params(self):
+        return ["image"] + list(self.mask_params)
+    def get_transform_init_args_names(self):
+        return ("mode", "by_channels")
+class RGBShift(ImageOnlyTransform):
+    """Randomly shift values for each channel of the input RGB image.
+    Args:
+        r_shift_limit ((int, int) or int): range for changing values for the red channel. If r_shift_limit is a single
+            int, the range will be (-r_shift_limit, r_shift_limit). Default: (-20, 20).
+        g_shift_limit ((int, int) or int): range for changing values for the green channel. If g_shift_limit is a
+            single int, the range  will be (-g_shift_limit, g_shift_limit). Default: (-20, 20).
+        b_shift_limit ((int, int) or int): range for changing values for the blue channel. If b_shift_limit is a single
+            int, the range will be (-b_shift_limit, b_shift_limit). Default: (-20, 20).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        r_shift_limit=20,
+        g_shift_limit=20,
+        b_shift_limit=20,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(RGBShift, self).__init__(always_apply, p)
+        self.r_shift_limit = to_tuple(r_shift_limit)
+        self.g_shift_limit = to_tuple(g_shift_limit)
+        self.b_shift_limit = to_tuple(b_shift_limit)
+    def apply(self, image, r_shift=0, g_shift=0, b_shift=0, **params):
+        if not is_rgb_image(image):
+            raise TypeError("RGBShift transformation expects 3-channel images.")
+        return F.shift_rgb(image, r_shift, g_shift, b_shift)
+    def get_params(self):
+        return {
+            "r_shift": random.uniform(self.r_shift_limit[0], self.r_shift_limit[1]),
+            "g_shift": random.uniform(self.g_shift_limit[0], self.g_shift_limit[1]),
+            "b_shift": random.uniform(self.b_shift_limit[0], self.b_shift_limit[1]),
+        }
+    def get_transform_init_args_names(self):
+        return ("r_shift_limit", "g_shift_limit", "b_shift_limit")
+class RandomBrightnessContrast(ImageOnlyTransform):
+    """Randomly change brightness and contrast of the input image.
+    Args:
+        brightness_limit ((float, float) or float): factor range for changing brightness.
+            If limit is a single float, the range will be (-limit, limit). Default: (-0.2, 0.2).
+        contrast_limit ((float, float) or float): factor range for changing contrast.
+            If limit is a single float, the range will be (-limit, limit). Default: (-0.2, 0.2).
+        brightness_by_max (Boolean): If True adjust contrast by image dtype maximum,
+            else adjust contrast by image mean.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        brightness_limit=0.2,
+        contrast_limit=0.2,
+        brightness_by_max=True,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(RandomBrightnessContrast, self).__init__(always_apply, p)
+        self.brightness_limit = to_tuple(brightness_limit)
+        self.contrast_limit = to_tuple(contrast_limit)
+        self.brightness_by_max = brightness_by_max
+    def apply(self, img, alpha=1.0, beta=0.0, **params):
+        return F.brightness_contrast_adjust(img, alpha, beta, self.brightness_by_max)
+    def get_params(self):
+        return {
+            "alpha": 1.0 + random.uniform(self.contrast_limit[0], self.contrast_limit[1]),
+            "beta": 0.0 + random.uniform(self.brightness_limit[0], self.brightness_limit[1]),
+        }
+    def get_transform_init_args_names(self):
+        return ("brightness_limit", "contrast_limit", "brightness_by_max")
+class RandomBrightness(RandomBrightnessContrast):
+    """Randomly change brightness of the input image.
+    Args:
+        limit ((float, float) or float): factor range for changing brightness.
+            If limit is a single float, the range will be (-limit, limit). Default: (-0.2, 0.2).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, limit=0.2, always_apply=False, p=0.5):
+        super(RandomBrightness, self).__init__(brightness_limit=limit, contrast_limit=0, always_apply=always_apply, p=p)
+        warnings.warn(
+            "This class has been deprecated. Please use RandomBrightnessContrast",
+            FutureWarning,
+        )
+    def get_transform_init_args(self):
+        return {"limit": self.brightness_limit}
+class RandomContrast(RandomBrightnessContrast):
+    """Randomly change contrast of the input image.
+    Args:
+        limit ((float, float) or float): factor range for changing contrast.
+            If limit is a single float, the range will be (-limit, limit). Default: (-0.2, 0.2).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, limit=0.2, always_apply=False, p=0.5):
+        super(RandomContrast, self).__init__(brightness_limit=0, contrast_limit=limit, always_apply=always_apply, p=p)
+        warnings.warn(
+            f"{self.__class__.__name__} has been deprecated. Please use RandomBrightnessContrast",
+            FutureWarning,
+        )
+    def get_transform_init_args(self):
+        return {"limit": self.contrast_limit}
+class GaussNoise(ImageOnlyTransform):
+    """Apply gaussian noise to the input image.
+    Args:
+        var_limit ((float, float) or float): variance range for noise. If var_limit is a single float, the range
+            will be (0, var_limit). Default: (10.0, 50.0).
+        mean (float): mean of the noise. Default: 0
+        per_channel (bool): if set to True, noise will be sampled for each channel independently.
+            Otherwise, the noise will be sampled once for all channels. Default: True
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, var_limit=(10.0, 50.0), mean=0, per_channel=True, always_apply=False, p=0.5):
+        super(GaussNoise, self).__init__(always_apply, p)
+        if isinstance(var_limit, (tuple, list)):
+            if var_limit[0] < 0:
+                raise ValueError("Lower var_limit should be non negative.")
+            if var_limit[1] < 0:
+                raise ValueError("Upper var_limit should be non negative.")
+            self.var_limit = var_limit
+        elif isinstance(var_limit, (int, float)):
+            if var_limit < 0:
+                raise ValueError("var_limit should be non negative.")
+            self.var_limit = (0, var_limit)
+        else:
+            raise TypeError(
+                "Expected var_limit type to be one of (int, float, tuple, list), got {}".format(type(var_limit))
+            )
+        self.mean = mean
+        self.per_channel = per_channel
+    def apply(self, img, gauss=None, **params):
+        return F.gauss_noise(img, gauss=gauss)
+    def get_params_dependent_on_targets(self, params):
+        image = params["image"]
+        var = random.uniform(self.var_limit[0], self.var_limit[1])
+        sigma = var**0.5
+        if self.per_channel:
+            gauss = random_utils.normal(self.mean, sigma, image.shape)
+        else:
+            gauss = random_utils.normal(self.mean, sigma, image.shape[:2])
+            if len(image.shape) == 3:
+                gauss = np.expand_dims(gauss, -1)
+        return {"gauss": gauss}
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_transform_init_args_names(self):
+        return ("var_limit", "per_channel", "mean")
+class ISONoise(ImageOnlyTransform):
+    """
+    Apply camera sensor noise.
+    Args:
+        color_shift (float, float): variance range for color hue change.
+            Measured as a fraction of 360 degree Hue angle in HLS colorspace.
+        intensity ((float, float): Multiplicative factor that control strength
+            of color and luminace noise.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8
+    """
+    def __init__(self, color_shift=(0.01, 0.05), intensity=(0.1, 0.5), always_apply=False, p=0.5):
+        super(ISONoise, self).__init__(always_apply, p)
+        self.intensity = intensity
+        self.color_shift = color_shift
+    def apply(self, img, color_shift=0.05, intensity=1.0, random_state=None, **params):
+        return F.iso_noise(img, color_shift, intensity, np.random.RandomState(random_state))
+    def get_params(self):
+        return {
+            "color_shift": random.uniform(self.color_shift[0], self.color_shift[1]),
+            "intensity": random.uniform(self.intensity[0], self.intensity[1]),
+            "random_state": random.randint(0, 65536),
+        }
+    def get_transform_init_args_names(self):
+        return ("intensity", "color_shift")
+class CLAHE(ImageOnlyTransform):
+    """Apply Contrast Limited Adaptive Histogram Equalization to the input image.
+    Args:
+        clip_limit (float or (float, float)): upper threshold value for contrast limiting.
+            If clip_limit is a single float value, the range will be (1, clip_limit). Default: (1, 4).
+        tile_grid_size ((int, int)): size of grid for histogram equalization. Default: (8, 8).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8
+    """
+    def __init__(self, clip_limit=4.0, tile_grid_size=(8, 8), always_apply=False, p=0.5):
+        super(CLAHE, self).__init__(always_apply, p)
+        self.clip_limit = to_tuple(clip_limit, 1)
+        self.tile_grid_size = tuple(tile_grid_size)
+    def apply(self, img, clip_limit=2, **params):
+        if not is_rgb_image(img) and not is_grayscale_image(img):
+            raise TypeError("CLAHE transformation expects 1-channel or 3-channel images.")
+        return F.clahe(img, clip_limit, self.tile_grid_size)
+    def get_params(self):
+        return {"clip_limit": random.uniform(self.clip_limit[0], self.clip_limit[1])}
+    def get_transform_init_args_names(self):
+        return ("clip_limit", "tile_grid_size")
+class ChannelShuffle(ImageOnlyTransform):
+    """Randomly rearrange channels of the input RGB image.
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def apply(self, img, channels_shuffled=(0, 1, 2), **params):
+        return F.channel_shuffle(img, channels_shuffled)
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        ch_arr = list(range(img.shape[2]))
+        random.shuffle(ch_arr)
+        return {"channels_shuffled": ch_arr}
+    def get_transform_init_args_names(self):
+        return ()
+class InvertImg(ImageOnlyTransform):
+    """Invert the input image by subtracting pixel values from max values of the image types,
+    i.e., 255 for uint8 and 1.0 for float32.
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def apply(self, img, **params):
+        return F.invert(img)
+    def get_transform_init_args_names(self):
+        return ()
+class RandomGamma(ImageOnlyTransform):
+    """
+    Args:
+        gamma_limit (float or (float, float)): If gamma_limit is a single float value,
+            the range will be (-gamma_limit, gamma_limit). Default: (80, 120).
+        eps: Deprecated.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, gamma_limit=(80, 120), eps=None, always_apply=False, p=0.5):
+        super(RandomGamma, self).__init__(always_apply, p)
+        self.gamma_limit = to_tuple(gamma_limit)
+        self.eps = eps
+    def apply(self, img, gamma=1, **params):
+        return F.gamma_transform(img, gamma=gamma)
+    def get_params(self):
+        return {"gamma": random.uniform(self.gamma_limit[0], self.gamma_limit[1]) / 100.0}
+    def get_transform_init_args_names(self):
+        return ("gamma_limit", "eps")
+class ToGray(ImageOnlyTransform):
+    """Convert the input RGB image to grayscale. If the mean pixel value for the resulting image is greater
+    than 127, invert the resulting grayscale image.
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def apply(self, img, **params):
+        if is_grayscale_image(img):
+            warnings.warn("The image is already gray.")
+            return img
+        if not is_rgb_image(img):
+            raise TypeError("ToGray transformation expects 3-channel images.")
+        return F.to_gray(img)
+    def get_transform_init_args_names(self):
+        return ()
+class ToRGB(ImageOnlyTransform):
+    """Convert the input grayscale image to RGB.
+    Args:
+        p (float): probability of applying the transform. Default: 1.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, always_apply=True, p=1.0):
+        super(ToRGB, self).__init__(always_apply=always_apply, p=p)
+    def apply(self, img, **params):
+        if is_rgb_image(img):
+            warnings.warn("The image is already an RGB.")
+            return img
+        if not is_grayscale_image(img):
+            raise TypeError("ToRGB transformation expects 2-dim images or 3-dim with the last dimension equal to 1.")
+        return F.gray_to_rgb(img)
+    def get_transform_init_args_names(self):
+        return ()
+class ToSepia(ImageOnlyTransform):
+    """Applies sepia filter to the input RGB image
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(self, always_apply=False, p=0.5):
+        super(ToSepia, self).__init__(always_apply, p)
+        self.sepia_transformation_matrix = np.array(
+            [[0.393, 0.769, 0.189], [0.349, 0.686, 0.168], [0.272, 0.534, 0.131]]
+        )
+    def apply(self, image, **params):
+        if not is_rgb_image(image):
+            raise TypeError("ToSepia transformation expects 3-channel images.")
+        return F.linear_transformation_rgb(image, self.sepia_transformation_matrix)
+    def get_transform_init_args_names(self):
+        return ()
+class ToFloat(ImageOnlyTransform):
+    """Divide pixel values by `max_value` to get a float32 output array where all values lie in the range [0, 1.0].
+    If `max_value` is None the transform will try to infer the maximum value by inspecting the data type of the input
+    image.
+    See Also:
+        :class:`~albumentations.augmentations.transforms.FromFloat`
+    Args:
+        max_value (float): maximum possible input value. Default: None.
+        p (float): probability of applying the transform. Default: 1.0.
+    Targets:
+        image
+    Image types:
+        any type
+    """
+    def __init__(self, max_value=None, always_apply=False, p=1.0):
+        super(ToFloat, self).__init__(always_apply, p)
+        self.max_value = max_value
+    def apply(self, img, **params):
+        return F.to_float(img, self.max_value)
+    def get_transform_init_args_names(self):
+        return ("max_value",)
+class FromFloat(ImageOnlyTransform):
+    """Take an input array where all values should lie in the range [0, 1.0], multiply them by `max_value` and then
+    cast the resulted value to a type specified by `dtype`. If `max_value` is None the transform will try to infer
+    the maximum value for the data type from the `dtype` argument.
+    This is the inverse transform for :class:`~albumentations.augmentations.transforms.ToFloat`.
+    Args:
+        max_value (float): maximum possible input value. Default: None.
+        dtype (string or numpy data type): data type of the output. See the `'Data types' page from the NumPy docs`_.
+            Default: 'uint16'.
+        p (float): probability of applying the transform. Default: 1.0.
+    Targets:
+        image
+    Image types:
+        float32
+    .. _'Data types' page from the NumPy docs:
+       https://docs.scipy.org/doc/numpy/user/basics.types.html
+    """
+    def __init__(self, dtype="uint16", max_value=None, always_apply=False, p=1.0):
+        super(FromFloat, self).__init__(always_apply, p)
+        self.dtype = np.dtype(dtype)
+        self.max_value = max_value
+    def apply(self, img, **params):
+        return F.from_float(img, self.dtype, self.max_value)
+    def get_transform_init_args(self):
+        return {"dtype": self.dtype.name, "max_value": self.max_value}
+class Downscale(ImageOnlyTransform):
+    """Decreases image quality by downscaling and upscaling back.
+    Args:
+        scale_min (float): lower bound on the image scale. Should be < 1.
+        scale_max (float):  lower bound on the image scale. Should be .
+        interpolation: cv2 interpolation method. Could be:
+            - single cv2 interpolation flag - selected method will be used for downscale and upscale.
+            - dict(downscale=flag, upscale=flag)
+            - Downscale.Interpolation(downscale=flag, upscale=flag) -
+            Default: Interpolation(downscale=cv2.INTER_NEAREST, upscale=cv2.INTER_NEAREST)
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    class Interpolation:
+        def __init__(self, *, downscale: int = cv2.INTER_NEAREST, upscale: int = cv2.INTER_NEAREST):
+            self.downscale = downscale
+            self.upscale = upscale
+    def __init__(
+        self,
+        scale_min: float = 0.25,
+        scale_max: float = 0.25,
+        interpolation: Optional[Union[int, Interpolation, Dict[str, int]]] = None,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super(Downscale, self).__init__(always_apply, p)
+        if interpolation is None:
+            self.interpolation = self.Interpolation(downscale=cv2.INTER_NEAREST, upscale=cv2.INTER_NEAREST)
+            warnings.warn(
+                "Using default interpolation INTER_NEAREST, which is sub-optimal."
+                "Please specify interpolation mode for downscale and upscale explicitly."
+                "For additional information see this PR https://github.com/albumentations-team/albumentations/pull/584"
+            )
+        elif isinstance(interpolation, int):
+            self.interpolation = self.Interpolation(downscale=interpolation, upscale=interpolation)
+        elif isinstance(interpolation, self.Interpolation):
+            self.interpolation = interpolation
+        elif isinstance(interpolation, dict):
+            self.interpolation = self.Interpolation(**interpolation)
+        else:
+            raise ValueError(
+                "Wrong interpolation data type. Supported types: `Optional[Union[int, Interpolation, Dict[str, int]]]`."
+                f" Got: {type(interpolation)}"
+            )
+        if scale_min > scale_max:
+            raise ValueError("Expected scale_min be less or equal scale_max, got {} {}".format(scale_min, scale_max))
+        if scale_max >= 1:
+            raise ValueError("Expected scale_max to be less than 1, got {}".format(scale_max))
+        self.scale_min = scale_min
+        self.scale_max = scale_max
+    def apply(self, img: np.ndarray, scale: Optional[float] = None, **params) -> np.ndarray:
+        return F.downscale(
+            img,
+            scale=scale,
+            down_interpolation=self.interpolation.downscale,
+            up_interpolation=self.interpolation.upscale,
+        )
+    def get_params(self) -> Dict[str, Any]:
+        return {"scale": random.uniform(self.scale_min, self.scale_max)}
+    def get_transform_init_args_names(self) -> Tuple[str, str]:
+        return "scale_min", "scale_max"
+    def _to_dict(self) -> Dict[str, Any]:
+        result = super()._to_dict()
+        result["interpolation"] = {"upscale": self.interpolation.upscale, "downscale": self.interpolation.downscale}
+        return result
+class Lambda(NoOp):
+    """A flexible transformation class for using user-defined transformation functions per targets.
+    Function signature must include **kwargs to accept optinal arguments like interpolation method, image size, etc:
+    Args:
+        image (callable): Image transformation function.
+        mask (callable): Mask transformation function.
+        keypoint (callable): Keypoint transformation function.
+        bbox (callable): BBox transformation function.
+        always_apply (bool): Indicates whether this transformation should be always applied.
+        p (float): probability of applying the transform. Default: 1.0.
+    Targets:
+        image, mask, bboxes, keypoints
+    Image types:
+        Any
+    """
+    def __init__(
+        self,
+        image=None,
+        mask=None,
+        keypoint=None,
+        bbox=None,
+        name=None,
+        always_apply=False,
+        p=1.0,
+    ):
+        super(Lambda, self).__init__(always_apply, p)
+        self.name = name
+        self.custom_apply_fns = {target_name: F.noop for target_name in ("image", "mask", "keypoint", "bbox")}
+        for target_name, custom_apply_fn in {
+            "image": image,
+            "mask": mask,
+            "keypoint": keypoint,
+            "bbox": bbox,
+        }.items():
+            if custom_apply_fn is not None:
+                if isinstance(custom_apply_fn, LambdaType) and custom_apply_fn.__name__ == "<lambda>":
+                    warnings.warn(
+                        "Using lambda is incompatible with multiprocessing. "
+                        "Consider using regular functions or partial()."
+                    )
+                self.custom_apply_fns[target_name] = custom_apply_fn
+    def apply(self, img, **params):
+        fn = self.custom_apply_fns["image"]
+        return fn(img, **params)
+    def apply_to_mask(self, mask, **params):
+        fn = self.custom_apply_fns["mask"]
+        return fn(mask, **params)
+    def apply_to_bbox(self, bbox, **params):
+        fn = self.custom_apply_fns["bbox"]
+        return fn(bbox, **params)
+    def apply_to_keypoint(self, keypoint, **params):
+        fn = self.custom_apply_fns["keypoint"]
+        return fn(keypoint, **params)
+    @classmethod
+    def is_serializable(cls):
+        return False
+    def _to_dict(self):
+        if self.name is None:
+            raise ValueError(
+                "To make a Lambda transform serializable you should provide the `name` argument, "
+                "e.g. `Lambda(name='my_transform', image=<some func>, ...)`."
+            )
+        return {"__class_fullname__": self.get_class_fullname(), "__name__": self.name}
+    def __repr__(self):
+        state = {"name": self.name}
+        state.update(self.custom_apply_fns.items())
+        state.update(self.get_base_init_args())
+        return "{name}({args})".format(name=self.__class__.__name__, args=format_args(state))
+class MultiplicativeNoise(ImageOnlyTransform):
+    """Multiply image to random number or array of numbers.
+    Args:
+        multiplier (float or tuple of floats): If single float image will be multiplied to this number.
+            If tuple of float multiplier will be in range `[multiplier[0], multiplier[1])`. Default: (0.9, 1.1).
+        per_channel (bool): If `False`, same values for all channels will be used.
+            If `True` use sample values for each channels. Default False.
+        elementwise (bool): If `False` multiply multiply all pixels in an image with a random value sampled once.
+            If `True` Multiply image pixels with values that are pixelwise randomly sampled. Defaule: False.
+    Targets:
+        image
+    Image types:
+        Any
+    """
+    def __init__(
+        self,
+        multiplier=(0.9, 1.1),
+        per_channel=False,
+        elementwise=False,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(MultiplicativeNoise, self).__init__(always_apply, p)
+        self.multiplier = to_tuple(multiplier, multiplier)
+        self.per_channel = per_channel
+        self.elementwise = elementwise
+    def apply(self, img, multiplier=np.array([1]), **kwargs):
+        return F.multiply(img, multiplier)
+    def get_params_dependent_on_targets(self, params):
+        if self.multiplier[0] == self.multiplier[1]:
+            return {"multiplier": np.array([self.multiplier[0]])}
+        img = params["image"]
+        h, w = img.shape[:2]
+        if self.per_channel:
+            c = 1 if is_grayscale_image(img) else img.shape[-1]
+        else:
+            c = 1
+        if self.elementwise:
+            shape = [h, w, c]
+        else:
+            shape = [c]
+        multiplier = random_utils.uniform(self.multiplier[0], self.multiplier[1], shape)
+        if is_grayscale_image(img) and img.ndim == 2:
+            multiplier = np.squeeze(multiplier)
+        return {"multiplier": multiplier}
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def get_transform_init_args_names(self):
+        return "multiplier", "per_channel", "elementwise"
+class FancyPCA(ImageOnlyTransform):
+    """Augment RGB image using FancyPCA from Krizhevsky's paper
+    "ImageNet Classification with Deep Convolutional Neural Networks"
+    Args:
+        alpha (float):  how much to perturb/scale the eigen vecs and vals.
+            scale is samples from gaussian distribution (mu=0, sigma=alpha)
+    Targets:
+        image
+    Image types:
+        3-channel uint8 images only
+    Credit:
+        http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
+        https://deshanadesai.github.io/notes/Fancy-PCA-with-Scikit-Image
+        https://pixelatedbrian.github.io/2018-04-29-fancy_pca/
+    """
+    def __init__(self, alpha=0.1, always_apply=False, p=0.5):
+        super(FancyPCA, self).__init__(always_apply=always_apply, p=p)
+        self.alpha = alpha
+    def apply(self, img, alpha=0.1, **params):
+        img = F.fancy_pca(img, alpha)
+        return img
+    def get_params(self):
+        return {"alpha": random.gauss(0, self.alpha)}
+    def get_transform_init_args_names(self):
+        return ("alpha",)
+class ColorJitter(ImageOnlyTransform):
+    """Randomly changes the brightness, contrast, and saturation of an image. Compared to ColorJitter from torchvision,
+    this transform gives a little bit different results because Pillow (used in torchvision) and OpenCV (used in
+    Albumentations) transform an image to HSV format by different formulas. Another difference - Pillow uses uint8
+    overflow, but we use value saturation.
+    Args:
+        brightness (float or tuple of float (min, max)): How much to jitter brightness.
+            brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
+            or the given [min, max]. Should be non negative numbers.
+        contrast (float or tuple of float (min, max)): How much to jitter contrast.
+            contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
+            or the given [min, max]. Should be non negative numbers.
+        saturation (float or tuple of float (min, max)): How much to jitter saturation.
+            saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
+            or the given [min, max]. Should be non negative numbers.
+        hue (float or tuple of float (min, max)): How much to jitter hue.
+            hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
+            Should have 0 <= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+    """
+    def __init__(
+        self,
+        brightness=0.2,
+        contrast=0.2,
+        saturation=0.2,
+        hue=0.2,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(ColorJitter, self).__init__(always_apply=always_apply, p=p)
+        self.brightness = self.__check_values(brightness, "brightness")
+        self.contrast = self.__check_values(contrast, "contrast")
+        self.saturation = self.__check_values(saturation, "saturation")
+        self.hue = self.__check_values(hue, "hue", offset=0, bounds=[-0.5, 0.5], clip=False)
+        self.transforms = [
+            F.adjust_brightness_torchvision,
+            F.adjust_contrast_torchvision,
+            F.adjust_saturation_torchvision,
+            F.adjust_hue_torchvision,
+        ]
+    @staticmethod
+    def __check_values(value, name, offset=1, bounds=(0, float("inf")), clip=True):
+        if isinstance(value, numbers.Number):
+            if value < 0:
+                raise ValueError("If {} is a single number, it must be non negative.".format(name))
+            value = [offset - value, offset + value]
+            if clip:
+                value[0] = max(value[0], 0)
+        elif isinstance(value, (tuple, list)) and len(value) == 2:
+            if not bounds[0] <= value[0] <= value[1] <= bounds[1]:
+                raise ValueError("{} values should be between {}".format(name, bounds))
+        else:
+            raise TypeError("{} should be a single number or a list/tuple with length 2.".format(name))
+        return value
+    def get_params(self):
+        brightness = random.uniform(self.brightness[0], self.brightness[1])
+        contrast = random.uniform(self.contrast[0], self.contrast[1])
+        saturation = random.uniform(self.saturation[0], self.saturation[1])
+        hue = random.uniform(self.hue[0], self.hue[1])
+        order = [0, 1, 2, 3]
+        random.shuffle(order)
+        return {
+            "brightness": brightness,
+            "contrast": contrast,
+            "saturation": saturation,
+            "hue": hue,
+            "order": order,
+        }
+    def apply(self, img, brightness=1.0, contrast=1.0, saturation=1.0, hue=0, order=[0, 1, 2, 3], **params):
+        if not is_rgb_image(img) and not is_grayscale_image(img):
+            raise TypeError("ColorJitter transformation expects 1-channel or 3-channel images.")
+        params = [brightness, contrast, saturation, hue]
+        for i in order:
+            img = self.transforms[i](img, params[i])
+        return img
+    def get_transform_init_args_names(self):
+        return ("brightness", "contrast", "saturation", "hue")
+class Sharpen(ImageOnlyTransform):
+    """Sharpen the input image and overlays the result with the original image.
+    Args:
+        alpha ((float, float)): range to choose the visibility of the sharpened image. At 0, only the original image is
+            visible, at 1.0 only its sharpened version is visible. Default: (0.2, 0.5).
+        lightness ((float, float)): range to choose the lightness of the sharpened image. Default: (0.5, 1.0).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    """
+    def __init__(self, alpha=(0.2, 0.5), lightness=(0.5, 1.0), always_apply=False, p=0.5):
+        super(Sharpen, self).__init__(always_apply, p)
+        self.alpha = self.__check_values(to_tuple(alpha, 0.0), name="alpha", bounds=(0.0, 1.0))
+        self.lightness = self.__check_values(to_tuple(lightness, 0.0), name="lightness")
+    @staticmethod
+    def __check_values(value, name, bounds=(0, float("inf"))):
+        if not bounds[0] <= value[0] <= value[1] <= bounds[1]:
+            raise ValueError("{} values should be between {}".format(name, bounds))
+        return value
+    @staticmethod
+    def __generate_sharpening_matrix(alpha_sample, lightness_sample):
+        matrix_nochange = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32)
+        matrix_effect = np.array(
+            [[-1, -1, -1], [-1, 8 + lightness_sample, -1], [-1, -1, -1]],
+            dtype=np.float32,
+        )
+        matrix = (1 - alpha_sample) * matrix_nochange + alpha_sample * matrix_effect
+        return matrix
+    def get_params(self):
+        alpha = random.uniform(*self.alpha)
+        lightness = random.uniform(*self.lightness)
+        sharpening_matrix = self.__generate_sharpening_matrix(alpha_sample=alpha, lightness_sample=lightness)
+        return {"sharpening_matrix": sharpening_matrix}
+    def apply(self, img, sharpening_matrix=None, **params):
+        return F.convolve(img, sharpening_matrix)
+    def get_transform_init_args_names(self):
+        return ("alpha", "lightness")
+class Emboss(ImageOnlyTransform):
+    """Emboss the input image and overlays the result with the original image.
+    Args:
+        alpha ((float, float)): range to choose the visibility of the embossed image. At 0, only the original image is
+            visible,at 1.0 only its embossed version is visible. Default: (0.2, 0.5).
+        strength ((float, float)): strength range of the embossing. Default: (0.2, 0.7).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    """
+    def __init__(self, alpha=(0.2, 0.5), strength=(0.2, 0.7), always_apply=False, p=0.5):
+        super(Emboss, self).__init__(always_apply, p)
+        self.alpha = self.__check_values(to_tuple(alpha, 0.0), name="alpha", bounds=(0.0, 1.0))
+        self.strength = self.__check_values(to_tuple(strength, 0.0), name="strength")
+    @staticmethod
+    def __check_values(value, name, bounds=(0, float("inf"))):
+        if not bounds[0] <= value[0] <= value[1] <= bounds[1]:
+            raise ValueError("{} values should be between {}".format(name, bounds))
+        return value
+    @staticmethod
+    def __generate_emboss_matrix(alpha_sample, strength_sample):
+        matrix_nochange = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32)
+        matrix_effect = np.array(
+            [
+                [-1 - strength_sample, 0 - strength_sample, 0],
+                [0 - strength_sample, 1, 0 + strength_sample],
+                [0, 0 + strength_sample, 1 + strength_sample],
+            ],
+            dtype=np.float32,
+        )
+        matrix = (1 - alpha_sample) * matrix_nochange + alpha_sample * matrix_effect
+        return matrix
+    def get_params(self):
+        alpha = random.uniform(*self.alpha)
+        strength = random.uniform(*self.strength)
+        emboss_matrix = self.__generate_emboss_matrix(alpha_sample=alpha, strength_sample=strength)
+        return {"emboss_matrix": emboss_matrix}
+    def apply(self, img, emboss_matrix=None, **params):
+        return F.convolve(img, emboss_matrix)
+    def get_transform_init_args_names(self):
+        return ("alpha", "strength")
+class Superpixels(ImageOnlyTransform):
+    """Transform images partially/completely to their superpixel representation.
+    This implementation uses skimage's version of the SLIC algorithm.
+    Args:
+        p_replace (float or tuple of float): Defines for any segment the probability that the pixels within that
+            segment are replaced by their average color (otherwise, the pixels are not changed).
+            Examples:
+                * A probability of ``0.0`` would mean, that the pixels in no
+                  segment are replaced by their average color (image is not
+                  changed at all).
+                * A probability of ``0.5`` would mean, that around half of all
+                  segments are replaced by their average color.
+                * A probability of ``1.0`` would mean, that all segments are
+                  replaced by their average color (resulting in a voronoi
+                  image).
+            Behaviour based on chosen data types for this parameter:
+                * If a ``float``, then that ``flat`` will always be used.
+                * If ``tuple`` ``(a, b)``, then a random probability will be
+                  sampled from the interval ``[a, b]`` per image.
+        n_segments (int, or tuple of int): Rough target number of how many superpixels to generate (the algorithm
+            may deviate from this number). Lower value will lead to coarser superpixels.
+            Higher values are computationally more intensive and will hence lead to a slowdown
+            * If a single ``int``, then that value will always be used as the
+              number of segments.
+            * If a ``tuple`` ``(a, b)``, then a value from the discrete
+              interval ``[a..b]`` will be sampled per image.
+        max_size (int or None): Maximum image size at which the augmentation is performed.
+            If the width or height of an image exceeds this value, it will be
+            downscaled before the augmentation so that the longest side matches `max_size`.
+            This is done to speed up the process. The final output image has the same size as the input image.
+            Note that in case `p_replace` is below ``1.0``,
+            the down-/upscaling will affect the not-replaced pixels too.
+            Use ``None`` to apply no down-/upscaling.
+        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
+            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
+            Default: cv2.INTER_LINEAR.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    """
+    def __init__(
+        self,
+        p_replace: Union[float, Sequence[float]] = 0.1,
+        n_segments: Union[int, Sequence[int]] = 100,
+        max_size: Optional[int] = 128,
+        interpolation: int = cv2.INTER_LINEAR,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply=always_apply, p=p)
+        self.p_replace = to_tuple(p_replace, p_replace)
+        self.n_segments = to_tuple(n_segments, n_segments)
+        self.max_size = max_size
+        self.interpolation = interpolation
+        if min(self.n_segments) < 1:
+            raise ValueError(f"n_segments must be >= 1. Got: {n_segments}")
+    def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
+        return ("p_replace", "n_segments", "max_size", "interpolation")
+    def get_params(self) -> dict:
+        n_segments = random.randint(*self.n_segments)
+        p = random.uniform(*self.p_replace)
+        return {"replace_samples": random_utils.random(n_segments) < p, "n_segments": n_segments}
+    def apply(self, img: np.ndarray, replace_samples: Sequence[bool] = (False,), n_segments: int = 1, **kwargs):
+        return F.superpixels(img, n_segments, replace_samples, self.max_size, self.interpolation)
+class TemplateTransform(ImageOnlyTransform):
+    """
+    Apply blending of input image with specified templates
+    Args:
+        templates (numpy array or list of numpy arrays): Images as template for transform.
+        img_weight ((float, float) or float): If single float will be used as weight for input image.
+            If tuple of float img_weight will be in range `[img_weight[0], img_weight[1])`. Default: 0.5.
+        template_weight ((float, float) or float): If single float will be used as weight for template.
+            If tuple of float template_weight will be in range `[template_weight[0], template_weight[1])`.
+            Default: 0.5.
+        template_transform: transformation object which could be applied to template,
+            must produce template the same size as input image.
+        name (string): (Optional) Name of transform, used only for deserialization.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    """
+    def __init__(
+        self,
+        templates,
+        img_weight=0.5,
+        template_weight=0.5,
+        template_transform=None,
+        name=None,
+        always_apply=False,
+        p=0.5,
+    ):
+        super().__init__(always_apply, p)
+        self.templates = templates if isinstance(templates, (list, tuple)) else [templates]
+        self.img_weight = to_tuple(img_weight, img_weight)
+        self.template_weight = to_tuple(template_weight, template_weight)
+        self.template_transform = template_transform
+        self.name = name
+    def apply(self, img, template=None, img_weight=0.5, template_weight=0.5, **params):
+        return F.add_weighted(img, img_weight, template, template_weight)
+    def get_params(self):
+        return {
+            "img_weight": random.uniform(self.img_weight[0], self.img_weight[1]),
+            "template_weight": random.uniform(self.template_weight[0], self.template_weight[1]),
+        }
+    def get_params_dependent_on_targets(self, params):
+        img = params["image"]
+        template = random.choice(self.templates)
+        if self.template_transform is not None:
+            template = self.template_transform(image=template)["image"]
+        if get_num_channels(template) not in [1, get_num_channels(img)]:
+            raise ValueError(
+                "Template must be a single channel or "
+                "has the same number of channels as input image ({}), got {}".format(
+                    get_num_channels(img), get_num_channels(template)
+                )
+            )
+        if template.dtype != img.dtype:
+            raise ValueError("Image and template must be the same image type")
+        if img.shape[:2] != template.shape[:2]:
+            raise ValueError(
+                "Image and template must be the same size, got {} and {}".format(img.shape[:2], template.shape[:2])
+            )
+        if get_num_channels(template) == 1 and get_num_channels(img) > 1:
+            template = np.stack((template,) * get_num_channels(img), axis=-1)
+        # in order to support grayscale image with dummy dim
+        template = template.reshape(img.shape)
+        return {"template": template}
+    @classmethod
+    def is_serializable(cls):
+        return False
+    @property
+    def targets_as_params(self):
+        return ["image"]
+    def _to_dict(self):
+        if self.name is None:
+            raise ValueError(
+                "To make a TemplateTransform serializable you should provide the `name` argument, "
+                "e.g. `TemplateTransform(name='my_transform', ...)`."
+            )
+        return {"__class_fullname__": self.get_class_fullname(), "__name__": self.name}
+class RingingOvershoot(ImageOnlyTransform):
+    """Create ringing or overshoot artefacts by conlvolving image with 2D sinc filter.
+    Args:
+        blur_limit (int, (int, int)): maximum kernel size for sinc filter.
+            Should be in range [3, inf). Default: (7, 15).
+        cutoff (float, (float, float)): range to choose the cutoff frequency in radians.
+            Should be in range (0, np.pi)
+            Default: (np.pi / 4, np.pi / 2).
+        p (float): probability of applying the transform. Default: 0.5.
+    Reference:
+        dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter
+        https://arxiv.org/abs/2107.10833
+    Targets:
+        image
+    """
+    def __init__(
+        self,
+        blur_limit: Union[int, Sequence[int]] = (7, 15),
+        cutoff: Union[float, Sequence[float]] = (np.pi / 4, np.pi / 2),
+        always_apply=False,
+        p=0.5,
+    ):
+        super(RingingOvershoot, self).__init__(always_apply, p)
+        self.blur_limit = to_tuple(blur_limit, 3)
+        self.cutoff = self.__check_values(to_tuple(cutoff, np.pi / 2), name="cutoff", bounds=(0, np.pi))
+    @staticmethod
+    def __check_values(value, name, bounds=(0, float("inf"))):
+        if not bounds[0] <= value[0] <= value[1] <= bounds[1]:
+            raise ValueError(f"{name} values should be between {bounds}")
+        return value
+    def get_params(self):
+        ksize = random.randrange(self.blur_limit[0], self.blur_limit[1] + 1, 2)
+        if ksize % 2 == 0:
+            raise ValueError(f"Kernel size must be odd. Got: {ksize}")
+        cutoff = random.uniform(*self.cutoff)
+        # From dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter
+        with np.errstate(divide="ignore", invalid="ignore"):
+            kernel = np.fromfunction(
+                lambda x, y: cutoff
+                * special.j1(cutoff * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2))
+                / (2 * np.pi * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2)),
+                [ksize, ksize],
+            )
+        kernel[(ksize - 1) // 2, (ksize - 1) // 2] = cutoff**2 / (4 * np.pi)
+        # Normalize kernel
+        kernel = kernel.astype(np.float32) / np.sum(kernel)
+        return {"kernel": kernel}
+    def apply(self, img, kernel=None, **params):
+        return F.convolve(img, kernel)
+    def get_transform_init_args_names(self):
+        return ("blur_limit", "cutoff")
+class UnsharpMask(ImageOnlyTransform):
+    """
+    Sharpen the input image using Unsharp Masking processing and overlays the result with the original image.
+    Args:
+        blur_limit (int, (int, int)): maximum Gaussian kernel size for blurring the input image.
+            Must be zero or odd and in range [0, inf). If set to 0 it will be computed from sigma
+            as `round(sigma * (3 if img.dtype == np.uint8 else 4) * 2 + 1) + 1`.
+            If set single value `blur_limit` will be in range (0, blur_limit).
+            Default: (3, 7).
+        sigma_limit (float, (float, float)): Gaussian kernel standard deviation. Must be in range [0, inf).
+            If set single value `sigma_limit` will be in range (0, sigma_limit).
+            If set to 0 sigma will be computed as `sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`. Default: 0.
+        alpha (float, (float, float)): range to choose the visibility of the sharpened image.
+            At 0, only the original image is visible, at 1.0 only its sharpened version is visible.
+            Default: (0.2, 0.5).
+        threshold (int): Value to limit sharpening only for areas with high pixel difference between original image
+            and it's smoothed version. Higher threshold means less sharpening on flat areas.
+            Must be in range [0, 255]. Default: 10.
+        p (float): probability of applying the transform. Default: 0.5.
+    Reference:
+        arxiv.org/pdf/2107.10833.pdf
+    Targets:
+        image
+    """
+    def __init__(
+        self,
+        blur_limit: Union[int, Sequence[int]] = (3, 7),
+        sigma_limit: Union[float, Sequence[float]] = 0.0,
+        alpha: Union[float, Sequence[float]] = (0.2, 0.5),
+        threshold: int = 10,
+        always_apply=False,
+        p=0.5,
+    ):
+        super(UnsharpMask, self).__init__(always_apply, p)
+        self.blur_limit = to_tuple(blur_limit, 3)
+        self.sigma_limit = self.__check_values(to_tuple(sigma_limit, 0.0), name="sigma_limit")
+        self.alpha = self.__check_values(to_tuple(alpha, 0.0), name="alpha", bounds=(0.0, 1.0))
+        self.threshold = threshold
+        if self.blur_limit[0] == 0 and self.sigma_limit[0] == 0:
+            self.blur_limit = 3, max(3, self.blur_limit[1])
+            raise ValueError("blur_limit and sigma_limit minimum value can not be both equal to 0.")
+        if (self.blur_limit[0] != 0 and self.blur_limit[0] % 2 != 1) or (
+            self.blur_limit[1] != 0 and self.blur_limit[1] % 2 != 1
+        ):
+            raise ValueError("UnsharpMask supports only odd blur limits.")
+    @staticmethod
+    def __check_values(value, name, bounds=(0, float("inf"))):
+        if not bounds[0] <= value[0] <= value[1] <= bounds[1]:
+            raise ValueError(f"{name} values should be between {bounds}")
+        return value
+    def get_params(self):
+        return {
+            "ksize": random.randrange(self.blur_limit[0], self.blur_limit[1] + 1, 2),
+            "sigma": random.uniform(*self.sigma_limit),
+            "alpha": random.uniform(*self.alpha),
+        }
+    def apply(self, img, ksize=3, sigma=0, alpha=0.2, **params):
+        return F.unsharp_mask(img, ksize, sigma=sigma, alpha=alpha, threshold=self.threshold)
+    def get_transform_init_args_names(self):
+        return ("blur_limit", "sigma_limit", "alpha", "threshold")
+class PixelDropout(DualTransform):
+    """Set pixels to 0 with some probability.
+    Args:
+        dropout_prob (float): pixel drop probability. Default: 0.01
+        per_channel (bool): if set to `True` drop mask will be sampled fo each channel,
+            otherwise the same mask will be sampled for all channels. Default: False
+        drop_value (number or sequence of numbers or None): Value that will be set in dropped place.
+            If set to None value will be sampled randomly, default ranges will be used:
+                - uint8 - [0, 255]
+                - uint16 - [0, 65535]
+                - uint32 - [0, 4294967295]
+                - float, double - [0, 1]
+            Default: 0
+        mask_drop_value (number or sequence of numbers or None): Value that will be set in dropped place in masks.
+            If set to None masks will be unchanged. Default: 0
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask
+    Image types:
+        any
+    """
+    def __init__(
+        self,
+        dropout_prob: float = 0.01,
+        per_channel: bool = False,
+        drop_value: Optional[Union[float, Sequence[float]]] = 0,
+        mask_drop_value: Optional[Union[float, Sequence[float]]] = None,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply, p)
+        self.dropout_prob = dropout_prob
+        self.per_channel = per_channel
+        self.drop_value = drop_value
+        self.mask_drop_value = mask_drop_value
+        if self.mask_drop_value is not None and self.per_channel:
+            raise ValueError("PixelDropout supports mask only with per_channel=False")
+    def apply(
+        self,
+        img: np.ndarray,
+        drop_mask: np.ndarray = np.array(None),
+        drop_value: Union[float, Sequence[float]] = (),
+        **params
+    ) -> np.ndarray:
+        return F.pixel_dropout(img, drop_mask, drop_value)
+    def apply_to_mask(self, img: np.ndarray, drop_mask: np.ndarray = np.array(None), **params) -> np.ndarray:
+        if self.mask_drop_value is None:
+            return img
+        if img.ndim == 2:
+            drop_mask = np.squeeze(drop_mask)
+        return F.pixel_dropout(img, drop_mask, self.mask_drop_value)
+    def apply_to_bbox(self, bbox, **params):
+        return bbox
+    def apply_to_keypoint(self, keypoint, **params):
+        return keypoint
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        img = params["image"]
+        shape = img.shape if self.per_channel else img.shape[:2]
+        rnd = np.random.RandomState(random.randint(0, 1 << 31))
+        # Use choice to create boolean matrix, if we will use binomial after that we will need type conversion
+        drop_mask = rnd.choice([True, False], shape, p=[self.dropout_prob, 1 - self.dropout_prob])
+        drop_value: Union[float, Sequence[float], np.ndarray]
+        if drop_mask.ndim != img.ndim:
+            drop_mask = np.expand_dims(drop_mask, -1)
+        if self.drop_value is None:
+            drop_shape = 1 if is_grayscale_image(img) else int(img.shape[-1])
+            if img.dtype in (np.uint8, np.uint16, np.uint32):
+                drop_value = rnd.randint(0, int(F.MAX_VALUES_BY_DTYPE[img.dtype]), drop_shape, img.dtype)
+            elif img.dtype in [np.float32, np.double]:
+                drop_value = rnd.uniform(0, 1, drop_shape).astype(img.dtype)
+            else:
+                raise ValueError(f"Unsupported dtype: {img.dtype}")
+        else:
+            drop_value = self.drop_value
+        return {"drop_mask": drop_mask, "drop_value": drop_value}
+    @property
+    def targets_as_params(self) -> List[str]:
+        return ["image"]
+    def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
+        return ("dropout_prob", "per_channel", "drop_value", "mask_drop_value")
+class Spatter(ImageOnlyTransform):
+    """
+    Apply spatter transform. It simulates corruption which can occlude a lens in the form of rain or mud.
+    Args:
+        mean (float, or tuple of floats): Mean value of normal distribution for generating liquid layer.
+            If single float it will be used as mean.
+            If tuple of float mean will be sampled from range `[mean[0], mean[1])`. Default: (0.65).
+        std (float, or tuple of floats): Standard deviation value of normal distribution for generating liquid layer.
+            If single float it will be used as std.
+            If tuple of float std will be sampled from range `[std[0], std[1])`. Default: (0.3).
+        gauss_sigma (float, or tuple of floats): Sigma value for gaussian filtering of liquid layer.
+            If single float it will be used as gauss_sigma.
+            If tuple of float gauss_sigma will be sampled from range `[sigma[0], sigma[1])`. Default: (2).
+        cutout_threshold (float, or tuple of floats): Threshold for filtering liqued layer
+            (determines number of drops). If single float it will used as cutout_threshold.
+            If tuple of float cutout_threshold will be sampled from range `[cutout_threshold[0], cutout_threshold[1])`.
+            Default: (0.68).
+        intensity (float, or tuple of floats): Intensity of corruption.
+            If single float it will be used as intensity.
+            If tuple of float intensity will be sampled from range `[intensity[0], intensity[1])`. Default: (0.6).
+        mode (string, or list of strings): Type of corruption. Currently, supported options are 'rain' and 'mud'.
+             If list is provided type of corruption will be sampled list. Default: ("rain").
+        color (list of (r, g, b) or dict or None): Corruption elements color.
+            If list uses provided list as color for specified mode.
+            If dict uses provided color for specified mode. Color for each specified mode should be provided in dict.
+            If None uses default colors (rain: (238, 238, 175), mud: (20, 42, 63)).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    Image types:
+        uint8, float32
+    Reference:
+    |  https://arxiv.org/pdf/1903.12261.pdf
+    |  https://github.com/hendrycks/robustness/blob/master/ImageNet-C/create_c/make_imagenet_c.py
+    """
+    def __init__(
+        self,
+        mean: ScaleFloatType = 0.65,
+        std: ScaleFloatType = 0.3,
+        gauss_sigma: ScaleFloatType = 2,
+        cutout_threshold: ScaleFloatType = 0.68,
+        intensity: ScaleFloatType = 0.6,
+        mode: Union[str, Sequence[str]] = "rain",
+        color: Optional[Union[Sequence[int], Dict[str, Sequence[int]]]] = None,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        super().__init__(always_apply=always_apply, p=p)
+        self.mean = to_tuple(mean, mean)
+        self.std = to_tuple(std, std)
+        self.gauss_sigma = to_tuple(gauss_sigma, gauss_sigma)
+        self.intensity = to_tuple(intensity, intensity)
+        self.cutout_threshold = to_tuple(cutout_threshold, cutout_threshold)
+        self.color = (
+            color
+            if color is not None
+            else {
+                "rain": [238, 238, 175],
+                "mud": [20, 42, 63],
+            }
+        )
+        self.mode = mode if isinstance(mode, (list, tuple)) else [mode]
+        if len(set(self.mode)) > 1 and not isinstance(self.color, dict):
+            raise ValueError(f"Unsupported color: {self.color}. Please specify color for each mode (use dict for it).")
+        for i in self.mode:
+            if i not in ["rain", "mud"]:
+                raise ValueError(f"Unsupported color mode: {mode}. Transform supports only `rain` and `mud` mods.")
+            if isinstance(self.color, dict):
+                if i not in self.color:
+                    raise ValueError(f"Wrong color definition: {self.color}. Color for mode: {i} not specified.")
+                if len(self.color[i]) != 3:
+                    raise ValueError(
+                        f"Unsupported color: {self.color[i]} for mode {i}. Color should be presented in RGB format."
+                    )
+        if isinstance(self.color, (list, tuple)):
+            if len(self.color) != 3:
+                raise ValueError(f"Unsupported color: {self.color}. Color should be presented in RGB format.")
+            self.color = {self.mode[0]: self.color}
+    def apply(
+        self,
+        img: np.ndarray,
+        non_mud: Optional[np.ndarray] = None,
+        mud: Optional[np.ndarray] = None,
+        drops: Optional[np.ndarray] = None,
+        mode: str = "",
+        **params
+    ) -> np.ndarray:
+        return F.spatter(img, non_mud, mud, drops, mode)
+    @property
+    def targets_as_params(self) -> List[str]:
+        return ["image"]
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        h, w = params["image"].shape[:2]
+        mean = random.uniform(self.mean[0], self.mean[1])
+        std = random.uniform(self.std[0], self.std[1])
+        cutout_threshold = random.uniform(self.cutout_threshold[0], self.cutout_threshold[1])
+        sigma = random.uniform(self.gauss_sigma[0], self.gauss_sigma[1])
+        mode = random.choice(self.mode)
+        intensity = random.uniform(self.intensity[0], self.intensity[1])
+        color = np.array(self.color[mode]) / 255.0
+        liquid_layer = random_utils.normal(size=(h, w), loc=mean, scale=std)
+        liquid_layer = gaussian_filter(liquid_layer, sigma=sigma, mode="nearest")
+        liquid_layer[liquid_layer < cutout_threshold] = 0
+        if mode == "rain":
+            liquid_layer = (liquid_layer * 255).astype(np.uint8)
+            dist = 255 - cv2.Canny(liquid_layer, 50, 150)
+            dist = cv2.distanceTransform(dist, cv2.DIST_L2, 5)
+            _, dist = cv2.threshold(dist, 20, 20, cv2.THRESH_TRUNC)
+            dist = blur(dist, 3).astype(np.uint8)
+            dist = F.equalize(dist)
+            ker = np.array([[-2, -1, 0], [-1, 1, 1], [0, 1, 2]])
+            dist = F.convolve(dist, ker)
+            dist = blur(dist, 3).astype(np.float32)
+            m = liquid_layer * dist
+            m *= 1 / np.max(m, axis=(0, 1))
+            drops = m[:, :, None] * color * intensity
+            mud = None
+            non_mud = None
+        else:
+            m = np.where(liquid_layer > cutout_threshold, 1, 0)
+            m = gaussian_filter(m.astype(np.float32), sigma=sigma, mode="nearest")
+            m[m < 1.2 * cutout_threshold] = 0
+            m = m[..., np.newaxis]
+            mud = m * color
+            non_mud = 1 - m
+            drops = None
+        return {
+            "non_mud": non_mud,
+            "mud": mud,
+            "drops": drops,
+            "mode": mode,
+        }
+    def get_transform_init_args_names(self) -> Tuple[str, str, str, str, str, str, str]:
+        return "mean", "std", "gauss_sigma", "intensity", "cutout_threshold", "mode", "color"

custom_albumentations/augmentations/utils.py ADDED Viewed

	@@ -0,0 +1,211 @@

+from functools import wraps
+from typing import Callable, Union
+import cv2
+import numpy as np
+from typing_extensions import Concatenate, ParamSpec
+from custom_albumentations.core.keypoints_utils import angle_to_2pi_range
+from custom_albumentations.core.transforms_interface import KeypointInternalType
+__all__ = [
+    "read_bgr_image",
+    "read_rgb_image",
+    "MAX_VALUES_BY_DTYPE",
+    "NPDTYPE_TO_OPENCV_DTYPE",
+    "clipped",
+    "get_opencv_dtype_from_numpy",
+    "angle_2pi_range",
+    "clip",
+    "preserve_shape",
+    "preserve_channel_dim",
+    "ensure_contiguous",
+    "is_rgb_image",
+    "is_grayscale_image",
+    "is_multispectral_image",
+    "get_num_channels",
+    "non_rgb_warning",
+    "_maybe_process_in_chunks",
+]
+P = ParamSpec("P")
+MAX_VALUES_BY_DTYPE = {
+    np.dtype("uint8"): 255,
+    np.dtype("uint16"): 65535,
+    np.dtype("uint32"): 4294967295,
+    np.dtype("float32"): 1.0,
+}
+NPDTYPE_TO_OPENCV_DTYPE = {
+    np.uint8: cv2.CV_8U,  # type: ignore[attr-defined]
+    np.uint16: cv2.CV_16U,  # type: ignore[attr-defined]
+    np.int32: cv2.CV_32S,  # type: ignore[attr-defined]
+    np.float32: cv2.CV_32F,  # type: ignore[attr-defined]
+    np.float64: cv2.CV_64F,  # type: ignore[attr-defined]
+    np.dtype("uint8"): cv2.CV_8U,  # type: ignore[attr-defined]
+    np.dtype("uint16"): cv2.CV_16U,  # type: ignore[attr-defined]
+    np.dtype("int32"): cv2.CV_32S,  # type: ignore[attr-defined]
+    np.dtype("float32"): cv2.CV_32F,  # type: ignore[attr-defined]
+    np.dtype("float64"): cv2.CV_64F,  # type: ignore[attr-defined]
+}
+def read_bgr_image(path):
+    return cv2.imread(path, cv2.IMREAD_COLOR)
+def read_rgb_image(path):
+    image = cv2.imread(path, cv2.IMREAD_COLOR)
+    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+def clipped(func: Callable[Concatenate[np.ndarray, P], np.ndarray]) -> Callable[Concatenate[np.ndarray, P], np.ndarray]:
+    @wraps(func)
+    def wrapped_function(img: np.ndarray, *args: P.args, **kwargs: P.kwargs) -> np.ndarray:
+        dtype = img.dtype
+        maxval = MAX_VALUES_BY_DTYPE.get(dtype, 1.0)
+        return clip(func(img, *args, **kwargs), dtype, maxval)
+    return wrapped_function
+def clip(img: np.ndarray, dtype: np.dtype, maxval: float) -> np.ndarray:
+    return np.clip(img, 0, maxval).astype(dtype)
+def get_opencv_dtype_from_numpy(value: Union[np.ndarray, int, np.dtype, object]) -> int:
+    """
+    Return a corresponding OpenCV dtype for a numpy's dtype
+    :param value: Input dtype of numpy array
+    :return: Corresponding dtype for OpenCV
+    """
+    if isinstance(value, np.ndarray):
+        value = value.dtype
+    return NPDTYPE_TO_OPENCV_DTYPE[value]
+def angle_2pi_range(
+    func: Callable[Concatenate[KeypointInternalType, P], KeypointInternalType]
+) -> Callable[Concatenate[KeypointInternalType, P], KeypointInternalType]:
+    @wraps(func)
+    def wrapped_function(keypoint: KeypointInternalType, *args: P.args, **kwargs: P.kwargs) -> KeypointInternalType:
+        (x, y, a, s) = func(keypoint, *args, **kwargs)[:4]
+        return (x, y, angle_to_2pi_range(a), s)
+    return wrapped_function
+def preserve_shape(
+    func: Callable[Concatenate[np.ndarray, P], np.ndarray]
+) -> Callable[Concatenate[np.ndarray, P], np.ndarray]:
+    """Preserve shape of the image"""
+    @wraps(func)
+    def wrapped_function(img: np.ndarray, *args: P.args, **kwargs: P.kwargs) -> np.ndarray:
+        shape = img.shape
+        result = func(img, *args, **kwargs)
+        result = result.reshape(shape)
+        return result
+    return wrapped_function
+def preserve_channel_dim(
+    func: Callable[Concatenate[np.ndarray, P], np.ndarray]
+) -> Callable[Concatenate[np.ndarray, P], np.ndarray]:
+    """Preserve dummy channel dim."""
+    @wraps(func)
+    def wrapped_function(img: np.ndarray, *args: P.args, **kwargs: P.kwargs) -> np.ndarray:
+        shape = img.shape
+        result = func(img, *args, **kwargs)
+        if len(shape) == 3 and shape[-1] == 1 and len(result.shape) == 2:
+            result = np.expand_dims(result, axis=-1)
+        return result
+    return wrapped_function
+def ensure_contiguous(
+    func: Callable[Concatenate[np.ndarray, P], np.ndarray]
+) -> Callable[Concatenate[np.ndarray, P], np.ndarray]:
+    """Ensure that input img is contiguous."""
+    @wraps(func)
+    def wrapped_function(img: np.ndarray, *args: P.args, **kwargs: P.kwargs) -> np.ndarray:
+        img = np.require(img, requirements=["C_CONTIGUOUS"])
+        result = func(img, *args, **kwargs)
+        return result
+    return wrapped_function
+def is_rgb_image(image: np.ndarray) -> bool:
+    return len(image.shape) == 3 and image.shape[-1] == 3
+def is_grayscale_image(image: np.ndarray) -> bool:
+    return (len(image.shape) == 2) or (len(image.shape) == 3 and image.shape[-1] == 1)
+def is_multispectral_image(image: np.ndarray) -> bool:
+    return len(image.shape) == 3 and image.shape[-1] not in [1, 3]
+def get_num_channels(image: np.ndarray) -> int:
+    return image.shape[2] if len(image.shape) == 3 else 1
+def non_rgb_warning(image: np.ndarray) -> None:
+    if not is_rgb_image(image):
+        message = "This transformation expects 3-channel images"
+        if is_grayscale_image(image):
+            message += "\nYou can convert your grayscale image to RGB using cv2.cvtColor(image, cv2.COLOR_GRAY2RGB))"
+        if is_multispectral_image(image):  # Any image with a number of channels other than 1 and 3
+            message += "\nThis transformation cannot be applied to multi-spectral images"
+        raise ValueError(message)
+def _maybe_process_in_chunks(
+    process_fn: Callable[Concatenate[np.ndarray, P], np.ndarray], **kwargs
+) -> Callable[[np.ndarray], np.ndarray]:
+    """
+    Wrap OpenCV function to enable processing images with more than 4 channels.
+    Limitations:
+        This wrapper requires image to be the first argument and rest must be sent via named arguments.
+    Args:
+        process_fn: Transform function (e.g cv2.resize).
+        kwargs: Additional parameters.
+    Returns:
+        numpy.ndarray: Transformed image.
+    """
+    @wraps(process_fn)
+    def __process_fn(img: np.ndarray) -> np.ndarray:
+        num_channels = get_num_channels(img)
+        if num_channels > 4:
+            chunks = []
+            for index in range(0, num_channels, 4):
+                if num_channels - index == 2:
+                    # Many OpenCV functions cannot work with 2-channel images
+                    for i in range(2):
+                        chunk = img[:, :, index + i : index + i + 1]
+                        chunk = process_fn(chunk, **kwargs)
+                        chunk = np.expand_dims(chunk, -1)
+                        chunks.append(chunk)
+                else:
+                    chunk = img[:, :, index : index + 4]
+                    chunk = process_fn(chunk, **kwargs)
+                    chunks.append(chunk)
+            img = np.dstack(chunks)
+        else:
+            img = process_fn(img, **kwargs)
+        return img
+    return __process_fn

custom_albumentations/core/__init__.py ADDED Viewed

File without changes

custom_albumentations/core/bbox_utils.py ADDED Viewed

	@@ -0,0 +1,522 @@

+from __future__ import division
+from typing import Any, Dict, List, Optional, Sequence, Tuple, TypeVar, cast
+import numpy as np
+from .transforms_interface import BoxInternalType, BoxType
+from .utils import DataProcessor, Params
+__all__ = [
+    "normalize_bbox",
+    "denormalize_bbox",
+    "normalize_bboxes",
+    "denormalize_bboxes",
+    "calculate_bbox_area",
+    "filter_bboxes_by_visibility",
+    "convert_bbox_to_albumentations",
+    "convert_bbox_from_albumentations",
+    "convert_bboxes_to_albumentations",
+    "convert_bboxes_from_albumentations",
+    "check_bbox",
+    "check_bboxes",
+    "filter_bboxes",
+    "union_of_bboxes",
+    "BboxProcessor",
+    "BboxParams",
+]
+TBox = TypeVar("TBox", BoxType, BoxInternalType)
+class BboxParams(Params):
+    """
+    Parameters of bounding boxes
+    Args:
+        format (str): format of bounding boxes. Should be 'coco', 'pascal_voc', 'albumentations' or 'yolo'.
+            The `coco` format
+                `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
+            The `pascal_voc` format
+                `[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212].
+            The `albumentations` format
+                is like `pascal_voc`, but normalized,
+                in other words: `[x_min, y_min, x_max, y_max]`, e.g. [0.2, 0.3, 0.4, 0.5].
+            The `yolo` format
+                `[x, y, width, height]`, e.g. [0.1, 0.2, 0.3, 0.4];
+                `x`, `y` - normalized bbox center; `width`, `height` - normalized bbox width and height.
+        label_fields (list): list of fields that are joined with boxes, e.g labels.
+            Should be same type as boxes.
+        min_area (float): minimum area of a bounding box. All bounding boxes whose
+            visible area in pixels is less than this value will be removed. Default: 0.0.
+        min_visibility (float): minimum fraction of area for a bounding box
+            to remain this box in list. Default: 0.0.
+        min_width (float): Minimum width of a bounding box. All bounding boxes whose width is
+            less than this value will be removed. Default: 0.0.
+        min_height (float): Minimum height of a bounding box. All bounding boxes whose height is
+            less than this value will be removed. Default: 0.0.
+        check_each_transform (bool): if `True`, then bboxes will be checked after each dual transform.
+            Default: `True`
+    """
+    def __init__(
+        self,
+        format: str,
+        label_fields: Optional[Sequence[str]] = None,
+        min_area: float = 0.0,
+        min_visibility: float = 0.0,
+        min_width: float = 0.0,
+        min_height: float = 0.0,
+        check_each_transform: bool = True,
+    ):
+        super(BboxParams, self).__init__(format, label_fields)
+        self.min_area = min_area
+        self.min_visibility = min_visibility
+        self.min_width = min_width
+        self.min_height = min_height
+        self.check_each_transform = check_each_transform
+    def _to_dict(self) -> Dict[str, Any]:
+        data = super(BboxParams, self)._to_dict()
+        data.update(
+            {
+                "min_area": self.min_area,
+                "min_visibility": self.min_visibility,
+                "min_width": self.min_width,
+                "min_height": self.min_height,
+                "check_each_transform": self.check_each_transform,
+            }
+        )
+        return data
+    @classmethod
+    def is_serializable(cls) -> bool:
+        return True
+    @classmethod
+    def get_class_fullname(cls) -> str:
+        return "BboxParams"
+class BboxProcessor(DataProcessor):
+    def __init__(self, params: BboxParams, additional_targets: Optional[Dict[str, str]] = None):
+        super().__init__(params, additional_targets)
+    @property
+    def default_data_name(self) -> str:
+        return "bboxes"
+    def ensure_data_valid(self, data: Dict[str, Any]) -> None:
+        for data_name in self.data_fields:
+            data_exists = data_name in data and len(data[data_name])
+            if data_exists and len(data[data_name][0]) < 5:
+                if self.params.label_fields is None:
+                    raise ValueError(
+                        "Please specify 'label_fields' in 'bbox_params' or add labels to the end of bbox "
+                        "because bboxes must have labels"
+                    )
+        if self.params.label_fields:
+            if not all(i in data.keys() for i in self.params.label_fields):
+                raise ValueError("Your 'label_fields' are not valid - them must have same names as params in dict")
+    def filter(self, data: Sequence, rows: int, cols: int) -> List:
+        self.params: BboxParams
+        return filter_bboxes(
+            data,
+            rows,
+            cols,
+            min_area=self.params.min_area,
+            min_visibility=self.params.min_visibility,
+            min_width=self.params.min_width,
+            min_height=self.params.min_height,
+        )
+    def check(self, data: Sequence, rows: int, cols: int) -> None:
+        check_bboxes(data)
+    def convert_from_albumentations(self, data: Sequence, rows: int, cols: int) -> List[BoxType]:
+        return convert_bboxes_from_albumentations(data, self.params.format, rows, cols, check_validity=True)
+    def convert_to_albumentations(self, data: Sequence[BoxType], rows: int, cols: int) -> List[BoxType]:
+        return convert_bboxes_to_albumentations(data, self.params.format, rows, cols, check_validity=True)
+def normalize_bbox(bbox: TBox, rows: int, cols: int) -> TBox:
+    """Normalize coordinates of a bounding box. Divide x-coordinates by image width and y-coordinates
+    by image height.
+    Args:
+        bbox: Denormalized bounding box `(x_min, y_min, x_max, y_max)`.
+        rows: Image height.
+        cols: Image width.
+    Returns:
+        Normalized bounding box `(x_min, y_min, x_max, y_max)`.
+    Raises:
+        ValueError: If rows or cols is less or equal zero
+    """
+    if rows <= 0:
+        raise ValueError("Argument rows must be positive integer")
+    if cols <= 0:
+        raise ValueError("Argument cols must be positive integer")
+    tail: Tuple[Any, ...]
+    (x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:])
+    x_min, x_max = x_min / cols, x_max / cols
+    y_min, y_max = y_min / rows, y_max / rows
+    return cast(BoxType, (x_min, y_min, x_max, y_max) + tail)  # type: ignore
+def denormalize_bbox(bbox: TBox, rows: int, cols: int) -> TBox:
+    """Denormalize coordinates of a bounding box. Multiply x-coordinates by image width and y-coordinates
+    by image height. This is an inverse operation for :func:`~albumentations.augmentations.bbox.normalize_bbox`.
+    Args:
+        bbox: Normalized bounding box `(x_min, y_min, x_max, y_max)`.
+        rows: Image height.
+        cols: Image width.
+    Returns:
+        Denormalized bounding box `(x_min, y_min, x_max, y_max)`.
+    Raises:
+        ValueError: If rows or cols is less or equal zero
+    """
+    tail: Tuple[Any, ...]
+    (x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:])
+    if rows <= 0:
+        raise ValueError("Argument rows must be positive integer")
+    if cols <= 0:
+        raise ValueError("Argument cols must be positive integer")
+    x_min, x_max = x_min * cols, x_max * cols
+    y_min, y_max = y_min * rows, y_max * rows
+    return cast(BoxType, (x_min, y_min, x_max, y_max) + tail)  # type: ignore
+def normalize_bboxes(bboxes: Sequence[BoxType], rows: int, cols: int) -> List[BoxType]:
+    """Normalize a list of bounding boxes.
+    Args:
+        bboxes: Denormalized bounding boxes `[(x_min, y_min, x_max, y_max)]`.
+        rows: Image height.
+        cols: Image width.
+    Returns:
+        Normalized bounding boxes `[(x_min, y_min, x_max, y_max)]`.
+    """
+    return [normalize_bbox(bbox, rows, cols) for bbox in bboxes]
+def denormalize_bboxes(bboxes: Sequence[BoxType], rows: int, cols: int) -> List[BoxType]:
+    """Denormalize a list of bounding boxes.
+    Args:
+        bboxes: Normalized bounding boxes `[(x_min, y_min, x_max, y_max)]`.
+        rows: Image height.
+        cols: Image width.
+    Returns:
+        List: Denormalized bounding boxes `[(x_min, y_min, x_max, y_max)]`.
+    """
+    return [denormalize_bbox(bbox, rows, cols) for bbox in bboxes]
+def calculate_bbox_area(bbox: BoxType, rows: int, cols: int) -> float:
+    """Calculate the area of a bounding box in (fractional) pixels.
+    Args:
+        bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
+        rows: Image height.
+        cols: Image width.
+    Return:
+        Area in (fractional) pixels of the (denormalized) bounding box.
+    """
+    bbox = denormalize_bbox(bbox, rows, cols)
+    x_min, y_min, x_max, y_max = bbox[:4]
+    area = (x_max - x_min) * (y_max - y_min)
+    return area
+def filter_bboxes_by_visibility(
+    original_shape: Sequence[int],
+    bboxes: Sequence[BoxType],
+    transformed_shape: Sequence[int],
+    transformed_bboxes: Sequence[BoxType],
+    threshold: float = 0.0,
+    min_area: float = 0.0,
+) -> List[BoxType]:
+    """Filter bounding boxes and return only those boxes whose visibility after transformation is above
+    the threshold and minimal area of bounding box in pixels is more then min_area.
+    Args:
+        original_shape: Original image shape `(height, width, ...)`.
+        bboxes: Original bounding boxes `[(x_min, y_min, x_max, y_max)]`.
+        transformed_shape: Transformed image shape `(height, width)`.
+        transformed_bboxes: Transformed bounding boxes `[(x_min, y_min, x_max, y_max)]`.
+        threshold: visibility threshold. Should be a value in the range [0.0, 1.0].
+        min_area: Minimal area threshold.
+    Returns:
+        Filtered bounding boxes `[(x_min, y_min, x_max, y_max)]`.
+    """
+    img_height, img_width = original_shape[:2]
+    transformed_img_height, transformed_img_width = transformed_shape[:2]
+    visible_bboxes = []
+    for bbox, transformed_bbox in zip(bboxes, transformed_bboxes):
+        if not all(0.0 <= value <= 1.0 for value in transformed_bbox[:4]):
+            continue
+        bbox_area = calculate_bbox_area(bbox, img_height, img_width)
+        transformed_bbox_area = calculate_bbox_area(transformed_bbox, transformed_img_height, transformed_img_width)
+        if transformed_bbox_area < min_area:
+            continue
+        visibility = transformed_bbox_area / bbox_area
+        if visibility >= threshold:
+            visible_bboxes.append(transformed_bbox)
+    return visible_bboxes
+def convert_bbox_to_albumentations(
+    bbox: BoxType, source_format: str, rows: int, cols: int, check_validity: bool = False
+) -> BoxType:
+    """Convert a bounding box from a format specified in `source_format` to the format used by albumentations:
+    normalized coordinates of top-left and bottom-right corners of the bounding box in a form of
+    `(x_min, y_min, x_max, y_max)` e.g. `(0.15, 0.27, 0.67, 0.5)`.
+    Args:
+        bbox: A bounding box tuple.
+        source_format: format of the bounding box. Should be 'coco', 'pascal_voc', or 'yolo'.
+        check_validity: Check if all boxes are valid boxes.
+        rows: Image height.
+        cols: Image width.
+    Returns:
+        tuple: A bounding box `(x_min, y_min, x_max, y_max)`.
+    Note:
+        The `coco` format of a bounding box looks like `(x_min, y_min, width, height)`, e.g. (97, 12, 150, 200).
+        The `pascal_voc` format of a bounding box looks like `(x_min, y_min, x_max, y_max)`, e.g. (97, 12, 247, 212).
+        The `yolo` format of a bounding box looks like `(x, y, width, height)`, e.g. (0.3, 0.1, 0.05, 0.07);
+        where `x`, `y` coordinates of the center of the box, all values normalized to 1 by image height and width.
+    Raises:
+        ValueError: if `target_format` is not equal to `coco` or `pascal_voc`, or `yolo`.
+        ValueError: If in YOLO format all labels not in range (0, 1).
+    """
+    if source_format not in {"coco", "pascal_voc", "yolo"}:
+        raise ValueError(
+            f"Unknown source_format {source_format}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'"
+        )
+    if source_format == "coco":
+        (x_min, y_min, width, height), tail = bbox[:4], bbox[4:]
+        x_max = x_min + width
+        y_max = y_min + height
+    elif source_format == "yolo":
+        # https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/scripts/voc_label.py#L12
+        _bbox = np.array(bbox[:4])
+        if check_validity and np.any((_bbox <= 0) | (_bbox > 1)):
+            raise ValueError("In YOLO format all coordinates must be float and in range (0, 1]")
+        (x, y, w, h), tail = bbox[:4], bbox[4:]
+        w_half, h_half = w / 2, h / 2
+        x_min = x - w_half
+        y_min = y - h_half
+        x_max = x_min + w
+        y_max = y_min + h
+    else:
+        (x_min, y_min, x_max, y_max), tail = bbox[:4], bbox[4:]
+    bbox = (x_min, y_min, x_max, y_max) + tuple(tail)  # type: ignore
+    if source_format != "yolo":
+        bbox = normalize_bbox(bbox, rows, cols)
+    if check_validity:
+        check_bbox(bbox)
+    return bbox
+def convert_bbox_from_albumentations(
+    bbox: BoxType, target_format: str, rows: int, cols: int, check_validity: bool = False
+) -> BoxType:
+    """Convert a bounding box from the format used by albumentations to a format, specified in `target_format`.
+    Args:
+        bbox: An albumentations bounding box `(x_min, y_min, x_max, y_max)`.
+        target_format: required format of the output bounding box. Should be 'coco', 'pascal_voc' or 'yolo'.
+        rows: Image height.
+        cols: Image width.
+        check_validity: Check if all boxes are valid boxes.
+    Returns:
+        tuple: A bounding box.
+    Note:
+        The `coco` format of a bounding box looks like `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
+        The `pascal_voc` format of a bounding box looks like `[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212].
+        The `yolo` format of a bounding box looks like `[x, y, width, height]`, e.g. [0.3, 0.1, 0.05, 0.07].
+    Raises:
+        ValueError: if `target_format` is not equal to `coco`, `pascal_voc` or `yolo`.
+    """
+    if target_format not in {"coco", "pascal_voc", "yolo"}:
+        raise ValueError(
+            f"Unknown target_format {target_format}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'"
+        )
+    if check_validity:
+        check_bbox(bbox)
+    if target_format != "yolo":
+        bbox = denormalize_bbox(bbox, rows, cols)
+    if target_format == "coco":
+        (x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:])
+        width = x_max - x_min
+        height = y_max - y_min
+        bbox = cast(BoxType, (x_min, y_min, width, height) + tail)
+    elif target_format == "yolo":
+        (x_min, y_min, x_max, y_max), tail = bbox[:4], bbox[4:]
+        x = (x_min + x_max) / 2.0
+        y = (y_min + y_max) / 2.0
+        w = x_max - x_min
+        h = y_max - y_min
+        bbox = cast(BoxType, (x, y, w, h) + tail)
+    return bbox
+def convert_bboxes_to_albumentations(
+    bboxes: Sequence[BoxType], source_format, rows, cols, check_validity=False
+) -> List[BoxType]:
+    """Convert a list bounding boxes from a format specified in `source_format` to the format used by albumentations"""
+    return [convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validity) for bbox in bboxes]
+def convert_bboxes_from_albumentations(
+    bboxes: Sequence[BoxType], target_format: str, rows: int, cols: int, check_validity: bool = False
+) -> List[BoxType]:
+    """Convert a list of bounding boxes from the format used by albumentations to a format, specified
+    in `target_format`.
+    Args:
+        bboxes: List of albumentation bounding box `(x_min, y_min, x_max, y_max)`.
+        target_format: required format of the output bounding box. Should be 'coco', 'pascal_voc' or 'yolo'.
+        rows: Image height.
+        cols: Image width.
+        check_validity: Check if all boxes are valid boxes.
+    Returns:
+        List of bounding boxes.
+    """
+    return [convert_bbox_from_albumentations(bbox, target_format, rows, cols, check_validity) for bbox in bboxes]
+def check_bbox(bbox: BoxType) -> None:
+    """Check if bbox boundaries are in range 0, 1 and minimums are lesser then maximums"""
+    for name, value in zip(["x_min", "y_min", "x_max", "y_max"], bbox[:4]):
+        if not 0 <= value <= 1 and not np.isclose(value, 0) and not np.isclose(value, 1):
+            raise ValueError(f"Expected {name} for bbox {bbox} to be in the range [0.0, 1.0], got {value}.")
+    x_min, y_min, x_max, y_max = bbox[:4]
+    if x_max <= x_min:
+        raise ValueError(f"x_max is less than or equal to x_min for bbox {bbox}.")
+    if y_max <= y_min:
+        raise ValueError(f"y_max is less than or equal to y_min for bbox {bbox}.")
+def check_bboxes(bboxes: Sequence[BoxType]) -> None:
+    """Check if bboxes boundaries are in range 0, 1 and minimums are lesser then maximums"""
+    for bbox in bboxes:
+        check_bbox(bbox)
+def filter_bboxes(
+    bboxes: Sequence[BoxType],
+    rows: int,
+    cols: int,
+    min_area: float = 0.0,
+    min_visibility: float = 0.0,
+    min_width: float = 0.0,
+    min_height: float = 0.0,
+) -> List[BoxType]:
+    """Remove bounding boxes that either lie outside of the visible area by more then min_visibility
+    or whose area in pixels is under the threshold set by `min_area`. Also it crops boxes to final image size.
+    Args:
+        bboxes: List of albumentation bounding box `(x_min, y_min, x_max, y_max)`.
+        rows: Image height.
+        cols: Image width.
+        min_area: Minimum area of a bounding box. All bounding boxes whose visible area in pixels.
+            is less than this value will be removed. Default: 0.0.
+        min_visibility: Minimum fraction of area for a bounding box to remain this box in list. Default: 0.0.
+        min_width: Minimum width of a bounding box. All bounding boxes whose width is
+            less than this value will be removed. Default: 0.0.
+        min_height: Minimum height of a bounding box. All bounding boxes whose height is
+            less than this value will be removed. Default: 0.0.
+    Returns:
+        List of bounding boxes.
+    """
+    resulting_boxes: List[BoxType] = []
+    for bbox in bboxes:
+        # Calculate areas of bounding box before and after clipping.
+        transformed_box_area = calculate_bbox_area(bbox, rows, cols)
+        bbox, tail = cast(BoxType, tuple(np.clip(bbox[:4], 0, 1.0))), tuple(bbox[4:])
+        clipped_box_area = calculate_bbox_area(bbox, rows, cols)
+        # Calculate width and height of the clipped bounding box.
+        x_min, y_min, x_max, y_max = denormalize_bbox(bbox, rows, cols)[:4]
+        clipped_width, clipped_height = x_max - x_min, y_max - y_min
+        if (
+            clipped_box_area != 0  # to ensure transformed_box_area!=0 and to handle min_area=0 or min_visibility=0
+            and clipped_box_area >= min_area
+            and clipped_box_area / transformed_box_area >= min_visibility
+            and clipped_width >= min_width
+            and clipped_height >= min_height
+        ):
+            resulting_boxes.append(cast(BoxType, bbox + tail))
+    return resulting_boxes
+def union_of_bboxes(height: int, width: int, bboxes: Sequence[BoxType], erosion_rate: float = 0.0) -> BoxType:
+    """Calculate union of bounding boxes.
+    Args:
+        height (float): Height of image or space.
+        width (float): Width of image or space.
+        bboxes (List[tuple]): List like bounding boxes. Format is `[(x_min, y_min, x_max, y_max)]`.
+        erosion_rate (float): How much each bounding box can be shrinked, useful for erosive cropping.
+            Set this in range [0, 1]. 0 will not be erosive at all, 1.0 can make any bbox to lose its volume.
+    Returns:
+        tuple: A bounding box `(x_min, y_min, x_max, y_max)`.
+    """
+    x1, y1 = width, height
+    x2, y2 = 0, 0
+    for bbox in bboxes:
+        x_min, y_min, x_max, y_max = bbox[:4]
+        w, h = x_max - x_min, y_max - y_min
+        lim_x1, lim_y1 = x_min + erosion_rate * w, y_min + erosion_rate * h
+        lim_x2, lim_y2 = x_max - erosion_rate * w, y_max - erosion_rate * h
+        x1, y1 = np.min([x1, lim_x1]), np.min([y1, lim_y1])
+        x2, y2 = np.max([x2, lim_x2]), np.max([y2, lim_y2])
+    return x1, y1, x2, y2

custom_albumentations/core/composition.py ADDED Viewed

	@@ -0,0 +1,552 @@

+from __future__ import division
+import random
+import typing
+import warnings
+from collections import defaultdict
+import numpy as np
+from .. import random_utils
+from .bbox_utils import BboxParams, BboxProcessor
+from .keypoints_utils import KeypointParams, KeypointsProcessor
+from .serialization import (
+    SERIALIZABLE_REGISTRY,
+    Serializable,
+    get_shortest_class_fullname,
+    instantiate_nonserializable,
+)
+from .transforms_interface import BasicTransform
+from .utils import format_args, get_shape
+__all__ = [
+    "BaseCompose",
+    "Compose",
+    "SomeOf",
+    "OneOf",
+    "OneOrOther",
+    "BboxParams",
+    "KeypointParams",
+    "ReplayCompose",
+    "Sequential",
+]
+REPR_INDENT_STEP = 2
+TransformType = typing.Union[BasicTransform, "BaseCompose"]
+TransformsSeqType = typing.Sequence[TransformType]
+def get_always_apply(transforms: typing.Union["BaseCompose", TransformsSeqType]) -> TransformsSeqType:
+    new_transforms: typing.List[TransformType] = []
+    for transform in transforms:  # type: ignore
+        if isinstance(transform, BaseCompose):
+            new_transforms.extend(get_always_apply(transform))
+        elif transform.always_apply:
+            new_transforms.append(transform)
+    return new_transforms
+class BaseCompose(Serializable):
+    def __init__(self, transforms: TransformsSeqType, p: float):
+        if isinstance(transforms, (BaseCompose, BasicTransform)):
+            warnings.warn(
+                "transforms is single transform, but a sequence is expected! Transform will be wrapped into list."
+            )
+            transforms = [transforms]
+        self.transforms = transforms
+        self.p = p
+        self.replay_mode = False
+        self.applied_in_replay = False
+    def __len__(self) -> int:
+        return len(self.transforms)
+    def __call__(self, *args, **data) -> typing.Dict[str, typing.Any]:
+        raise NotImplementedError
+    def __getitem__(self, item: int) -> TransformType:  # type: ignore
+        return self.transforms[item]
+    def __repr__(self) -> str:
+        return self.indented_repr()
+    def indented_repr(self, indent: int = REPR_INDENT_STEP) -> str:
+        args = {k: v for k, v in self._to_dict().items() if not (k.startswith("__") or k == "transforms")}
+        repr_string = self.__class__.__name__ + "(["
+        for t in self.transforms:
+            repr_string += "\n"
+            if hasattr(t, "indented_repr"):
+                t_repr = t.indented_repr(indent + REPR_INDENT_STEP)  # type: ignore
+            else:
+                t_repr = repr(t)
+            repr_string += " " * indent + t_repr + ","
+        repr_string += "\n" + " " * (indent - REPR_INDENT_STEP) + "], {args})".format(args=format_args(args))
+        return repr_string
+    @classmethod
+    def get_class_fullname(cls) -> str:
+        return get_shortest_class_fullname(cls)
+    @classmethod
+    def is_serializable(cls) -> bool:
+        return True
+    def _to_dict(self) -> typing.Dict[str, typing.Any]:
+        return {
+            "__class_fullname__": self.get_class_fullname(),
+            "p": self.p,
+            "transforms": [t._to_dict() for t in self.transforms],  # skipcq: PYL-W0212
+        }
+    def get_dict_with_id(self) -> typing.Dict[str, typing.Any]:
+        return {
+            "__class_fullname__": self.get_class_fullname(),
+            "id": id(self),
+            "params": None,
+            "transforms": [t.get_dict_with_id() for t in self.transforms],
+        }
+    def add_targets(self, additional_targets: typing.Optional[typing.Dict[str, str]]) -> None:
+        if additional_targets:
+            for t in self.transforms:
+                t.add_targets(additional_targets)
+    def set_deterministic(self, flag: bool, save_key: str = "replay") -> None:
+        for t in self.transforms:
+            t.set_deterministic(flag, save_key)
+class Compose(BaseCompose):
+    """Compose transforms and handle all transformations regarding bounding boxes
+    Args:
+        transforms (list): list of transformations to compose.
+        bbox_params (BboxParams): Parameters for bounding boxes transforms
+        keypoint_params (KeypointParams): Parameters for keypoints transforms
+        additional_targets (dict): Dict with keys - new target name, values - old target name. ex: {'image2': 'image'}
+        p (float): probability of applying all list of transforms. Default: 1.0.
+        is_check_shapes (bool): If True shapes consistency of images/mask/masks would be checked on each call. If you
+            would like to disable this check - pass False (do it only if you are sure in your data consistency).
+    """
+    def __init__(
+        self,
+        transforms: TransformsSeqType,
+        bbox_params: typing.Optional[typing.Union[dict, "BboxParams"]] = None,
+        keypoint_params: typing.Optional[typing.Union[dict, "KeypointParams"]] = None,
+        additional_targets: typing.Optional[typing.Dict[str, str]] = None,
+        p: float = 1.0,
+        is_check_shapes: bool = True,
+    ):
+        super(Compose, self).__init__(transforms, p)
+        self.processors: typing.Dict[str, typing.Union[BboxProcessor, KeypointsProcessor]] = {}
+        if bbox_params:
+            if isinstance(bbox_params, dict):
+                b_params = BboxParams(**bbox_params)
+            elif isinstance(bbox_params, BboxParams):
+                b_params = bbox_params
+            else:
+                raise ValueError("unknown format of bbox_params, please use `dict` or `BboxParams`")
+            self.processors["bboxes"] = BboxProcessor(b_params, additional_targets)
+        if keypoint_params:
+            if isinstance(keypoint_params, dict):
+                k_params = KeypointParams(**keypoint_params)
+            elif isinstance(keypoint_params, KeypointParams):
+                k_params = keypoint_params
+            else:
+                raise ValueError("unknown format of keypoint_params, please use `dict` or `KeypointParams`")
+            self.processors["keypoints"] = KeypointsProcessor(k_params, additional_targets)
+        if additional_targets is None:
+            additional_targets = {}
+        self.additional_targets = additional_targets
+        for proc in self.processors.values():
+            proc.ensure_transforms_valid(self.transforms)
+        self.add_targets(additional_targets)
+        self.is_check_args = True
+        self._disable_check_args_for_transforms(self.transforms)
+        self.is_check_shapes = is_check_shapes
+    @staticmethod
+    def _disable_check_args_for_transforms(transforms: TransformsSeqType) -> None:
+        for transform in transforms:
+            if isinstance(transform, BaseCompose):
+                Compose._disable_check_args_for_transforms(transform.transforms)
+            if isinstance(transform, Compose):
+                transform._disable_check_args()
+    def _disable_check_args(self) -> None:
+        self.is_check_args = False
+    def __call__(self, *args, force_apply: bool = False, **data) -> typing.Dict[str, typing.Any]:
+        if args:
+            raise KeyError("You have to pass data to augmentations as named arguments, for example: aug(image=image)")
+        if self.is_check_args:
+            self._check_args(**data)
+        assert isinstance(force_apply, (bool, int)), "force_apply must have bool or int type"
+        need_to_run = force_apply or random.random() < self.p
+        for p in self.processors.values():
+            p.ensure_data_valid(data)
+        transforms = self.transforms if need_to_run else get_always_apply(self.transforms)
+        check_each_transform = any(
+            getattr(item.params, "check_each_transform", False) for item in self.processors.values()
+        )
+        for p in self.processors.values():
+            p.preprocess(data)
+        for idx, t in enumerate(transforms):
+            data = t(**data)
+            if check_each_transform:
+                data = self._check_data_post_transform(data)
+        data = Compose._make_targets_contiguous(data)  # ensure output targets are contiguous
+        for p in self.processors.values():
+            p.postprocess(data)
+        return data
+    def _check_data_post_transform(self, data: typing.Dict[str, typing.Any]) -> typing.Dict[str, typing.Any]:
+        rows, cols = get_shape(data["image"])
+        for p in self.processors.values():
+            if not getattr(p.params, "check_each_transform", False):
+                continue
+            for data_name in p.data_fields:
+                data[data_name] = p.filter(data[data_name], rows, cols)
+        return data
+    def _to_dict(self) -> typing.Dict[str, typing.Any]:
+        dictionary = super(Compose, self)._to_dict()
+        bbox_processor = self.processors.get("bboxes")
+        keypoints_processor = self.processors.get("keypoints")
+        dictionary.update(
+            {
+                "bbox_params": bbox_processor.params._to_dict() if bbox_processor else None,  # skipcq: PYL-W0212
+                "keypoint_params": keypoints_processor.params._to_dict()  # skipcq: PYL-W0212
+                if keypoints_processor
+                else None,
+                "additional_targets": self.additional_targets,
+                "is_check_shapes": self.is_check_shapes,
+            }
+        )
+        return dictionary
+    def get_dict_with_id(self) -> typing.Dict[str, typing.Any]:
+        dictionary = super().get_dict_with_id()
+        bbox_processor = self.processors.get("bboxes")
+        keypoints_processor = self.processors.get("keypoints")
+        dictionary.update(
+            {
+                "bbox_params": bbox_processor.params._to_dict() if bbox_processor else None,  # skipcq: PYL-W0212
+                "keypoint_params": keypoints_processor.params._to_dict()  # skipcq: PYL-W0212
+                if keypoints_processor
+                else None,
+                "additional_targets": self.additional_targets,
+                "params": None,
+                "is_check_shapes": self.is_check_shapes,
+            }
+        )
+        return dictionary
+    def _check_args(self, **kwargs) -> None:
+        checked_single = ["image", "mask"]
+        checked_multi = ["masks"]
+        check_bbox_param = ["bboxes"]
+        # ["bboxes", "keypoints"] could be almost any type, no need to check them
+        shapes = []
+        for data_name, data in kwargs.items():
+            internal_data_name = self.additional_targets.get(data_name, data_name)
+            if internal_data_name in checked_single:
+                if not isinstance(data, np.ndarray):
+                    raise TypeError("{} must be numpy array type".format(data_name))
+                shapes.append(data.shape[:2])
+            if internal_data_name in checked_multi:
+                if data is not None and len(data):
+                    if not isinstance(data[0], np.ndarray):
+                        raise TypeError("{} must be list of numpy arrays".format(data_name))
+                    shapes.append(data[0].shape[:2])
+            if internal_data_name in check_bbox_param and self.processors.get("bboxes") is None:
+                raise ValueError("bbox_params must be specified for bbox transformations")
+        if self.is_check_shapes and shapes and shapes.count(shapes[0]) != len(shapes):
+            raise ValueError(
+                "Height and Width of image, mask or masks should be equal. You can disable shapes check "
+                "by setting a parameter is_check_shapes=False of Compose class (do it only if you are sure "
+                "about your data consistency)."
+            )
+    @staticmethod
+    def _make_targets_contiguous(data: typing.Dict[str, typing.Any]) -> typing.Dict[str, typing.Any]:
+        result = {}
+        for key, value in data.items():
+            if isinstance(value, np.ndarray):
+                value = np.ascontiguousarray(value)
+            result[key] = value
+        return result
+class OneOf(BaseCompose):
+    """Select one of transforms to apply. Selected transform will be called with `force_apply=True`.
+    Transforms probabilities will be normalized to one 1, so in this case transforms probabilities works as weights.
+    Args:
+        transforms (list): list of transformations to compose.
+        p (float): probability of applying selected transform. Default: 0.5.
+    """
+    def __init__(self, transforms: TransformsSeqType, p: float = 0.5):
+        super(OneOf, self).__init__(transforms, p)
+        transforms_ps = [t.p for t in self.transforms]
+        s = sum(transforms_ps)
+        self.transforms_ps = [t / s for t in transforms_ps]
+    def __call__(self, *args, force_apply: bool = False, **data) -> typing.Dict[str, typing.Any]:
+        if self.replay_mode:
+            for t in self.transforms:
+                data = t(**data)
+            return data
+        if self.transforms_ps and (force_apply or random.random() < self.p):
+            idx: int = random_utils.choice(len(self.transforms), p=self.transforms_ps)
+            t = self.transforms[idx]
+            data = t(force_apply=True, **data)
+        return data
+class SomeOf(BaseCompose):
+    """Select N transforms to apply. Selected transforms will be called with `force_apply=True`.
+    Transforms probabilities will be normalized to one 1, so in this case transforms probabilities works as weights.
+    Args:
+        transforms (list): list of transformations to compose.
+        n (int): number of transforms to apply.
+        replace (bool): Whether the sampled transforms are with or without replacement. Default: True.
+        p (float): probability of applying selected transform. Default: 1.
+    """
+    def __init__(self, transforms: TransformsSeqType, n: int, replace: bool = True, p: float = 1):
+        super(SomeOf, self).__init__(transforms, p)
+        self.n = n
+        self.replace = replace
+        transforms_ps = [t.p for t in self.transforms]
+        s = sum(transforms_ps)
+        self.transforms_ps = [t / s for t in transforms_ps]
+    def __call__(self, *args, force_apply: bool = False, **data) -> typing.Dict[str, typing.Any]:
+        if self.replay_mode:
+            for t in self.transforms:
+                data = t(**data)
+            return data
+        if self.transforms_ps and (force_apply or random.random() < self.p):
+            idx = random_utils.choice(len(self.transforms), size=self.n, replace=self.replace, p=self.transforms_ps)
+            for i in idx:  # type: ignore
+                t = self.transforms[i]
+                data = t(force_apply=True, **data)
+        return data
+    def _to_dict(self) -> typing.Dict[str, typing.Any]:
+        dictionary = super(SomeOf, self)._to_dict()
+        dictionary.update({"n": self.n, "replace": self.replace})
+        return dictionary
+class OneOrOther(BaseCompose):
+    """Select one or another transform to apply. Selected transform will be called with `force_apply=True`."""
+    def __init__(
+        self,
+        first: typing.Optional[TransformType] = None,
+        second: typing.Optional[TransformType] = None,
+        transforms: typing.Optional[TransformsSeqType] = None,
+        p: float = 0.5,
+    ):
+        if transforms is None:
+            if first is None or second is None:
+                raise ValueError("You must set both first and second or set transforms argument.")
+            transforms = [first, second]
+        super(OneOrOther, self).__init__(transforms, p)
+        if len(self.transforms) != 2:
+            warnings.warn("Length of transforms is not equal to 2.")
+    def __call__(self, *args, force_apply: bool = False, **data) -> typing.Dict[str, typing.Any]:
+        if self.replay_mode:
+            for t in self.transforms:
+                data = t(**data)
+            return data
+        if random.random() < self.p:
+            return self.transforms[0](force_apply=True, **data)
+        return self.transforms[-1](force_apply=True, **data)
+class PerChannel(BaseCompose):
+    """Apply transformations per-channel
+    Args:
+        transforms (list): list of transformations to compose.
+        channels (sequence): channels to apply the transform to. Pass None to apply to all.
+                         Default: None (apply to all)
+        p (float): probability of applying the transform. Default: 0.5.
+    """
+    def __init__(
+        self, transforms: TransformsSeqType, channels: typing.Optional[typing.Sequence[int]] = None, p: float = 0.5
+    ):
+        super(PerChannel, self).__init__(transforms, p)
+        self.channels = channels
+    def __call__(self, *args, force_apply: bool = False, **data) -> typing.Dict[str, typing.Any]:
+        if force_apply or random.random() < self.p:
+            image = data["image"]
+            # Expand mono images to have a single channel
+            if len(image.shape) == 2:
+                image = np.expand_dims(image, -1)
+            if self.channels is None:
+                self.channels = range(image.shape[2])
+            for c in self.channels:
+                for t in self.transforms:
+                    image[:, :, c] = t(image=image[:, :, c])["image"]
+            data["image"] = image
+        return data
+class ReplayCompose(Compose):
+    def __init__(
+        self,
+        transforms: TransformsSeqType,
+        bbox_params: typing.Optional[typing.Union[dict, "BboxParams"]] = None,
+        keypoint_params: typing.Optional[typing.Union[dict, "KeypointParams"]] = None,
+        additional_targets: typing.Optional[typing.Dict[str, str]] = None,
+        p: float = 1.0,
+        is_check_shapes: bool = True,
+        save_key: str = "replay",
+    ):
+        super(ReplayCompose, self).__init__(
+            transforms, bbox_params, keypoint_params, additional_targets, p, is_check_shapes
+        )
+        self.set_deterministic(True, save_key=save_key)
+        self.save_key = save_key
+    def __call__(self, *args, force_apply: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
+        kwargs[self.save_key] = defaultdict(dict)
+        result = super(ReplayCompose, self).__call__(force_apply=force_apply, **kwargs)
+        serialized = self.get_dict_with_id()
+        self.fill_with_params(serialized, result[self.save_key])
+        self.fill_applied(serialized)
+        result[self.save_key] = serialized
+        return result
+    @staticmethod
+    def replay(saved_augmentations: typing.Dict[str, typing.Any], **kwargs) -> typing.Dict[str, typing.Any]:
+        augs = ReplayCompose._restore_for_replay(saved_augmentations)
+        return augs(force_apply=True, **kwargs)
+    @staticmethod
+    def _restore_for_replay(
+        transform_dict: typing.Dict[str, typing.Any], lambda_transforms: typing.Optional[dict] = None
+    ) -> TransformType:
+        """
+        Args:
+            lambda_transforms (dict): A dictionary that contains lambda transforms, that
+            is instances of the Lambda class.
+                This dictionary is required when you are restoring a pipeline that contains lambda transforms. Keys
+                in that dictionary should be named same as `name` arguments in respective lambda transforms from
+                a serialized pipeline.
+        """
+        applied = transform_dict["applied"]
+        params = transform_dict["params"]
+        lmbd = instantiate_nonserializable(transform_dict, lambda_transforms)
+        if lmbd:
+            transform = lmbd
+        else:
+            name = transform_dict["__class_fullname__"]
+            args = {k: v for k, v in transform_dict.items() if k not in ["__class_fullname__", "applied", "params"]}
+            cls = SERIALIZABLE_REGISTRY[name]
+            if "transforms" in args:
+                args["transforms"] = [
+                    ReplayCompose._restore_for_replay(t, lambda_transforms=lambda_transforms)
+                    for t in args["transforms"]
+                ]
+            transform = cls(**args)
+        transform = typing.cast(BasicTransform, transform)
+        if isinstance(transform, BasicTransform):
+            transform.params = params
+        transform.replay_mode = True
+        transform.applied_in_replay = applied
+        return transform
+    def fill_with_params(self, serialized: dict, all_params: dict) -> None:
+        params = all_params.get(serialized.get("id"))
+        serialized["params"] = params
+        del serialized["id"]
+        for transform in serialized.get("transforms", []):
+            self.fill_with_params(transform, all_params)
+    def fill_applied(self, serialized: typing.Dict[str, typing.Any]) -> bool:
+        if "transforms" in serialized:
+            applied = [self.fill_applied(t) for t in serialized["transforms"]]
+            serialized["applied"] = any(applied)
+        else:
+            serialized["applied"] = serialized.get("params") is not None
+        return serialized["applied"]
+    def _to_dict(self) -> typing.Dict[str, typing.Any]:
+        dictionary = super(ReplayCompose, self)._to_dict()
+        dictionary.update({"save_key": self.save_key})
+        return dictionary
+class Sequential(BaseCompose):
+    """Sequentially applies all transforms to targets.
+    Note:
+        This transform is not intended to be a replacement for `Compose`. Instead, it should be used inside `Compose`
+        the same way `OneOf` or `OneOrOther` are used. For instance, you can combine `OneOf` with `Sequential` to
+        create an augmentation pipeline that contains multiple sequences of augmentations and applies one randomly
+        chose sequence to input data (see the `Example` section for an example definition of such pipeline).
+    Example:
+        >>> import custom_albumentations as albumentations as A
+        >>> transform = A.Compose([
+        >>>    A.OneOf([
+        >>>        A.Sequential([
+        >>>            A.HorizontalFlip(p=0.5),
+        >>>            A.ShiftScaleRotate(p=0.5),
+        >>>        ]),
+        >>>        A.Sequential([
+        >>>            A.VerticalFlip(p=0.5),
+        >>>            A.RandomBrightnessContrast(p=0.5),
+        >>>        ]),
+        >>>    ], p=1)
+        >>> ])
+    """
+    def __init__(self, transforms: TransformsSeqType, p: float = 0.5):
+        super().__init__(transforms, p)
+    def __call__(self, *args, **data) -> typing.Dict[str, typing.Any]:
+        for t in self.transforms:
+            data = t(**data)
+        return data

custom_albumentations/core/keypoints_utils.py ADDED Viewed

	@@ -0,0 +1,286 @@

+from __future__ import division
+import math
+import typing
+import warnings
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+from .utils import DataProcessor, Params
+__all__ = [
+    "angle_to_2pi_range",
+    "check_keypoints",
+    "convert_keypoints_from_albumentations",
+    "convert_keypoints_to_albumentations",
+    "filter_keypoints",
+    "KeypointsProcessor",
+    "KeypointParams",
+]
+keypoint_formats = {"xy", "yx", "xya", "xys", "xyas", "xysa"}
+def angle_to_2pi_range(angle: float) -> float:
+    two_pi = 2 * math.pi
+    return angle % two_pi
+class KeypointParams(Params):
+    """
+    Parameters of keypoints
+    Args:
+        format (str): format of keypoints. Should be 'xy', 'yx', 'xya', 'xys', 'xyas', 'xysa'.
+            x - X coordinate,
+            y - Y coordinate
+            s - Keypoint scale
+            a - Keypoint orientation in radians or degrees (depending on KeypointParams.angle_in_degrees)
+        label_fields (list): list of fields that are joined with keypoints, e.g labels.
+            Should be same type as keypoints.
+        remove_invisible (bool): to remove invisible points after transform or not
+        angle_in_degrees (bool): angle in degrees or radians in 'xya', 'xyas', 'xysa' keypoints
+        check_each_transform (bool): if `True`, then keypoints will be checked after each dual transform.
+            Default: `True`
+    """
+    def __init__(
+        self,
+        format: str,  # skipcq: PYL-W0622
+        label_fields: Optional[Sequence[str]] = None,
+        remove_invisible: bool = True,
+        angle_in_degrees: bool = True,
+        check_each_transform: bool = True,
+    ):
+        super(KeypointParams, self).__init__(format, label_fields)
+        self.remove_invisible = remove_invisible
+        self.angle_in_degrees = angle_in_degrees
+        self.check_each_transform = check_each_transform
+    def _to_dict(self) -> Dict[str, Any]:
+        data = super(KeypointParams, self)._to_dict()
+        data.update(
+            {
+                "remove_invisible": self.remove_invisible,
+                "angle_in_degrees": self.angle_in_degrees,
+                "check_each_transform": self.check_each_transform,
+            }
+        )
+        return data
+    @classmethod
+    def is_serializable(cls) -> bool:
+        return True
+    @classmethod
+    def get_class_fullname(cls) -> str:
+        return "KeypointParams"
+class KeypointsProcessor(DataProcessor):
+    def __init__(self, params: KeypointParams, additional_targets: Optional[Dict[str, str]] = None):
+        super().__init__(params, additional_targets)
+    @property
+    def default_data_name(self) -> str:
+        return "keypoints"
+    def ensure_data_valid(self, data: Dict[str, Any]) -> None:
+        if self.params.label_fields:
+            if not all(i in data.keys() for i in self.params.label_fields):
+                raise ValueError(
+                    "Your 'label_fields' are not valid - them must have same names as params in "
+                    "'keypoint_params' dict"
+                )
+    def ensure_transforms_valid(self, transforms: Sequence[object]) -> None:
+        # IAA-based augmentations supports only transformation of xy keypoints.
+        # If your keypoints formats is other than 'xy' we emit warning to let user
+        # be aware that angle and size will not be modified.
+        try:
+            from custom_albumentations.imgaug.transforms import DualIAATransform
+        except ImportError:
+            # imgaug is not installed so we skip imgaug checks.
+            return
+        if self.params.format is not None and self.params.format != "xy":
+            for transform in transforms:
+                if isinstance(transform, DualIAATransform):
+                    warnings.warn(
+                        "{} transformation supports only 'xy' keypoints "
+                        "augmentation. You have '{}' keypoints format. Scale "
+                        "and angle WILL NOT BE transformed.".format(transform.__class__.__name__, self.params.format)
+                    )
+                    break
+    def filter(self, data: Sequence[Sequence], rows: int, cols: int) -> Sequence[Sequence]:
+        self.params: KeypointParams
+        return filter_keypoints(data, rows, cols, remove_invisible=self.params.remove_invisible)
+    def check(self, data: Sequence[Sequence], rows: int, cols: int) -> None:
+        check_keypoints(data, rows, cols)
+    def convert_from_albumentations(self, data: Sequence[Sequence], rows: int, cols: int) -> List[Tuple]:
+        params = self.params
+        return convert_keypoints_from_albumentations(
+            data,
+            params.format,
+            rows,
+            cols,
+            check_validity=params.remove_invisible,
+            angle_in_degrees=params.angle_in_degrees,
+        )
+    def convert_to_albumentations(self, data: Sequence[Sequence], rows: int, cols: int) -> List[Tuple]:
+        params = self.params
+        return convert_keypoints_to_albumentations(
+            data,
+            params.format,
+            rows,
+            cols,
+            check_validity=params.remove_invisible,
+            angle_in_degrees=params.angle_in_degrees,
+        )
+def check_keypoint(kp: Sequence, rows: int, cols: int) -> None:
+    """Check if keypoint coordinates are less than image shapes"""
+    for name, value, size in zip(["x", "y"], kp[:2], [cols, rows]):
+        if not 0 <= value < size:
+            raise ValueError(
+                "Expected {name} for keypoint {kp} "
+                "to be in the range [0.0, {size}], got {value}.".format(kp=kp, name=name, value=value, size=size)
+            )
+    angle = kp[2]
+    if not (0 <= angle < 2 * math.pi):
+        raise ValueError("Keypoint angle must be in range [0, 2 * PI). Got: {angle}".format(angle=angle))
+def check_keypoints(keypoints: Sequence[Sequence], rows: int, cols: int) -> None:
+    """Check if keypoints boundaries are less than image shapes"""
+    for kp in keypoints:
+        check_keypoint(kp, rows, cols)
+def filter_keypoints(keypoints: Sequence[Sequence], rows: int, cols: int, remove_invisible: bool) -> Sequence[Sequence]:
+    if not remove_invisible:
+        return keypoints
+    resulting_keypoints = []
+    for kp in keypoints:
+        x, y = kp[:2]
+        if x < 0 or x >= cols:
+            continue
+        if y < 0 or y >= rows:
+            continue
+        resulting_keypoints.append(kp)
+    return resulting_keypoints
+def convert_keypoint_to_albumentations(
+    keypoint: Sequence,
+    source_format: str,
+    rows: int,
+    cols: int,
+    check_validity: bool = False,
+    angle_in_degrees: bool = True,
+) -> Tuple:
+    if source_format not in keypoint_formats:
+        raise ValueError("Unknown target_format {}. Supported formats are: {}".format(source_format, keypoint_formats))
+    if source_format == "xy":
+        (x, y), tail = keypoint[:2], tuple(keypoint[2:])
+        a, s = 0.0, 0.0
+    elif source_format == "yx":
+        (y, x), tail = keypoint[:2], tuple(keypoint[2:])
+        a, s = 0.0, 0.0
+    elif source_format == "xya":
+        (x, y, a), tail = keypoint[:3], tuple(keypoint[3:])
+        s = 0.0
+    elif source_format == "xys":
+        (x, y, s), tail = keypoint[:3], tuple(keypoint[3:])
+        a = 0.0
+    elif source_format == "xyas":
+        (x, y, a, s), tail = keypoint[:4], tuple(keypoint[4:])
+    elif source_format == "xysa":
+        (x, y, s, a), tail = keypoint[:4], tuple(keypoint[4:])
+    else:
+        raise ValueError(f"Unsupported source format. Got {source_format}")
+    if angle_in_degrees:
+        a = math.radians(a)
+    keypoint = (x, y, angle_to_2pi_range(a), s) + tail
+    if check_validity:
+        check_keypoint(keypoint, rows, cols)
+    return keypoint
+def convert_keypoint_from_albumentations(
+    keypoint: Sequence,
+    target_format: str,
+    rows: int,
+    cols: int,
+    check_validity: bool = False,
+    angle_in_degrees: bool = True,
+) -> Tuple:
+    if target_format not in keypoint_formats:
+        raise ValueError("Unknown target_format {}. Supported formats are: {}".format(target_format, keypoint_formats))
+    (x, y, angle, scale), tail = keypoint[:4], tuple(keypoint[4:])
+    angle = angle_to_2pi_range(angle)
+    if check_validity:
+        check_keypoint((x, y, angle, scale), rows, cols)
+    if angle_in_degrees:
+        angle = math.degrees(angle)
+    kp: Tuple
+    if target_format == "xy":
+        kp = (x, y)
+    elif target_format == "yx":
+        kp = (y, x)
+    elif target_format == "xya":
+        kp = (x, y, angle)
+    elif target_format == "xys":
+        kp = (x, y, scale)
+    elif target_format == "xyas":
+        kp = (x, y, angle, scale)
+    elif target_format == "xysa":
+        kp = (x, y, scale, angle)
+    else:
+        raise ValueError(f"Invalid target format. Got: {target_format}")
+    return kp + tail
+def convert_keypoints_to_albumentations(
+    keypoints: Sequence[Sequence],
+    source_format: str,
+    rows: int,
+    cols: int,
+    check_validity: bool = False,
+    angle_in_degrees: bool = True,
+) -> List[Tuple]:
+    return [
+        convert_keypoint_to_albumentations(kp, source_format, rows, cols, check_validity, angle_in_degrees)
+        for kp in keypoints
+    ]
+def convert_keypoints_from_albumentations(
+    keypoints: Sequence[Sequence],
+    target_format: str,
+    rows: int,
+    cols: int,
+    check_validity: bool = False,
+    angle_in_degrees: bool = True,
+) -> List[Tuple]:
+    return [
+        convert_keypoint_from_albumentations(kp, target_format, rows, cols, check_validity, angle_in_degrees)
+        for kp in keypoints
+    ]

custom_albumentations/core/serialization.py ADDED Viewed

	@@ -0,0 +1,247 @@

+from __future__ import absolute_import
+import json
+import typing
+import warnings
+from abc import ABC, ABCMeta, abstractmethod
+from typing import IO, Any, Callable, Dict, Optional, Tuple, Type, Union
+try:
+    import yaml
+    yaml_available = True
+except ImportError:
+    yaml_available = False
+from custom_albumentations import __version__
+__all__ = ["to_dict", "from_dict", "save", "load"]
+SERIALIZABLE_REGISTRY: Dict[str, "SerializableMeta"] = {}
+NON_SERIALIZABLE_REGISTRY: Dict[str, "SerializableMeta"] = {}
+def shorten_class_name(class_fullname: str) -> str:
+    splitted = class_fullname.split(".")
+    if len(splitted) == 1:
+        return class_fullname
+    top_module, *_, class_name = splitted
+    if top_module == "albumentations":
+        return class_name
+    return class_fullname
+def get_shortest_class_fullname(cls: Type) -> str:
+    class_fullname = "{cls.__module__}.{cls.__name__}".format(cls=cls)
+    return shorten_class_name(class_fullname)
+class SerializableMeta(ABCMeta):
+    """
+    A metaclass that is used to register classes in `SERIALIZABLE_REGISTRY` or `NON_SERIALIZABLE_REGISTRY`
+    so they can be found later while deserializing transformation pipeline using classes full names.
+    """
+    def __new__(mcs, name: str, bases: Tuple[type, ...], *args, **kwargs) -> "SerializableMeta":
+        cls_obj = super().__new__(mcs, name, bases, *args, **kwargs)
+        if name != "Serializable" and ABC not in bases:
+            if cls_obj.is_serializable():
+                SERIALIZABLE_REGISTRY[cls_obj.get_class_fullname()] = cls_obj
+            else:
+                NON_SERIALIZABLE_REGISTRY[cls_obj.get_class_fullname()] = cls_obj
+        return cls_obj
+    @classmethod
+    def is_serializable(mcs) -> bool:
+        return False
+    @classmethod
+    def get_class_fullname(mcs) -> str:
+        return get_shortest_class_fullname(mcs)
+    @classmethod
+    def _to_dict(mcs) -> Dict[str, Any]:
+        return {}
+class Serializable(metaclass=SerializableMeta):
+    @classmethod
+    @abstractmethod
+    def is_serializable(cls) -> bool:
+        raise NotImplementedError
+    @classmethod
+    @abstractmethod
+    def get_class_fullname(cls) -> str:
+        raise NotImplementedError
+    @abstractmethod
+    def _to_dict(self) -> Dict[str, Any]:
+        raise NotImplementedError
+    def to_dict(self, on_not_implemented_error: str = "raise") -> Dict[str, Any]:
+        """
+        Take a transform pipeline and convert it to a serializable representation that uses only standard
+        python data types: dictionaries, lists, strings, integers, and floats.
+        Args:
+            self: A transform that should be serialized. If the transform doesn't implement the `to_dict`
+                method and `on_not_implemented_error` equals to 'raise' then `NotImplementedError` is raised.
+                If `on_not_implemented_error` equals to 'warn' then `NotImplementedError` will be ignored
+                but no transform parameters will be serialized.
+            on_not_implemented_error (str): `raise` or `warn`.
+        """
+        if on_not_implemented_error not in {"raise", "warn"}:
+            raise ValueError(
+                "Unknown on_not_implemented_error value: {}. Supported values are: 'raise' and 'warn'".format(
+                    on_not_implemented_error
+                )
+            )
+        try:
+            transform_dict = self._to_dict()
+        except NotImplementedError as e:
+            if on_not_implemented_error == "raise":
+                raise e
+            transform_dict = {}
+            warnings.warn(
+                "Got NotImplementedError while trying to serialize {obj}. Object arguments are not preserved. "
+                "Implement either '{cls_name}.get_transform_init_args_names' or '{cls_name}.get_transform_init_args' "
+                "method to make the transform serializable".format(obj=self, cls_name=self.__class__.__name__)
+            )
+        return {"__version__": __version__, "transform": transform_dict}
+def to_dict(transform: Serializable, on_not_implemented_error: str = "raise") -> Dict[str, Any]:
+    """
+    Take a transform pipeline and convert it to a serializable representation that uses only standard
+    python data types: dictionaries, lists, strings, integers, and floats.
+    Args:
+        transform: A transform that should be serialized. If the transform doesn't implement the `to_dict`
+            method and `on_not_implemented_error` equals to 'raise' then `NotImplementedError` is raised.
+            If `on_not_implemented_error` equals to 'warn' then `NotImplementedError` will be ignored
+            but no transform parameters will be serialized.
+        on_not_implemented_error (str): `raise` or `warn`.
+    """
+    return transform.to_dict(on_not_implemented_error)
+def instantiate_nonserializable(
+    transform: Dict[str, Any], nonserializable: Optional[Dict[str, Any]] = None
+) -> Optional[Serializable]:
+    if transform.get("__class_fullname__") in NON_SERIALIZABLE_REGISTRY:
+        name = transform["__name__"]
+        if nonserializable is None:
+            raise ValueError(
+                "To deserialize a non-serializable transform with name {name} you need to pass a dict with"
+                "this transform as the `lambda_transforms` argument".format(name=name)
+            )
+        result_transform = nonserializable.get(name)
+        if transform is None:
+            raise ValueError(
+                "Non-serializable transform with {name} was not found in `nonserializable`".format(name=name)
+            )
+        return result_transform
+    return None
+def from_dict(
+    transform_dict: Dict[str, Any],
+    nonserializable: Optional[Dict[str, Any]] = None,
+    lambda_transforms: Union[Optional[Dict[str, Any]], str] = "deprecated",
+) -> Optional[Serializable]:
+    """
+    Args:
+        transform_dict (dict): A dictionary with serialized transform pipeline.
+        nonserializable (dict): A dictionary that contains non-serializable transforms.
+            This dictionary is required when you are restoring a pipeline that contains non-serializable transforms.
+            Keys in that dictionary should be named same as `name` arguments in respective transforms from
+            a serialized pipeline.
+        lambda_transforms (dict): Deprecated. Use 'nonserizalizable' instead.
+    """
+    if lambda_transforms != "deprecated":
+        warnings.warn("lambda_transforms argument is deprecated, please use 'nonserializable'", DeprecationWarning)
+        nonserializable = typing.cast(Optional[Dict[str, Any]], lambda_transforms)
+    register_additional_transforms()
+    transform = transform_dict["transform"]
+    lmbd = instantiate_nonserializable(transform, nonserializable)
+    if lmbd:
+        return lmbd
+    name = transform["__class_fullname__"]
+    args = {k: v for k, v in transform.items() if k != "__class_fullname__"}
+    cls = SERIALIZABLE_REGISTRY[shorten_class_name(name)]
+    if "transforms" in args:
+        args["transforms"] = [from_dict({"transform": t}, nonserializable=nonserializable) for t in args["transforms"]]
+    return cls(**args)
+def check_data_format(data_format: str) -> None:
+    if data_format not in {"json", "yaml"}:
+        raise ValueError("Unknown data_format {}. Supported formats are: 'json' and 'yaml'".format(data_format))
+def save(
+    transform: Serializable, filepath: str, data_format: str = "json", on_not_implemented_error: str = "raise"
+) -> None:
+    """
+    Take a transform pipeline, serialize it and save a serialized version to a file
+    using either json or yaml format.
+    Args:
+        transform (obj): Transform to serialize.
+        filepath (str): Filepath to write to.
+        data_format (str): Serialization format. Should be either `json` or 'yaml'.
+        on_not_implemented_error (str): Parameter that describes what to do if a transform doesn't implement
+            the `to_dict` method. If 'raise' then `NotImplementedError` is raised, if `warn` then the exception will be
+            ignored and no transform arguments will be saved.
+    """
+    check_data_format(data_format)
+    transform_dict = transform.to_dict(on_not_implemented_error=on_not_implemented_error)
+    dump_fn = json.dump if data_format == "json" else yaml.safe_dump
+    with open(filepath, "w") as f:
+        dump_fn(transform_dict, f)  # type: ignore
+def load(
+    filepath: str,
+    data_format: str = "json",
+    nonserializable: Optional[Dict[str, Any]] = None,
+    lambda_transforms: Union[Optional[Dict[str, Any]], str] = "deprecated",
+) -> object:
+    """
+    Load a serialized pipeline from a json or yaml file and construct a transform pipeline.
+    Args:
+        filepath (str): Filepath to read from.
+        data_format (str): Serialization format. Should be either `json` or 'yaml'.
+        nonserializable (dict): A dictionary that contains non-serializable transforms.
+            This dictionary is required when you are restoring a pipeline that contains non-serializable transforms.
+            Keys in that dictionary should be named same as `name` arguments in respective transforms from
+            a serialized pipeline.
+        lambda_transforms (dict): Deprecated. Use 'nonserizalizable' instead.
+    """
+    if lambda_transforms != "deprecated":
+        warnings.warn("lambda_transforms argument is deprecated, please use 'nonserializable'", DeprecationWarning)
+        nonserializable = typing.cast(Optional[Dict[str, Any]], lambda_transforms)
+    check_data_format(data_format)
+    load_fn = json.load if data_format == "json" else yaml.safe_load
+    with open(filepath) as f:
+        transform_dict = load_fn(f)  # type: ignore
+    return from_dict(transform_dict, nonserializable=nonserializable)
+def register_additional_transforms() -> None:
+    """
+    Register transforms that are not imported directly into the `albumentations` module.
+    """
+    try:
+        # This import will result in ImportError if `torch` is not installed
+        import custom_albumentations.pytorch
+    except ImportError:
+        pass

custom_albumentations/core/transforms_interface.py ADDED Viewed

	@@ -0,0 +1,293 @@

+from __future__ import absolute_import
+import random
+from copy import deepcopy
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, cast
+from warnings import warn
+import cv2
+import numpy as np
+from .serialization import Serializable, get_shortest_class_fullname
+from .utils import format_args
+__all__ = [
+    "to_tuple",
+    "BasicTransform",
+    "DualTransform",
+    "ImageOnlyTransform",
+    "NoOp",
+    "BoxType",
+    "KeypointType",
+    "ImageColorType",
+    "ScaleFloatType",
+    "ScaleIntType",
+    "ImageColorType",
+]
+NumType = Union[int, float, np.ndarray]
+BoxInternalType = Tuple[float, float, float, float]
+BoxType = Union[BoxInternalType, Tuple[float, float, float, float, Any]]
+KeypointInternalType = Tuple[float, float, float, float]
+KeypointType = Union[KeypointInternalType, Tuple[float, float, float, float, Any]]
+ImageColorType = Union[float, Sequence[float]]
+ScaleFloatType = Union[float, Tuple[float, float]]
+ScaleIntType = Union[int, Tuple[int, int]]
+FillValueType = Optional[Union[int, float, Sequence[int], Sequence[float]]]
+def to_tuple(param, low=None, bias=None):
+    """Convert input argument to min-max tuple
+    Args:
+        param (scalar, tuple or list of 2+ elements): Input value.
+            If value is scalar, return value would be (offset - value, offset + value).
+            If value is tuple, return value would be value + offset (broadcasted).
+        low:  Second element of tuple can be passed as optional argument
+        bias: An offset factor added to each element
+    """
+    if low is not None and bias is not None:
+        raise ValueError("Arguments low and bias are mutually exclusive")
+    if param is None:
+        return param
+    if isinstance(param, (int, float)):
+        if low is None:
+            param = -param, +param
+        else:
+            param = (low, param) if low < param else (param, low)
+    elif isinstance(param, Sequence):
+        if len(param) != 2:
+            raise ValueError("to_tuple expects 1 or 2 values")
+        param = tuple(param)
+    else:
+        raise ValueError("Argument param must be either scalar (int, float) or tuple")
+    if bias is not None:
+        return tuple(bias + x for x in param)
+    return tuple(param)
+class BasicTransform(Serializable):
+    call_backup = None
+    interpolation: Any
+    fill_value: Any
+    mask_fill_value: Any
+    def __init__(self, always_apply: bool = False, p: float = 0.5):
+        self.p = p
+        self.always_apply = always_apply
+        self._additional_targets: Dict[str, str] = {}
+        # replay mode params
+        self.deterministic = False
+        self.save_key = "replay"
+        self.params: Dict[Any, Any] = {}
+        self.replay_mode = False
+        self.applied_in_replay = False
+    def __call__(self, *args, force_apply: bool = False, **kwargs) -> Dict[str, Any]:
+        if args:
+            raise KeyError("You have to pass data to augmentations as named arguments, for example: aug(image=image)")
+        if self.replay_mode:
+            if self.applied_in_replay:
+                return self.apply_with_params(self.params, **kwargs)
+            return kwargs
+        if (random.random() < self.p) or self.always_apply or force_apply:
+            params = self.get_params()
+            if self.targets_as_params:
+                assert all(key in kwargs for key in self.targets_as_params), "{} requires {}".format(
+                    self.__class__.__name__, self.targets_as_params
+                )
+                targets_as_params = {k: kwargs[k] for k in self.targets_as_params}
+                params_dependent_on_targets = self.get_params_dependent_on_targets(targets_as_params)
+                params.update(params_dependent_on_targets)
+            if self.deterministic:
+                if self.targets_as_params:
+                    warn(
+                        self.get_class_fullname() + " could work incorrectly in ReplayMode for other input data"
+                        " because its' params depend on targets."
+                    )
+                kwargs[self.save_key][id(self)] = deepcopy(params)
+            return self.apply_with_params(params, **kwargs)
+        return kwargs
+    def apply_with_params(self, params: Dict[str, Any], **kwargs) -> Dict[str, Any]:  # skipcq: PYL-W0613
+        if params is None:
+            return kwargs
+        params = self.update_params(params, **kwargs)
+        res = {}
+        for key, arg in kwargs.items():
+            if arg is not None:
+                target_function = self._get_target_function(key)
+                target_dependencies = {k: kwargs[k] for k in self.target_dependence.get(key, [])}
+                res[key] = target_function(arg, **dict(params, **target_dependencies))
+            else:
+                res[key] = None
+        return res
+    def set_deterministic(self, flag: bool, save_key: str = "replay") -> "BasicTransform":
+        assert save_key != "params", "params save_key is reserved"
+        self.deterministic = flag
+        self.save_key = save_key
+        return self
+    def __repr__(self) -> str:
+        state = self.get_base_init_args()
+        state.update(self.get_transform_init_args())
+        return "{name}({args})".format(name=self.__class__.__name__, args=format_args(state))
+    def _get_target_function(self, key: str) -> Callable:
+        transform_key = key
+        if key in self._additional_targets:
+            transform_key = self._additional_targets.get(key, key)
+        target_function = self.targets.get(transform_key, lambda x, **p: x)
+        return target_function
+    def apply(self, img: np.ndarray, **params) -> np.ndarray:
+        raise NotImplementedError
+    def get_params(self) -> Dict:
+        return {}
+    @property
+    def targets(self) -> Dict[str, Callable]:
+        # you must specify targets in subclass
+        # for example: ('image', 'mask')
+        #              ('image', 'boxes')
+        raise NotImplementedError
+    def update_params(self, params: Dict[str, Any], **kwargs) -> Dict[str, Any]:
+        if hasattr(self, "interpolation"):
+            params["interpolation"] = self.interpolation
+        if hasattr(self, "fill_value"):
+            params["fill_value"] = self.fill_value
+        if hasattr(self, "mask_fill_value"):
+            params["mask_fill_value"] = self.mask_fill_value
+        params.update({"cols": kwargs["image"].shape[1], "rows": kwargs["image"].shape[0]})
+        return params
+    @property
+    def target_dependence(self) -> Dict:
+        return {}
+    def add_targets(self, additional_targets: Dict[str, str]):
+        """Add targets to transform them the same way as one of existing targets
+        ex: {'target_image': 'image'}
+        ex: {'obj1_mask': 'mask', 'obj2_mask': 'mask'}
+        by the way you must have at least one object with key 'image'
+        Args:
+            additional_targets (dict): keys - new target name, values - old target name. ex: {'image2': 'image'}
+        """
+        self._additional_targets = additional_targets
+    @property
+    def targets_as_params(self) -> List[str]:
+        return []
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        raise NotImplementedError(
+            "Method get_params_dependent_on_targets is not implemented in class " + self.__class__.__name__
+        )
+    @classmethod
+    def get_class_fullname(cls) -> str:
+        return get_shortest_class_fullname(cls)
+    @classmethod
+    def is_serializable(cls):
+        return True
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        raise NotImplementedError(
+            "Class {name} is not serializable because the `get_transform_init_args_names` method is not "
+            "implemented".format(name=self.get_class_fullname())
+        )
+    def get_base_init_args(self) -> Dict[str, Any]:
+        return {"always_apply": self.always_apply, "p": self.p}
+    def get_transform_init_args(self) -> Dict[str, Any]:
+        return {k: getattr(self, k) for k in self.get_transform_init_args_names()}
+    def _to_dict(self) -> Dict[str, Any]:
+        state = {"__class_fullname__": self.get_class_fullname()}
+        state.update(self.get_base_init_args())
+        state.update(self.get_transform_init_args())
+        return state
+    def get_dict_with_id(self) -> Dict[str, Any]:
+        d = self._to_dict()
+        d["id"] = id(self)
+        return d
+class DualTransform(BasicTransform):
+    """Transform for segmentation task."""
+    @property
+    def targets(self) -> Dict[str, Callable]:
+        return {
+            "image": self.apply,
+            "mask": self.apply_to_mask,
+            "masks": self.apply_to_masks,
+            "bboxes": self.apply_to_bboxes,
+            "keypoints": self.apply_to_keypoints,
+        }
+    def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
+        raise NotImplementedError("Method apply_to_bbox is not implemented in class " + self.__class__.__name__)
+    def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType:
+        raise NotImplementedError("Method apply_to_keypoint is not implemented in class " + self.__class__.__name__)
+    def apply_to_bboxes(self, bboxes: Sequence[BoxType], **params) -> List[BoxType]:
+        return [self.apply_to_bbox(tuple(bbox[:4]), **params) + tuple(bbox[4:]) for bbox in bboxes]  # type: ignore
+    def apply_to_keypoints(self, keypoints: Sequence[KeypointType], **params) -> List[KeypointType]:
+        return [  # type: ignore
+            self.apply_to_keypoint(tuple(keypoint[:4]), **params) + tuple(keypoint[4:])  # type: ignore
+            for keypoint in keypoints
+        ]
+    def apply_to_mask(self, img: np.ndarray, **params) -> np.ndarray:
+        return self.apply(img, **{k: cv2.INTER_NEAREST if k == "interpolation" else v for k, v in params.items()})
+    def apply_to_masks(self, masks: Sequence[np.ndarray], **params) -> List[np.ndarray]:
+        return [self.apply_to_mask(mask, **params) for mask in masks]
+class ImageOnlyTransform(BasicTransform):
+    """Transform applied to image only."""
+    @property
+    def targets(self) -> Dict[str, Callable]:
+        return {"image": self.apply}
+class NoOp(DualTransform):
+    """Does nothing"""
+    def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType:
+        return keypoint
+    def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType:
+        return bbox
+    def apply(self, img: np.ndarray, **params) -> np.ndarray:
+        return img
+    def apply_to_mask(self, img: np.ndarray, **params) -> np.ndarray:
+        return img
+    def get_transform_init_args_names(self) -> Tuple:
+        return ()

custom_albumentations/core/utils.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from __future__ import absolute_import
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional, Sequence, Tuple
+import numpy as np
+from .serialization import Serializable
+def get_shape(img: Any) -> Tuple[int, int]:
+    if isinstance(img, np.ndarray):
+        rows, cols = img.shape[:2]
+        return rows, cols
+    try:
+        import torch
+        if torch.is_tensor(img):
+            rows, cols = img.shape[-2:]
+            return rows, cols
+    except ImportError:
+        pass
+    raise RuntimeError(
+        f"Albumentations supports only numpy.ndarray and torch.Tensor data type for image. Got: {type(img)}"
+    )
+def format_args(args_dict: Dict):
+    formatted_args = []
+    for k, v in args_dict.items():
+        if isinstance(v, str):
+            v = f"'{v}'"
+        formatted_args.append(f"{k}={v}")
+    return ", ".join(formatted_args)
+class Params(Serializable, ABC):
+    def __init__(self, format: str, label_fields: Optional[Sequence[str]] = None):
+        self.format = format
+        self.label_fields = label_fields
+    def _to_dict(self) -> Dict[str, Any]:
+        return {"format": self.format, "label_fields": self.label_fields}
+class DataProcessor(ABC):
+    def __init__(self, params: Params, additional_targets: Optional[Dict[str, str]] = None):
+        self.params = params
+        self.data_fields = [self.default_data_name]
+        if additional_targets is not None:
+            for k, v in additional_targets.items():
+                if v == self.default_data_name:
+                    self.data_fields.append(k)
+    @property
+    @abstractmethod
+    def default_data_name(self) -> str:
+        raise NotImplementedError
+    def ensure_data_valid(self, data: Dict[str, Any]) -> None:
+        pass
+    def ensure_transforms_valid(self, transforms: Sequence[object]) -> None:
+        pass
+    def postprocess(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        rows, cols = get_shape(data["image"])
+        for data_name in self.data_fields:
+            data[data_name] = self.filter(data[data_name], rows, cols)
+            data[data_name] = self.check_and_convert(data[data_name], rows, cols, direction="from")
+        data = self.remove_label_fields_from_data(data)
+        return data
+    def preprocess(self, data: Dict[str, Any]) -> None:
+        data = self.add_label_fields_to_data(data)
+        rows, cols = data["image"].shape[:2]
+        for data_name in self.data_fields:
+            data[data_name] = self.check_and_convert(data[data_name], rows, cols, direction="to")
+    def check_and_convert(self, data: Sequence, rows: int, cols: int, direction: str = "to") -> Sequence:
+        if self.params.format == "albumentations":
+            self.check(data, rows, cols)
+            return data
+        if direction == "to":
+            return self.convert_to_albumentations(data, rows, cols)
+        elif direction == "from":
+            return self.convert_from_albumentations(data, rows, cols)
+        else:
+            raise ValueError(f"Invalid direction. Must be `to` or `from`. Got `{direction}`")
+    @abstractmethod
+    def filter(self, data: Sequence, rows: int, cols: int) -> Sequence:
+        pass
+    @abstractmethod
+    def check(self, data: Sequence, rows: int, cols: int) -> None:
+        pass
+    @abstractmethod
+    def convert_to_albumentations(self, data: Sequence, rows: int, cols: int) -> Sequence:
+        pass
+    @abstractmethod
+    def convert_from_albumentations(self, data: Sequence, rows: int, cols: int) -> Sequence:
+        pass
+    def add_label_fields_to_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        if self.params.label_fields is None:
+            return data
+        for data_name in self.data_fields:
+            for field in self.params.label_fields:
+                assert len(data[data_name]) == len(data[field])
+                data_with_added_field = []
+                for d, field_value in zip(data[data_name], data[field]):
+                    data_with_added_field.append(list(d) + [field_value])
+                data[data_name] = data_with_added_field
+        return data
+    def remove_label_fields_from_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        if self.params.label_fields is None:
+            return data
+        for data_name in self.data_fields:
+            label_fields_len = len(self.params.label_fields)
+            for idx, field in enumerate(self.params.label_fields):
+                field_values = []
+                for bbox in data[data_name]:
+                    field_values.append(bbox[-label_fields_len + idx])
+                data[field] = field_values
+            if label_fields_len:
+                data[data_name] = [d[:-label_fields_len] for d in data[data_name]]
+        return data

custom_albumentations/imgaug/__init__.py ADDED Viewed

File without changes

custom_albumentations/imgaug/stubs.py ADDED Viewed

	@@ -0,0 +1,77 @@

+__all__ = [
+    "IAAEmboss",
+    "IAASuperpixels",
+    "IAASharpen",
+    "IAAAdditiveGaussianNoise",
+    "IAACropAndPad",
+    "IAAFliplr",
+    "IAAFlipud",
+    "IAAAffine",
+    "IAAPiecewiseAffine",
+    "IAAPerspective",
+]
+class IAAStub:
+    def __init__(self, *args, **kwargs):
+        cls_name = self.__class__.__name__
+        doc_link = "https://albumentations.ai/docs/api_reference/augmentations" + self.doc_link
+        raise RuntimeError(
+            f"You are trying to use a deprecated augmentation '{cls_name}' which depends on the imgaug library, "
+            f"but imgaug is not installed.\n\n"
+            "There are two options to fix this error:\n"
+            "1. [Recommended]. Switch to the Albumentations' implementation of the augmentation with the same API: "
+            f"{self.alternative} - {doc_link}\n"
+            "2. Install a version of Albumentations that contains imgaug by running "
+            "'pip install -U albumentations[imgaug]'."
+        )
+class IAACropAndPad(IAAStub):
+    alternative = "CropAndPad"
+    doc_link = "/crops/transforms/#albumentations.augmentations.crops.transforms.CropAndPad"
+class IAAFliplr(IAAStub):
+    alternative = "HorizontalFlip"
+    doc_link = "/transforms/#albumentations.augmentations.transforms.HorizontalFlip"
+class IAAFlipud(IAAStub):
+    alternative = "VerticalFlip"
+    doc_link = "/transforms/#albumentations.augmentations.transforms.VerticalFlip"
+class IAAEmboss(IAAStub):
+    alternative = "Emboss"
+    doc_link = "/transforms/#albumentations.augmentations.transforms.Emboss"
+class IAASuperpixels(IAAStub):
+    alternative = "Superpixels"
+    doc_link = "/transforms/#albumentations.augmentations.transforms.Superpixels"
+class IAASharpen(IAAStub):
+    alternative = "Sharpen"
+    doc_link = "/transforms/#albumentations.augmentations.transforms.Sharpen"
+class IAAAdditiveGaussianNoise(IAAStub):
+    alternative = "GaussNoise"
+    doc_link = "/transforms/#albumentations.augmentations.transforms.GaussNoise"
+class IAAPiecewiseAffine(IAAStub):
+    alternative = "PiecewiseAffine"
+    doc_link = "/geometric/transforms/#albumentations.augmentations.geometric.transforms.PiecewiseAffine"
+class IAAAffine(IAAStub):
+    alternative = "Affine"
+    doc_link = "/geometric/transforms/#albumentations.augmentations.geometric.transforms.Affine"
+class IAAPerspective(IAAStub):
+    alternative = "Perspective"
+    doc_link = "/geometric/transforms/#albumentations.augmentations.geometric.transforms.Perspective"

custom_albumentations/imgaug/transforms.py ADDED Viewed

	@@ -0,0 +1,391 @@

+try:
+    import imgaug as ia
+except ImportError as e:
+    raise ImportError(
+        "You are trying to import an augmentation that depends on the imgaug library, but imgaug is not installed. To "
+        "install a version of Albumentations that contains imgaug please run 'pip install -U albumentations[imgaug]'"
+    ) from e
+try:
+    from imgaug import augmenters as iaa
+except ImportError:
+    import imgaug.imgaug.augmenters as iaa
+import warnings
+from custom_albumentations.core.bbox_utils import (
+    convert_bboxes_from_albumentations,
+    convert_bboxes_to_albumentations,
+)
+from custom_albumentations.core.keypoints_utils import (
+    convert_keypoints_from_albumentations,
+    convert_keypoints_to_albumentations,
+)
+from ..augmentations import Perspective
+from ..core.transforms_interface import (
+    BasicTransform,
+    DualTransform,
+    ImageOnlyTransform,
+    to_tuple,
+)
+__all__ = [
+    "BasicIAATransform",
+    "DualIAATransform",
+    "ImageOnlyIAATransform",
+    "IAAEmboss",
+    "IAASuperpixels",
+    "IAASharpen",
+    "IAAAdditiveGaussianNoise",
+    "IAACropAndPad",
+    "IAAFliplr",
+    "IAAFlipud",
+    "IAAAffine",
+    "IAAPiecewiseAffine",
+    "IAAPerspective",
+]
+class BasicIAATransform(BasicTransform):
+    def __init__(self, always_apply=False, p=0.5):
+        super(BasicIAATransform, self).__init__(always_apply, p)
+    @property
+    def processor(self):
+        return iaa.Noop()
+    def update_params(self, params, **kwargs):
+        params = super(BasicIAATransform, self).update_params(params, **kwargs)
+        params["deterministic_processor"] = self.processor.to_deterministic()
+        return params
+    def apply(self, img, deterministic_processor=None, **params):
+        return deterministic_processor.augment_image(img)
+class DualIAATransform(DualTransform, BasicIAATransform):
+    def apply_to_bboxes(self, bboxes, deterministic_processor=None, rows=0, cols=0, **params):
+        if len(bboxes) > 0:
+            bboxes = convert_bboxes_from_albumentations(bboxes, "pascal_voc", rows=rows, cols=cols)
+            bboxes_t = ia.BoundingBoxesOnImage([ia.BoundingBox(*bbox[:4]) for bbox in bboxes], (rows, cols))
+            bboxes_t = deterministic_processor.augment_bounding_boxes([bboxes_t])[0].bounding_boxes
+            bboxes_t = [
+                [bbox.x1, bbox.y1, bbox.x2, bbox.y2] + list(bbox_orig[4:])
+                for (bbox, bbox_orig) in zip(bboxes_t, bboxes)
+            ]
+            bboxes = convert_bboxes_to_albumentations(bboxes_t, "pascal_voc", rows=rows, cols=cols)
+        return bboxes
+    """Applies transformation to keypoints.
+    Notes:
+        Since IAA supports only xy keypoints, scale and orientation will remain unchanged.
+    TODO:
+        Emit a warning message if child classes of DualIAATransform are instantiated
+        inside Compose with keypoints format other than 'xy'.
+    """
+    def apply_to_keypoints(self, keypoints, deterministic_processor=None, rows=0, cols=0, **params):
+        if len(keypoints) > 0:
+            keypoints = convert_keypoints_from_albumentations(keypoints, "xy", rows=rows, cols=cols)
+            keypoints_t = ia.KeypointsOnImage([ia.Keypoint(*kp[:2]) for kp in keypoints], (rows, cols))
+            keypoints_t = deterministic_processor.augment_keypoints([keypoints_t])[0].keypoints
+            bboxes_t = [[kp.x, kp.y] + list(kp_orig[2:]) for (kp, kp_orig) in zip(keypoints_t, keypoints)]
+            keypoints = convert_keypoints_to_albumentations(bboxes_t, "xy", rows=rows, cols=cols)
+        return keypoints
+class ImageOnlyIAATransform(ImageOnlyTransform, BasicIAATransform):
+    pass
+class IAACropAndPad(DualIAATransform):
+    """This augmentation is deprecated. Please use CropAndPad instead."""
+    def __init__(self, px=None, percent=None, pad_mode="constant", pad_cval=0, keep_size=True, always_apply=False, p=1):
+        super(IAACropAndPad, self).__init__(always_apply, p)
+        self.px = px
+        self.percent = percent
+        self.pad_mode = pad_mode
+        self.pad_cval = pad_cval
+        self.keep_size = keep_size
+        warnings.warn("IAACropAndPad is deprecated. Please use CropAndPad instead", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.CropAndPad(self.px, self.percent, self.pad_mode, self.pad_cval, self.keep_size)
+    def get_transform_init_args_names(self):
+        return ("px", "percent", "pad_mode", "pad_cval", "keep_size")
+class IAAFliplr(DualIAATransform):
+    """This augmentation is deprecated. Please use HorizontalFlip instead."""
+    def __init__(self, always_apply=False, p=0.5):
+        super().__init__(always_apply, p)
+        warnings.warn("IAAFliplr is deprecated. Please use HorizontalFlip instead.", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.Fliplr(1)
+    def get_transform_init_args_names(self):
+        return ()
+class IAAFlipud(DualIAATransform):
+    """This augmentation is deprecated. Please use VerticalFlip instead."""
+    def __init__(self, always_apply=False, p=0.5):
+        super().__init__(always_apply, p)
+        warnings.warn("IAAFlipud is deprecated. Please use VerticalFlip instead.", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.Flipud(1)
+    def get_transform_init_args_names(self):
+        return ()
+class IAAEmboss(ImageOnlyIAATransform):
+    """Emboss the input image and overlays the result with the original image.
+    This augmentation is deprecated. Please use Emboss instead.
+    Args:
+        alpha ((float, float)): range to choose the visibility of the embossed image. At 0, only the original image is
+            visible,at 1.0 only its embossed version is visible. Default: (0.2, 0.5).
+        strength ((float, float)): strength range of the embossing. Default: (0.2, 0.7).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    """
+    def __init__(self, alpha=(0.2, 0.5), strength=(0.2, 0.7), always_apply=False, p=0.5):
+        super(IAAEmboss, self).__init__(always_apply, p)
+        self.alpha = to_tuple(alpha, 0.0)
+        self.strength = to_tuple(strength, 0.0)
+        warnings.warn("This augmentation is deprecated. Please use Emboss instead", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.Emboss(self.alpha, self.strength)
+    def get_transform_init_args_names(self):
+        return ("alpha", "strength")
+class IAASuperpixels(ImageOnlyIAATransform):
+    """Completely or partially transform the input image to its superpixel representation. Uses skimage's version
+    of the SLIC algorithm. May be slow.
+    This augmentation is deprecated. Please use Superpixels instead.
+    Args:
+        p_replace (float): defines the probability of any superpixel area being replaced by the superpixel, i.e. by
+            the average pixel color within its area. Default: 0.1.
+        n_segments (int): target number of superpixels to generate. Default: 100.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    """
+    def __init__(self, p_replace=0.1, n_segments=100, always_apply=False, p=0.5):
+        super(IAASuperpixels, self).__init__(always_apply, p)
+        self.p_replace = p_replace
+        self.n_segments = n_segments
+        warnings.warn("IAASuperpixels is deprecated. Please use Superpixels instead.", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.Superpixels(p_replace=self.p_replace, n_segments=self.n_segments)
+    def get_transform_init_args_names(self):
+        return ("p_replace", "n_segments")
+class IAASharpen(ImageOnlyIAATransform):
+    """Sharpen the input image and overlays the result with the original image.
+    This augmentation is deprecated. Please use Sharpen instead
+    Args:
+        alpha ((float, float)): range to choose the visibility of the sharpened image. At 0, only the original image is
+            visible, at 1.0 only its sharpened version is visible. Default: (0.2, 0.5).
+        lightness ((float, float)): range to choose the lightness of the sharpened image. Default: (0.5, 1.0).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    """
+    def __init__(self, alpha=(0.2, 0.5), lightness=(0.5, 1.0), always_apply=False, p=0.5):
+        super(IAASharpen, self).__init__(always_apply, p)
+        self.alpha = to_tuple(alpha, 0)
+        self.lightness = to_tuple(lightness, 0)
+        warnings.warn("IAASharpen is deprecated. Please use Sharpen instead", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.Sharpen(self.alpha, self.lightness)
+    def get_transform_init_args_names(self):
+        return ("alpha", "lightness")
+class IAAAdditiveGaussianNoise(ImageOnlyIAATransform):
+    """Add gaussian noise to the input image.
+    This augmentation is deprecated. Please use GaussNoise instead.
+    Args:
+        loc (int): mean of the normal distribution that generates the noise. Default: 0.
+        scale ((float, float)): standard deviation of the normal distribution that generates the noise.
+            Default: (0.01 * 255, 0.05 * 255).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image
+    """
+    def __init__(self, loc=0, scale=(0.01 * 255, 0.05 * 255), per_channel=False, always_apply=False, p=0.5):
+        super(IAAAdditiveGaussianNoise, self).__init__(always_apply, p)
+        self.loc = loc
+        self.scale = to_tuple(scale, 0.0)
+        self.per_channel = per_channel
+        warnings.warn("IAAAdditiveGaussianNoise is deprecated. Please use GaussNoise instead", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.AdditiveGaussianNoise(self.loc, self.scale, self.per_channel)
+    def get_transform_init_args_names(self):
+        return ("loc", "scale", "per_channel")
+class IAAPiecewiseAffine(DualIAATransform):
+    """Place a regular grid of points on the input and randomly move the neighbourhood of these point around
+    via affine transformations.
+    This augmentation is deprecated. Please use PiecewiseAffine instead.
+    Note: This class introduce interpolation artifacts to mask if it has values other than {0;1}
+    Args:
+        scale ((float, float): factor range that determines how far each point is moved. Default: (0.03, 0.05).
+        nb_rows (int): number of rows of points that the regular grid should have. Default: 4.
+        nb_cols (int): number of columns of points that the regular grid should have. Default: 4.
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask
+    """
+    def __init__(
+        self, scale=(0.03, 0.05), nb_rows=4, nb_cols=4, order=1, cval=0, mode="constant", always_apply=False, p=0.5
+    ):
+        super(IAAPiecewiseAffine, self).__init__(always_apply, p)
+        self.scale = to_tuple(scale, 0.0)
+        self.nb_rows = nb_rows
+        self.nb_cols = nb_cols
+        self.order = order
+        self.cval = cval
+        self.mode = mode
+        warnings.warn("This IAAPiecewiseAffine is deprecated. Please use PiecewiseAffine instead", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.PiecewiseAffine(self.scale, self.nb_rows, self.nb_cols, self.order, self.cval, self.mode)
+    def get_transform_init_args_names(self):
+        return ("scale", "nb_rows", "nb_cols", "order", "cval", "mode")
+class IAAAffine(DualIAATransform):
+    """Place a regular grid of points on the input and randomly move the neighbourhood of these point around
+    via affine transformations.
+    This augmentation is deprecated. Please use Affine instead.
+    Note: This class introduce interpolation artifacts to mask if it has values other than {0;1}
+    Args:
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask
+    """
+    def __init__(
+        self,
+        scale=1.0,
+        translate_percent=None,
+        translate_px=None,
+        rotate=0.0,
+        shear=0.0,
+        order=1,
+        cval=0,
+        mode="reflect",
+        always_apply=False,
+        p=0.5,
+    ):
+        super(IAAAffine, self).__init__(always_apply, p)
+        self.scale = to_tuple(scale, 1.0)
+        self.translate_percent = to_tuple(translate_percent, 0)
+        self.translate_px = to_tuple(translate_px, 0)
+        self.rotate = to_tuple(rotate)
+        self.shear = to_tuple(shear)
+        self.order = order
+        self.cval = cval
+        self.mode = mode
+        warnings.warn("This IAAAffine is deprecated. Please use Affine instead", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.Affine(
+            self.scale,
+            self.translate_percent,
+            self.translate_px,
+            self.rotate,
+            self.shear,
+            self.order,
+            self.cval,
+            self.mode,
+        )
+    def get_transform_init_args_names(self):
+        return ("scale", "translate_percent", "translate_px", "rotate", "shear", "order", "cval", "mode")
+class IAAPerspective(Perspective):
+    """Perform a random four point perspective transform of the input.
+    This augmentation is deprecated. Please use Perspective instead.
+    Note: This class introduce interpolation artifacts to mask if it has values other than {0;1}
+    Args:
+        scale ((float, float): standard deviation of the normal distributions. These are used to sample
+            the random distances of the subimage's corners from the full image's corners. Default: (0.05, 0.1).
+        p (float): probability of applying the transform. Default: 0.5.
+    Targets:
+        image, mask
+    """
+    def __init__(self, scale=(0.05, 0.1), keep_size=True, always_apply=False, p=0.5):
+        super(IAAPerspective, self).__init__(always_apply, p)
+        self.scale = to_tuple(scale, 1.0)
+        self.keep_size = keep_size
+        warnings.warn("This IAAPerspective is deprecated. Please use Perspective instead", FutureWarning)
+    @property
+    def processor(self):
+        return iaa.PerspectiveTransform(self.scale, keep_size=self.keep_size)
+    def get_transform_init_args_names(self):
+        return ("scale", "keep_size")

custom_albumentations/pytorch/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from __future__ import absolute_import
2	+
3	+ from .transforms import *

custom_albumentations/pytorch/functional.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from __future__ import division
+import numpy as np
+import torch
+import torchvision.transforms.functional as F
+def img_to_tensor(im, normalize=None):
+    tensor = torch.from_numpy(np.moveaxis(im / (255.0 if im.dtype == np.uint8 else 1), -1, 0).astype(np.float32))
+    if normalize is not None:
+        return F.normalize(tensor, **normalize)
+    return tensor
+def mask_to_tensor(mask, num_classes, sigmoid):
+    if num_classes > 1:
+        if not sigmoid:
+            # softmax
+            long_mask = np.zeros((mask.shape[:2]), dtype=np.int64)
+            if len(mask.shape) == 3:
+                for c in range(mask.shape[2]):
+                    long_mask[mask[..., c] > 0] = c
+            else:
+                long_mask[mask > 127] = 1
+                long_mask[mask == 0] = 0
+            mask = long_mask
+        else:
+            mask = np.moveaxis(mask / (255.0 if mask.dtype == np.uint8 else 1), -1, 0).astype(np.float32)
+    else:
+        mask = np.expand_dims(mask / (255.0 if mask.dtype == np.uint8 else 1), 0).astype(np.float32)
+    return torch.from_numpy(mask)

custom_albumentations/pytorch/transforms.py ADDED Viewed

	@@ -0,0 +1,104 @@

+from __future__ import absolute_import
+import warnings
+import numpy as np
+import torch
+from torchvision.transforms import functional as F
+from ..core.transforms_interface import BasicTransform
+__all__ = ["ToTensorV2"]
+def img_to_tensor(im, normalize=None):
+    tensor = torch.from_numpy(np.moveaxis(im / (255.0 if im.dtype == np.uint8 else 1), -1, 0).astype(np.float32))
+    if normalize is not None:
+        return F.normalize(tensor, **normalize)
+    return tensor
+def mask_to_tensor(mask, num_classes, sigmoid):
+    if num_classes > 1:
+        if not sigmoid:
+            # softmax
+            long_mask = np.zeros((mask.shape[:2]), dtype=np.int64)
+            if len(mask.shape) == 3:
+                for c in range(mask.shape[2]):
+                    long_mask[mask[..., c] > 0] = c
+            else:
+                long_mask[mask > 127] = 1
+                long_mask[mask == 0] = 0
+            mask = long_mask
+        else:
+            mask = np.moveaxis(mask / (255.0 if mask.dtype == np.uint8 else 1), -1, 0).astype(np.float32)
+    else:
+        mask = np.expand_dims(mask / (255.0 if mask.dtype == np.uint8 else 1), 0).astype(np.float32)
+    return torch.from_numpy(mask)
+class ToTensor(BasicTransform):
+    """Convert image and mask to `torch.Tensor` and divide by 255 if image or mask are `uint8` type.
+    This transform is now removed from custom_albumentations. If you need it downgrade the library to version 0.5.2.
+    Args:
+        num_classes (int): only for segmentation
+        sigmoid (bool, optional): only for segmentation, transform mask to LongTensor or not.
+        normalize (dict, optional): dict with keys [mean, std] to pass it into torchvision.normalize
+    """
+    def __init__(self, num_classes=1, sigmoid=True, normalize=None):
+        raise RuntimeError(
+            "`ToTensor` is obsolete and it was removed from custom_albumentations. Please use `ToTensorV2` instead - "
+            "https://albumentations.ai/docs/api_reference/pytorch/transforms/"
+            "#albumentations.pytorch.transforms.ToTensorV2. "
+            "\n\nIf you need `ToTensor` downgrade Albumentations to version 0.5.2."
+        )
+class ToTensorV2(BasicTransform):
+    """Convert image and mask to `torch.Tensor`. The numpy `HWC` image is converted to pytorch `CHW` tensor.
+    If the image is in `HW` format (grayscale image), it will be converted to pytorch `HW` tensor.
+    This is a simplified and improved version of the old `ToTensor`
+    transform (`ToTensor` was deprecated, and now it is not present in Albumentations. You should use `ToTensorV2`
+    instead).
+    Args:
+        transpose_mask (bool): If True and an input mask has three dimensions, this transform will transpose dimensions
+            so the shape `[height, width, num_channels]` becomes `[num_channels, height, width]`. The latter format is a
+            standard format for PyTorch Tensors. Default: False.
+        always_apply (bool): Indicates whether this transformation should be always applied. Default: True.
+        p (float): Probability of applying the transform. Default: 1.0.
+    """
+    def __init__(self, transpose_mask=False, always_apply=True, p=1.0):
+        super(ToTensorV2, self).__init__(always_apply=always_apply, p=p)
+        self.transpose_mask = transpose_mask
+    @property
+    def targets(self):
+        return {"image": self.apply, "mask": self.apply_to_mask, "masks": self.apply_to_masks}
+    def apply(self, img, **params):  # skipcq: PYL-W0613
+        if len(img.shape) not in [2, 3]:
+            raise ValueError("Albumentations only supports images in HW or HWC format")
+        if len(img.shape) == 2:
+            img = np.expand_dims(img, 2)
+        return torch.from_numpy(img.transpose(2, 0, 1))
+    def apply_to_mask(self, mask, **params):  # skipcq: PYL-W0613
+        if self.transpose_mask and mask.ndim == 3:
+            mask = mask.transpose(2, 0, 1)
+        return torch.from_numpy(mask)
+    def apply_to_masks(self, masks, **params):
+        return [self.apply_to_mask(mask, **params) for mask in masks]
+    def get_transform_init_args_names(self):
+        return ("transpose_mask",)
+    def get_params_dependent_on_targets(self, params):
+        return {}

custom_albumentations/random_utils.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# Use `Any` as the return type to avoid mypy problems with Union data types,
+# because numpy can return single number and ndarray
+import random as py_random
+from typing import Any, Optional, Sequence, Type, Union
+import numpy as np
+from .core.transforms_interface import NumType
+IntNumType = Union[int, np.ndarray]
+Size = Union[int, Sequence[int]]
+def get_random_state() -> np.random.RandomState:
+    return np.random.RandomState(py_random.randint(0, (1 << 32) - 1))
+def uniform(
+    low: NumType = 0.0,
+    high: NumType = 1.0,
+    size: Optional[Size] = None,
+    random_state: Optional[np.random.RandomState] = None,
+) -> Any:
+    if random_state is None:
+        random_state = get_random_state()
+    return random_state.uniform(low, high, size)
+def rand(d0: NumType, d1: NumType, *more, random_state: Optional[np.random.RandomState] = None, **kwargs) -> Any:
+    if random_state is None:
+        random_state = get_random_state()
+    return random_state.rand(d0, d1, *more, **kwargs)  # type: ignore
+def randn(d0: NumType, d1: NumType, *more, random_state: Optional[np.random.RandomState] = None, **kwargs) -> Any:
+    if random_state is None:
+        random_state = get_random_state()
+    return random_state.randn(d0, d1, *more, **kwargs)  # type: ignore
+def normal(
+    loc: NumType = 0.0,
+    scale: NumType = 1.0,
+    size: Optional[Size] = None,
+    random_state: Optional[np.random.RandomState] = None,
+) -> Any:
+    if random_state is None:
+        random_state = get_random_state()
+    return random_state.normal(loc, scale, size)
+def poisson(
+    lam: NumType = 1.0, size: Optional[Size] = None, random_state: Optional[np.random.RandomState] = None
+) -> Any:
+    if random_state is None:
+        random_state = get_random_state()
+    return random_state.poisson(lam, size)
+def permutation(
+    x: Union[int, Sequence[float], np.ndarray], random_state: Optional[np.random.RandomState] = None
+) -> Any:
+    if random_state is None:
+        random_state = get_random_state()
+    return random_state.permutation(x)
+def randint(
+    low: IntNumType,
+    high: Optional[IntNumType] = None,
+    size: Optional[Size] = None,
+    dtype: Type = np.int32,
+    random_state: Optional[np.random.RandomState] = None,
+) -> Any:
+    if random_state is None:
+        random_state = get_random_state()
+    return random_state.randint(low, high, size, dtype)
+def random(size: Optional[NumType] = None, random_state: Optional[np.random.RandomState] = None) -> Any:
+    if random_state is None:
+        random_state = get_random_state()
+    return random_state.random(size)  # type: ignore
+def choice(
+    a: NumType,
+    size: Optional[Size] = None,
+    replace: bool = True,
+    p: Optional[Union[Sequence[float], np.ndarray]] = None,
+    random_state: Optional[np.random.RandomState] = None,
+) -> Any:
+    if random_state is None:
+        random_state = get_random_state()
+    return random_state.choice(a, size, replace, p)  # type: ignore

custom_controlnet_aux/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ #Dummy file ensuring this package will be recognized

custom_controlnet_aux/anime_face_segment/__init__.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from .network import UNet
+from .util import seg2img
+import torch
+import os
+import cv2
+from custom_controlnet_aux.util import HWC3, resize_image_with_pad, common_input_validate, custom_hf_download, BDS_MODEL_NAME
+from huggingface_hub import hf_hub_download
+from PIL import Image
+from einops import rearrange
+from .anime_segmentation import AnimeSegmentation
+import numpy as np
+class AnimeFaceSegmentor:
+    def __init__(self, model, seg_model):
+        self.model = model
+        self.seg_model = seg_model
+        self.device = "cpu"
+    @classmethod
+    def from_pretrained(cls, pretrained_model_or_path=BDS_MODEL_NAME, filename="UNet.pth", seg_filename="isnetis.ckpt"):
+        model_path = custom_hf_download(pretrained_model_or_path, filename, subfolder="Annotators")
+        seg_model_path = custom_hf_download("skytnt/anime-seg", seg_filename)
+        model = UNet()
+        ckpt = torch.load(model_path, map_location="cpu")
+        model.load_state_dict(ckpt)
+        model.eval()
+        seg_model = AnimeSegmentation(seg_model_path)
+        seg_model.net.eval()
+        return cls(model, seg_model)
+    def to(self, device):
+        self.model.to(device)
+        self.seg_model.net.to(device)
+        self.device = device
+        return self
+    def __call__(self, input_image, detect_resolution=512, output_type="pil", upscale_method="INTER_CUBIC", remove_background=True, **kwargs):
+        input_image, output_type = common_input_validate(input_image, output_type, **kwargs)
+        input_image, remove_pad = resize_image_with_pad(input_image, detect_resolution, upscale_method)
+        with torch.no_grad():
+            if remove_background:
+                print(input_image.shape)
+                mask, input_image = self.seg_model(input_image, 0) #Don't resize image as it is resized
+            image_feed = torch.from_numpy(input_image).float().to(self.device)
+            image_feed = rearrange(image_feed, 'h w c -> 1 c h w')
+            image_feed = image_feed / 255
+            seg = self.model(image_feed).squeeze(dim=0)
+            result = seg2img(seg.cpu().detach().numpy())
+        detected_map = HWC3(result)
+        detected_map = remove_pad(detected_map)
+        if remove_background:
+            mask = remove_pad(mask)
+            H, W, C = detected_map.shape
+            tmp = np.zeros([H, W, C + 1])
+            tmp[:,:,:C] = detected_map
+            tmp[:,:,3:] = mask
+            detected_map = tmp
+        if output_type == "pil":
+            detected_map = Image.fromarray(detected_map[..., :3])
+        return detected_map

custom_controlnet_aux/anime_face_segment/anime_segmentation.py ADDED Viewed

	@@ -0,0 +1,58 @@

+#https://github.com/SkyTNT/anime-segmentation/tree/main
+#Only adapt isnet_is (https://huggingface.co/skytnt/anime-seg/blob/main/isnetis.ckpt)
+import torch.nn as nn
+import torch
+from .isnet import ISNetDIS
+import numpy as np
+import cv2
+from comfy.model_management import get_torch_device
+DEVICE = get_torch_device()
+class AnimeSegmentation:
+    def __init__(self, ckpt_path):
+        super(AnimeSegmentation).__init__()
+        sd = torch.load(ckpt_path, map_location="cpu")
+        self.net = ISNetDIS()
+        #gt_encoder isn't used during inference
+        self.net.load_state_dict({k.replace("net.", ''):v for k, v in sd.items() if k.startswith("net.")})
+        self.net = self.net.to(DEVICE)
+        self.net.eval()
+    def get_mask(self, input_img, s=640):
+        input_img = (input_img / 255).astype(np.float32)
+        if s == 0:
+            img_input = np.transpose(input_img, (2, 0, 1))
+            img_input = img_input[np.newaxis, :]
+            tmpImg = torch.from_numpy(img_input).float().to(DEVICE)
+            with torch.no_grad():
+                pred = self.net(tmpImg)[0][0].sigmoid() #https://github.com/SkyTNT/anime-segmentation/blob/main/train.py#L92C20-L92C47
+                pred = pred.cpu().numpy()[0]
+                pred = np.transpose(pred, (1, 2, 0))
+                #pred = pred[:, :, np.newaxis]
+                return pred
+        h, w = h0, w0 = input_img.shape[:-1]
+        h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s)
+        ph, pw = s - h, s - w
+        img_input = np.zeros([s, s, 3], dtype=np.float32)
+        img_input[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] = cv2.resize(input_img, (w, h))
+        img_input = np.transpose(img_input, (2, 0, 1))
+        img_input = img_input[np.newaxis, :]
+        tmpImg = torch.from_numpy(img_input).float().to(DEVICE)
+        with torch.no_grad():
+            pred = self.net(tmpImg)[0][0].sigmoid() #https://github.com/SkyTNT/anime-segmentation/blob/main/train.py#L92C20-L92C47
+            pred = pred.cpu().numpy()[0]
+            pred = np.transpose(pred, (1, 2, 0))
+            pred = pred[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w]
+            #pred = cv2.resize(pred, (w0, h0))[:, :, np.newaxis]
+            pred = cv2.resize(pred, (w0, h0))
+            return pred
+    def __call__(self, np_img, img_size):
+        mask = self.get_mask(np_img, int(img_size))
+        np_img = (mask * np_img + 255 * (1 - mask)).astype(np.uint8)
+        mask = (mask * 255).astype(np.uint8)
+        #np_img = np.concatenate([np_img, mask], axis=2, dtype=np.uint8)
+        #mask = mask.repeat(3, axis=2)
+        return mask, np_img

custom_controlnet_aux/anime_face_segment/isnet.py ADDED Viewed

	@@ -0,0 +1,619 @@

+# Codes are borrowed from
+# https://github.com/xuebinqin/DIS/blob/main/IS-Net/models/isnet.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models
+bce_loss = nn.BCEWithLogitsLoss(reduction="mean")
+def muti_loss_fusion(preds, target):
+    loss0 = 0.0
+    loss = 0.0
+    for i in range(0, len(preds)):
+        if preds[i].shape[2] != target.shape[2] or preds[i].shape[3] != target.shape[3]:
+            tmp_target = F.interpolate(
+                target, size=preds[i].size()[2:], mode="bilinear", align_corners=True
+            )
+            loss = loss + bce_loss(preds[i], tmp_target)
+        else:
+            loss = loss + bce_loss(preds[i], target)
+        if i == 0:
+            loss0 = loss
+    return loss0, loss
+fea_loss = nn.MSELoss(reduction="mean")
+kl_loss = nn.KLDivLoss(reduction="mean")
+l1_loss = nn.L1Loss(reduction="mean")
+smooth_l1_loss = nn.SmoothL1Loss(reduction="mean")
+def muti_loss_fusion_kl(preds, target, dfs, fs, mode="MSE"):
+    loss0 = 0.0
+    loss = 0.0
+    for i in range(0, len(preds)):
+        if preds[i].shape[2] != target.shape[2] or preds[i].shape[3] != target.shape[3]:
+            tmp_target = F.interpolate(
+                target, size=preds[i].size()[2:], mode="bilinear", align_corners=True
+            )
+            loss = loss + bce_loss(preds[i], tmp_target)
+        else:
+            loss = loss + bce_loss(preds[i], target)
+        if i == 0:
+            loss0 = loss
+    for i in range(0, len(dfs)):
+        df = dfs[i]
+        fs_i = fs[i]
+        if mode == "MSE":
+            loss = loss + fea_loss(
+                df, fs_i
+            )  ### add the mse loss of features as additional constraints
+        elif mode == "KL":
+            loss = loss + kl_loss(F.log_softmax(df, dim=1), F.softmax(fs_i, dim=1))
+        elif mode == "MAE":
+            loss = loss + l1_loss(df, fs_i)
+        elif mode == "SmoothL1":
+            loss = loss + smooth_l1_loss(df, fs_i)
+    return loss0, loss
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(
+            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate, stride=stride
+        )
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src, tar):
+    src = F.interpolate(src, size=tar.shape[2:], mode="bilinear", align_corners=False)
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
+        super(RSU7, self).__init__()
+        self.in_ch = in_ch
+        self.mid_ch = mid_ch
+        self.out_ch = out_ch
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)  ## 1 -> 1/2
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
+        hx6dup = _upsample_like(hx6d, hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
+        return hx1d + hxin
+class myrebnconv(nn.Module):
+    def __init__(
+        self,
+        in_ch=3,
+        out_ch=1,
+        kernel_size=3,
+        stride=1,
+        padding=1,
+        dilation=1,
+        groups=1,
+    ):
+        super(myrebnconv, self).__init__()
+        self.conv = nn.Conv2d(
+            in_ch,
+            out_ch,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_ch)
+        self.rl = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.rl(self.bn(self.conv(x)))
+class ISNetGTEncoder(nn.Module):
+    def __init__(self, in_ch=1, out_ch=1):
+        super(ISNetGTEncoder, self).__init__()
+        self.conv_in = myrebnconv(
+            in_ch, 16, 3, stride=2, padding=1
+        )  # nn.Conv2d(in_ch,64,3,stride=2,padding=1)
+        self.stage1 = RSU7(16, 16, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 16, 64)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(64, 32, 128)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(128, 32, 256)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(256, 64, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 64, 512)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+    @staticmethod
+    def compute_loss(args):
+        preds, targets = args
+        return muti_loss_fusion(preds, targets)
+    def forward(self, x):
+        hx = x
+        hxin = self.conv_in(hx)
+        # hx = self.pool_in(hxin)
+        # stage 1
+        hx1 = self.stage1(hxin)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        # side output
+        d1 = self.side1(hx1)
+        d1 = _upsample_like(d1, x)
+        d2 = self.side2(hx2)
+        d2 = _upsample_like(d2, x)
+        d3 = self.side3(hx3)
+        d3 = _upsample_like(d3, x)
+        d4 = self.side4(hx4)
+        d4 = _upsample_like(d4, x)
+        d5 = self.side5(hx5)
+        d5 = _upsample_like(d5, x)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, x)
+        # d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
+        # return [torch.sigmoid(d1), torch.sigmoid(d2), torch.sigmoid(d3), torch.sigmoid(d4), torch.sigmoid(d5), torch.sigmoid(d6)], [hx1, hx2, hx3, hx4, hx5, hx6]
+        return [d1, d2, d3, d4, d5, d6], [hx1, hx2, hx3, hx4, hx5, hx6]
+class ISNetDIS(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(ISNetDIS, self).__init__()
+        self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1)
+        self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage1 = RSU7(64, 32, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 32, 128)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(128, 64, 256)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(256, 128, 512)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(512, 256, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 256, 512)
+        # decoder
+        self.stage5d = RSU4F(1024, 256, 512)
+        self.stage4d = RSU4(1024, 128, 256)
+        self.stage3d = RSU5(512, 64, 128)
+        self.stage2d = RSU6(256, 32, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+        # self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    @staticmethod
+    def compute_loss_kl(preds, targets, dfs, fs, mode="MSE"):
+        return muti_loss_fusion_kl(preds, targets, dfs, fs, mode=mode)
+    @staticmethod
+    def compute_loss(args):
+        if len(args) == 3:
+            ds, dfs, labels = args
+            return muti_loss_fusion(ds, labels)
+        else:
+            ds, dfs, labels, fs = args
+            return muti_loss_fusion_kl(ds, labels, dfs, fs, mode="MSE")
+    def forward(self, x):
+        hx = x
+        hxin = self.conv_in(hx)
+        hx = self.pool_in(hxin)
+        # stage 1
+        hx1 = self.stage1(hxin)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # -------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d1 = _upsample_like(d1, x)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, x)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, x)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, x)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, x)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, x)
+        # d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
+        # return [torch.sigmoid(d1), torch.sigmoid(d2), torch.sigmoid(d3), torch.sigmoid(d4), torch.sigmoid(d5), torch.sigmoid(d6)], [hx1d, hx2d, hx3d, hx4d, hx5d, hx6]
+        return [d1, d2, d3, d4, d5, d6], [hx1d, hx2d, hx3d, hx4d, hx5d, hx6]

custom_controlnet_aux/anime_face_segment/network.py ADDED Viewed

	@@ -0,0 +1,100 @@

+#https://github.com/siyeong0/Anime-Face-Segmentation/blob/main/network.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from custom_controlnet_aux.util import custom_torch_download
+class UNet(nn.Module):
+    def __init__(self):
+        super(UNet, self).__init__()
+        self.NUM_SEG_CLASSES = 7 # Background, hair, face, eye, mouth, skin, clothes
+        mobilenet_v2 = torchvision.models.mobilenet_v2(pretrained=False)
+        mobilenet_v2.load_state_dict(torch.load(custom_torch_download(filename="mobilenet_v2-b0353104.pth")), strict=True)
+        mob_blocks = mobilenet_v2.features
+        # Encoder
+        self.en_block0 = nn.Sequential(    # in_ch=3 out_ch=16
+            mob_blocks[0],
+            mob_blocks[1]
+        )
+        self.en_block1 = nn.Sequential(    # in_ch=16 out_ch=24
+            mob_blocks[2],
+            mob_blocks[3],
+        )
+        self.en_block2 = nn.Sequential(    # in_ch=24 out_ch=32
+            mob_blocks[4],
+            mob_blocks[5],
+            mob_blocks[6],
+        )
+        self.en_block3 = nn.Sequential(    # in_ch=32 out_ch=96
+            mob_blocks[7],
+            mob_blocks[8],
+            mob_blocks[9],
+            mob_blocks[10],
+            mob_blocks[11],
+            mob_blocks[12],
+            mob_blocks[13],
+        )
+        self.en_block4 = nn.Sequential(    # in_ch=96 out_ch=160
+            mob_blocks[14],
+            mob_blocks[15],
+            mob_blocks[16],
+        )
+        # Decoder
+        self.de_block4 = nn.Sequential(     # in_ch=160 out_ch=96
+            nn.UpsamplingNearest2d(scale_factor=2),
+            nn.Conv2d(160, 96, kernel_size=3, padding=1),
+            nn.InstanceNorm2d(96),
+            nn.LeakyReLU(0.1),
+            nn.Dropout(p=0.2)
+        )
+        self.de_block3 = nn.Sequential(     # in_ch=96x2 out_ch=32
+            nn.UpsamplingNearest2d(scale_factor=2),
+            nn.Conv2d(96*2, 32, kernel_size=3, padding=1),
+            nn.InstanceNorm2d(32),
+            nn.LeakyReLU(0.1),
+            nn.Dropout(p=0.2)
+        )
+        self.de_block2 = nn.Sequential(     # in_ch=32x2 out_ch=24
+            nn.UpsamplingNearest2d(scale_factor=2),
+            nn.Conv2d(32*2, 24, kernel_size=3, padding=1),
+            nn.InstanceNorm2d(24),
+            nn.LeakyReLU(0.1),
+            nn.Dropout(p=0.2)
+        )
+        self.de_block1 = nn.Sequential(     # in_ch=24x2 out_ch=16
+            nn.UpsamplingNearest2d(scale_factor=2),
+            nn.Conv2d(24*2, 16, kernel_size=3, padding=1),
+            nn.InstanceNorm2d(16),
+            nn.LeakyReLU(0.1),
+            nn.Dropout(p=0.2)
+        )
+        self.de_block0 = nn.Sequential(     # in_ch=16x2 out_ch=7
+            nn.UpsamplingNearest2d(scale_factor=2),
+            nn.Conv2d(16*2, self.NUM_SEG_CLASSES, kernel_size=3, padding=1),
+            nn.Softmax2d()
+        )
+    def forward(self, x):
+        e0 = self.en_block0(x)
+        e1 = self.en_block1(e0)
+        e2 = self.en_block2(e1)
+        e3 = self.en_block3(e2)
+        e4 = self.en_block4(e3)
+        d4 = self.de_block4(e4)
+        c4 = torch.cat((d4,e3),1)
+        d3 = self.de_block3(c4)
+        c3 = torch.cat((d3,e2),1)
+        d2 = self.de_block2(c3)
+        c2 =torch.cat((d2,e1),1)
+        d1 = self.de_block1(c2)
+        c1 = torch.cat((d1,e0),1)
+        y = self.de_block0(c1)
+        return y

custom_controlnet_aux/anime_face_segment/util.py ADDED Viewed

	@@ -0,0 +1,40 @@

+#https://github.com/siyeong0/Anime-Face-Segmentation/blob/main/util.py
+#The color palette is changed according to https://github.com/Mikubill/sd-webui-controlnet/blob/91f67ddcc7bc47537a6285864abfc12590f46c3f/annotator/anime_face_segment/__init__.py
+import cv2 as cv
+import glob
+import numpy as np
+import os
+"""
+COLOR_BACKGROUND = (0,255,255)
+COLOR_HAIR = (255,0,0)
+COLOR_EYE = (0,0,255)
+COLOR_MOUTH = (255,255,255)
+COLOR_FACE = (0,255,0)
+COLOR_SKIN = (255,255,0)
+COLOR_CLOTHES = (255,0,255)
+"""
+COLOR_BACKGROUND = (255,255,0)
+COLOR_HAIR = (0,0,255)
+COLOR_EYE = (255,0,0)
+COLOR_MOUTH = (255,255,255)
+COLOR_FACE = (0,255,0)
+COLOR_SKIN = (0,255,255)
+COLOR_CLOTHES = (255,0,255)
+PALETTE = [COLOR_BACKGROUND,COLOR_HAIR,COLOR_EYE,COLOR_MOUTH,COLOR_FACE,COLOR_SKIN,COLOR_CLOTHES]
+def img2seg(path):
+    src = cv.imread(path)
+    src = src.reshape(-1, 3)
+    seg_list = []
+    for color in PALETTE:
+        seg_list.append(np.where(np.all(src==color, axis=1), 1.0, 0.0))
+    dst = np.stack(seg_list,axis=1).reshape(512,512,7)
+    return dst.astype(np.float32)
+def seg2img(src):
+    src = np.moveaxis(src,0,2)
+    dst = [[PALETTE[np.argmax(val)] for val in buf]for buf in src]
+    return np.array(dst).astype(np.uint8)

custom_controlnet_aux/binary/__init__.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import warnings
+import cv2
+import numpy as np
+from PIL import Image
+from custom_controlnet_aux.util import HWC3, resize_image_with_pad
+class BinaryDetector:
+    def __call__(self, input_image=None, bin_threshold=0, detect_resolution=512, output_type=None, upscale_method="INTER_CUBIC", **kwargs):
+        if "img" in kwargs:
+            warnings.warn("img is deprecated, please use `input_image=...` instead.", DeprecationWarning)
+            input_image = kwargs.pop("img")
+        if input_image is None:
+            raise ValueError("input_image must be defined.")
+        if not isinstance(input_image, np.ndarray):
+            input_image = np.array(input_image, dtype=np.uint8)
+            output_type = output_type or "pil"
+        else:
+            output_type = output_type or "np"
+        detected_map, remove_pad = resize_image_with_pad(input_image, detect_resolution, upscale_method)
+        img_gray = cv2.cvtColor(detected_map, cv2.COLOR_RGB2GRAY)
+        if bin_threshold == 0 or bin_threshold == 255:
+        # Otsu's threshold
+            otsu_threshold, img_bin = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+            print("Otsu threshold:", otsu_threshold)
+        else:
+            _, img_bin = cv2.threshold(img_gray, bin_threshold, 255, cv2.THRESH_BINARY_INV)
+        detected_map = cv2.cvtColor(img_bin, cv2.COLOR_GRAY2RGB)
+        detected_map = HWC3(remove_pad(255 - detected_map))
+        if output_type == "pil":
+            detected_map = Image.fromarray(detected_map)
+        return detected_map

custom_controlnet_aux/canny/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import warnings
+import cv2
+import numpy as np
+from PIL import Image
+from custom_controlnet_aux.util import resize_image_with_pad, common_input_validate, HWC3
+class CannyDetector:
+    def __call__(self, input_image=None, low_threshold=100, high_threshold=200, detect_resolution=512, output_type=None, upscale_method="INTER_CUBIC", **kwargs):
+        input_image, output_type = common_input_validate(input_image, output_type, **kwargs)
+        detected_map, remove_pad = resize_image_with_pad(input_image, detect_resolution, upscale_method)
+        detected_map = cv2.Canny(detected_map, low_threshold, high_threshold)
+        detected_map = HWC3(remove_pad(detected_map))
+        if output_type == "pil":
+            detected_map = Image.fromarray(detected_map)
+        return detected_map

custom_controlnet_aux/color/__init__.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import cv2
+import warnings
+import cv2
+import numpy as np
+from PIL import Image
+from custom_controlnet_aux.util import HWC3, safer_memory, common_input_validate
+def cv2_resize_shortest_edge(image, size):
+    h, w = image.shape[:2]
+    if h < w:
+        new_h = size
+        new_w = int(round(w / h * size))
+    else:
+        new_w = size
+        new_h = int(round(h / w * size))
+    resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    return resized_image
+def apply_color(img, res=512):
+    img = cv2_resize_shortest_edge(img, res)
+    h, w = img.shape[:2]
+    input_img_color = cv2.resize(img, (w//64, h//64), interpolation=cv2.INTER_CUBIC)
+    input_img_color = cv2.resize(input_img_color, (w, h), interpolation=cv2.INTER_NEAREST)
+    return input_img_color
+#Color T2I like multiples-of-64, upscale methods are fixed.
+class ColorDetector:
+    def __call__(self, input_image=None, detect_resolution=512, output_type=None, **kwargs):
+        input_image, output_type = common_input_validate(input_image, output_type, **kwargs)
+        input_image = HWC3(input_image)
+        detected_map = HWC3(apply_color(input_image, detect_resolution))
+        if output_type == "pil":
+            detected_map = Image.fromarray(detected_map)
+        return detected_map