Spaces:
Configuration error
Configuration error
| from __future__ import division | |
| from typing import Any, Dict, List, Optional, Sequence, Tuple, TypeVar, cast | |
| import numpy as np | |
| from .transforms_interface import BoxInternalType, BoxType | |
| from .utils import DataProcessor, Params | |
| __all__ = [ | |
| "normalize_bbox", | |
| "denormalize_bbox", | |
| "normalize_bboxes", | |
| "denormalize_bboxes", | |
| "calculate_bbox_area", | |
| "filter_bboxes_by_visibility", | |
| "convert_bbox_to_albumentations", | |
| "convert_bbox_from_albumentations", | |
| "convert_bboxes_to_albumentations", | |
| "convert_bboxes_from_albumentations", | |
| "check_bbox", | |
| "check_bboxes", | |
| "filter_bboxes", | |
| "union_of_bboxes", | |
| "BboxProcessor", | |
| "BboxParams", | |
| ] | |
| TBox = TypeVar("TBox", BoxType, BoxInternalType) | |
| class BboxParams(Params): | |
| """ | |
| Parameters of bounding boxes | |
| Args: | |
| format (str): format of bounding boxes. Should be 'coco', 'pascal_voc', 'albumentations' or 'yolo'. | |
| The `coco` format | |
| `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200]. | |
| The `pascal_voc` format | |
| `[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212]. | |
| The `albumentations` format | |
| is like `pascal_voc`, but normalized, | |
| in other words: `[x_min, y_min, x_max, y_max]`, e.g. [0.2, 0.3, 0.4, 0.5]. | |
| The `yolo` format | |
| `[x, y, width, height]`, e.g. [0.1, 0.2, 0.3, 0.4]; | |
| `x`, `y` - normalized bbox center; `width`, `height` - normalized bbox width and height. | |
| label_fields (list): list of fields that are joined with boxes, e.g labels. | |
| Should be same type as boxes. | |
| min_area (float): minimum area of a bounding box. All bounding boxes whose | |
| visible area in pixels is less than this value will be removed. Default: 0.0. | |
| min_visibility (float): minimum fraction of area for a bounding box | |
| to remain this box in list. Default: 0.0. | |
| min_width (float): Minimum width of a bounding box. All bounding boxes whose width is | |
| less than this value will be removed. Default: 0.0. | |
| min_height (float): Minimum height of a bounding box. All bounding boxes whose height is | |
| less than this value will be removed. Default: 0.0. | |
| check_each_transform (bool): if `True`, then bboxes will be checked after each dual transform. | |
| Default: `True` | |
| """ | |
| def __init__( | |
| self, | |
| format: str, | |
| label_fields: Optional[Sequence[str]] = None, | |
| min_area: float = 0.0, | |
| min_visibility: float = 0.0, | |
| min_width: float = 0.0, | |
| min_height: float = 0.0, | |
| check_each_transform: bool = True, | |
| ): | |
| super(BboxParams, self).__init__(format, label_fields) | |
| self.min_area = min_area | |
| self.min_visibility = min_visibility | |
| self.min_width = min_width | |
| self.min_height = min_height | |
| self.check_each_transform = check_each_transform | |
| def _to_dict(self) -> Dict[str, Any]: | |
| data = super(BboxParams, self)._to_dict() | |
| data.update( | |
| { | |
| "min_area": self.min_area, | |
| "min_visibility": self.min_visibility, | |
| "min_width": self.min_width, | |
| "min_height": self.min_height, | |
| "check_each_transform": self.check_each_transform, | |
| } | |
| ) | |
| return data | |
| def is_serializable(cls) -> bool: | |
| return True | |
| def get_class_fullname(cls) -> str: | |
| return "BboxParams" | |
| class BboxProcessor(DataProcessor): | |
| def __init__(self, params: BboxParams, additional_targets: Optional[Dict[str, str]] = None): | |
| super().__init__(params, additional_targets) | |
| def default_data_name(self) -> str: | |
| return "bboxes" | |
| def ensure_data_valid(self, data: Dict[str, Any]) -> None: | |
| for data_name in self.data_fields: | |
| data_exists = data_name in data and len(data[data_name]) | |
| if data_exists and len(data[data_name][0]) < 5: | |
| if self.params.label_fields is None: | |
| raise ValueError( | |
| "Please specify 'label_fields' in 'bbox_params' or add labels to the end of bbox " | |
| "because bboxes must have labels" | |
| ) | |
| if self.params.label_fields: | |
| if not all(i in data.keys() for i in self.params.label_fields): | |
| raise ValueError("Your 'label_fields' are not valid - them must have same names as params in dict") | |
| def filter(self, data: Sequence, rows: int, cols: int) -> List: | |
| self.params: BboxParams | |
| return filter_bboxes( | |
| data, | |
| rows, | |
| cols, | |
| min_area=self.params.min_area, | |
| min_visibility=self.params.min_visibility, | |
| min_width=self.params.min_width, | |
| min_height=self.params.min_height, | |
| ) | |
| def check(self, data: Sequence, rows: int, cols: int) -> None: | |
| check_bboxes(data) | |
| def convert_from_albumentations(self, data: Sequence, rows: int, cols: int) -> List[BoxType]: | |
| return convert_bboxes_from_albumentations(data, self.params.format, rows, cols, check_validity=True) | |
| def convert_to_albumentations(self, data: Sequence[BoxType], rows: int, cols: int) -> List[BoxType]: | |
| return convert_bboxes_to_albumentations(data, self.params.format, rows, cols, check_validity=True) | |
| def normalize_bbox(bbox: TBox, rows: int, cols: int) -> TBox: | |
| """Normalize coordinates of a bounding box. Divide x-coordinates by image width and y-coordinates | |
| by image height. | |
| Args: | |
| bbox: Denormalized bounding box `(x_min, y_min, x_max, y_max)`. | |
| rows: Image height. | |
| cols: Image width. | |
| Returns: | |
| Normalized bounding box `(x_min, y_min, x_max, y_max)`. | |
| Raises: | |
| ValueError: If rows or cols is less or equal zero | |
| """ | |
| if rows <= 0: | |
| raise ValueError("Argument rows must be positive integer") | |
| if cols <= 0: | |
| raise ValueError("Argument cols must be positive integer") | |
| tail: Tuple[Any, ...] | |
| (x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:]) | |
| x_min, x_max = x_min / cols, x_max / cols | |
| y_min, y_max = y_min / rows, y_max / rows | |
| return cast(BoxType, (x_min, y_min, x_max, y_max) + tail) # type: ignore | |
| def denormalize_bbox(bbox: TBox, rows: int, cols: int) -> TBox: | |
| """Denormalize coordinates of a bounding box. Multiply x-coordinates by image width and y-coordinates | |
| by image height. This is an inverse operation for :func:`~albumentations.augmentations.bbox.normalize_bbox`. | |
| Args: | |
| bbox: Normalized bounding box `(x_min, y_min, x_max, y_max)`. | |
| rows: Image height. | |
| cols: Image width. | |
| Returns: | |
| Denormalized bounding box `(x_min, y_min, x_max, y_max)`. | |
| Raises: | |
| ValueError: If rows or cols is less or equal zero | |
| """ | |
| tail: Tuple[Any, ...] | |
| (x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:]) | |
| if rows <= 0: | |
| raise ValueError("Argument rows must be positive integer") | |
| if cols <= 0: | |
| raise ValueError("Argument cols must be positive integer") | |
| x_min, x_max = x_min * cols, x_max * cols | |
| y_min, y_max = y_min * rows, y_max * rows | |
| return cast(BoxType, (x_min, y_min, x_max, y_max) + tail) # type: ignore | |
| def normalize_bboxes(bboxes: Sequence[BoxType], rows: int, cols: int) -> List[BoxType]: | |
| """Normalize a list of bounding boxes. | |
| Args: | |
| bboxes: Denormalized bounding boxes `[(x_min, y_min, x_max, y_max)]`. | |
| rows: Image height. | |
| cols: Image width. | |
| Returns: | |
| Normalized bounding boxes `[(x_min, y_min, x_max, y_max)]`. | |
| """ | |
| return [normalize_bbox(bbox, rows, cols) for bbox in bboxes] | |
| def denormalize_bboxes(bboxes: Sequence[BoxType], rows: int, cols: int) -> List[BoxType]: | |
| """Denormalize a list of bounding boxes. | |
| Args: | |
| bboxes: Normalized bounding boxes `[(x_min, y_min, x_max, y_max)]`. | |
| rows: Image height. | |
| cols: Image width. | |
| Returns: | |
| List: Denormalized bounding boxes `[(x_min, y_min, x_max, y_max)]`. | |
| """ | |
| return [denormalize_bbox(bbox, rows, cols) for bbox in bboxes] | |
| def calculate_bbox_area(bbox: BoxType, rows: int, cols: int) -> float: | |
| """Calculate the area of a bounding box in (fractional) pixels. | |
| Args: | |
| bbox: A bounding box `(x_min, y_min, x_max, y_max)`. | |
| rows: Image height. | |
| cols: Image width. | |
| Return: | |
| Area in (fractional) pixels of the (denormalized) bounding box. | |
| """ | |
| bbox = denormalize_bbox(bbox, rows, cols) | |
| x_min, y_min, x_max, y_max = bbox[:4] | |
| area = (x_max - x_min) * (y_max - y_min) | |
| return area | |
| def filter_bboxes_by_visibility( | |
| original_shape: Sequence[int], | |
| bboxes: Sequence[BoxType], | |
| transformed_shape: Sequence[int], | |
| transformed_bboxes: Sequence[BoxType], | |
| threshold: float = 0.0, | |
| min_area: float = 0.0, | |
| ) -> List[BoxType]: | |
| """Filter bounding boxes and return only those boxes whose visibility after transformation is above | |
| the threshold and minimal area of bounding box in pixels is more then min_area. | |
| Args: | |
| original_shape: Original image shape `(height, width, ...)`. | |
| bboxes: Original bounding boxes `[(x_min, y_min, x_max, y_max)]`. | |
| transformed_shape: Transformed image shape `(height, width)`. | |
| transformed_bboxes: Transformed bounding boxes `[(x_min, y_min, x_max, y_max)]`. | |
| threshold: visibility threshold. Should be a value in the range [0.0, 1.0]. | |
| min_area: Minimal area threshold. | |
| Returns: | |
| Filtered bounding boxes `[(x_min, y_min, x_max, y_max)]`. | |
| """ | |
| img_height, img_width = original_shape[:2] | |
| transformed_img_height, transformed_img_width = transformed_shape[:2] | |
| visible_bboxes = [] | |
| for bbox, transformed_bbox in zip(bboxes, transformed_bboxes): | |
| if not all(0.0 <= value <= 1.0 for value in transformed_bbox[:4]): | |
| continue | |
| bbox_area = calculate_bbox_area(bbox, img_height, img_width) | |
| transformed_bbox_area = calculate_bbox_area(transformed_bbox, transformed_img_height, transformed_img_width) | |
| if transformed_bbox_area < min_area: | |
| continue | |
| visibility = transformed_bbox_area / bbox_area | |
| if visibility >= threshold: | |
| visible_bboxes.append(transformed_bbox) | |
| return visible_bboxes | |
| def convert_bbox_to_albumentations( | |
| bbox: BoxType, source_format: str, rows: int, cols: int, check_validity: bool = False | |
| ) -> BoxType: | |
| """Convert a bounding box from a format specified in `source_format` to the format used by albumentations: | |
| normalized coordinates of top-left and bottom-right corners of the bounding box in a form of | |
| `(x_min, y_min, x_max, y_max)` e.g. `(0.15, 0.27, 0.67, 0.5)`. | |
| Args: | |
| bbox: A bounding box tuple. | |
| source_format: format of the bounding box. Should be 'coco', 'pascal_voc', or 'yolo'. | |
| check_validity: Check if all boxes are valid boxes. | |
| rows: Image height. | |
| cols: Image width. | |
| Returns: | |
| tuple: A bounding box `(x_min, y_min, x_max, y_max)`. | |
| Note: | |
| The `coco` format of a bounding box looks like `(x_min, y_min, width, height)`, e.g. (97, 12, 150, 200). | |
| The `pascal_voc` format of a bounding box looks like `(x_min, y_min, x_max, y_max)`, e.g. (97, 12, 247, 212). | |
| The `yolo` format of a bounding box looks like `(x, y, width, height)`, e.g. (0.3, 0.1, 0.05, 0.07); | |
| where `x`, `y` coordinates of the center of the box, all values normalized to 1 by image height and width. | |
| Raises: | |
| ValueError: if `target_format` is not equal to `coco` or `pascal_voc`, or `yolo`. | |
| ValueError: If in YOLO format all labels not in range (0, 1). | |
| """ | |
| if source_format not in {"coco", "pascal_voc", "yolo"}: | |
| raise ValueError( | |
| f"Unknown source_format {source_format}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'" | |
| ) | |
| if source_format == "coco": | |
| (x_min, y_min, width, height), tail = bbox[:4], bbox[4:] | |
| x_max = x_min + width | |
| y_max = y_min + height | |
| elif source_format == "yolo": | |
| # https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/scripts/voc_label.py#L12 | |
| _bbox = np.array(bbox[:4]) | |
| if check_validity and np.any((_bbox <= 0) | (_bbox > 1)): | |
| raise ValueError("In YOLO format all coordinates must be float and in range (0, 1]") | |
| (x, y, w, h), tail = bbox[:4], bbox[4:] | |
| w_half, h_half = w / 2, h / 2 | |
| x_min = x - w_half | |
| y_min = y - h_half | |
| x_max = x_min + w | |
| y_max = y_min + h | |
| else: | |
| (x_min, y_min, x_max, y_max), tail = bbox[:4], bbox[4:] | |
| bbox = (x_min, y_min, x_max, y_max) + tuple(tail) # type: ignore | |
| if source_format != "yolo": | |
| bbox = normalize_bbox(bbox, rows, cols) | |
| if check_validity: | |
| check_bbox(bbox) | |
| return bbox | |
| def convert_bbox_from_albumentations( | |
| bbox: BoxType, target_format: str, rows: int, cols: int, check_validity: bool = False | |
| ) -> BoxType: | |
| """Convert a bounding box from the format used by albumentations to a format, specified in `target_format`. | |
| Args: | |
| bbox: An albumentations bounding box `(x_min, y_min, x_max, y_max)`. | |
| target_format: required format of the output bounding box. Should be 'coco', 'pascal_voc' or 'yolo'. | |
| rows: Image height. | |
| cols: Image width. | |
| check_validity: Check if all boxes are valid boxes. | |
| Returns: | |
| tuple: A bounding box. | |
| Note: | |
| The `coco` format of a bounding box looks like `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200]. | |
| The `pascal_voc` format of a bounding box looks like `[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212]. | |
| The `yolo` format of a bounding box looks like `[x, y, width, height]`, e.g. [0.3, 0.1, 0.05, 0.07]. | |
| Raises: | |
| ValueError: if `target_format` is not equal to `coco`, `pascal_voc` or `yolo`. | |
| """ | |
| if target_format not in {"coco", "pascal_voc", "yolo"}: | |
| raise ValueError( | |
| f"Unknown target_format {target_format}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'" | |
| ) | |
| if check_validity: | |
| check_bbox(bbox) | |
| if target_format != "yolo": | |
| bbox = denormalize_bbox(bbox, rows, cols) | |
| if target_format == "coco": | |
| (x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:]) | |
| width = x_max - x_min | |
| height = y_max - y_min | |
| bbox = cast(BoxType, (x_min, y_min, width, height) + tail) | |
| elif target_format == "yolo": | |
| (x_min, y_min, x_max, y_max), tail = bbox[:4], bbox[4:] | |
| x = (x_min + x_max) / 2.0 | |
| y = (y_min + y_max) / 2.0 | |
| w = x_max - x_min | |
| h = y_max - y_min | |
| bbox = cast(BoxType, (x, y, w, h) + tail) | |
| return bbox | |
| def convert_bboxes_to_albumentations( | |
| bboxes: Sequence[BoxType], source_format, rows, cols, check_validity=False | |
| ) -> List[BoxType]: | |
| """Convert a list bounding boxes from a format specified in `source_format` to the format used by albumentations""" | |
| return [convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validity) for bbox in bboxes] | |
| def convert_bboxes_from_albumentations( | |
| bboxes: Sequence[BoxType], target_format: str, rows: int, cols: int, check_validity: bool = False | |
| ) -> List[BoxType]: | |
| """Convert a list of bounding boxes from the format used by albumentations to a format, specified | |
| in `target_format`. | |
| Args: | |
| bboxes: List of albumentation bounding box `(x_min, y_min, x_max, y_max)`. | |
| target_format: required format of the output bounding box. Should be 'coco', 'pascal_voc' or 'yolo'. | |
| rows: Image height. | |
| cols: Image width. | |
| check_validity: Check if all boxes are valid boxes. | |
| Returns: | |
| List of bounding boxes. | |
| """ | |
| return [convert_bbox_from_albumentations(bbox, target_format, rows, cols, check_validity) for bbox in bboxes] | |
| def check_bbox(bbox: BoxType) -> None: | |
| """Check if bbox boundaries are in range 0, 1 and minimums are lesser then maximums""" | |
| for name, value in zip(["x_min", "y_min", "x_max", "y_max"], bbox[:4]): | |
| if not 0 <= value <= 1 and not np.isclose(value, 0) and not np.isclose(value, 1): | |
| raise ValueError(f"Expected {name} for bbox {bbox} to be in the range [0.0, 1.0], got {value}.") | |
| x_min, y_min, x_max, y_max = bbox[:4] | |
| if x_max <= x_min: | |
| raise ValueError(f"x_max is less than or equal to x_min for bbox {bbox}.") | |
| if y_max <= y_min: | |
| raise ValueError(f"y_max is less than or equal to y_min for bbox {bbox}.") | |
| def check_bboxes(bboxes: Sequence[BoxType]) -> None: | |
| """Check if bboxes boundaries are in range 0, 1 and minimums are lesser then maximums""" | |
| for bbox in bboxes: | |
| check_bbox(bbox) | |
| def filter_bboxes( | |
| bboxes: Sequence[BoxType], | |
| rows: int, | |
| cols: int, | |
| min_area: float = 0.0, | |
| min_visibility: float = 0.0, | |
| min_width: float = 0.0, | |
| min_height: float = 0.0, | |
| ) -> List[BoxType]: | |
| """Remove bounding boxes that either lie outside of the visible area by more then min_visibility | |
| or whose area in pixels is under the threshold set by `min_area`. Also it crops boxes to final image size. | |
| Args: | |
| bboxes: List of albumentation bounding box `(x_min, y_min, x_max, y_max)`. | |
| rows: Image height. | |
| cols: Image width. | |
| min_area: Minimum area of a bounding box. All bounding boxes whose visible area in pixels. | |
| is less than this value will be removed. Default: 0.0. | |
| min_visibility: Minimum fraction of area for a bounding box to remain this box in list. Default: 0.0. | |
| min_width: Minimum width of a bounding box. All bounding boxes whose width is | |
| less than this value will be removed. Default: 0.0. | |
| min_height: Minimum height of a bounding box. All bounding boxes whose height is | |
| less than this value will be removed. Default: 0.0. | |
| Returns: | |
| List of bounding boxes. | |
| """ | |
| resulting_boxes: List[BoxType] = [] | |
| for bbox in bboxes: | |
| # Calculate areas of bounding box before and after clipping. | |
| transformed_box_area = calculate_bbox_area(bbox, rows, cols) | |
| bbox, tail = cast(BoxType, tuple(np.clip(bbox[:4], 0, 1.0))), tuple(bbox[4:]) | |
| clipped_box_area = calculate_bbox_area(bbox, rows, cols) | |
| # Calculate width and height of the clipped bounding box. | |
| x_min, y_min, x_max, y_max = denormalize_bbox(bbox, rows, cols)[:4] | |
| clipped_width, clipped_height = x_max - x_min, y_max - y_min | |
| if ( | |
| clipped_box_area != 0 # to ensure transformed_box_area!=0 and to handle min_area=0 or min_visibility=0 | |
| and clipped_box_area >= min_area | |
| and clipped_box_area / transformed_box_area >= min_visibility | |
| and clipped_width >= min_width | |
| and clipped_height >= min_height | |
| ): | |
| resulting_boxes.append(cast(BoxType, bbox + tail)) | |
| return resulting_boxes | |
| def union_of_bboxes(height: int, width: int, bboxes: Sequence[BoxType], erosion_rate: float = 0.0) -> BoxType: | |
| """Calculate union of bounding boxes. | |
| Args: | |
| height (float): Height of image or space. | |
| width (float): Width of image or space. | |
| bboxes (List[tuple]): List like bounding boxes. Format is `[(x_min, y_min, x_max, y_max)]`. | |
| erosion_rate (float): How much each bounding box can be shrinked, useful for erosive cropping. | |
| Set this in range [0, 1]. 0 will not be erosive at all, 1.0 can make any bbox to lose its volume. | |
| Returns: | |
| tuple: A bounding box `(x_min, y_min, x_max, y_max)`. | |
| """ | |
| x1, y1 = width, height | |
| x2, y2 = 0, 0 | |
| for bbox in bboxes: | |
| x_min, y_min, x_max, y_max = bbox[:4] | |
| w, h = x_max - x_min, y_max - y_min | |
| lim_x1, lim_y1 = x_min + erosion_rate * w, y_min + erosion_rate * h | |
| lim_x2, lim_y2 = x_max - erosion_rate * w, y_max - erosion_rate * h | |
| x1, y1 = np.min([x1, lim_x1]), np.min([y1, lim_y1]) | |
| x2, y2 = np.max([x2, lim_x2]), np.max([y2, lim_y2]) | |
| return x1, y1, x2, y2 | |