JasonSmithSO's picture
Upload 777 files
0034848 verified
from typing import Tuple
import math
import numpy as np
from enum import IntEnum
from typing import List, Tuple, Union
import torch
from torch.nn import functional as F
import logging
import cv2
Image = np.ndarray
Boxes = torch.Tensor
ImageSizeType = Tuple[int, int]
_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray]
IntTupleBox = Tuple[int, int, int, int]
class BoxMode(IntEnum):
"""
Enum of different ways to represent a box.
"""
XYXY_ABS = 0
"""
(x0, y0, x1, y1) in absolute floating points coordinates.
The coordinates in range [0, width or height].
"""
XYWH_ABS = 1
"""
(x0, y0, w, h) in absolute floating points coordinates.
"""
XYXY_REL = 2
"""
Not yet supported!
(x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image.
"""
XYWH_REL = 3
"""
Not yet supported!
(x0, y0, w, h) in range [0, 1]. They are relative to the size of the image.
"""
XYWHA_ABS = 4
"""
(xc, yc, w, h, a) in absolute floating points coordinates.
(xc, yc) is the center of the rotated box, and the angle a is in degrees ccw.
"""
@staticmethod
def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType:
"""
Args:
box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5
from_mode, to_mode (BoxMode)
Returns:
The converted box of the same type.
"""
if from_mode == to_mode:
return box
original_type = type(box)
is_numpy = isinstance(box, np.ndarray)
single_box = isinstance(box, (list, tuple))
if single_box:
assert len(box) == 4 or len(box) == 5, (
"BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor,"
" where k == 4 or 5"
)
arr = torch.tensor(box)[None, :]
else:
# avoid modifying the input box
if is_numpy:
arr = torch.from_numpy(np.asarray(box)).clone()
else:
arr = box.clone()
assert to_mode not in [BoxMode.XYXY_REL, BoxMode.XYWH_REL] and from_mode not in [
BoxMode.XYXY_REL,
BoxMode.XYWH_REL,
], "Relative mode not yet supported!"
if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS:
assert (
arr.shape[-1] == 5
), "The last dimension of input shape must be 5 for XYWHA format"
original_dtype = arr.dtype
arr = arr.double()
w = arr[:, 2]
h = arr[:, 3]
a = arr[:, 4]
c = torch.abs(torch.cos(a * math.pi / 180.0))
s = torch.abs(torch.sin(a * math.pi / 180.0))
# This basically computes the horizontal bounding rectangle of the rotated box
new_w = c * w + s * h
new_h = c * h + s * w
# convert center to top-left corner
arr[:, 0] -= new_w / 2.0
arr[:, 1] -= new_h / 2.0
# bottom-right corner
arr[:, 2] = arr[:, 0] + new_w
arr[:, 3] = arr[:, 1] + new_h
arr = arr[:, :4].to(dtype=original_dtype)
elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS:
original_dtype = arr.dtype
arr = arr.double()
arr[:, 0] += arr[:, 2] / 2.0
arr[:, 1] += arr[:, 3] / 2.0
angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype)
arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype)
else:
if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS:
arr[:, 2] += arr[:, 0]
arr[:, 3] += arr[:, 1]
elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS:
arr[:, 2] -= arr[:, 0]
arr[:, 3] -= arr[:, 1]
else:
raise NotImplementedError(
"Conversion from BoxMode {} to {} is not supported yet".format(
from_mode, to_mode
)
)
if single_box:
return original_type(arr.flatten().tolist())
if is_numpy:
return arr.numpy()
else:
return arr
class MatrixVisualizer:
"""
Base visualizer for matrix data
"""
def __init__(
self,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
val_scale=1.0,
alpha=0.7,
interp_method_matrix=cv2.INTER_LINEAR,
interp_method_mask=cv2.INTER_NEAREST,
):
self.inplace = inplace
self.cmap = cmap
self.val_scale = val_scale
self.alpha = alpha
self.interp_method_matrix = interp_method_matrix
self.interp_method_mask = interp_method_mask
def visualize(self, image_bgr, mask, matrix, bbox_xywh):
self._check_image(image_bgr)
self._check_mask_matrix(mask, matrix)
if self.inplace:
image_target_bgr = image_bgr
else:
image_target_bgr = image_bgr * 0
x, y, w, h = [int(v) for v in bbox_xywh]
if w <= 0 or h <= 0:
return image_bgr
mask, matrix = self._resize(mask, matrix, w, h)
mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
matrix_scaled = matrix.astype(np.float32) * self.val_scale
_EPSILON = 1e-6
if np.any(matrix_scaled > 255 + _EPSILON):
logger = logging.getLogger(__name__)
logger.warning(
f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
)
matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
image_target_bgr[y : y + h, x : x + w, :] = (
image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
)
return image_target_bgr.astype(np.uint8)
def _resize(self, mask, matrix, w, h):
if (w != mask.shape[1]) or (h != mask.shape[0]):
mask = cv2.resize(mask, (w, h), self.interp_method_mask)
if (w != matrix.shape[1]) or (h != matrix.shape[0]):
matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
return mask, matrix
def _check_image(self, image_rgb):
assert len(image_rgb.shape) == 3
assert image_rgb.shape[2] == 3
assert image_rgb.dtype == np.uint8
def _check_mask_matrix(self, mask, matrix):
assert len(matrix.shape) == 2
assert len(mask.shape) == 2
assert mask.dtype == np.uint8
class DensePoseResultsVisualizer:
def visualize(
self,
image_bgr: Image,
results,
) -> Image:
context = self.create_visualization_context(image_bgr)
for i, result in enumerate(results):
boxes_xywh, labels, uv = result
iuv_array = torch.cat(
(labels[None].type(torch.float32), uv * 255.0)
).type(torch.uint8)
self.visualize_iuv_arr(context, iuv_array.cpu().numpy(), boxes_xywh)
image_bgr = self.context_to_image_bgr(context)
return image_bgr
def create_visualization_context(self, image_bgr: Image):
return image_bgr
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
pass
def context_to_image_bgr(self, context):
return context
def get_image_bgr_from_context(self, context):
return context
class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
def __init__(
self,
data_extractor,
segm_extractor,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
alpha=0.7,
val_scale=1.0,
**kwargs,
):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
)
self.data_extractor = data_extractor
self.segm_extractor = segm_extractor
def context_to_image_bgr(self, context):
return context
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
image_bgr = self.get_image_bgr_from_context(context)
matrix = self.data_extractor(iuv_arr)
segm = self.segm_extractor(iuv_arr)
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[segm > 0] = 1
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
def _extract_i_from_iuvarr(iuv_arr):
return iuv_arr[0, :, :]
def _extract_u_from_iuvarr(iuv_arr):
return iuv_arr[1, :, :]
def _extract_v_from_iuvarr(iuv_arr):
return iuv_arr[2, :, :]
def make_int_box(box: torch.Tensor) -> IntTupleBox:
int_box = [0, 0, 0, 0]
int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
return int_box[0], int_box[1], int_box[2], int_box[3]
def densepose_chart_predictor_output_to_result_with_confidences(
boxes: Boxes,
coarse_segm,
fine_segm,
u, v
):
boxes_xyxy_abs = boxes.clone()
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
box_xywh = make_int_box(boxes_xywh_abs[0])
labels = resample_fine_and_coarse_segm_tensors_to_bbox(fine_segm, coarse_segm, box_xywh).squeeze(0)
uv = resample_uv_tensors_to_bbox(u, v, labels, box_xywh)
confidences = []
return box_xywh, labels, uv
def resample_fine_and_coarse_segm_tensors_to_bbox(
fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
):
"""
Resample fine and coarse segmentation tensors to the given
bounding box and derive labels for each pixel of the bounding box
Args:
fine_segm: float tensor of shape [1, C, Hout, Wout]
coarse_segm: float tensor of shape [1, K, Hout, Wout]
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
corner coordinates, width (W) and height (H)
Return:
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
"""
x, y, w, h = box_xywh_abs
w = max(int(w), 1)
h = max(int(h), 1)
# coarse segmentation
coarse_segm_bbox = F.interpolate(
coarse_segm,
(h, w),
mode="bilinear",
align_corners=False,
).argmax(dim=1)
# combined coarse and fine segmentation
labels = (
F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
* (coarse_segm_bbox > 0).long()
)
return labels
def resample_uv_tensors_to_bbox(
u: torch.Tensor,
v: torch.Tensor,
labels: torch.Tensor,
box_xywh_abs: IntTupleBox,
) -> torch.Tensor:
"""
Resamples U and V coordinate estimates for the given bounding box
Args:
u (tensor [1, C, H, W] of float): U coordinates
v (tensor [1, C, H, W] of float): V coordinates
labels (tensor [H, W] of long): labels obtained by resampling segmentation
outputs for the given bounding box
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
Return:
Resampled U and V coordinates - a tensor [2, H, W] of float
"""
x, y, w, h = box_xywh_abs
w = max(int(w), 1)
h = max(int(h), 1)
u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
for part_id in range(1, u_bbox.size(1)):
uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
return uv