| 
							 | 
						 | 
					
					
						
						| 
							 | 
						import logging | 
					
					
						
						| 
							 | 
						from typing import Callable, Dict, List, Optional, Tuple, Union | 
					
					
						
						| 
							 | 
						import torch | 
					
					
						
						| 
							 | 
						from torch import nn | 
					
					
						
						| 
							 | 
						from torch.nn import functional as F | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						from detectron2.config import configurable | 
					
					
						
						| 
							 | 
						from detectron2.data.detection_utils import get_fed_loss_cls_weights | 
					
					
						
						| 
							 | 
						from detectron2.layers import ShapeSpec, batched_nms, cat, cross_entropy, nonzero_tuple | 
					
					
						
						| 
							 | 
						from detectron2.modeling.box_regression import Box2BoxTransform, _dense_box_regression_loss | 
					
					
						
						| 
							 | 
						from detectron2.structures import Boxes, Instances | 
					
					
						
						| 
							 | 
						from detectron2.utils.events import get_event_storage | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						__all__ = ["fast_rcnn_inference", "FastRCNNOutputLayers"] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						logger = logging.getLogger(__name__) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						""" | 
					
					
						
						| 
							 | 
						Shape shorthand in this module: | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    N: number of images in the minibatch | 
					
					
						
						| 
							 | 
						    R: number of ROIs, combined over all images, in the minibatch | 
					
					
						
						| 
							 | 
						    Ri: number of ROIs in image i | 
					
					
						
						| 
							 | 
						    K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						Naming convention: | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box | 
					
					
						
						| 
							 | 
						    transform (see :class:`box_regression.Box2BoxTransform`). | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    pred_class_logits: predicted class scores in [-inf, +inf]; use | 
					
					
						
						| 
							 | 
						        softmax(pred_class_logits) to estimate P(class). | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    gt_classes: ground-truth classification labels in [0, K], where [0, K) represent | 
					
					
						
						| 
							 | 
						        foreground object classes and K represents the background class. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    pred_proposal_deltas: predicted box2box transform deltas for transforming proposals | 
					
					
						
						| 
							 | 
						        to detection box predictions. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    gt_proposal_deltas: ground-truth box2box transform deltas | 
					
					
						
						| 
							 | 
						""" | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						def fast_rcnn_inference( | 
					
					
						
						| 
							 | 
						    boxes: List[torch.Tensor], | 
					
					
						
						| 
							 | 
						    scores: List[torch.Tensor], | 
					
					
						
						| 
							 | 
						    image_shapes: List[Tuple[int, int]], | 
					
					
						
						| 
							 | 
						    score_thresh: float, | 
					
					
						
						| 
							 | 
						    nms_thresh: float, | 
					
					
						
						| 
							 | 
						    topk_per_image: int, | 
					
					
						
						| 
							 | 
						): | 
					
					
						
						| 
							 | 
						    """ | 
					
					
						
						| 
							 | 
						    Call `fast_rcnn_inference_single_image` for all images. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    Args: | 
					
					
						
						| 
							 | 
						        boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic | 
					
					
						
						| 
							 | 
						            boxes for each image. Element i has shape (Ri, K * 4) if doing | 
					
					
						
						| 
							 | 
						            class-specific regression, or (Ri, 4) if doing class-agnostic | 
					
					
						
						| 
							 | 
						            regression, where Ri is the number of predicted objects for image i. | 
					
					
						
						| 
							 | 
						            This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`. | 
					
					
						
						| 
							 | 
						        scores (list[Tensor]): A list of Tensors of predicted class scores for each image. | 
					
					
						
						| 
							 | 
						            Element i has shape (Ri, K + 1), where Ri is the number of predicted objects | 
					
					
						
						| 
							 | 
						            for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`. | 
					
					
						
						| 
							 | 
						        image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. | 
					
					
						
						| 
							 | 
						        score_thresh (float): Only return detections with a confidence score exceeding this | 
					
					
						
						| 
							 | 
						            threshold. | 
					
					
						
						| 
							 | 
						        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1]. | 
					
					
						
						| 
							 | 
						        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return | 
					
					
						
						| 
							 | 
						            all detections. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    Returns: | 
					
					
						
						| 
							 | 
						        instances: (list[Instances]): A list of N instances, one for each image in the batch, | 
					
					
						
						| 
							 | 
						            that stores the topk most confidence detections. | 
					
					
						
						| 
							 | 
						        kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates | 
					
					
						
						| 
							 | 
						            the corresponding boxes/scores index in [0, Ri) from the input, for image i. | 
					
					
						
						| 
							 | 
						    """ | 
					
					
						
						| 
							 | 
						    result_per_image = [ | 
					
					
						
						| 
							 | 
						        fast_rcnn_inference_single_image( | 
					
					
						
						| 
							 | 
						            boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image | 
					
					
						
						| 
							 | 
						        ) | 
					
					
						
						| 
							 | 
						        for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) | 
					
					
						
						| 
							 | 
						    ] | 
					
					
						
						| 
							 | 
						    return [x[0] for x in result_per_image], [x[1] for x in result_per_image] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						def _log_classification_stats(pred_logits, gt_classes, prefix="fast_rcnn"): | 
					
					
						
						| 
							 | 
						    """ | 
					
					
						
						| 
							 | 
						    Log the classification metrics to EventStorage. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    Args: | 
					
					
						
						| 
							 | 
						        pred_logits: Rx(K+1) logits. The last column is for background class. | 
					
					
						
						| 
							 | 
						        gt_classes: R labels | 
					
					
						
						| 
							 | 
						    """ | 
					
					
						
						| 
							 | 
						    num_instances = gt_classes.numel() | 
					
					
						
						| 
							 | 
						    if num_instances == 0: | 
					
					
						
						| 
							 | 
						        return | 
					
					
						
						| 
							 | 
						    pred_classes = pred_logits.argmax(dim=1) | 
					
					
						
						| 
							 | 
						    bg_class_ind = pred_logits.shape[1] - 1 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    fg_inds = (gt_classes >= 0) & (gt_classes < bg_class_ind) | 
					
					
						
						| 
							 | 
						    num_fg = fg_inds.nonzero().numel() | 
					
					
						
						| 
							 | 
						    fg_gt_classes = gt_classes[fg_inds] | 
					
					
						
						| 
							 | 
						    fg_pred_classes = pred_classes[fg_inds] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel() | 
					
					
						
						| 
							 | 
						    num_accurate = (pred_classes == gt_classes).nonzero().numel() | 
					
					
						
						| 
							 | 
						    fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel() | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    storage = get_event_storage() | 
					
					
						
						| 
							 | 
						    storage.put_scalar(f"{prefix}/cls_accuracy", num_accurate / num_instances) | 
					
					
						
						| 
							 | 
						    if num_fg > 0: | 
					
					
						
						| 
							 | 
						        storage.put_scalar(f"{prefix}/fg_cls_accuracy", fg_num_accurate / num_fg) | 
					
					
						
						| 
							 | 
						        storage.put_scalar(f"{prefix}/false_negative", num_false_negative / num_fg) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						def fast_rcnn_inference_single_image( | 
					
					
						
						| 
							 | 
						    boxes, | 
					
					
						
						| 
							 | 
						    scores, | 
					
					
						
						| 
							 | 
						    image_shape: Tuple[int, int], | 
					
					
						
						| 
							 | 
						    score_thresh: float, | 
					
					
						
						| 
							 | 
						    nms_thresh: float, | 
					
					
						
						| 
							 | 
						    topk_per_image: int, | 
					
					
						
						| 
							 | 
						): | 
					
					
						
						| 
							 | 
						    """ | 
					
					
						
						| 
							 | 
						    Single-image inference. Return bounding-box detection results by thresholding | 
					
					
						
						| 
							 | 
						    on scores and applying non-maximum suppression (NMS). | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    Args: | 
					
					
						
						| 
							 | 
						        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes | 
					
					
						
						| 
							 | 
						        per image. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    Returns: | 
					
					
						
						| 
							 | 
						        Same as `fast_rcnn_inference`, but for only one image. | 
					
					
						
						| 
							 | 
						    """ | 
					
					
						
						| 
							 | 
						    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) | 
					
					
						
						| 
							 | 
						    if not valid_mask.all(): | 
					
					
						
						| 
							 | 
						        boxes = boxes[valid_mask] | 
					
					
						
						| 
							 | 
						        scores = scores[valid_mask] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    scores = scores[:, :-1] | 
					
					
						
						| 
							 | 
						    num_bbox_reg_classes = boxes.shape[1] // 4 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    boxes = Boxes(boxes.reshape(-1, 4)) | 
					
					
						
						| 
							 | 
						    boxes.clip(image_shape) | 
					
					
						
						| 
							 | 
						    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)   | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    filter_mask = scores > score_thresh   | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    filter_inds = filter_mask.nonzero() | 
					
					
						
						| 
							 | 
						    if num_bbox_reg_classes == 1: | 
					
					
						
						| 
							 | 
						        boxes = boxes[filter_inds[:, 0], 0] | 
					
					
						
						| 
							 | 
						    else: | 
					
					
						
						| 
							 | 
						        boxes = boxes[filter_mask] | 
					
					
						
						| 
							 | 
						    scores = scores[filter_mask] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) | 
					
					
						
						| 
							 | 
						    if topk_per_image >= 0: | 
					
					
						
						| 
							 | 
						        keep = keep[:topk_per_image] | 
					
					
						
						| 
							 | 
						    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    result = Instances(image_shape) | 
					
					
						
						| 
							 | 
						    result.pred_boxes = Boxes(boxes) | 
					
					
						
						| 
							 | 
						    result.scores = scores | 
					
					
						
						| 
							 | 
						    result.pred_classes = filter_inds[:, 1] | 
					
					
						
						| 
							 | 
						    return result, filter_inds[:, 0] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						class FastRCNNOutputLayers(nn.Module): | 
					
					
						
						| 
							 | 
						    """ | 
					
					
						
						| 
							 | 
						    Two linear layers for predicting Fast R-CNN outputs: | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    1. proposal-to-detection box regression deltas | 
					
					
						
						| 
							 | 
						    2. classification scores | 
					
					
						
						| 
							 | 
						    """ | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    @configurable | 
					
					
						
						| 
							 | 
						    def __init__( | 
					
					
						
						| 
							 | 
						        self, | 
					
					
						
						| 
							 | 
						        input_shape: ShapeSpec, | 
					
					
						
						| 
							 | 
						        *, | 
					
					
						
						| 
							 | 
						        box2box_transform, | 
					
					
						
						| 
							 | 
						        num_classes: int, | 
					
					
						
						| 
							 | 
						        test_score_thresh: float = 0.0, | 
					
					
						
						| 
							 | 
						        test_nms_thresh: float = 0.5, | 
					
					
						
						| 
							 | 
						        test_topk_per_image: int = 100, | 
					
					
						
						| 
							 | 
						        cls_agnostic_bbox_reg: bool = False, | 
					
					
						
						| 
							 | 
						        smooth_l1_beta: float = 0.0, | 
					
					
						
						| 
							 | 
						        box_reg_loss_type: str = "smooth_l1", | 
					
					
						
						| 
							 | 
						        loss_weight: Union[float, Dict[str, float]] = 1.0, | 
					
					
						
						| 
							 | 
						        use_fed_loss: bool = False, | 
					
					
						
						| 
							 | 
						        use_sigmoid_ce: bool = False, | 
					
					
						
						| 
							 | 
						        get_fed_loss_cls_weights: Optional[Callable] = None, | 
					
					
						
						| 
							 | 
						        fed_loss_num_classes: int = 50, | 
					
					
						
						| 
							 | 
						    ): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        NOTE: this interface is experimental. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            input_shape (ShapeSpec): shape of the input feature to this module | 
					
					
						
						| 
							 | 
						            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): | 
					
					
						
						| 
							 | 
						            num_classes (int): number of foreground classes | 
					
					
						
						| 
							 | 
						            test_score_thresh (float): threshold to filter predictions results. | 
					
					
						
						| 
							 | 
						            test_nms_thresh (float): NMS threshold for prediction results. | 
					
					
						
						| 
							 | 
						            test_topk_per_image (int): number of top predictions to produce per image. | 
					
					
						
						| 
							 | 
						            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression | 
					
					
						
						| 
							 | 
						            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if | 
					
					
						
						| 
							 | 
						                `box_reg_loss_type` is "smooth_l1" | 
					
					
						
						| 
							 | 
						            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou", | 
					
					
						
						| 
							 | 
						                "diou", "ciou" | 
					
					
						
						| 
							 | 
						            loss_weight (float|dict): weights to use for losses. Can be single float for weighting | 
					
					
						
						| 
							 | 
						                all losses, or a dict of individual weightings. Valid dict keys are: | 
					
					
						
						| 
							 | 
						                    * "loss_cls": applied to classification loss | 
					
					
						
						| 
							 | 
						                    * "loss_box_reg": applied to box regression loss | 
					
					
						
						| 
							 | 
						            use_fed_loss (bool): whether to use federated loss which samples additional negative | 
					
					
						
						| 
							 | 
						                classes to calculate the loss | 
					
					
						
						| 
							 | 
						            use_sigmoid_ce (bool): whether to calculate the loss using weighted average of binary | 
					
					
						
						| 
							 | 
						                cross entropy with logits. This could be used together with federated loss | 
					
					
						
						| 
							 | 
						            get_fed_loss_cls_weights (Callable): a callable which takes dataset name and frequency | 
					
					
						
						| 
							 | 
						                weight power, and returns the probabilities to sample negative classes for | 
					
					
						
						| 
							 | 
						                federated loss. The implementation can be found in | 
					
					
						
						| 
							 | 
						                detectron2/data/detection_utils.py | 
					
					
						
						| 
							 | 
						            fed_loss_num_classes (int): number of federated classes to keep in total | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        super().__init__() | 
					
					
						
						| 
							 | 
						        if isinstance(input_shape, int):   | 
					
					
						
						| 
							 | 
						            input_shape = ShapeSpec(channels=input_shape) | 
					
					
						
						| 
							 | 
						        self.num_classes = num_classes | 
					
					
						
						| 
							 | 
						        input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						        self.cls_score = nn.Linear(input_size, num_classes + 1) | 
					
					
						
						| 
							 | 
						        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes | 
					
					
						
						| 
							 | 
						        box_dim = len(box2box_transform.weights) | 
					
					
						
						| 
							 | 
						        self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        nn.init.normal_(self.cls_score.weight, std=0.01) | 
					
					
						
						| 
							 | 
						        nn.init.normal_(self.bbox_pred.weight, std=0.001) | 
					
					
						
						| 
							 | 
						        for l in [self.cls_score, self.bbox_pred]: | 
					
					
						
						| 
							 | 
						            nn.init.constant_(l.bias, 0) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        self.box2box_transform = box2box_transform | 
					
					
						
						| 
							 | 
						        self.smooth_l1_beta = smooth_l1_beta | 
					
					
						
						| 
							 | 
						        self.test_score_thresh = test_score_thresh | 
					
					
						
						| 
							 | 
						        self.test_nms_thresh = test_nms_thresh | 
					
					
						
						| 
							 | 
						        self.test_topk_per_image = test_topk_per_image | 
					
					
						
						| 
							 | 
						        self.box_reg_loss_type = box_reg_loss_type | 
					
					
						
						| 
							 | 
						        if isinstance(loss_weight, float): | 
					
					
						
						| 
							 | 
						            loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight} | 
					
					
						
						| 
							 | 
						        self.loss_weight = loss_weight | 
					
					
						
						| 
							 | 
						        self.use_fed_loss = use_fed_loss | 
					
					
						
						| 
							 | 
						        self.use_sigmoid_ce = use_sigmoid_ce | 
					
					
						
						| 
							 | 
						        self.fed_loss_num_classes = fed_loss_num_classes | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        if self.use_fed_loss: | 
					
					
						
						| 
							 | 
						            assert self.use_sigmoid_ce, "Please use sigmoid cross entropy loss with federated loss" | 
					
					
						
						| 
							 | 
						            fed_loss_cls_weights = get_fed_loss_cls_weights() | 
					
					
						
						| 
							 | 
						            assert ( | 
					
					
						
						| 
							 | 
						                len(fed_loss_cls_weights) == self.num_classes | 
					
					
						
						| 
							 | 
						            ), "Please check the provided fed_loss_cls_weights. Their size should match num_classes" | 
					
					
						
						| 
							 | 
						            self.register_buffer("fed_loss_cls_weights", fed_loss_cls_weights) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    @classmethod | 
					
					
						
						| 
							 | 
						    def from_config(cls, cfg, input_shape): | 
					
					
						
						| 
							 | 
						        return { | 
					
					
						
						| 
							 | 
						            "input_shape": input_shape, | 
					
					
						
						| 
							 | 
						            "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS), | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						            "num_classes"               : cfg.MODEL.ROI_HEADS.NUM_CLASSES, | 
					
					
						
						| 
							 | 
						            "cls_agnostic_bbox_reg"     : cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, | 
					
					
						
						| 
							 | 
						            "smooth_l1_beta"            : cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA, | 
					
					
						
						| 
							 | 
						            "test_score_thresh"         : cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST, | 
					
					
						
						| 
							 | 
						            "test_nms_thresh"           : cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, | 
					
					
						
						| 
							 | 
						            "test_topk_per_image"       : cfg.TEST.DETECTIONS_PER_IMAGE, | 
					
					
						
						| 
							 | 
						            "box_reg_loss_type"         : cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE, | 
					
					
						
						| 
							 | 
						            "loss_weight"               : {"loss_box_reg": cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT},   | 
					
					
						
						| 
							 | 
						            "use_fed_loss"              : cfg.MODEL.ROI_BOX_HEAD.USE_FED_LOSS, | 
					
					
						
						| 
							 | 
						            "use_sigmoid_ce"            : cfg.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE, | 
					
					
						
						| 
							 | 
						            "get_fed_loss_cls_weights"  : lambda: get_fed_loss_cls_weights(dataset_names=cfg.DATASETS.TRAIN, freq_weight_power=cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER),   | 
					
					
						
						| 
							 | 
						            "fed_loss_num_classes"      : cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES, | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    def forward(self, x): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            x: per-region features of shape (N, ...) for N bounding boxes to predict. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        Returns: | 
					
					
						
						| 
							 | 
						            (Tensor, Tensor): | 
					
					
						
						| 
							 | 
						            First tensor: shape (N,K+1), scores for each of the N box. Each row contains the | 
					
					
						
						| 
							 | 
						            scores for K object categories and 1 background class. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						            Second tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4), | 
					
					
						
						| 
							 | 
						            or (N,4) for class-agnostic regression. | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        if x.dim() > 2: | 
					
					
						
						| 
							 | 
						            x = torch.flatten(x, start_dim=1) | 
					
					
						
						| 
							 | 
						        scores = self.cls_score(x) | 
					
					
						
						| 
							 | 
						        proposal_deltas = self.bbox_pred(x) | 
					
					
						
						| 
							 | 
						        return scores, proposal_deltas | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    def losses(self, predictions, proposals): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            predictions: return values of :meth:`forward()`. | 
					
					
						
						| 
							 | 
						            proposals (list[Instances]): proposals that match the features that were used | 
					
					
						
						| 
							 | 
						                to compute predictions. The fields ``proposal_boxes``, ``gt_boxes``, | 
					
					
						
						| 
							 | 
						                ``gt_classes`` are expected. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        Returns: | 
					
					
						
						| 
							 | 
						            Dict[str, Tensor]: dict of losses | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        scores, proposal_deltas = predictions | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						        gt_classes = ( | 
					
					
						
						| 
							 | 
						            cat([p.gt_classes for p in proposals], dim=0) if len(proposals) else torch.empty(0) | 
					
					
						
						| 
							 | 
						        ) | 
					
					
						
						| 
							 | 
						        _log_classification_stats(scores, gt_classes) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						        if len(proposals): | 
					
					
						
						| 
							 | 
						            proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)   | 
					
					
						
						| 
							 | 
						            assert not proposal_boxes.requires_grad, "Proposals should not require gradients!" | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						            gt_boxes = cat( | 
					
					
						
						| 
							 | 
						                [(p.gt_boxes if p.has("gt_boxes") else p.proposal_boxes).tensor for p in proposals], | 
					
					
						
						| 
							 | 
						                dim=0, | 
					
					
						
						| 
							 | 
						            ) | 
					
					
						
						| 
							 | 
						        else: | 
					
					
						
						| 
							 | 
						            proposal_boxes = gt_boxes = torch.empty((0, 4), device=proposal_deltas.device) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        if self.use_sigmoid_ce: | 
					
					
						
						| 
							 | 
						            loss_cls = self.sigmoid_cross_entropy_loss(scores, gt_classes) | 
					
					
						
						| 
							 | 
						        else: | 
					
					
						
						| 
							 | 
						            loss_cls = cross_entropy(scores, gt_classes, reduction="mean") | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        losses = { | 
					
					
						
						| 
							 | 
						            "loss_cls": loss_cls, | 
					
					
						
						| 
							 | 
						            "loss_box_reg": self.box_reg_loss( | 
					
					
						
						| 
							 | 
						                proposal_boxes, gt_boxes, proposal_deltas, gt_classes | 
					
					
						
						| 
							 | 
						            ), | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						        return {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()} | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    def get_fed_loss_classes(self, gt_classes, num_fed_loss_classes, num_classes, weight): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            gt_classes: a long tensor of shape R that contains the gt class label of each proposal. | 
					
					
						
						| 
							 | 
						            num_fed_loss_classes: minimum number of classes to keep when calculating federated loss. | 
					
					
						
						| 
							 | 
						            Will sample negative classes if number of unique gt_classes is smaller than this value. | 
					
					
						
						| 
							 | 
						            num_classes: number of foreground classes | 
					
					
						
						| 
							 | 
						            weight: probabilities used to sample negative classes | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        Returns: | 
					
					
						
						| 
							 | 
						            Tensor: | 
					
					
						
						| 
							 | 
						                classes to keep when calculating the federated loss, including both unique gt | 
					
					
						
						| 
							 | 
						                classes and sampled negative classes. | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        unique_gt_classes = torch.unique(gt_classes) | 
					
					
						
						| 
							 | 
						        prob = unique_gt_classes.new_ones(num_classes + 1).float() | 
					
					
						
						| 
							 | 
						        prob[-1] = 0 | 
					
					
						
						| 
							 | 
						        if len(unique_gt_classes) < num_fed_loss_classes: | 
					
					
						
						| 
							 | 
						            prob[:num_classes] = weight.float().clone() | 
					
					
						
						| 
							 | 
						            prob[unique_gt_classes] = 0 | 
					
					
						
						| 
							 | 
						            sampled_negative_classes = torch.multinomial( | 
					
					
						
						| 
							 | 
						                prob, num_fed_loss_classes - len(unique_gt_classes), replacement=False | 
					
					
						
						| 
							 | 
						            ) | 
					
					
						
						| 
							 | 
						            fed_loss_classes = torch.cat([unique_gt_classes, sampled_negative_classes]) | 
					
					
						
						| 
							 | 
						        else: | 
					
					
						
						| 
							 | 
						            fed_loss_classes = unique_gt_classes | 
					
					
						
						| 
							 | 
						        return fed_loss_classes | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    def sigmoid_cross_entropy_loss(self, pred_class_logits, gt_classes): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            pred_class_logits: shape (N, K+1), scores for each of the N box. Each row contains the | 
					
					
						
						| 
							 | 
						            scores for K object categories and 1 background class | 
					
					
						
						| 
							 | 
						            gt_classes: a long tensor of shape R that contains the gt class label of each proposal. | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        if pred_class_logits.numel() == 0: | 
					
					
						
						| 
							 | 
						            return pred_class_logits.new_zeros([1])[0] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        N = pred_class_logits.shape[0] | 
					
					
						
						| 
							 | 
						        K = pred_class_logits.shape[1] - 1 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        target = pred_class_logits.new_zeros(N, K + 1) | 
					
					
						
						| 
							 | 
						        target[range(len(gt_classes)), gt_classes] = 1 | 
					
					
						
						| 
							 | 
						        target = target[:, :K] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        cls_loss = F.binary_cross_entropy_with_logits( | 
					
					
						
						| 
							 | 
						            pred_class_logits[:, :-1], target, reduction="none" | 
					
					
						
						| 
							 | 
						        ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        if self.use_fed_loss: | 
					
					
						
						| 
							 | 
						            fed_loss_classes = self.get_fed_loss_classes( | 
					
					
						
						| 
							 | 
						                gt_classes, | 
					
					
						
						| 
							 | 
						                num_fed_loss_classes=self.fed_loss_num_classes, | 
					
					
						
						| 
							 | 
						                num_classes=K, | 
					
					
						
						| 
							 | 
						                weight=self.fed_loss_cls_weights, | 
					
					
						
						| 
							 | 
						            ) | 
					
					
						
						| 
							 | 
						            fed_loss_classes_mask = fed_loss_classes.new_zeros(K + 1) | 
					
					
						
						| 
							 | 
						            fed_loss_classes_mask[fed_loss_classes] = 1 | 
					
					
						
						| 
							 | 
						            fed_loss_classes_mask = fed_loss_classes_mask[:K] | 
					
					
						
						| 
							 | 
						            weight = fed_loss_classes_mask.view(1, K).expand(N, K).float() | 
					
					
						
						| 
							 | 
						        else: | 
					
					
						
						| 
							 | 
						            weight = 1 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        loss = torch.sum(cls_loss * weight) / N | 
					
					
						
						| 
							 | 
						        return loss | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            proposal_boxes/gt_boxes are tensors with the same shape (R, 4 or 5). | 
					
					
						
						| 
							 | 
						            pred_deltas has shape (R, 4 or 5), or (R, num_classes * (4 or 5)). | 
					
					
						
						| 
							 | 
						            gt_classes is a long tensor of shape R, the gt class label of each proposal. | 
					
					
						
						| 
							 | 
						            R shall be the number of proposals. | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        box_dim = proposal_boxes.shape[1]   | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						        fg_inds = nonzero_tuple((gt_classes >= 0) & (gt_classes < self.num_classes))[0] | 
					
					
						
						| 
							 | 
						        if pred_deltas.shape[1] == box_dim:   | 
					
					
						
						| 
							 | 
						            fg_pred_deltas = pred_deltas[fg_inds] | 
					
					
						
						| 
							 | 
						        else: | 
					
					
						
						| 
							 | 
						            fg_pred_deltas = pred_deltas.view(-1, self.num_classes, box_dim)[ | 
					
					
						
						| 
							 | 
						                fg_inds, gt_classes[fg_inds] | 
					
					
						
						| 
							 | 
						            ] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        loss_box_reg = _dense_box_regression_loss( | 
					
					
						
						| 
							 | 
						            [proposal_boxes[fg_inds]], | 
					
					
						
						| 
							 | 
						            self.box2box_transform, | 
					
					
						
						| 
							 | 
						            [fg_pred_deltas.unsqueeze(0)], | 
					
					
						
						| 
							 | 
						            [gt_boxes[fg_inds]], | 
					
					
						
						| 
							 | 
						            ..., | 
					
					
						
						| 
							 | 
						            self.box_reg_loss_type, | 
					
					
						
						| 
							 | 
						            self.smooth_l1_beta, | 
					
					
						
						| 
							 | 
						        ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						        return loss_box_reg / max(gt_classes.numel(), 1.0)   | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    def inference(self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            predictions: return values of :meth:`forward()`. | 
					
					
						
						| 
							 | 
						            proposals (list[Instances]): proposals that match the features that were | 
					
					
						
						| 
							 | 
						                used to compute predictions. The ``proposal_boxes`` field is expected. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        Returns: | 
					
					
						
						| 
							 | 
						            list[Instances]: same as `fast_rcnn_inference`. | 
					
					
						
						| 
							 | 
						            list[Tensor]: same as `fast_rcnn_inference`. | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        boxes = self.predict_boxes(predictions, proposals) | 
					
					
						
						| 
							 | 
						        scores = self.predict_probs(predictions, proposals) | 
					
					
						
						| 
							 | 
						        image_shapes = [x.image_size for x in proposals] | 
					
					
						
						| 
							 | 
						        return fast_rcnn_inference( | 
					
					
						
						| 
							 | 
						            boxes, | 
					
					
						
						| 
							 | 
						            scores, | 
					
					
						
						| 
							 | 
						            image_shapes, | 
					
					
						
						| 
							 | 
						            self.test_score_thresh, | 
					
					
						
						| 
							 | 
						            self.test_nms_thresh, | 
					
					
						
						| 
							 | 
						            self.test_topk_per_image, | 
					
					
						
						| 
							 | 
						        ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    def predict_boxes_for_gt_classes(self, predictions, proposals): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            predictions: return values of :meth:`forward()`. | 
					
					
						
						| 
							 | 
						            proposals (list[Instances]): proposals that match the features that were used | 
					
					
						
						| 
							 | 
						                to compute predictions. The fields ``proposal_boxes``, ``gt_classes`` are expected. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        Returns: | 
					
					
						
						| 
							 | 
						            list[Tensor]: | 
					
					
						
						| 
							 | 
						                A list of Tensors of predicted boxes for GT classes in case of | 
					
					
						
						| 
							 | 
						                class-specific box head. Element i of the list has shape (Ri, B), where Ri is | 
					
					
						
						| 
							 | 
						                the number of proposals for image i and B is the box dimension (4 or 5) | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        if not len(proposals): | 
					
					
						
						| 
							 | 
						            return [] | 
					
					
						
						| 
							 | 
						        scores, proposal_deltas = predictions | 
					
					
						
						| 
							 | 
						        proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0) | 
					
					
						
						| 
							 | 
						        N, B = proposal_boxes.shape | 
					
					
						
						| 
							 | 
						        predict_boxes = self.box2box_transform.apply_deltas( | 
					
					
						
						| 
							 | 
						            proposal_deltas, proposal_boxes | 
					
					
						
						| 
							 | 
						        )   | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        K = predict_boxes.shape[1] // B | 
					
					
						
						| 
							 | 
						        if K > 1: | 
					
					
						
						| 
							 | 
						            gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0) | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						            gt_classes = gt_classes.clamp_(0, K - 1) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						            predict_boxes = predict_boxes.view(N, K, B)[ | 
					
					
						
						| 
							 | 
						                torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes | 
					
					
						
						| 
							 | 
						            ] | 
					
					
						
						| 
							 | 
						        num_prop_per_image = [len(p) for p in proposals] | 
					
					
						
						| 
							 | 
						        return predict_boxes.split(num_prop_per_image) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    def predict_boxes( | 
					
					
						
						| 
							 | 
						        self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances] | 
					
					
						
						| 
							 | 
						    ): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            predictions: return values of :meth:`forward()`. | 
					
					
						
						| 
							 | 
						            proposals (list[Instances]): proposals that match the features that were | 
					
					
						
						| 
							 | 
						                used to compute predictions. The ``proposal_boxes`` field is expected. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        Returns: | 
					
					
						
						| 
							 | 
						            list[Tensor]: | 
					
					
						
						| 
							 | 
						                A list of Tensors of predicted class-specific or class-agnostic boxes | 
					
					
						
						| 
							 | 
						                for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is | 
					
					
						
						| 
							 | 
						                the number of proposals for image i and B is the box dimension (4 or 5) | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        if not len(proposals): | 
					
					
						
						| 
							 | 
						            return [] | 
					
					
						
						| 
							 | 
						        _, proposal_deltas = predictions | 
					
					
						
						| 
							 | 
						        num_prop_per_image = [len(p) for p in proposals] | 
					
					
						
						| 
							 | 
						        proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0) | 
					
					
						
						| 
							 | 
						        predict_boxes = self.box2box_transform.apply_deltas( | 
					
					
						
						| 
							 | 
						            proposal_deltas, | 
					
					
						
						| 
							 | 
						            proposal_boxes, | 
					
					
						
						| 
							 | 
						        )   | 
					
					
						
						| 
							 | 
						        return predict_boxes.split(num_prop_per_image) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    def predict_probs( | 
					
					
						
						| 
							 | 
						        self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances] | 
					
					
						
						| 
							 | 
						    ): | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        Args: | 
					
					
						
						| 
							 | 
						            predictions: return values of :meth:`forward()`. | 
					
					
						
						| 
							 | 
						            proposals (list[Instances]): proposals that match the features that were | 
					
					
						
						| 
							 | 
						                used to compute predictions. | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        Returns: | 
					
					
						
						| 
							 | 
						            list[Tensor]: | 
					
					
						
						| 
							 | 
						                A list of Tensors of predicted class probabilities for each image. | 
					
					
						
						| 
							 | 
						                Element i has shape (Ri, K + 1), where Ri is the number of proposals for image i. | 
					
					
						
						| 
							 | 
						        """ | 
					
					
						
						| 
							 | 
						        scores, _ = predictions | 
					
					
						
						| 
							 | 
						        num_inst_per_image = [len(p) for p in proposals] | 
					
					
						
						| 
							 | 
						        if self.use_sigmoid_ce: | 
					
					
						
						| 
							 | 
						            probs = scores.sigmoid() | 
					
					
						
						| 
							 | 
						        else: | 
					
					
						
						| 
							 | 
						            probs = F.softmax(scores, dim=-1) | 
					
					
						
						| 
							 | 
						        return probs.split(num_inst_per_image, dim=0) | 
					
					
						
						| 
							 | 
						
 |