DiffusionDet / loss.py

Upload 5 files

89b2487 verified 2 months ago

20.2 kB

	import torch
	import torch.nn.functional as F
	from fvcore.nn import sigmoid_focal_loss_jit
	from torch import nn

	import torch.distributed as dist
	from torch.distributed import get_world_size
	from torchvision import ops


	def is_dist_avail_and_initialized():
	if not dist.is_available():
	return False
	if not dist.is_initialized():
	return False
	return True


	def get_fed_loss_classes(gt_classes, num_fed_loss_classes, num_classes, weight):
	"""
	Args:
	gt_classes: a long tensor of shape R that contains the gt class label of each proposal.
	num_fed_loss_classes: minimum number of classes to keep when calculating federated loss.
	Will sample negative classes if number of unique gt_classes is smaller than this value.
	num_classes: number of foreground classes
	weight: probabilities used to sample negative classes
	Returns:
	Tensor:
	classes to keep when calculating the federated loss, including both unique gt
	classes and sampled negative classes.
	"""
	unique_gt_classes = torch.unique(gt_classes)
	prob = unique_gt_classes.new_ones(num_classes + 1).float()
	prob[-1] = 0
	if len(unique_gt_classes) < num_fed_loss_classes:
	prob[:num_classes] = weight.float().clone()
	prob[unique_gt_classes] = 0
	sampled_negative_classes = torch.multinomial(
	prob, num_fed_loss_classes - len(unique_gt_classes), replacement=False
	)
	fed_loss_classes = torch.cat([unique_gt_classes, sampled_negative_classes])
	else:
	fed_loss_classes = unique_gt_classes
	return fed_loss_classes


	class CriterionDynamicK(nn.Module):
	""" This class computes the loss for DiffusionDet.
	The process happens in two steps:
	1) we compute hungarian assignment between ground truth boxes and the outputs of the model
	2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
	"""

	def __init__(self, config, num_classes, weight_dict):
	""" Create the criterion.
	Parameters:
	num_classes: number of object categories, omitting the special no-object category
	weight_dict: dict containing as key the names of the losses and as values their relative weight.
	"""
	super().__init__()
	self.config = config
	self.num_classes = num_classes
	self.matcher = HungarianMatcherDynamicK(config)
	self.weight_dict = weight_dict
	self.eos_coef = config.no_object_weight
	self.use_focal = config.use_focal
	self.use_fed_loss = config.use_fed_loss

	if self.use_focal:
	self.focal_loss_alpha = config.alpha
	self.focal_loss_gamma = config.gamma

	# copy-paste from https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/roi_heads/fast_rcnn.py#L356
	def loss_labels(self, outputs, targets, indices):
	"""Classification loss (NLL)
	targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
	"""
	assert 'pred_logits' in outputs
	src_logits = outputs['pred_logits']
	batch_size = len(targets)

	# idx = self._get_src_permutation_idx(indices)
	# target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
	target_classes = torch.full(src_logits.shape[:2], self.num_classes,
	dtype=torch.int64, device=src_logits.device)
	src_logits_list = []
	target_classes_o_list = []
	# target_classes[idx] = target_classes_o
	for batch_idx in range(batch_size):
	valid_query = indices[batch_idx][0]
	gt_multi_idx = indices[batch_idx][1]
	if len(gt_multi_idx) == 0:
	continue
	bz_src_logits = src_logits[batch_idx]
	target_classes_o = targets[batch_idx]["labels"]
	target_classes[batch_idx, valid_query] = target_classes_o[gt_multi_idx]

	src_logits_list.append(bz_src_logits[valid_query])
	target_classes_o_list.append(target_classes_o[gt_multi_idx])

	if self.use_focal or self.use_fed_loss:
	num_boxes = torch.cat(target_classes_o_list).shape[0] if len(target_classes_o_list) != 0 else 1

	target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], self.num_classes + 1],
	dtype=src_logits.dtype, layout=src_logits.layout,
	device=src_logits.device)
	target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1)

	gt_classes = torch.argmax(target_classes_onehot, dim=-1)
	target_classes_onehot = target_classes_onehot[:, :, :-1]

	src_logits = src_logits.flatten(0, 1)
	target_classes_onehot = target_classes_onehot.flatten(0, 1)
	if self.use_focal:
	cls_loss = sigmoid_focal_loss_jit(src_logits, target_classes_onehot, alpha=self.focal_loss_alpha,
	gamma=self.focal_loss_gamma, reduction="none")
	else:
	cls_loss = F.binary_cross_entropy_with_logits(src_logits, target_classes_onehot, reduction="none")
	if self.use_fed_loss:
	K = self.num_classes
	N = src_logits.shape[0]
	fed_loss_classes = get_fed_loss_classes(
	gt_classes,
	num_fed_loss_classes=self.fed_loss_num_classes,
	num_classes=K,
	weight=self.fed_loss_cls_weights,
	)
	fed_loss_classes_mask = fed_loss_classes.new_zeros(K + 1)
	fed_loss_classes_mask[fed_loss_classes] = 1
	fed_loss_classes_mask = fed_loss_classes_mask[:K]
	weight = fed_loss_classes_mask.view(1, K).expand(N, K).float()

	loss_ce = torch.sum(cls_loss * weight) / num_boxes
	else:
	loss_ce = torch.sum(cls_loss) / num_boxes

	losses = {'loss_ce': loss_ce}
	else:
	raise NotImplementedError

	return losses

	def loss_boxes(self, outputs, targets, indices):
	"""Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
	targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
	The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
	"""
	assert 'pred_boxes' in outputs
	# idx = self._get_src_permutation_idx(indices)
	src_boxes = outputs['pred_boxes']

	batch_size = len(targets)
	pred_box_list = []
	pred_norm_box_list = []
	tgt_box_list = []
	tgt_box_xyxy_list = []
	for batch_idx in range(batch_size):
	valid_query = indices[batch_idx][0]
	gt_multi_idx = indices[batch_idx][1]
	if len(gt_multi_idx) == 0:
	continue
	bz_image_whwh = targets[batch_idx]['image_size_xyxy']
	bz_src_boxes = src_boxes[batch_idx]
	bz_target_boxes = targets[batch_idx]["boxes"] # normalized (cx, cy, w, h)
	bz_target_boxes_xyxy = targets[batch_idx]["boxes_xyxy"] # absolute (x1, y1, x2, y2)
	pred_box_list.append(bz_src_boxes[valid_query])
	pred_norm_box_list.append(bz_src_boxes[valid_query] / bz_image_whwh) # normalize (x1, y1, x2, y2)
	tgt_box_list.append(bz_target_boxes[gt_multi_idx])
	tgt_box_xyxy_list.append(bz_target_boxes_xyxy[gt_multi_idx])

	if len(pred_box_list) != 0:
	src_boxes = torch.cat(pred_box_list)
	src_boxes_norm = torch.cat(pred_norm_box_list) # normalized (x1, y1, x2, y2)
	target_boxes = torch.cat(tgt_box_list)
	target_boxes_abs_xyxy = torch.cat(tgt_box_xyxy_list)
	num_boxes = src_boxes.shape[0]

	losses = {}
	# require normalized (x1, y1, x2, y2)
	loss_bbox = F.l1_loss(src_boxes_norm, ops.box_convert(target_boxes, 'cxcywh', 'xyxy'), reduction='none')
	losses['loss_bbox'] = loss_bbox.sum() / num_boxes

	# loss_giou = giou_loss(box_ops.box_cxcywh_to_xyxy(src_boxes), box_ops.box_cxcywh_to_xyxy(target_boxes))
	loss_giou = 1 - torch.diag(ops.generalized_box_iou(src_boxes, target_boxes_abs_xyxy))
	losses['loss_giou'] = loss_giou.sum() / num_boxes
	else:
	losses = {'loss_bbox': outputs['pred_boxes'].sum() * 0,
	'loss_giou': outputs['pred_boxes'].sum() * 0}

	return losses

	def get_loss(self, loss, outputs, targets, indices):
	loss_map = {
	'labels': self.loss_labels,
	'boxes': self.loss_boxes,
	}
	assert loss in loss_map, f'do you really want to compute {loss} loss?'
	return loss_map[loss](outputs, targets, indices)

	def forward(self, outputs, targets):
	""" This performs the loss computation.
	Parameters:
	outputs: dict of tensors, see the output specification of the model for the format
	targets: list of dicts, such that len(targets) == batch_size.
	The expected keys in each dict depends on the losses applied, see each loss' doc
	"""
	outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs'}

	# Retrieve the matching between the outputs of the last layer and the targets
	indices, _ = self.matcher(outputs_without_aux, targets)

	# Compute all the requested losses
	losses = {}
	for loss in ["labels", "boxes"]:
	losses.update(self.get_loss(loss, outputs, targets, indices))

	# In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
	if 'aux_outputs' in outputs:
	for i, aux_outputs in enumerate(outputs['aux_outputs']):
	indices, _ = self.matcher(aux_outputs, targets)
	for loss in ["labels", "boxes"]:
	if loss == 'masks':
	# Intermediate masks losses are too costly to compute, we ignore them.
	continue

	l_dict = self.get_loss(loss, aux_outputs, targets, indices)
	l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
	losses.update(l_dict)

	return losses


	def get_in_boxes_info(boxes, target_gts):
	xy_target_gts = ops.box_convert(target_gts, 'cxcywh', 'xyxy') # (x1, y1, x2, y2)

	anchor_center_x = boxes[:, 0].unsqueeze(1)
	anchor_center_y = boxes[:, 1].unsqueeze(1)

	# whether the center of each anchor is inside a gt box
	b_l = anchor_center_x > xy_target_gts[:, 0].unsqueeze(0)
	b_r = anchor_center_x < xy_target_gts[:, 2].unsqueeze(0)
	b_t = anchor_center_y > xy_target_gts[:, 1].unsqueeze(0)
	b_b = anchor_center_y < xy_target_gts[:, 3].unsqueeze(0)
	# (b_l.long()+b_r.long()+b_t.long()+b_b.long())==4 [300,num_gt] ,
	is_in_boxes = ((b_l.long() + b_r.long() + b_t.long() + b_b.long()) == 4)
	is_in_boxes_all = is_in_boxes.sum(1) > 0 # [num_query]
	# in fixed center
	center_radius = 2.5
	# Modified to self-adapted sampling --- the center size depends on the size of the gt boxes
	# https://github.com/dulucas/UVO_Challenge/blob/main/Track1/detection/mmdet/core/bbox/assigners/rpn_sim_ota_assigner.py#L212
	b_l = anchor_center_x > (
	target_gts[:, 0] - (center_radius * (xy_target_gts[:, 2] - xy_target_gts[:, 0]))).unsqueeze(0)
	b_r = anchor_center_x < (
	target_gts[:, 0] + (center_radius * (xy_target_gts[:, 2] - xy_target_gts[:, 0]))).unsqueeze(0)
	b_t = anchor_center_y > (
	target_gts[:, 1] - (center_radius * (xy_target_gts[:, 3] - xy_target_gts[:, 1]))).unsqueeze(0)
	b_b = anchor_center_y < (
	target_gts[:, 1] + (center_radius * (xy_target_gts[:, 3] - xy_target_gts[:, 1]))).unsqueeze(0)

	is_in_centers = ((b_l.long() + b_r.long() + b_t.long() + b_b.long()) == 4)
	is_in_centers_all = is_in_centers.sum(1) > 0

	is_in_boxes_anchor = is_in_boxes_all \| is_in_centers_all
	is_in_boxes_and_center = (is_in_boxes & is_in_centers)

	return is_in_boxes_anchor, is_in_boxes_and_center


	class HungarianMatcherDynamicK(nn.Module):
	"""This class computes an assignment between the targets and the predictions of the network
	For efficiency reasons, the targets don't include the no_object. Because of this, in general,
	there are more predictions than targets. In this case, we do a 1-to-k (dynamic) matching of the best predictions,
	while the others are un-matched (and thus treated as non-objects).
	"""

	def __init__(self, config):
	super().__init__()
	self.use_focal = config.use_focal
	self.use_fed_loss = config.use_fed_loss
	self.cost_class = config.class_weight
	self.cost_giou = config.giou_weight
	self.cost_bbox = config.l1_weight
	self.ota_k = config.ota_k

	if self.use_focal:
	self.focal_loss_alpha = config.alpha
	self.focal_loss_gamma = config.gamma

	assert self.cost_class != 0 or self.cost_bbox != 0 or self.cost_giou != 0, "all costs cant be 0"

	def forward(self, outputs, targets):
	""" simOTA for detr"""
	with torch.no_grad():
	bs, num_queries = outputs["pred_logits"].shape[:2]
	# We flatten to compute the cost matrices in a batch
	if self.use_focal or self.use_fed_loss:
	out_prob = outputs["pred_logits"].sigmoid() # [batch_size, num_queries, num_classes]
	out_bbox = outputs["pred_boxes"] # [batch_size, num_queries, 4]
	else:
	out_prob = outputs["pred_logits"].softmax(-1) # [batch_size, num_queries, num_classes]
	out_bbox = outputs["pred_boxes"] # [batch_size, num_queries, 4]

	indices = []
	matched_ids = []
	assert bs == len(targets)
	for batch_idx in range(bs):
	bz_boxes = out_bbox[batch_idx] # [num_proposals, 4]
	bz_out_prob = out_prob[batch_idx]
	bz_tgt_ids = targets[batch_idx]["labels"]
	num_insts = len(bz_tgt_ids)
	if num_insts == 0: # empty object in key frame
	non_valid = torch.zeros(bz_out_prob.shape[0]).to(bz_out_prob) > 0
	indices_batchi = (non_valid, torch.arange(0, 0).to(bz_out_prob))
	matched_qidx = torch.arange(0, 0).to(bz_out_prob)
	indices.append(indices_batchi)
	matched_ids.append(matched_qidx)
	continue

	bz_gtboxs = targets[batch_idx]['boxes'] # [num_gt, 4] normalized (cx, xy, w, h)
	bz_gtboxs_abs_xyxy = targets[batch_idx]['boxes_xyxy']
	fg_mask, is_in_boxes_and_center = get_in_boxes_info(
	ops.box_convert(bz_boxes, 'xyxy', 'cxcywh'), # absolute (cx, cy, w, h)
	ops.box_convert(bz_gtboxs_abs_xyxy, 'xyxy', 'cxcywh') # absolute (cx, cy, w, h)
	)

	pair_wise_ious = ops.box_iou(bz_boxes, bz_gtboxs_abs_xyxy)

	# Compute the classification cost.
	if self.use_focal:
	alpha = self.focal_loss_alpha
	gamma = self.focal_loss_gamma
	neg_cost_class = (1 - alpha) * (bz_out_prob ** gamma) * (-(1 - bz_out_prob + 1e-8).log())
	pos_cost_class = alpha * ((1 - bz_out_prob) ** gamma) * (-(bz_out_prob + 1e-8).log())
	cost_class = pos_cost_class[:, bz_tgt_ids] - neg_cost_class[:, bz_tgt_ids]
	elif self.use_fed_loss:
	# focal loss degenerates to naive one
	neg_cost_class = (-(1 - bz_out_prob + 1e-8).log())
	pos_cost_class = (-(bz_out_prob + 1e-8).log())
	cost_class = pos_cost_class[:, bz_tgt_ids] - neg_cost_class[:, bz_tgt_ids]
	else:
	cost_class = -bz_out_prob[:, bz_tgt_ids]

	# Compute the L1 cost between boxes
	# image_size_out = torch.cat([v["image_size_xyxy"].unsqueeze(0) for v in targets])
	# image_size_out = image_size_out.unsqueeze(1).repeat(1, num_queries, 1).flatten(0, 1)
	# image_size_tgt = torch.cat([v["image_size_xyxy_tgt"] for v in targets])

	bz_image_size_out = targets[batch_idx]['image_size_xyxy']
	bz_image_size_tgt = targets[batch_idx]['image_size_xyxy_tgt']

	bz_out_bbox_ = bz_boxes / bz_image_size_out # normalize (x1, y1, x2, y2)
	bz_tgt_bbox_ = bz_gtboxs_abs_xyxy / bz_image_size_tgt # normalize (x1, y1, x2, y2)
	cost_bbox = torch.cdist(bz_out_bbox_, bz_tgt_bbox_, p=1)

	cost_giou = -ops.generalized_box_iou(bz_boxes, bz_gtboxs_abs_xyxy)

	# Final cost matrix
	cost = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou + 100.0 * (
	~is_in_boxes_and_center)
	# cost = (cost_class + 3.0 * cost_giou + 100.0 * (~is_in_boxes_and_center)) # [num_query,num_gt]
	cost[~fg_mask] = cost[~fg_mask] + 10000.0

	# if bz_gtboxs.shape[0]>0:
	indices_batchi, matched_qidx = self.dynamic_k_matching(cost, pair_wise_ious, bz_gtboxs.shape[0])

	indices.append(indices_batchi)
	matched_ids.append(matched_qidx)

	return indices, matched_ids

	def dynamic_k_matching(self, cost, pair_wise_ious, num_gt):
	matching_matrix = torch.zeros_like(cost) # [300,num_gt]
	ious_in_boxes_matrix = pair_wise_ious
	n_candidate_k = self.ota_k

	# Take the sum of the predicted value and the top 10 iou of gt with the largest iou as dynamic_k
	topk_ious, _ = torch.topk(ious_in_boxes_matrix, n_candidate_k, dim=0)
	dynamic_ks = torch.clamp(topk_ious.sum(0).int(), min=1)

	for gt_idx in range(num_gt):
	_, pos_idx = torch.topk(cost[:, gt_idx], k=dynamic_ks[gt_idx].item(), largest=False)
	matching_matrix[:, gt_idx][pos_idx] = 1.0

	del topk_ious, dynamic_ks, pos_idx

	anchor_matching_gt = matching_matrix.sum(1)

	if (anchor_matching_gt > 1).sum() > 0:
	_, cost_argmin = torch.min(cost[anchor_matching_gt > 1], dim=1)
	matching_matrix[anchor_matching_gt > 1] *= 0
	matching_matrix[anchor_matching_gt > 1, cost_argmin,] = 1

	while (matching_matrix.sum(0) == 0).any():
	num_zero_gt = (matching_matrix.sum(0) == 0).sum()
	matched_query_id = matching_matrix.sum(1) > 0
	cost[matched_query_id] += 100000.0
	unmatch_id = torch.nonzero(matching_matrix.sum(0) == 0, as_tuple=False).squeeze(1)
	for gt_idx in unmatch_id:
	pos_idx = torch.argmin(cost[:, gt_idx])
	matching_matrix[:, gt_idx][pos_idx] = 1.0
	if (matching_matrix.sum(1) > 1).sum() > 0: # If a query matches more than one gt
	_, cost_argmin = torch.min(cost[anchor_matching_gt > 1],
	dim=1) # find gt for these queries with minimal cost
	matching_matrix[anchor_matching_gt > 1] *= 0 # reset mapping relationship
	matching_matrix[anchor_matching_gt > 1, cost_argmin,] = 1 # keep gt with minimal cost

	assert not (matching_matrix.sum(0) == 0).any()
	selected_query = matching_matrix.sum(1) > 0
	gt_indices = matching_matrix[selected_query].max(1)[1]
	assert selected_query.sum() == len(gt_indices)

	cost[matching_matrix == 0] = cost[matching_matrix == 0] + float('inf')
	matched_query_id = torch.min(cost, dim=0)[1]

	return (selected_query, gt_indices), matched_query_id