sapiens-pose

Paused

App Files Files Community

sapiens-pose / external /det /mmdet /models /necks /yolo_neck.py

rawalkhirodkar

Add initial commit

28c256d over 1 year ago

raw

history blame contribute delete

5.99 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	# Copyright (c) 2019 Western Digital Corporation or its affiliates.
	from typing import List, Tuple

	import torch
	import torch.nn.functional as F
	from mmcv.cnn import ConvModule
	from mmengine.model import BaseModule
	from torch import Tensor

	from mmdet.registry import MODELS
	from mmdet.utils import ConfigType, OptConfigType, OptMultiConfig


	class DetectionBlock(BaseModule):
	"""Detection block in YOLO neck.

	Let out_channels = n, the DetectionBlock contains:
	Six ConvLayers, 1 Conv2D Layer and 1 YoloLayer.
	The first 6 ConvLayers are formed the following way:
	1x1xn, 3x3x2n, 1x1xn, 3x3x2n, 1x1xn, 3x3x2n.
	The Conv2D layer is 1x1x255.
	Some block will have branch after the fifth ConvLayer.
	The input channel is arbitrary (in_channels)

	Args:
	in_channels (int): The number of input channels.
	out_channels (int): The number of output channels.
	conv_cfg (dict): Config dict for convolution layer. Default: None.
	norm_cfg (dict): Dictionary to construct and config norm layer.
	Default: dict(type='BN', requires_grad=True)
	act_cfg (dict): Config dict for activation layer.
	Default: dict(type='LeakyReLU', negative_slope=0.1).
	init_cfg (dict or list[dict], optional): Initialization config dict.
	Default: None
	"""

	def __init__(self,
	in_channels: int,
	out_channels: int,
	conv_cfg: OptConfigType = None,
	norm_cfg: ConfigType = dict(type='BN', requires_grad=True),
	act_cfg: ConfigType = dict(
	type='LeakyReLU', negative_slope=0.1),
	init_cfg: OptMultiConfig = None) -> None:
	super(DetectionBlock, self).__init__(init_cfg)
	double_out_channels = out_channels * 2

	# shortcut
	cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
	self.conv1 = ConvModule(in_channels, out_channels, 1, **cfg)
	self.conv2 = ConvModule(
	out_channels, double_out_channels, 3, padding=1, **cfg)
	self.conv3 = ConvModule(double_out_channels, out_channels, 1, **cfg)
	self.conv4 = ConvModule(
	out_channels, double_out_channels, 3, padding=1, **cfg)
	self.conv5 = ConvModule(double_out_channels, out_channels, 1, **cfg)

	def forward(self, x: Tensor) -> Tensor:
	tmp = self.conv1(x)
	tmp = self.conv2(tmp)
	tmp = self.conv3(tmp)
	tmp = self.conv4(tmp)
	out = self.conv5(tmp)
	return out


	@MODELS.register_module()
	class YOLOV3Neck(BaseModule):
	"""The neck of YOLOV3.

	It can be treated as a simplified version of FPN. It
	will take the result from Darknet backbone and do some upsampling and
	concatenation. It will finally output the detection result.

	Note:
	The input feats should be from top to bottom.
	i.e., from high-lvl to low-lvl
	But YOLOV3Neck will process them in reversed order.
	i.e., from bottom (high-lvl) to top (low-lvl)

	Args:
	num_scales (int): The number of scales / stages.
	in_channels (List[int]): The number of input channels per scale.
	out_channels (List[int]): The number of output channels per scale.
	conv_cfg (dict, optional): Config dict for convolution layer.
	Default: None.
	norm_cfg (dict, optional): Dictionary to construct and config norm
	layer. Default: dict(type='BN', requires_grad=True)
	act_cfg (dict, optional): Config dict for activation layer.
	Default: dict(type='LeakyReLU', negative_slope=0.1).
	init_cfg (dict or list[dict], optional): Initialization config dict.
	Default: None
	"""

	def __init__(self,
	num_scales: int,
	in_channels: List[int],
	out_channels: List[int],
	conv_cfg: OptConfigType = None,
	norm_cfg: ConfigType = dict(type='BN', requires_grad=True),
	act_cfg: ConfigType = dict(
	type='LeakyReLU', negative_slope=0.1),
	init_cfg: OptMultiConfig = None) -> None:
	super(YOLOV3Neck, self).__init__(init_cfg)
	assert (num_scales == len(in_channels) == len(out_channels))
	self.num_scales = num_scales
	self.in_channels = in_channels
	self.out_channels = out_channels

	# shortcut
	cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)

	# To support arbitrary scales, the code looks awful, but it works.
	# Better solution is welcomed.
	self.detect1 = DetectionBlock(in_channels[0], out_channels[0], **cfg)
	for i in range(1, self.num_scales):
	in_c, out_c = self.in_channels[i], self.out_channels[i]
	inter_c = out_channels[i - 1]
	self.add_module(f'conv{i}', ConvModule(inter_c, out_c, 1, **cfg))
	# in_c + out_c : High-lvl feats will be cat with low-lvl feats
	self.add_module(f'detect{i+1}',
	DetectionBlock(in_c + out_c, out_c, **cfg))

	def forward(self, feats=Tuple[Tensor]) -> Tuple[Tensor]:
	assert len(feats) == self.num_scales

	# processed from bottom (high-lvl) to top (low-lvl)
	outs = []
	out = self.detect1(feats[-1])
	outs.append(out)

	for i, x in enumerate(reversed(feats[:-1])):
	conv = getattr(self, f'conv{i+1}')
	tmp = conv(out)

	# Cat with low-lvl feats
	tmp = F.interpolate(tmp, scale_factor=2)
	tmp = torch.cat((tmp, x), 1)

	detect = getattr(self, f'detect{i+2}')
	out = detect(tmp)
	outs.append(out)

	return tuple(outs)