File size: 5,729 Bytes
a60ce1a 89b2487 a60ce1a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
from transformers import PretrainedConfig
from transformers.models.auto import CONFIG_MAPPING
from transformers.utils.backbone_utils import verify_backbone_config_arguments
from transformers.utils import logging, PushToHubMixin
logger = logging.get_logger(__name__)
class DiffusionDetConfig(PretrainedConfig):
model_type = "diffusiondet"
def __init__(
self,
use_timm_backbone=True,
backbone_config=None,
num_channels=3,
pixel_mean=(123.675, 116.280, 103.530),
pixel_std=(58.395, 57.120, 57.375),
resnet_out_features=("res2", "res3", "res4", "res5"),
resnet_in_features=("res2", "res3", "res4", "res5"),
roi_head_in_features=("p2", "p3", "p4", "p5"),
fpn_out_channels=256,
pooler_resolution=7,
sampling_ratio=2,
num_proposals=300,
num_attn_heads=8,
dropout=0.0,
dim_feedforward=2048,
activation="relu",
hidden_dim=256,
num_cls=1,
num_reg=3,
num_heads=6,
num_dynamic=2,
dim_dynamic=64,
class_weight=2.0,
giou_weight=2.0,
l1_weight=5.0,
deep_supervision=True,
no_object_weight=0.1,
use_focal=True,
use_fed_loss=False,
alpha=0.25,
gamma=2.0,
prior_prob=0.01,
ota_k=5,
snr_scale=2.0,
sample_step=1,
use_nms=True,
swin_size="B",
use_swin_checkpoint=False,
swin_out_features=(0, 1, 2, 3),
optimizer="ADAMW",
backbone_multiplier=1.0,
backbone='resnet50',
use_pretrained_backbone=True,
backbone_kwargs=None,
dilation=False,
**kwargs
):
# We default to values which were previously hard-coded in the model. This enables configurability of the config
# while keeping the default behavior the same.
if use_timm_backbone and backbone_kwargs is None:
backbone_kwargs = {}
if dilation:
backbone_kwargs["output_stride"] = 16
backbone_kwargs["out_indices"] = [1, 2, 3, 4]
backbone_kwargs["in_chans"] = num_channels
# Backwards compatibility
elif not use_timm_backbone and backbone in (None, "resnet50"):
if backbone_config is None:
logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.")
backbone_config = CONFIG_MAPPING["resnet"](out_features=["stage4"])
elif isinstance(backbone_config, dict):
backbone_model_type = backbone_config.get("model_type")
config_class = CONFIG_MAPPING[backbone_model_type]
backbone_config = config_class.from_dict(backbone_config)
backbone = None
# set timm attributes to None
dilation = None
verify_backbone_config_arguments(
use_timm_backbone=use_timm_backbone,
use_pretrained_backbone=use_pretrained_backbone,
backbone=backbone,
backbone_config=backbone_config,
backbone_kwargs=backbone_kwargs,
)
# Auto mapping
self.auto_map = {
"AutoConfig": "configuration_diffusiondet.DiffusionDetConfig",
"AutoModelForObjectDetection": "modeling_diffusiondet.DiffusionDet"
}
# Backbone.
self.use_timm_backbone = use_timm_backbone
self.backbone_config = backbone_config
self.num_channels = num_channels
self.backbone = backbone
self.use_pretrained_backbone = use_pretrained_backbone
self.backbone_kwargs = backbone_kwargs
self.dilation = dilation
self.fpn_out_channels = fpn_out_channels
# Model.
self.pixel_mean = pixel_mean
self.pixel_std = pixel_std
self.resnet_out_features = resnet_out_features
self.resnet_in_features = resnet_in_features
self.roi_head_in_features = roi_head_in_features
self.pooler_resolution = pooler_resolution
self.sampling_ratio = sampling_ratio
self.num_proposals = num_proposals
# RCNN Head.
self.num_attn_heads = num_attn_heads
self.dropout = dropout
self.dim_feedforward = dim_feedforward
self.activation = activation
self.hidden_dim = hidden_dim
self.num_cls = num_cls
self.num_reg = num_reg
self.num_heads = num_heads
# Dynamic Conv.
self.num_dynamic = num_dynamic
self.dim_dynamic = dim_dynamic
# Loss.
self.class_weight = class_weight
self.giou_weight = giou_weight
self.l1_weight = l1_weight
self.deep_supervision = deep_supervision
self.no_object_weight = no_object_weight
# Focal Loss.
self.use_focal = use_focal
self.use_fed_loss = use_fed_loss
self.alpha = alpha
self.gamma = gamma
self.prior_prob = prior_prob
# Dynamic K
self.ota_k = ota_k
# Diffusion
self.snr_scale = snr_scale
self.sample_step = sample_step
# Inference
self.use_nms = use_nms
# Swin Backbones
self.swin_size = swin_size
self.use_swin_checkpoint = use_swin_checkpoint
self.swin_out_features = swin_out_features
# Optimizer.
self.optimizer = optimizer
self.backbone_multiplier = backbone_multiplier
self.num_labels = 80
super().__init__()
|