| model = dict( | |
| type='DETR', | |
| backbone=dict( | |
| type='ResNet', | |
| depth=50, | |
| num_stages=4, | |
| out_indices=(3, ), | |
| frozen_stages=4, | |
| norm_cfg=dict(type='BN', requires_grad=False), | |
| norm_eval=True, | |
| style='pytorch', | |
| init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), | |
| bbox_head=dict( | |
| type='DETRHead', | |
| num_classes=256, | |
| in_channels=2048, | |
| transformer=dict( | |
| type='Transformer', | |
| encoder=dict( | |
| type='DetrTransformerEncoder', | |
| num_layers=6, | |
| transformerlayers=dict( | |
| type='BaseTransformerLayer', | |
| attn_cfgs=[ | |
| dict( | |
| type='MultiheadAttention', | |
| embed_dims=256, | |
| num_heads=8, | |
| dropout=0.1) | |
| ], | |
| feedforward_channels=2048, | |
| ffn_dropout=0.1, | |
| operation_order=('self_attn', 'norm', 'ffn', 'norm'))), | |
| decoder=dict( | |
| type='DetrTransformerDecoder', | |
| return_intermediate=True, | |
| num_layers=6, | |
| transformerlayers=dict( | |
| type='DetrTransformerDecoderLayer', | |
| attn_cfgs=dict( | |
| type='MultiheadAttention', | |
| embed_dims=256, | |
| num_heads=8, | |
| dropout=0.1), | |
| feedforward_channels=2048, | |
| ffn_dropout=0.1, | |
| operation_order=('self_attn', 'norm', 'cross_attn', 'norm', | |
| 'ffn', 'norm')))), | |
| positional_encoding=dict( | |
| type='SinePositionalEncoding', num_feats=128, normalize=True), | |
| loss_cls=dict( | |
| type='CrossEntropyLoss', | |
| bg_cls_weight=0.1, | |
| use_sigmoid=False, | |
| loss_weight=1.0, | |
| class_weight=1.0), | |
| loss_bbox=dict(type='L1Loss', loss_weight=5.0), | |
| loss_iou=dict(type='GIoULoss', loss_weight=2.0)), | |
| train_cfg=dict( | |
| assigner=dict( | |
| type='HungarianAssigner', | |
| cls_cost=dict(type='ClassificationCost', weight=1.0), | |
| reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), | |
| iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), | |
| test_cfg=dict(max_per_img=100)) | |
| dataset_type = 'CocoDataset' | |
| data_root = 'data/coco/' | |
| img_norm_cfg = dict( | |
| mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |
| train_pipeline = [ | |
| dict(type='LoadImageFromFile'), | |
| dict(type='LoadAnnotations', with_bbox=True), | |
| dict(type='RandomFlip', flip_ratio=0.5), | |
| dict( | |
| type='AutoAugment', | |
| policies=[[{ | |
| 'type': | |
| 'Resize', | |
| 'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333), | |
| (608, 1333), (640, 1333), (672, 1333), (704, 1333), | |
| (736, 1333), (768, 1333), (800, 1333)], | |
| 'multiscale_mode': | |
| 'value', | |
| 'keep_ratio': | |
| True | |
| }], | |
| [{ | |
| 'type': 'Resize', | |
| 'img_scale': [(400, 1333), (500, 1333), (600, 1333)], | |
| 'multiscale_mode': 'value', | |
| 'keep_ratio': True | |
| }, { | |
| 'type': 'RandomCrop', | |
| 'crop_type': 'absolute_range', | |
| 'crop_size': (384, 600), | |
| 'allow_negative_crop': True | |
| }, { | |
| 'type': | |
| 'Resize', | |
| 'img_scale': [(480, 1333), (512, 1333), (544, 1333), | |
| (576, 1333), (608, 1333), (640, 1333), | |
| (672, 1333), (704, 1333), (736, 1333), | |
| (768, 1333), (800, 1333)], | |
| 'multiscale_mode': | |
| 'value', | |
| 'override': | |
| True, | |
| 'keep_ratio': | |
| True | |
| }]]), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='Pad', size_divisor=1), | |
| dict(type='DefaultFormatBundle'), | |
| dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) | |
| ] | |
| test_pipeline = [ | |
| dict(type='LoadImageFromFile'), | |
| dict( | |
| type='MultiScaleFlipAug', | |
| img_scale=(1333, 800), | |
| flip=False, | |
| transforms=[ | |
| dict(type='Resize', keep_ratio=True), | |
| dict(type='RandomFlip'), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='Pad', size_divisor=32), | |
| dict(type='ImageToTensor', keys=['img']), | |
| dict(type='Collect', keys=['img']) | |
| ]) | |
| ] | |
| data = dict( | |
| samples_per_gpu=2, | |
| workers_per_gpu=2, | |
| train=dict( | |
| type='CocoDataset', | |
| ann_file='[email protected]_cluster-id-as-class.json', | |
| img_prefix='data/coco/train2017/', | |
| pipeline=[ | |
| dict(type='LoadImageFromFile'), | |
| dict(type='LoadAnnotations', with_bbox=True), | |
| dict(type='RandomFlip', flip_ratio=0.5), | |
| dict( | |
| type='AutoAugment', | |
| policies=[[{ | |
| 'type': | |
| 'Resize', | |
| 'img_scale': [(480, 1333), (512, 1333), (544, 1333), | |
| (576, 1333), (608, 1333), (640, 1333), | |
| (672, 1333), (704, 1333), (736, 1333), | |
| (768, 1333), (800, 1333)], | |
| 'multiscale_mode': | |
| 'value', | |
| 'keep_ratio': | |
| True | |
| }], | |
| [{ | |
| 'type': 'Resize', | |
| 'img_scale': [(400, 1333), (500, 1333), | |
| (600, 1333)], | |
| 'multiscale_mode': 'value', | |
| 'keep_ratio': True | |
| }, { | |
| 'type': 'RandomCrop', | |
| 'crop_type': 'absolute_range', | |
| 'crop_size': (384, 600), | |
| 'allow_negative_crop': True | |
| }, { | |
| 'type': | |
| 'Resize', | |
| 'img_scale': [(480, 1333), (512, 1333), | |
| (544, 1333), (576, 1333), | |
| (608, 1333), (640, 1333), | |
| (672, 1333), (704, 1333), | |
| (736, 1333), (768, 1333), | |
| (800, 1333)], | |
| 'multiscale_mode': | |
| 'value', | |
| 'override': | |
| True, | |
| 'keep_ratio': | |
| True | |
| }]]), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='Pad', size_divisor=1), | |
| dict(type='DefaultFormatBundle'), | |
| dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) | |
| ], | |
| classes=[ | |
| 'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5', | |
| 'cluster_6', 'cluster_7', 'cluster_8', 'cluster_9', 'cluster_10', | |
| 'cluster_11', 'cluster_12', 'cluster_13', 'cluster_14', | |
| 'cluster_15', 'cluster_16', 'cluster_17', 'cluster_18', | |
| 'cluster_19', 'cluster_20', 'cluster_21', 'cluster_22', | |
| 'cluster_23', 'cluster_24', 'cluster_25', 'cluster_26', | |
| 'cluster_27', 'cluster_28', 'cluster_29', 'cluster_30', | |
| 'cluster_31', 'cluster_32', 'cluster_33', 'cluster_34', | |
| 'cluster_35', 'cluster_36', 'cluster_37', 'cluster_38', | |
| 'cluster_39', 'cluster_40', 'cluster_41', 'cluster_42', | |
| 'cluster_43', 'cluster_44', 'cluster_45', 'cluster_46', | |
| 'cluster_47', 'cluster_48', 'cluster_49', 'cluster_50', | |
| 'cluster_51', 'cluster_52', 'cluster_53', 'cluster_54', | |
| 'cluster_55', 'cluster_56', 'cluster_57', 'cluster_58', | |
| 'cluster_59', 'cluster_60', 'cluster_61', 'cluster_62', | |
| 'cluster_63', 'cluster_64', 'cluster_65', 'cluster_66', | |
| 'cluster_67', 'cluster_68', 'cluster_69', 'cluster_70', | |
| 'cluster_71', 'cluster_72', 'cluster_73', 'cluster_74', | |
| 'cluster_75', 'cluster_76', 'cluster_77', 'cluster_78', | |
| 'cluster_79', 'cluster_80', 'cluster_81', 'cluster_82', | |
| 'cluster_83', 'cluster_84', 'cluster_85', 'cluster_86', | |
| 'cluster_87', 'cluster_88', 'cluster_89', 'cluster_90', | |
| 'cluster_91', 'cluster_92', 'cluster_93', 'cluster_94', | |
| 'cluster_95', 'cluster_96', 'cluster_97', 'cluster_98', | |
| 'cluster_99', 'cluster_100', 'cluster_101', 'cluster_102', | |
| 'cluster_103', 'cluster_104', 'cluster_105', 'cluster_106', | |
| 'cluster_107', 'cluster_108', 'cluster_109', 'cluster_110', | |
| 'cluster_111', 'cluster_112', 'cluster_113', 'cluster_114', | |
| 'cluster_115', 'cluster_116', 'cluster_117', 'cluster_118', | |
| 'cluster_119', 'cluster_120', 'cluster_121', 'cluster_122', | |
| 'cluster_123', 'cluster_124', 'cluster_125', 'cluster_126', | |
| 'cluster_127', 'cluster_128', 'cluster_129', 'cluster_130', | |
| 'cluster_131', 'cluster_132', 'cluster_133', 'cluster_134', | |
| 'cluster_135', 'cluster_136', 'cluster_137', 'cluster_138', | |
| 'cluster_139', 'cluster_140', 'cluster_141', 'cluster_142', | |
| 'cluster_143', 'cluster_144', 'cluster_145', 'cluster_146', | |
| 'cluster_147', 'cluster_148', 'cluster_149', 'cluster_150', | |
| 'cluster_151', 'cluster_152', 'cluster_153', 'cluster_154', | |
| 'cluster_155', 'cluster_156', 'cluster_157', 'cluster_158', | |
| 'cluster_159', 'cluster_160', 'cluster_161', 'cluster_162', | |
| 'cluster_163', 'cluster_164', 'cluster_165', 'cluster_166', | |
| 'cluster_167', 'cluster_168', 'cluster_169', 'cluster_170', | |
| 'cluster_171', 'cluster_172', 'cluster_173', 'cluster_174', | |
| 'cluster_175', 'cluster_176', 'cluster_177', 'cluster_178', | |
| 'cluster_179', 'cluster_180', 'cluster_181', 'cluster_182', | |
| 'cluster_183', 'cluster_184', 'cluster_185', 'cluster_186', | |
| 'cluster_187', 'cluster_188', 'cluster_189', 'cluster_190', | |
| 'cluster_191', 'cluster_192', 'cluster_193', 'cluster_194', | |
| 'cluster_195', 'cluster_196', 'cluster_197', 'cluster_198', | |
| 'cluster_199', 'cluster_200', 'cluster_201', 'cluster_202', | |
| 'cluster_203', 'cluster_204', 'cluster_205', 'cluster_206', | |
| 'cluster_207', 'cluster_208', 'cluster_209', 'cluster_210', | |
| 'cluster_211', 'cluster_212', 'cluster_213', 'cluster_214', | |
| 'cluster_215', 'cluster_216', 'cluster_217', 'cluster_218', | |
| 'cluster_219', 'cluster_220', 'cluster_221', 'cluster_222', | |
| 'cluster_223', 'cluster_224', 'cluster_225', 'cluster_226', | |
| 'cluster_227', 'cluster_228', 'cluster_229', 'cluster_230', | |
| 'cluster_231', 'cluster_232', 'cluster_233', 'cluster_234', | |
| 'cluster_235', 'cluster_236', 'cluster_237', 'cluster_238', | |
| 'cluster_239', 'cluster_240', 'cluster_241', 'cluster_242', | |
| 'cluster_243', 'cluster_244', 'cluster_245', 'cluster_246', | |
| 'cluster_247', 'cluster_248', 'cluster_249', 'cluster_250', | |
| 'cluster_251', 'cluster_252', 'cluster_253', 'cluster_254', | |
| 'cluster_255', 'cluster_256' | |
| ]), | |
| val=dict( | |
| type='CocoDataset', | |
| ann_file='data/coco/annotations/instances_val2017.json', | |
| img_prefix='data/coco/val2017/', | |
| pipeline=[ | |
| dict(type='LoadImageFromFile'), | |
| dict( | |
| type='MultiScaleFlipAug', | |
| img_scale=(1333, 800), | |
| flip=False, | |
| transforms=[ | |
| dict(type='Resize', keep_ratio=True), | |
| dict(type='RandomFlip'), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='Pad', size_divisor=32), | |
| dict(type='ImageToTensor', keys=['img']), | |
| dict(type='Collect', keys=['img']) | |
| ]) | |
| ]), | |
| test=dict( | |
| type='CocoDataset', | |
| ann_file='data/coco/annotations/instances_val2017.json', | |
| img_prefix='data/coco/val2017/', | |
| pipeline=[ | |
| dict(type='LoadImageFromFile'), | |
| dict( | |
| type='MultiScaleFlipAug', | |
| img_scale=(1333, 800), | |
| flip=False, | |
| transforms=[ | |
| dict(type='Resize', keep_ratio=True), | |
| dict(type='RandomFlip'), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='Pad', size_divisor=32), | |
| dict(type='ImageToTensor', keys=['img']), | |
| dict(type='Collect', keys=['img']) | |
| ]) | |
| ])) | |
| evaluation = dict( | |
| interval=65535, metric='bbox', save_best='auto', gpu_collect=True) | |
| checkpoint_config = dict(interval=1) | |
| log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) | |
| custom_hooks = [ | |
| dict(type='NumClassCheckHook'), | |
| dict( | |
| type='MMDetWandbHook', | |
| init_kwargs=dict(project='I2B', group='finetune'), | |
| interval=50, | |
| num_eval_images=0, | |
| log_checkpoint=False) | |
| ] | |
| dist_params = dict(backend='nccl') | |
| log_level = 'INFO' | |
| load_from = None | |
| resume_from = None | |
| workflow = [('train', 1)] | |
| opencv_num_threads = 0 | |
| mp_start_method = 'fork' | |
| auto_scale_lr = dict(enable=True, base_batch_size=64) | |
| custom_imports = dict( | |
| imports=[ | |
| 'mmselfsup.datasets.pipelines', | |
| 'selfsup.core.hook.momentum_update_hook', | |
| 'selfsup.datasets.pipelines.selfsup_pipelines', | |
| 'selfsup.datasets.pipelines.rand_aug', | |
| 'selfsup.datasets.single_view_coco', | |
| 'selfsup.datasets.multi_view_coco', | |
| 'selfsup.models.losses.contrastive_loss', | |
| 'selfsup.models.dense_heads.fcos_head', | |
| 'selfsup.models.dense_heads.retina_head', | |
| 'selfsup.models.dense_heads.detr_head', | |
| 'selfsup.models.dense_heads.deformable_detr_head', | |
| 'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head', | |
| 'selfsup.models.roi_heads.standard_roi_head', | |
| 'selfsup.models.detectors.selfsup_detector', | |
| 'selfsup.models.detectors.selfsup_fcos', | |
| 'selfsup.models.detectors.selfsup_detr', | |
| 'selfsup.models.detectors.selfsup_deformable_detr', | |
| 'selfsup.models.detectors.selfsup_retinanet', | |
| 'selfsup.models.detectors.selfsup_mask_rcnn', | |
| 'selfsup.core.bbox.assigners.hungarian_assigner', | |
| 'selfsup.core.bbox.assigners.pseudo_hungarian_assigner', | |
| 'selfsup.core.bbox.match_costs.match_cost' | |
| ], | |
| allow_failed_imports=False) | |
| classes = [ | |
| 'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5', | |
| 'cluster_6', 'cluster_7', 'cluster_8', 'cluster_9', 'cluster_10', | |
| 'cluster_11', 'cluster_12', 'cluster_13', 'cluster_14', 'cluster_15', | |
| 'cluster_16', 'cluster_17', 'cluster_18', 'cluster_19', 'cluster_20', | |
| 'cluster_21', 'cluster_22', 'cluster_23', 'cluster_24', 'cluster_25', | |
| 'cluster_26', 'cluster_27', 'cluster_28', 'cluster_29', 'cluster_30', | |
| 'cluster_31', 'cluster_32', 'cluster_33', 'cluster_34', 'cluster_35', | |
| 'cluster_36', 'cluster_37', 'cluster_38', 'cluster_39', 'cluster_40', | |
| 'cluster_41', 'cluster_42', 'cluster_43', 'cluster_44', 'cluster_45', | |
| 'cluster_46', 'cluster_47', 'cluster_48', 'cluster_49', 'cluster_50', | |
| 'cluster_51', 'cluster_52', 'cluster_53', 'cluster_54', 'cluster_55', | |
| 'cluster_56', 'cluster_57', 'cluster_58', 'cluster_59', 'cluster_60', | |
| 'cluster_61', 'cluster_62', 'cluster_63', 'cluster_64', 'cluster_65', | |
| 'cluster_66', 'cluster_67', 'cluster_68', 'cluster_69', 'cluster_70', | |
| 'cluster_71', 'cluster_72', 'cluster_73', 'cluster_74', 'cluster_75', | |
| 'cluster_76', 'cluster_77', 'cluster_78', 'cluster_79', 'cluster_80', | |
| 'cluster_81', 'cluster_82', 'cluster_83', 'cluster_84', 'cluster_85', | |
| 'cluster_86', 'cluster_87', 'cluster_88', 'cluster_89', 'cluster_90', | |
| 'cluster_91', 'cluster_92', 'cluster_93', 'cluster_94', 'cluster_95', | |
| 'cluster_96', 'cluster_97', 'cluster_98', 'cluster_99', 'cluster_100', | |
| 'cluster_101', 'cluster_102', 'cluster_103', 'cluster_104', 'cluster_105', | |
| 'cluster_106', 'cluster_107', 'cluster_108', 'cluster_109', 'cluster_110', | |
| 'cluster_111', 'cluster_112', 'cluster_113', 'cluster_114', 'cluster_115', | |
| 'cluster_116', 'cluster_117', 'cluster_118', 'cluster_119', 'cluster_120', | |
| 'cluster_121', 'cluster_122', 'cluster_123', 'cluster_124', 'cluster_125', | |
| 'cluster_126', 'cluster_127', 'cluster_128', 'cluster_129', 'cluster_130', | |
| 'cluster_131', 'cluster_132', 'cluster_133', 'cluster_134', 'cluster_135', | |
| 'cluster_136', 'cluster_137', 'cluster_138', 'cluster_139', 'cluster_140', | |
| 'cluster_141', 'cluster_142', 'cluster_143', 'cluster_144', 'cluster_145', | |
| 'cluster_146', 'cluster_147', 'cluster_148', 'cluster_149', 'cluster_150', | |
| 'cluster_151', 'cluster_152', 'cluster_153', 'cluster_154', 'cluster_155', | |
| 'cluster_156', 'cluster_157', 'cluster_158', 'cluster_159', 'cluster_160', | |
| 'cluster_161', 'cluster_162', 'cluster_163', 'cluster_164', 'cluster_165', | |
| 'cluster_166', 'cluster_167', 'cluster_168', 'cluster_169', 'cluster_170', | |
| 'cluster_171', 'cluster_172', 'cluster_173', 'cluster_174', 'cluster_175', | |
| 'cluster_176', 'cluster_177', 'cluster_178', 'cluster_179', 'cluster_180', | |
| 'cluster_181', 'cluster_182', 'cluster_183', 'cluster_184', 'cluster_185', | |
| 'cluster_186', 'cluster_187', 'cluster_188', 'cluster_189', 'cluster_190', | |
| 'cluster_191', 'cluster_192', 'cluster_193', 'cluster_194', 'cluster_195', | |
| 'cluster_196', 'cluster_197', 'cluster_198', 'cluster_199', 'cluster_200', | |
| 'cluster_201', 'cluster_202', 'cluster_203', 'cluster_204', 'cluster_205', | |
| 'cluster_206', 'cluster_207', 'cluster_208', 'cluster_209', 'cluster_210', | |
| 'cluster_211', 'cluster_212', 'cluster_213', 'cluster_214', 'cluster_215', | |
| 'cluster_216', 'cluster_217', 'cluster_218', 'cluster_219', 'cluster_220', | |
| 'cluster_221', 'cluster_222', 'cluster_223', 'cluster_224', 'cluster_225', | |
| 'cluster_226', 'cluster_227', 'cluster_228', 'cluster_229', 'cluster_230', | |
| 'cluster_231', 'cluster_232', 'cluster_233', 'cluster_234', 'cluster_235', | |
| 'cluster_236', 'cluster_237', 'cluster_238', 'cluster_239', 'cluster_240', | |
| 'cluster_241', 'cluster_242', 'cluster_243', 'cluster_244', 'cluster_245', | |
| 'cluster_246', 'cluster_247', 'cluster_248', 'cluster_249', 'cluster_250', | |
| 'cluster_251', 'cluster_252', 'cluster_253', 'cluster_254', 'cluster_255', | |
| 'cluster_256' | |
| ] | |
| optimizer = dict( | |
| type='AdamW', | |
| lr=0.0002, | |
| weight_decay=0.0001, | |
| paramwise_cfg=dict( | |
| custom_keys=dict(backbone=dict(lr_mult=0, decay_mult=0)))) | |
| optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) | |
| lr_config = dict(policy='step', step=[40]) | |
| runner = dict(type='EpochBasedRunner', max_epochs=50) | |
| work_dir = 'work_dirs/selfsup_detr_cluster-ids-as-pseudo-labels' | |
| auto_resume = False | |
| gpu_ids = range(0, 32) | |