Spaces:
Running
on
L4
Running
on
L4
| # ------------------------------------------------------------------------------ | |
| # Copyright (c) Microsoft | |
| # Licensed under the MIT License. | |
| # The code is from SimMIM. | |
| # (https://github.com/microsoft/SimMIM) | |
| # ------------------------------------------------------------------------------ | |
| import json | |
| from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor | |
| from mmcv.runner import build_optimizer | |
| from mmcv.runner import get_dist_info | |
| def get_num_layer_for_swin(var_name, num_max_layer, layers_per_stage): | |
| var_name = var_name.replace('encoder', 'backbone') if var_name.startswith('encoder') else var_name | |
| if var_name in ("backbone.cls_token", "backbone.mask_token", | |
| "backbone.pos_embed", "backbone.absolute_pos_embed"): | |
| return 0 | |
| elif var_name.startswith("backbone.patch_embed"): | |
| return 0 | |
| elif var_name.startswith("backbone.layers"): | |
| if var_name.split('.')[3] == "blocks": | |
| stage_id = int(var_name.split('.')[2]) | |
| layer_id = int(var_name.split('.')[4]) \ | |
| + sum(layers_per_stage[:stage_id]) | |
| return layer_id + 1 | |
| elif var_name.split('.')[3] == "downsample": | |
| stage_id = int(var_name.split('.')[2]) | |
| layer_id = sum(layers_per_stage[:stage_id + 1]) | |
| return layer_id | |
| else: | |
| return num_max_layer - 1 | |
| class LDMOptimizerConstructor(DefaultOptimizerConstructor): | |
| def add_params(self, params, module, prefix='', is_dcn_module=None): | |
| """Add all parameters of module to the params list. | |
| The parameters of the given module will be added to the list of param | |
| groups, with specific rules defined by paramwise_cfg. | |
| Args: | |
| params (list[dict]): A list of param groups, it will be modified | |
| in place. | |
| module (nn.Module): The module to be added. | |
| prefix (str): The prefix of the module | |
| is_dcn_module (int|float|None): If the current module is a | |
| submodule of DCN, `is_dcn_module` will be passed to | |
| control conv_offset layer's learning rate. Defaults to None. | |
| """ | |
| parameter_groups = {} | |
| no_decay_names = self.paramwise_cfg.get('no_decay_names', []) | |
| print("Build LDMOptimizerConstructor") | |
| weight_decay = self.base_wd | |
| for name, param in module.named_parameters(): | |
| if not param.requires_grad: | |
| continue # frozen weights | |
| if len(param.shape) == 1 or name.endswith(".bias") or name in ('absolute_pos_embed'): | |
| group_name = "no_decay" | |
| this_weight_decay = 0. | |
| else: | |
| group_name = "decay" | |
| this_weight_decay = weight_decay | |
| for nd_name in no_decay_names: | |
| if nd_name in name: | |
| group_name = "no_decay" | |
| this_weight_decay = 0. | |
| break | |
| if 'unet' in name or 'cond_stage_model' in name or 'encoder_vq' in name or 'clip_model' in name: | |
| layer_id = 0 | |
| else: | |
| layer_id = 1 | |
| group_name = "layer_%d_%s" % (layer_id, group_name) | |
| if group_name not in parameter_groups: | |
| if layer_id == 0: | |
| scale = 0.01 | |
| else: | |
| scale = 1.0 | |
| parameter_groups[group_name] = { | |
| "weight_decay": this_weight_decay, | |
| "params": [], | |
| "param_names": [], | |
| "lr_scale": scale, | |
| "group_name": group_name, | |
| "lr": scale * self.base_lr, | |
| } | |
| parameter_groups[group_name]["params"].append(param) | |
| parameter_groups[group_name]["param_names"].append(name) | |
| rank, _ = get_dist_info() | |
| if rank == 0: | |
| to_display = {} | |
| for key in parameter_groups: | |
| to_display[key] = { | |
| "param_names": parameter_groups[key]["param_names"], | |
| "lr_scale": parameter_groups[key]["lr_scale"], | |
| "lr": parameter_groups[key]["lr"], | |
| "weight_decay": parameter_groups[key]["weight_decay"], | |
| } | |
| params.extend(parameter_groups.values()) | |
| def build_optimizers(model, cfgs): | |
| """Build multiple optimizers from configs. | |
| If `cfgs` contains several dicts for optimizers, then a dict for each | |
| constructed optimizers will be returned. | |
| If `cfgs` only contains one optimizer config, the constructed optimizer | |
| itself will be returned. | |
| For example, | |
| 1) Multiple optimizer configs: | |
| .. code-block:: python | |
| optimizer_cfg = dict( | |
| model1=dict(type='SGD', lr=lr), | |
| model2=dict(type='SGD', lr=lr)) | |
| The return dict is | |
| ``dict('model1': torch.optim.Optimizer, 'model2': torch.optim.Optimizer)`` | |
| 2) Single optimizer config: | |
| .. code-block:: python | |
| optimizer_cfg = dict(type='SGD', lr=lr) | |
| The return is ``torch.optim.Optimizer``. | |
| Args: | |
| model (:obj:`nn.Module`): The model with parameters to be optimized. | |
| cfgs (dict): The config dict of the optimizer. | |
| Returns: | |
| dict[:obj:`torch.optim.Optimizer`] | :obj:`torch.optim.Optimizer`: | |
| The initialized optimizers. | |
| """ | |
| optimizers = {} | |
| if hasattr(model, 'module'): | |
| model = model.module | |
| # determine whether 'cfgs' has several dicts for optimizers | |
| if all(isinstance(v, dict) for v in cfgs.values()): | |
| for key, cfg in cfgs.items(): | |
| cfg_ = cfg.copy() | |
| module = getattr(model, key) | |
| optimizers[key] = build_optimizer(module, cfg_) | |
| return optimizers | |
| return build_optimizer(model, cfgs) | |