🔨 [Add] Warm up in training proccess
Browse files- yolo/config/config.py +1 -0
- yolo/config/hyper/default.yaml +11 -7
- yolo/tools/model_helper.py +23 -5
- yolo/tools/trainer.py +3 -4
yolo/config/config.py
CHANGED
|
@@ -53,6 +53,7 @@ class SchedulerArgs:
|
|
| 53 |
class SchedulerConfig:
|
| 54 |
type: str
|
| 55 |
args: SchedulerArgs
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
@dataclass
|
|
|
|
| 53 |
class SchedulerConfig:
|
| 54 |
type: str
|
| 55 |
args: SchedulerArgs
|
| 56 |
+
warmup: Dict[str, Union[str, int, float]]
|
| 57 |
|
| 58 |
|
| 59 |
@dataclass
|
yolo/config/hyper/default.yaml
CHANGED
|
@@ -15,12 +15,13 @@ data:
|
|
| 15 |
shuffle: True
|
| 16 |
pin_memory: True
|
| 17 |
train:
|
| 18 |
-
epoch:
|
| 19 |
optimizer:
|
| 20 |
-
type:
|
| 21 |
args:
|
| 22 |
-
lr: 0.
|
| 23 |
-
weight_decay: 0.
|
|
|
|
| 24 |
loss:
|
| 25 |
objective:
|
| 26 |
BCELoss: 0.5
|
|
@@ -35,10 +36,13 @@ train:
|
|
| 35 |
iou: 6.0
|
| 36 |
cls: 0.5
|
| 37 |
scheduler:
|
| 38 |
-
type:
|
|
|
|
|
|
|
| 39 |
args:
|
| 40 |
-
|
| 41 |
-
|
|
|
|
| 42 |
ema:
|
| 43 |
enabled: true
|
| 44 |
decay: 0.995
|
|
|
|
| 15 |
shuffle: True
|
| 16 |
pin_memory: True
|
| 17 |
train:
|
| 18 |
+
epoch: 500
|
| 19 |
optimizer:
|
| 20 |
+
type: SGD
|
| 21 |
args:
|
| 22 |
+
lr: 0.01
|
| 23 |
+
weight_decay: 0.0005
|
| 24 |
+
momentum: 0.937
|
| 25 |
loss:
|
| 26 |
objective:
|
| 27 |
BCELoss: 0.5
|
|
|
|
| 36 |
iou: 6.0
|
| 37 |
cls: 0.5
|
| 38 |
scheduler:
|
| 39 |
+
type: LinearLR
|
| 40 |
+
warmup:
|
| 41 |
+
epochs: 3.0
|
| 42 |
args:
|
| 43 |
+
total_iters: ${hyper.train.epoch}
|
| 44 |
+
start_factor: 1
|
| 45 |
+
end_factor: 0.01
|
| 46 |
ema:
|
| 47 |
enabled: true
|
| 48 |
decay: 0.995
|
yolo/tools/model_helper.py
CHANGED
|
@@ -2,9 +2,10 @@ from typing import Any, Dict, Type
|
|
| 2 |
|
| 3 |
import torch
|
| 4 |
from torch.optim import Optimizer
|
| 5 |
-
from torch.optim.lr_scheduler import _LRScheduler
|
| 6 |
|
| 7 |
from yolo.config.config import OptimizerConfig, SchedulerConfig
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
class EMA:
|
|
@@ -31,21 +32,38 @@ class EMA:
|
|
| 31 |
self.shadow[name].copy_(param.data)
|
| 32 |
|
| 33 |
|
| 34 |
-
def get_optimizer(
|
| 35 |
"""Create an optimizer for the given model parameters based on the configuration.
|
| 36 |
|
| 37 |
Returns:
|
| 38 |
An instance of the optimizer configured according to the provided settings.
|
| 39 |
"""
|
| 40 |
optimizer_class: Type[Optimizer] = getattr(torch.optim, optim_cfg.type)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
return optimizer_class(model_parameters, **optim_cfg.args)
|
| 42 |
|
| 43 |
|
| 44 |
-
def get_scheduler(optimizer: Optimizer,
|
| 45 |
"""Create a learning rate scheduler for the given optimizer based on the configuration.
|
| 46 |
|
| 47 |
Returns:
|
| 48 |
An instance of the scheduler configured according to the provided settings.
|
| 49 |
"""
|
| 50 |
-
scheduler_class: Type[_LRScheduler] = getattr(torch.optim.lr_scheduler,
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import torch
|
| 4 |
from torch.optim import Optimizer
|
| 5 |
+
from torch.optim.lr_scheduler import LambdaLR, SequentialLR, _LRScheduler
|
| 6 |
|
| 7 |
from yolo.config.config import OptimizerConfig, SchedulerConfig
|
| 8 |
+
from yolo.model.yolo import YOLO
|
| 9 |
|
| 10 |
|
| 11 |
class EMA:
|
|
|
|
| 32 |
self.shadow[name].copy_(param.data)
|
| 33 |
|
| 34 |
|
| 35 |
+
def get_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
| 36 |
"""Create an optimizer for the given model parameters based on the configuration.
|
| 37 |
|
| 38 |
Returns:
|
| 39 |
An instance of the optimizer configured according to the provided settings.
|
| 40 |
"""
|
| 41 |
optimizer_class: Type[Optimizer] = getattr(torch.optim, optim_cfg.type)
|
| 42 |
+
|
| 43 |
+
bias_params = [p for name, p in model.named_parameters() if "bias" in name]
|
| 44 |
+
norm_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" in name]
|
| 45 |
+
conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
|
| 46 |
+
|
| 47 |
+
model_parameters = [
|
| 48 |
+
{"params": bias_params, "nestrov": True, "momentum": 0.937},
|
| 49 |
+
{"params": conv_params, "weight_decay": 0.0},
|
| 50 |
+
{"params": norm_params, "weight_decay": 1e-5},
|
| 51 |
+
]
|
| 52 |
return optimizer_class(model_parameters, **optim_cfg.args)
|
| 53 |
|
| 54 |
|
| 55 |
+
def get_scheduler(optimizer: Optimizer, schedule_cfg: SchedulerConfig) -> _LRScheduler:
|
| 56 |
"""Create a learning rate scheduler for the given optimizer based on the configuration.
|
| 57 |
|
| 58 |
Returns:
|
| 59 |
An instance of the scheduler configured according to the provided settings.
|
| 60 |
"""
|
| 61 |
+
scheduler_class: Type[_LRScheduler] = getattr(torch.optim.lr_scheduler, schedule_cfg.type)
|
| 62 |
+
schedule = scheduler_class(optimizer, **schedule_cfg.args)
|
| 63 |
+
if hasattr(schedule_cfg, "warmup"):
|
| 64 |
+
wepoch = schedule_cfg.warmup.epochs
|
| 65 |
+
lambda1 = lambda epoch: 0.1 + 0.9 * (epoch + 1 / wepoch) if epoch < wepoch else 1
|
| 66 |
+
lambda2 = lambda epoch: 10 - 9 * (epoch + 1 / wepoch) if epoch < wepoch else 1
|
| 67 |
+
warmup_schedule = LambdaLR(optimizer, lr_lambda=[lambda1, lambda2, lambda1])
|
| 68 |
+
schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[2])
|
| 69 |
+
return schedule
|
yolo/tools/trainer.py
CHANGED
|
@@ -19,7 +19,7 @@ class Trainer:
|
|
| 19 |
|
| 20 |
self.model = model.to(device)
|
| 21 |
self.device = device
|
| 22 |
-
self.optimizer = get_optimizer(model
|
| 23 |
self.scheduler = get_scheduler(self.optimizer, train_cfg.scheduler)
|
| 24 |
self.loss_fn = get_loss_function(cfg)
|
| 25 |
self.progress = CustomProgress(cfg, save_path, use_wandb=True)
|
|
@@ -47,7 +47,6 @@ class Trainer:
|
|
| 47 |
def train_one_epoch(self, dataloader):
|
| 48 |
self.model.train()
|
| 49 |
total_loss = 0
|
| 50 |
-
self.progress.start_batch(len(dataloader))
|
| 51 |
|
| 52 |
for data, targets in dataloader:
|
| 53 |
loss, loss_each = self.train_one_batch(data, targets)
|
|
@@ -58,7 +57,6 @@ class Trainer:
|
|
| 58 |
if self.scheduler:
|
| 59 |
self.scheduler.step()
|
| 60 |
|
| 61 |
-
self.progress.finish_batch()
|
| 62 |
return total_loss / len(dataloader)
|
| 63 |
|
| 64 |
def save_checkpoint(self, epoch: int, filename="checkpoint.pt"):
|
|
@@ -80,8 +78,9 @@ class Trainer:
|
|
| 80 |
self.progress.start_train(num_epochs)
|
| 81 |
for epoch in range(num_epochs):
|
| 82 |
|
|
|
|
| 83 |
epoch_loss = self.train_one_epoch(dataloader)
|
| 84 |
-
self.progress.
|
| 85 |
|
| 86 |
logger.info(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
|
| 87 |
if (epoch + 1) % 5 == 0:
|
|
|
|
| 19 |
|
| 20 |
self.model = model.to(device)
|
| 21 |
self.device = device
|
| 22 |
+
self.optimizer = get_optimizer(model, train_cfg.optimizer)
|
| 23 |
self.scheduler = get_scheduler(self.optimizer, train_cfg.scheduler)
|
| 24 |
self.loss_fn = get_loss_function(cfg)
|
| 25 |
self.progress = CustomProgress(cfg, save_path, use_wandb=True)
|
|
|
|
| 47 |
def train_one_epoch(self, dataloader):
|
| 48 |
self.model.train()
|
| 49 |
total_loss = 0
|
|
|
|
| 50 |
|
| 51 |
for data, targets in dataloader:
|
| 52 |
loss, loss_each = self.train_one_batch(data, targets)
|
|
|
|
| 57 |
if self.scheduler:
|
| 58 |
self.scheduler.step()
|
| 59 |
|
|
|
|
| 60 |
return total_loss / len(dataloader)
|
| 61 |
|
| 62 |
def save_checkpoint(self, epoch: int, filename="checkpoint.pt"):
|
|
|
|
| 78 |
self.progress.start_train(num_epochs)
|
| 79 |
for epoch in range(num_epochs):
|
| 80 |
|
| 81 |
+
self.progress.start_one_epoch(len(dataloader), self.optimizer, epoch)
|
| 82 |
epoch_loss = self.train_one_epoch(dataloader)
|
| 83 |
+
self.progress.finish_one_epoch()
|
| 84 |
|
| 85 |
logger.info(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
|
| 86 |
if (epoch + 1) % 5 == 0:
|