|
import math |
|
|
|
|
|
class WarmupLR: |
|
""" |
|
Linear Warmup learning rate scheduler. After warmup, learning rate is |
|
constant. |
|
|
|
Args: |
|
optimizer (torch.optim.Optimizer): optimizer |
|
warmup_steps (int): number of warmup steps |
|
|
|
""" |
|
|
|
def __init__(self, optimizer, warmup_steps): |
|
self.optimizer = optimizer |
|
self.warmup_steps = warmup_steps |
|
self.base_lr = None |
|
|
|
def get_lr(self, lr, step): |
|
return lr * min(step / max(self.warmup_steps, 1), 1.0) |
|
|
|
def step(self, step): |
|
if self.base_lr is None: |
|
self.base_lr = [ |
|
param_group["lr"] for param_group in self.optimizer.param_groups |
|
] |
|
for param_group, base_lr_group in zip( |
|
self.optimizer.param_groups, self.base_lr |
|
): |
|
param_group["lr"] = self.get_lr(base_lr_group, step) |
|
|
|
def state_dict(self): |
|
return { |
|
key: value for key, value in self.__dict__.items() if key != "optimizer" |
|
} |
|
|
|
def load_state_dict(self, state_dict): |
|
self.__dict__.update(state_dict) |
|
|
|
|
|
class WarmupCosineDecayLR: |
|
""" |
|
Linear Warmup learning rate scheduler. After warmup, learning rate is |
|
constant. |
|
After warmup, learning rate follows a cosine decay. |
|
|
|
Args: |
|
optimizer (torch.optim.Optimizer): optimizer |
|
warmup_steps (int): number of warmup steps |
|
total_steps (int): total number of steps |
|
rate (float): cosine decay rate |
|
""" |
|
|
|
def __init__(self, optimizer, warmup_steps, total_steps, rate=1.0): |
|
self.optimizer = optimizer |
|
self.warmup_steps = warmup_steps |
|
self.base_lr = None |
|
self.total_steps = total_steps |
|
self.rate = rate |
|
|
|
def get_lr(self, lr, step): |
|
if step < self.warmup_steps: |
|
return lr * min(step / max(self.warmup_steps, 1), 1.0) |
|
else: |
|
return ( |
|
0.5 |
|
* lr |
|
* ( |
|
1 |
|
+ math.cos( |
|
self.rate |
|
* math.pi |
|
* (step - self.warmup_steps) |
|
/ (self.total_steps - self.warmup_steps) |
|
) |
|
) |
|
) |
|
|
|
def step(self, step): |
|
if self.base_lr is None: |
|
self.base_lr = [ |
|
param_group["lr"] for param_group in self.optimizer.param_groups |
|
] |
|
for param_group, base_lr_group in zip( |
|
self.optimizer.param_groups, self.base_lr |
|
): |
|
param_group["lr"] = self.get_lr(base_lr_group, step) |
|
|
|
def state_dict(self): |
|
return { |
|
key: value for key, value in self.__dict__.items() if key != "optimizer" |
|
} |
|
|
|
def load_state_dict(self, state_dict): |
|
self.__dict__.update(state_dict) |
|
|