| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| """STFT-based Loss modules.""" |
|
|
| import torch |
| import torch.nn.functional as F |
|
|
|
|
| def stft(x, fft_size, hop_size, win_length, window): |
| """Perform STFT and convert to magnitude spectrogram. |
| Args: |
| x (Tensor): Input signal tensor (B, T). |
| fft_size (int): FFT size. |
| hop_size (int): Hop size. |
| win_length (int): Window length. |
| window (str): Window function type. |
| Returns: |
| Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1). |
| """ |
| x_stft = torch.stft(x, fft_size, hop_size, win_length, window) |
| real = x_stft[..., 0] |
| imag = x_stft[..., 1] |
|
|
| |
| return torch.sqrt(torch.clamp(real ** 2 + imag ** 2, min=1e-7)).transpose(2, 1) |
|
|
|
|
| class SpectralConvergengeLoss(torch.nn.Module): |
| """Spectral convergence loss module.""" |
|
|
| def __init__(self): |
| """Initilize spectral convergence loss module.""" |
| super(SpectralConvergengeLoss, self).__init__() |
|
|
| def forward(self, x_mag, y_mag): |
| """Calculate forward propagation. |
| Args: |
| x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins). |
| y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins). |
| Returns: |
| Tensor: Spectral convergence loss value. |
| """ |
| return torch.norm(y_mag - x_mag, p="fro") / torch.norm(y_mag, p="fro") |
|
|
|
|
| class LogSTFTMagnitudeLoss(torch.nn.Module): |
| """Log STFT magnitude loss module.""" |
|
|
| def __init__(self): |
| """Initilize los STFT magnitude loss module.""" |
| super(LogSTFTMagnitudeLoss, self).__init__() |
|
|
| def forward(self, x_mag, y_mag): |
| """Calculate forward propagation. |
| Args: |
| x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins). |
| y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins). |
| Returns: |
| Tensor: Log STFT magnitude loss value. |
| """ |
| return F.l1_loss(torch.log(y_mag), torch.log(x_mag)) |
|
|
|
|
| class STFTLoss(torch.nn.Module): |
| """STFT loss module.""" |
|
|
| def __init__(self, fft_size=1024, shift_size=120, win_length=600, window="hann_window"): |
| """Initialize STFT loss module.""" |
| super(STFTLoss, self).__init__() |
| self.fft_size = fft_size |
| self.shift_size = shift_size |
| self.win_length = win_length |
| self.window = getattr(torch, window)(win_length) |
| self.spectral_convergenge_loss = SpectralConvergengeLoss() |
| self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss() |
|
|
| def forward(self, x, y): |
| """Calculate forward propagation. |
| Args: |
| x (Tensor): Predicted signal (B, T). |
| y (Tensor): Groundtruth signal (B, T). |
| Returns: |
| Tensor: Spectral convergence loss value. |
| Tensor: Log STFT magnitude loss value. |
| """ |
| x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window) |
| y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window) |
| sc_loss = self.spectral_convergenge_loss(x_mag, y_mag) |
| mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag) |
|
|
| return sc_loss, mag_loss |
|
|
|
|
| class MultiResolutionSTFTLoss(torch.nn.Module): |
| """Multi resolution STFT loss module.""" |
|
|
| def __init__(self, |
| fft_sizes=[1024, 2048, 512], |
| hop_sizes=[120, 240, 50], |
| win_lengths=[600, 1200, 240], |
| window="hann_window", factor_sc=0.1, factor_mag=0.1): |
| """Initialize Multi resolution STFT loss module. |
| Args: |
| fft_sizes (list): List of FFT sizes. |
| hop_sizes (list): List of hop sizes. |
| win_lengths (list): List of window lengths. |
| window (str): Window function type. |
| factor (float): a balancing factor across different losses. |
| """ |
| super(MultiResolutionSTFTLoss, self).__init__() |
| assert len(fft_sizes) == len(hop_sizes) == len(win_lengths) |
| self.stft_losses = torch.nn.ModuleList() |
| for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths): |
| self.stft_losses += [STFTLoss(fs, ss, wl, window)] |
| self.factor_sc = factor_sc |
| self.factor_mag = factor_mag |
|
|
| def forward(self, x, y): |
| """Calculate forward propagation. |
| Args: |
| x (Tensor): Predicted signal (B, T). |
| y (Tensor): Groundtruth signal (B, T). |
| Returns: |
| Tensor: Multi resolution spectral convergence loss value. |
| Tensor: Multi resolution log STFT magnitude loss value. |
| """ |
| sc_loss = 0.0 |
| mag_loss = 0.0 |
| for f in self.stft_losses: |
| sc_l, mag_l = f(x, y) |
| sc_loss += sc_l |
| mag_loss += mag_l |
| sc_loss /= len(self.stft_losses) |
| mag_loss /= len(self.stft_losses) |
|
|
| return self.factor_sc*sc_loss, self.factor_mag*mag_loss |
|
|