Spaces:
Running
Running
| # Copyright (c) 2023 Amphion. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import torch | |
| import torch.nn.functional as F | |
| import torch.nn as nn | |
| from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm | |
| from torch import nn | |
| LRELU_SLOPE = 0.1 | |
| # This code is a refined MRD adopted from BigVGAN under the MIT License | |
| # https://github.com/NVIDIA/BigVGAN | |
| class DiscriminatorR(nn.Module): | |
| def __init__(self, cfg, resolution): | |
| super().__init__() | |
| self.resolution = resolution | |
| assert ( | |
| len(self.resolution) == 3 | |
| ), "MRD layer requires list with len=3, got {}".format(self.resolution) | |
| self.lrelu_slope = LRELU_SLOPE | |
| norm_f = ( | |
| weight_norm if cfg.model.mrd.use_spectral_norm == False else spectral_norm | |
| ) | |
| if cfg.model.mrd.mrd_override: | |
| print( | |
| "INFO: overriding MRD use_spectral_norm as {}".format( | |
| cfg.model.mrd.mrd_use_spectral_norm | |
| ) | |
| ) | |
| norm_f = ( | |
| weight_norm | |
| if cfg.model.mrd.mrd_use_spectral_norm == False | |
| else spectral_norm | |
| ) | |
| self.d_mult = cfg.model.mrd.discriminator_channel_mult_factor | |
| if cfg.model.mrd.mrd_override: | |
| print( | |
| "INFO: overriding mrd channel multiplier as {}".format( | |
| cfg.model.mrd.mrd_channel_mult | |
| ) | |
| ) | |
| self.d_mult = cfg.model.mrd.mrd_channel_mult | |
| self.convs = nn.ModuleList( | |
| [ | |
| norm_f(nn.Conv2d(1, int(32 * self.d_mult), (3, 9), padding=(1, 4))), | |
| norm_f( | |
| nn.Conv2d( | |
| int(32 * self.d_mult), | |
| int(32 * self.d_mult), | |
| (3, 9), | |
| stride=(1, 2), | |
| padding=(1, 4), | |
| ) | |
| ), | |
| norm_f( | |
| nn.Conv2d( | |
| int(32 * self.d_mult), | |
| int(32 * self.d_mult), | |
| (3, 9), | |
| stride=(1, 2), | |
| padding=(1, 4), | |
| ) | |
| ), | |
| norm_f( | |
| nn.Conv2d( | |
| int(32 * self.d_mult), | |
| int(32 * self.d_mult), | |
| (3, 9), | |
| stride=(1, 2), | |
| padding=(1, 4), | |
| ) | |
| ), | |
| norm_f( | |
| nn.Conv2d( | |
| int(32 * self.d_mult), | |
| int(32 * self.d_mult), | |
| (3, 3), | |
| padding=(1, 1), | |
| ) | |
| ), | |
| ] | |
| ) | |
| self.conv_post = norm_f( | |
| nn.Conv2d(int(32 * self.d_mult), 1, (3, 3), padding=(1, 1)) | |
| ) | |
| def forward(self, x): | |
| fmap = [] | |
| x = self.spectrogram(x) | |
| x = x.unsqueeze(1) | |
| for l in self.convs: | |
| x = l(x) | |
| x = F.leaky_relu(x, self.lrelu_slope) | |
| fmap.append(x) | |
| x = self.conv_post(x) | |
| fmap.append(x) | |
| x = torch.flatten(x, 1, -1) | |
| return x, fmap | |
| def spectrogram(self, x): | |
| n_fft, hop_length, win_length = self.resolution | |
| x = F.pad( | |
| x, | |
| (int((n_fft - hop_length) / 2), int((n_fft - hop_length) / 2)), | |
| mode="reflect", | |
| ) | |
| x = x.squeeze(1) | |
| x = torch.stft( | |
| x, | |
| n_fft=n_fft, | |
| hop_length=hop_length, | |
| win_length=win_length, | |
| center=False, | |
| return_complex=True, | |
| ) | |
| x = torch.view_as_real(x) # [B, F, TT, 2] | |
| mag = torch.norm(x, p=2, dim=-1) # [B, F, TT] | |
| return mag | |
| class MultiResolutionDiscriminator(nn.Module): | |
| def __init__(self, cfg, debug=False): | |
| super().__init__() | |
| self.resolutions = cfg.model.mrd.resolutions | |
| assert ( | |
| len(self.resolutions) == 3 | |
| ), "MRD requires list of list with len=3, each element having a list with len=3. got {}".format( | |
| self.resolutions | |
| ) | |
| self.discriminators = nn.ModuleList( | |
| [DiscriminatorR(cfg, resolution) for resolution in self.resolutions] | |
| ) | |
| def forward(self, y, y_hat): | |
| y_d_rs = [] | |
| y_d_gs = [] | |
| fmap_rs = [] | |
| fmap_gs = [] | |
| for i, d in enumerate(self.discriminators): | |
| y_d_r, fmap_r = d(x=y) | |
| y_d_g, fmap_g = d(x=y_hat) | |
| y_d_rs.append(y_d_r) | |
| fmap_rs.append(fmap_r) | |
| y_d_gs.append(y_d_g) | |
| fmap_gs.append(fmap_g) | |
| return y_d_rs, y_d_gs, fmap_rs, fmap_gs | |