Spaces:

Yehor
/

radtts-uk-demo

Build error

App Files Files Community

radtts-uk-demo / hifigan_models.py

Yehor

Add the code

4304c2f 3 months ago

raw

history blame contribute delete

16.9 kB

	# original source takes from https://github.com/jik876/hifi-gan/
	# MIT License
	#
	# Copyright (c) 2020 Jungil Kong
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.
	import math
	import torch
	import torch.nn.functional as F
	import torch.nn as nn
	from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
	from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
	from hifigan_utils import init_weights, get_padding

	LRELU_SLOPE = 0.1


	class GaussianBlurAugmentation(nn.Module):
	def __init__(self, kernel_size, sigmas, p_blurring):
	super(GaussianBlurAugmentation, self).__init__()
	self.kernel_size = kernel_size
	self.sigmas = sigmas
	kernels = self.initialize_kernels(kernel_size, sigmas)
	self.register_buffer("kernels", kernels)
	self.p_blurring = p_blurring
	self.conv = F.conv2d

	def initialize_kernels(self, kernel_size, sigmas):
	mesh_grids = torch.meshgrid(
	[torch.arange(size, dtype=torch.float32) for size in kernel_size]
	)
	kernels = []
	for sigma in sigmas:
	kernel = 1
	sigma = [sigma] * len(kernel_size)
	for size, std, mgrid in zip(kernel_size, sigma, mesh_grids):
	mean = (size - 1) / 2
	kernel *= (
	1
	/ (std * math.sqrt(2 * math.pi))
	* torch.exp(-(((mgrid - mean) / std) ** 2) / 2)
	)

	# Make sure sum of values in gaussian kernel equals 1.
	kernel = kernel / torch.sum(kernel)

	# Reshape to depthwise convolutional weight
	kernel = kernel.view(1, 1, *kernel.size())
	kernel = kernel.repeat(1, [1] (kernel.dim() - 1))
	kernels.append(kernel[None])

	kernels = torch.cat(kernels)
	return kernels

	def forward(self, x):
	if torch.rand(1)[0] > self.p_blurring:
	return x
	else:
	i = torch.randint(len(self.kernels), (1,))[0]
	kernel = self.kernels[i]
	pad = int((self.kernel_size[0] - 1) / 2)
	x = F.pad(x[:, None], (pad, pad, pad, pad), mode="reflect")
	x = self.conv(x, weight=kernel)[:, 0]
	return x


	class ResBlock1(torch.nn.Module):
	__constants__ = ["lrelu_slope"]

	def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)):
	super(ResBlock1, self).__init__()
	self.h = h
	self.lrelu_slope = LRELU_SLOPE
	self.convs1 = nn.ModuleList(
	[
	weight_norm(
	Conv1d(
	channels,
	channels,
	kernel_size,
	1,
	dilation=dilation[0],
	padding=get_padding(kernel_size, dilation[0]),
	)
	),
	weight_norm(
	Conv1d(
	channels,
	channels,
	kernel_size,
	1,
	dilation=dilation[1],
	padding=get_padding(kernel_size, dilation[1]),
	)
	),
	weight_norm(
	Conv1d(
	channels,
	channels,
	kernel_size,
	1,
	dilation=dilation[2],
	padding=get_padding(kernel_size, dilation[2]),
	)
	),
	]
	)
	self.convs1.apply(init_weights)

	self.convs2 = nn.ModuleList(
	[
	weight_norm(
	Conv1d(
	channels,
	channels,
	kernel_size,
	1,
	dilation=1,
	padding=get_padding(kernel_size, 1),
	)
	),
	weight_norm(
	Conv1d(
	channels,
	channels,
	kernel_size,
	1,
	dilation=1,
	padding=get_padding(kernel_size, 1),
	)
	),
	weight_norm(
	Conv1d(
	channels,
	channels,
	kernel_size,
	1,
	dilation=1,
	padding=get_padding(kernel_size, 1),
	)
	),
	]
	)
	self.convs2.apply(init_weights)

	def forward(self, x):
	for c1, c2 in zip(self.convs1, self.convs2):
	xt = F.leaky_relu(x, self.lrelu_slope)
	xt = c1(xt)
	xt = F.leaky_relu(xt, self.lrelu_slope)
	xt = c2(xt)
	x = xt + x
	return x

	def remove_weight_norm(self):
	for l in self.convs1:
	remove_weight_norm(l)
	for l in self.convs2:
	remove_weight_norm(l)


	class ResBlock2(torch.nn.Module):
	__constants__ = ["lrelu_slope"]

	def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)):
	super(ResBlock2, self).__init__()
	self.h = h
	self.convs = nn.ModuleList(
	[
	weight_norm(
	Conv1d(
	channels,
	channels,
	kernel_size,
	1,
	dilation=dilation[0],
	padding=get_padding(kernel_size, dilation[0]),
	)
	),
	weight_norm(
	Conv1d(
	channels,
	channels,
	kernel_size,
	1,
	dilation=dilation[1],
	padding=get_padding(kernel_size, dilation[1]),
	)
	),
	]
	)
	self.convs.apply(init_weights)
	self.lrelu_slope = LRELU_SLOPE

	def forward(self, x):
	for c in self.convs:
	xt = F.leaky_relu(x, self.lrelu_slope)
	xt = c(xt)
	x = xt + x
	return x

	def remove_weight_norm(self):
	for l in self.convs:
	remove_weight_norm(l)


	class Generator(torch.nn.Module):
	__constants__ = ["lrelu_slope", "num_kernels", "num_upsamples", "p_blur"]

	def __init__(self, h):
	super(Generator, self).__init__()
	self.num_kernels = len(h.resblock_kernel_sizes)
	self.num_upsamples = len(h.upsample_rates)
	self.conv_pre = weight_norm(
	Conv1d(80, h.upsample_initial_channel, 7, 1, padding=3)
	)
	self.p_blur = h.gaussian_blur["p_blurring"]
	self.gaussian_blur_fn = None
	if self.p_blur > 0.0:
	self.gaussian_blur_fn = GaussianBlurAugmentation(
	h.gaussian_blur["kernel_size"], h.gaussian_blur["sigmas"], self.p_blur
	)
	else:
	self.gaussian_blur_fn = nn.Identity()
	self.lrelu_slope = LRELU_SLOPE

	resblock = ResBlock1 if h.resblock == "1" else ResBlock2

	self.ups = nn.ModuleList()
	for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)):
	self.ups.append(
	weight_norm(
	ConvTranspose1d(
	h.upsample_initial_channel // (2**i),
	h.upsample_initial_channel // (2 ** (i + 1)),
	k,
	u,
	padding=(k - u) // 2,
	)
	)
	)

	self.resblocks = nn.ModuleList()
	for i in range(len(self.ups)):
	resblock_list = nn.ModuleList()
	ch = h.upsample_initial_channel // (2 ** (i + 1))
	for j, (k, d) in enumerate(
	zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)
	):
	resblock_list.append(resblock(h, ch, k, d))
	self.resblocks.append(resblock_list)

	self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
	self.ups.apply(init_weights)
	self.conv_post.apply(init_weights)

	def load_state_dict(self, state_dict):
	new_state_dict = {}
	for k, v in state_dict.items():
	new_k = k
	if "resblocks" in k:
	parts = k.split(".")
	# only do this is the checkpoint type is older
	if len(parts) == 5:
	layer = int(parts[1])
	new_layer = f"{layer // 3}.{layer % 3}"
	new_k = f"resblocks.{new_layer}.{'.'.join(parts[2:])}"
	new_state_dict[new_k] = v
	super().load_state_dict(new_state_dict)

	def forward(self, x):
	if self.p_blur > 0.0:
	x = self.gaussian_blur_fn(x)
	x = self.conv_pre(x)
	for upsample_layer, resblock_group in zip(self.ups, self.resblocks):
	x = F.leaky_relu(x, self.lrelu_slope)
	x = upsample_layer(x)
	xs = torch.zeros(x.shape, dtype=x.dtype, device=x.device)
	for resblock in resblock_group:
	xs += resblock(x)
	x = xs / self.num_kernels
	x = F.leaky_relu(x)
	x = self.conv_post(x)
	x = torch.tanh(x)

	return x

	def remove_weight_norm(self):
	print("Removing weight norm...")
	for l in self.ups:
	remove_weight_norm(l)
	for group in self.resblocks:
	for block in group:
	block.remove_weight_norm()
	remove_weight_norm(self.conv_pre)
	remove_weight_norm(self.conv_post)


	class DiscriminatorP(torch.nn.Module):
	__constants__ = ["LRELU_SLOPE"]

	def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
	super(DiscriminatorP, self).__init__()
	self.period = period
	norm_f = weight_norm if use_spectral_norm == False else spectral_norm
	self.convs = nn.ModuleList(
	[
	norm_f(
	Conv2d(
	1,
	32,
	(kernel_size, 1),
	(stride, 1),
	padding=(get_padding(5, 1), 0),
	)
	),
	norm_f(
	Conv2d(
	32,
	128,
	(kernel_size, 1),
	(stride, 1),
	padding=(get_padding(5, 1), 0),
	)
	),
	norm_f(
	Conv2d(
	128,
	512,
	(kernel_size, 1),
	(stride, 1),
	padding=(get_padding(5, 1), 0),
	)
	),
	norm_f(
	Conv2d(
	512,
	1024,
	(kernel_size, 1),
	(stride, 1),
	padding=(get_padding(5, 1), 0),
	)
	),
	norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))),
	]
	)
	self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))

	def forward(self, x):
	fmap = []

	# 1d to 2d
	b, c, t = x.shape
	if t % self.period != 0: # pad first
	n_pad = self.period - (t % self.period)
	x = F.pad(x, (0, n_pad), "reflect")
	t = t + n_pad
	x = x.view(b, c, t // self.period, self.period)

	for l in self.convs:
	x = l(x)
	x = F.leaky_relu(x, LRELU_SLOPE)
	fmap.append(x)
	x = self.conv_post(x)
	fmap.append(x)
	x = torch.flatten(x, 1, -1)

	return x, fmap


	class MultiPeriodDiscriminator(torch.nn.Module):
	def __init__(self):
	super(MultiPeriodDiscriminator, self).__init__()
	self.discriminators = nn.ModuleList(
	[
	DiscriminatorP(2),
	DiscriminatorP(3),
	DiscriminatorP(5),
	DiscriminatorP(7),
	DiscriminatorP(11),
	]
	)

	def forward(self, y, y_hat):
	y_d_rs = []
	y_d_gs = []
	fmap_rs = []
	fmap_gs = []
	for i, d in enumerate(self.discriminators):
	y_d_r, fmap_r = d(y)
	y_d_g, fmap_g = d(y_hat)
	y_d_rs.append(y_d_r)
	fmap_rs.append(fmap_r)
	y_d_gs.append(y_d_g)
	fmap_gs.append(fmap_g)

	return y_d_rs, y_d_gs, fmap_rs, fmap_gs


	class DiscriminatorS(torch.nn.Module):
	__constants__ = ["LRELU_SLOPE"]

	def __init__(self, use_spectral_norm=False):
	super(DiscriminatorS, self).__init__()
	norm_f = weight_norm if use_spectral_norm == False else spectral_norm
	self.convs = nn.ModuleList(
	[
	norm_f(Conv1d(1, 128, 15, 1, padding=7)),
	norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)),
	norm_f(Conv1d(128, 256, 41, 2, groups=16, padding=20)),
	norm_f(Conv1d(256, 512, 41, 4, groups=16, padding=20)),
	norm_f(Conv1d(512, 1024, 41, 4, groups=16, padding=20)),
	norm_f(Conv1d(1024, 1024, 41, 1, groups=16, padding=20)),
	norm_f(Conv1d(1024, 1024, 5, 1, padding=2)),
	]
	)
	self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1))

	def forward(self, x):
	fmap = []
	for l in self.convs:
	x = l(x)
	x = F.leaky_relu(x, LRELU_SLOPE)
	fmap.append(x)
	x = self.conv_post(x)
	fmap.append(x)
	x = torch.flatten(x, 1, -1)

	return x, fmap


	class MultiScaleDiscriminator(torch.nn.Module):
	def __init__(self):
	super(MultiScaleDiscriminator, self).__init__()
	self.discriminators = nn.ModuleList(
	[
	DiscriminatorS(use_spectral_norm=True),
	DiscriminatorS(),
	DiscriminatorS(),
	]
	)
	self.meanpools = nn.ModuleList(
	[AvgPool1d(4, 2, padding=2), AvgPool1d(4, 2, padding=2)]
	)

	def forward(self, y, y_hat):
	y_d_rs = []
	y_d_gs = []
	fmap_rs = []
	fmap_gs = []
	for i, d in enumerate(self.discriminators):
	if i != 0:
	y = self.meanpools[i - 1](y)
	y_hat = self.meanpools[i - 1](y_hat)
	y_d_r, fmap_r = d(y)
	y_d_g, fmap_g = d(y_hat)
	y_d_rs.append(y_d_r)
	fmap_rs.append(fmap_r)
	y_d_gs.append(y_d_g)
	fmap_gs.append(fmap_g)

	return y_d_rs, y_d_gs, fmap_rs, fmap_gs


	def feature_loss(fmap_r, fmap_g):
	loss = 0
	for dr, dg in zip(fmap_r, fmap_g):
	for rl, gl in zip(dr, dg):
	loss += torch.mean(torch.abs(rl - gl))

	return loss * 2


	def discriminator_loss(disc_real_outputs, disc_generated_outputs):
	loss = 0
	r_losses = []
	g_losses = []
	for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
	r_loss = torch.mean((1 - dr) ** 2)
	g_loss = torch.mean(dg**2)
	loss += r_loss + g_loss
	r_losses.append(r_loss.item())
	g_losses.append(g_loss.item())

	return loss, r_losses, g_losses


	def generator_loss(disc_outputs):
	loss = 0
	gen_losses = []
	for dg in disc_outputs:
	l = torch.mean((1 - dg) ** 2)
	gen_losses.append(l)
	loss += l

	return loss, gen_losses