RVC-GUI

Running

App Files Files Community

RVC-GUI / main /library /algorithm /stftpitchshift.py

AnhP

Upload 82 files

e4d8df5 verified 5 months ago

raw

history blame

8 kB

	import numpy as np

	from numpy.lib.stride_tricks import sliding_window_view

	def istft(frames, framesize, hopsize):
	frames = np.atleast_2d(frames)
	assert frames.ndim == 2

	analysis_window_size = np.ravel(framesize)[0]
	synthesis_window_size = np.ravel(framesize)[-1]

	assert analysis_window_size >= synthesis_window_size

	A = asymmetric_analysis_window(analysis_window_size, synthesis_window_size) if analysis_window_size != synthesis_window_size else symmetric_window(analysis_window_size)
	S = asymmetric_synthesis_window(analysis_window_size, synthesis_window_size) if analysis_window_size != synthesis_window_size else symmetric_window(synthesis_window_size)

	W = S * hopsize / np.sum(A * S)
	N = frames.shape[0] * hopsize + analysis_window_size

	y = np.zeros((N), float)

	frames[:, 0] = 0
	frames[:, -1] = 0
	frames0 = sliding_window_view(y, analysis_window_size, writeable=True)[::hopsize]
	frames1 = np.fft.irfft(frames, axis=-1, norm='forward') * W

	for i in range(min(len(frames0), len(frames1))):
	frames0[i] += frames1[i]

	return y

	def asymmetric_synthesis_window(analysis_window_size, synthesis_window_size):
	n = analysis_window_size
	m = synthesis_window_size // 2

	right = symmetric_window(2 * m)
	window = np.zeros(n)

	window[n-m-m:n-m] = np.square(right[:m]) / symmetric_window(2 * n - 2 * m)[n-m-m:n-m]
	window[-m:] = right[-m:]

	return window

	def asymmetric_analysis_window(analysis_window_size, synthesis_window_size):
	n = analysis_window_size
	m = synthesis_window_size // 2

	window = np.zeros(n)
	window[:n-m] = symmetric_window(2 * n - 2 * m)[:n-m]
	window[-m:] = symmetric_window(2 * m)[-m:]

	return window

	def symmetric_window(symmetric_window_size):
	n = symmetric_window_size
	window = 0.5 - 0.5 * np.cos(2 * np.pi * np.arange(n) / n)

	return window

	def stft(x, framesize, hopsize):
	x = np.atleast_1d(x)
	assert x.ndim == 1

	analysis_window_size = np.ravel(framesize)[0]
	synthesis_window_size = np.ravel(framesize)[-1]

	assert analysis_window_size >= synthesis_window_size

	W = asymmetric_analysis_window(analysis_window_size, synthesis_window_size) if analysis_window_size != synthesis_window_size else symmetric_window(analysis_window_size)

	frames0 = sliding_window_view(x, analysis_window_size, writeable=False)[::hopsize]
	frames1 = np.fft.rfft(frames0 * W, axis=-1, norm='forward')

	return frames1

	def normalize(frames, frames0):
	for i in range(len(frames)):
	a = np.real(frames0[i])
	b = np.real(frames[i])
	a = np.dot(a, a)
	b = np.dot(b, b)

	if b == 0: continue
	frames[i] = np.real(frames[i]) * np.sqrt(a / b) + 1j * np.imag(frames[i])

	return frames

	def lowpass(cepstrum, quefrency):
	cepstrum[1:quefrency] *= 2
	cepstrum[quefrency+1:] = 0

	return cepstrum

	def lifter(frames, quefrency):
	envelopes = np.zeros(frames.shape)

	for i, frame in enumerate(frames):
	with np.errstate(divide='ignore', invalid='ignore'):
	spectrum = np.log10(np.real(frame))

	envelopes[i] = np.power(10, np.real(np.fft.rfft(lowpass(np.fft.irfft(spectrum, norm='forward'), quefrency), norm='forward')))

	return envelopes

	def resample(x, factor):
	if factor == 1: return x.copy()
	y = np.zeros(x.shape, dtype=x.dtype)

	n = len(x)
	m = int(n * factor)

	i = np.arange(min(n, m))
	k = i * (n / m)

	j = np.trunc(k).astype(int)
	k = k - j

	ok = (0 <= j) & (j < n - 1)
	y[i[ok]] = k[ok] * x[j[ok] + 1] + (1 - k[ok]) * x[j[ok]]

	return y

	def shiftpitch(frames, factors, samplerate):
	for i in range(len(frames)):
	magnitudes = np.vstack([resample(np.real(frames[i]), factor) for factor in factors])
	frequencies = np.vstack([resample(np.imag(frames[i]), factor) * factor for factor in factors])

	magnitudes[(frequencies <= 0) \| (frequencies >= samplerate / 2)] = 0
	mask = np.argmax(magnitudes, axis=0)

	magnitudes = np.take_along_axis(magnitudes, mask[None,:], axis=0)
	frequencies = np.take_along_axis(frequencies, mask[None,:], axis=0)

	frames[i] = magnitudes + 1j * frequencies

	return frames

	def wrap(x):
	return (x + np.pi) % (2 * np.pi) - np.pi

	def encode(frames, framesize, hopsize, samplerate):
	M, N = frames.shape
	analysis_framesize = np.ravel(framesize)[0]

	freqinc = samplerate / analysis_framesize
	phaseinc = 2 * np.pi * hopsize / analysis_framesize

	buffer = np.zeros(N)
	data = np.zeros((M, N), complex)

	for m, frame in enumerate(frames):
	arg = np.angle(frame)
	buffer = arg

	i = np.arange(N)
	freq = (i + (wrap((arg - buffer) - i * phaseinc) / phaseinc)) * freqinc

	data[m] = np.abs(frame) + 1j * freq

	return data

	def decode(frames, framesize, hopsize, samplerate):
	M, N = frames.shape
	analysis_framesize = np.ravel(framesize)[0]
	synthesis_framesize = np.ravel(framesize)[-1]

	freqinc = samplerate / analysis_framesize
	phaseinc = 2 * np.pi * hopsize / analysis_framesize
	timeshift = 2 * np.pi * synthesis_framesize * np.arange(N) / N if synthesis_framesize != analysis_framesize else 0

	buffer = np.zeros(N)
	data = np.zeros((M, N), complex)

	for m, frame in enumerate(frames):
	i = np.arange(N)
	delta = (i + ((np.imag(frame) - i * freqinc) / freqinc)) * phaseinc
	buffer += delta
	arg = buffer.copy()
	arg -= timeshift
	data[m] = np.real(frame) * np.exp(1j * arg)

	return data

	class StftPitchShift:
	def __init__(self, framesize, hopsize, samplerate):
	self.framesize = framesize
	self.hopsize = hopsize
	self.samplerate = samplerate

	def shiftpitch(self, input, factors = 1, quefrency = 0, distortion = 1, normalization = False):
	input = np.atleast_1d(input)
	dtype = input.dtype
	shape = input.shape

	input = np.squeeze(input)
	if input.ndim != 1: raise ValueError('input.ndim != 1')

	if np.issubdtype(dtype, np.integer):
	a, b = np.iinfo(dtype).min, np.iinfo(dtype).max
	input = ((input.astype(float) - a) / (b - a)) * 2 - 1
	elif not np.issubdtype(dtype, np.floating): raise TypeError('not np.issubdtype(dtype, np.floating)')

	def isnotnormal(x):
	return (np.isinf(x)) \| (np.isnan(x)) \| (abs(x) < np.finfo(x.dtype).tiny)

	framesize = self.framesize
	hopsize = self.hopsize
	samplerate = self.samplerate

	factors = np.asarray(factors).flatten()
	quefrency = int(quefrency * samplerate)

	frames = encode(stft(input, framesize, hopsize), framesize, hopsize, samplerate)

	if normalization: frames0 = frames.copy()

	if quefrency:
	envelopes = lifter(frames, quefrency)
	mask = isnotnormal(envelopes)

	frames.real /= envelopes
	frames.real[mask] = 0

	if distortion != 1:
	envelopes[mask] = 0

	for i in range(len(envelopes)):
	envelopes[i] = resample(envelopes[i], distortion)

	mask = isnotnormal(envelopes)

	frames = shiftpitch(frames, factors, samplerate)
	frames.real *= envelopes
	frames.real[mask] = 0
	else: frames = shiftpitch(frames, factors, samplerate)

	if normalization: frames = normalize(frames, frames0)

	output = istft(decode(frames, framesize, hopsize, samplerate), framesize, hopsize)
	output.resize(shape, refcheck=False)

	if np.issubdtype(dtype, np.integer):
	a, b = np.iinfo(dtype).min, np.iinfo(dtype).max
	output = (((output + 1) / 2) * (b - a) + a).clip(a, b).astype(dtype)
	elif output.dtype != dtype: output = output.astype(dtype)

	assert output.dtype == dtype
	assert output.shape == shape

	return output