Spaces:

0qwpifs
/

VoiceReplacer

Running

App Files Files Community

VoiceReplacer / app.py

0qwpifs

Update app.py

2a1bc84 verified 8 months ago

raw

history blame

3.28 kB

	import gradio as gr
	import torchaudio
	import torch
	import os
	from speechbrain.inference import SpeakerRecognition

	# Путь к вашему голосовому образцу
	user_voice_path = "voice_recording.wav"

	# Загрузка модели
	model = SpeakerRecognition.from_hparams(
	source="speechbrain/spkrec-ecapa-voxceleb",
	savedir="pretrained_models/spkrec-ecapa-voxceleb",
	)

	def process_audio(input_audio, pitch_shift=0):
	try:
	# Проверяем, является ли input_audio кортежем (путь к файлу, sample_rate)
	if isinstance(input_audio, tuple):
	input_audio_path = input_audio[0]
	elif isinstance(input_audio, str):
	input_audio_path = input_audio
	else:
	raise ValueError(f"Неподдерживаемый формат входных данных: {type(input_audio)}")

	# Проверка наличия голосового образца
	if not os.path.exists(user_voice_path):
	raise FileNotFoundError(f"Файл голосового образца не найден по пути: {user_voice_path}")

	# Загрузка аудиофайлов
	user_waveform, user_sr = torchaudio.load(user_voice_path)
	target_waveform, target_sr = torchaudio.load(input_audio_path)

	# Приведение к одинаковой частоте дискретизации
	if user_sr != target_sr:
	target_waveform = torchaudio.functional.resample(target_waveform, target_sr, user_sr)

	# Изменение тона голоса
	if pitch_shift != 0:
	target_waveform = torchaudio.functional.pitch_shift(
	waveform=target_waveform,
	sample_rate=user_sr,
	n_steps=pitch_shift
	)

	# Перенос голоса
	with torch.no_grad():
	embeddings_user = model.encode_batch(user_waveform)
	embeddings_target = model.encode_batch(target_waveform)
	converted_embeddings = embeddings_user + (embeddings_target - embeddings_user) * 0.5

	# Восстановление аудио из embeddings
	converted_waveform = model.synth_model.generate(
	converted_embeddings,
	length=target_waveform.shape[-1]
	)

	# Сохранение результата
	output_path = "converted_audio.wav"
	torchaudio.save(output_path, converted_waveform.cpu(), user_sr)

	return output_path
	except Exception as e:
	print(f"Ошибка: {str(e)}")
	return None

	demo = gr.Interface(
	fn=process_audio,
	inputs=[
	gr.Audio(label="Загрузите аудиофайл с голосом для замены", type="filepath"),
	gr.Slider(-24, 24, 0, step=1, label="Изменение тона (в полутонов)")
	],
	outputs=gr.Audio(label="Обработанный аудиофайл"),
	title="VoiceReplacer Pro",
	description="Замените голос в аудиофайле на ваш собственный голос с возможностью изменения тона"
	)

	demo.launch()