Spaces:
Configuration error
Configuration error
import os | |
import sys | |
import json | |
import argparse | |
import subprocess | |
from functools import lru_cache | |
from distutils.util import strtobool | |
now_dir = os.getcwd() | |
sys.path.append(now_dir) | |
current_script_directory = os.path.dirname(os.path.realpath(__file__)) | |
logs_path = os.path.join(current_script_directory, "logs") | |
from rvc.lib.tools.prerequisites_download import prequisites_download_pipeline | |
python = sys.executable | |
# Get TTS Voices -> https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4 | |
# Cache only one result since the file is static | |
def load_voices_data(): | |
with open( | |
os.path.join("rvc", "lib", "tools", "tts_voices.json"), "r", encoding="utf-8" | |
) as file: | |
return json.load(file) | |
voices_data = load_voices_data() | |
locales = list({voice["ShortName"] for voice in voices_data}) | |
def import_voice_converter(): | |
from rvc.infer.infer import VoiceConverter | |
return VoiceConverter() | |
def get_config(): | |
from rvc.configs.config import Config | |
return Config() | |
# Infer | |
def run_infer_script( | |
pitch: int, | |
filter_radius: int, | |
index_rate: float, | |
volume_envelope: int, | |
protect: float, | |
hop_length: int, | |
f0_method: str, | |
input_path: str, | |
output_path: str, | |
pth_path: str, | |
index_path: str, | |
split_audio: bool, | |
f0_autotune: bool, | |
f0_autotune_strength: float, | |
clean_audio: bool, | |
clean_strength: float, | |
export_format: str, | |
f0_file: str, | |
embedder_model: str, | |
embedder_model_custom: str = None, | |
formant_shifting: bool = False, | |
formant_qfrency: float = 1.0, | |
formant_timbre: float = 1.0, | |
post_process: bool = False, | |
reverb: bool = False, | |
pitch_shift: bool = False, | |
limiter: bool = False, | |
gain: bool = False, | |
distortion: bool = False, | |
chorus: bool = False, | |
bitcrush: bool = False, | |
clipping: bool = False, | |
compressor: bool = False, | |
delay: bool = False, | |
reverb_room_size: float = 0.5, | |
reverb_damping: float = 0.5, | |
reverb_wet_gain: float = 0.5, | |
reverb_dry_gain: float = 0.5, | |
reverb_width: float = 0.5, | |
reverb_freeze_mode: float = 0.5, | |
pitch_shift_semitones: float = 0.0, | |
limiter_threshold: float = -6, | |
limiter_release_time: float = 0.01, | |
gain_db: float = 0.0, | |
distortion_gain: float = 25, | |
chorus_rate: float = 1.0, | |
chorus_depth: float = 0.25, | |
chorus_center_delay: float = 7, | |
chorus_feedback: float = 0.0, | |
chorus_mix: float = 0.5, | |
bitcrush_bit_depth: int = 8, | |
clipping_threshold: float = -6, | |
compressor_threshold: float = 0, | |
compressor_ratio: float = 1, | |
compressor_attack: float = 1.0, | |
compressor_release: float = 100, | |
delay_seconds: float = 0.5, | |
delay_feedback: float = 0.0, | |
delay_mix: float = 0.5, | |
sid: int = 0, | |
): | |
kwargs = { | |
"audio_input_path": input_path, | |
"audio_output_path": output_path, | |
"model_path": pth_path, | |
"index_path": index_path, | |
"pitch": pitch, | |
"filter_radius": filter_radius, | |
"index_rate": index_rate, | |
"volume_envelope": volume_envelope, | |
"protect": protect, | |
"hop_length": hop_length, | |
"f0_method": f0_method, | |
"pth_path": pth_path, | |
"index_path": index_path, | |
"split_audio": split_audio, | |
"f0_autotune": f0_autotune, | |
"f0_autotune_strength": f0_autotune_strength, | |
"clean_audio": clean_audio, | |
"clean_strength": clean_strength, | |
"export_format": export_format, | |
"f0_file": f0_file, | |
"embedder_model": embedder_model, | |
"embedder_model_custom": embedder_model_custom, | |
"post_process": post_process, | |
"formant_shifting": formant_shifting, | |
"formant_qfrency": formant_qfrency, | |
"formant_timbre": formant_timbre, | |
"reverb": reverb, | |
"pitch_shift": pitch_shift, | |
"limiter": limiter, | |
"gain": gain, | |
"distortion": distortion, | |
"chorus": chorus, | |
"bitcrush": bitcrush, | |
"clipping": clipping, | |
"compressor": compressor, | |
"delay": delay, | |
"reverb_room_size": reverb_room_size, | |
"reverb_damping": reverb_damping, | |
"reverb_wet_level": reverb_wet_gain, | |
"reverb_dry_level": reverb_dry_gain, | |
"reverb_width": reverb_width, | |
"reverb_freeze_mode": reverb_freeze_mode, | |
"pitch_shift_semitones": pitch_shift_semitones, | |
"limiter_threshold": limiter_threshold, | |
"limiter_release": limiter_release_time, | |
"gain_db": gain_db, | |
"distortion_gain": distortion_gain, | |
"chorus_rate": chorus_rate, | |
"chorus_depth": chorus_depth, | |
"chorus_delay": chorus_center_delay, | |
"chorus_feedback": chorus_feedback, | |
"chorus_mix": chorus_mix, | |
"bitcrush_bit_depth": bitcrush_bit_depth, | |
"clipping_threshold": clipping_threshold, | |
"compressor_threshold": compressor_threshold, | |
"compressor_ratio": compressor_ratio, | |
"compressor_attack": compressor_attack, | |
"compressor_release": compressor_release, | |
"delay_seconds": delay_seconds, | |
"delay_feedback": delay_feedback, | |
"delay_mix": delay_mix, | |
"sid": sid, | |
} | |
infer_pipeline = import_voice_converter() | |
infer_pipeline.convert_audio( | |
**kwargs, | |
) | |
# Batch infer | |
def run_batch_infer_script( | |
pitch: int, | |
filter_radius: int, | |
index_rate: float, | |
volume_envelope: int, | |
protect: float, | |
hop_length: int, | |
f0_method: str, | |
input_folder: str, | |
output_folder: str, | |
pth_path: str, | |
index_path: str, | |
split_audio: bool, | |
f0_autotune: bool, | |
f0_autotune_strength: float, | |
clean_audio: bool, | |
clean_strength: float, | |
export_format: str, | |
f0_file: str, | |
embedder_model: str, | |
embedder_model_custom: str = None, | |
formant_shifting: bool = False, | |
formant_qfrency: float = 1.0, | |
formant_timbre: float = 1.0, | |
post_process: bool = False, | |
reverb: bool = False, | |
pitch_shift: bool = False, | |
limiter: bool = False, | |
gain: bool = False, | |
distortion: bool = False, | |
chorus: bool = False, | |
bitcrush: bool = False, | |
clipping: bool = False, | |
compressor: bool = False, | |
delay: bool = False, | |
reverb_room_size: float = 0.5, | |
reverb_damping: float = 0.5, | |
reverb_wet_gain: float = 0.5, | |
reverb_dry_gain: float = 0.5, | |
reverb_width: float = 0.5, | |
reverb_freeze_mode: float = 0.5, | |
pitch_shift_semitones: float = 0.0, | |
limiter_threshold: float = -6, | |
limiter_release_time: float = 0.01, | |
gain_db: float = 0.0, | |
distortion_gain: float = 25, | |
chorus_rate: float = 1.0, | |
chorus_depth: float = 0.25, | |
chorus_center_delay: float = 7, | |
chorus_feedback: float = 0.0, | |
chorus_mix: float = 0.5, | |
bitcrush_bit_depth: int = 8, | |
clipping_threshold: float = -6, | |
compressor_threshold: float = 0, | |
compressor_ratio: float = 1, | |
compressor_attack: float = 1.0, | |
compressor_release: float = 100, | |
delay_seconds: float = 0.5, | |
delay_feedback: float = 0.0, | |
delay_mix: float = 0.5, | |
sid: int = 0, | |
): | |
kwargs = { | |
"audio_input_paths": input_folder, | |
"audio_output_path": output_folder, | |
"model_path": pth_path, | |
"index_path": index_path, | |
"pitch": pitch, | |
"filter_radius": filter_radius, | |
"index_rate": index_rate, | |
"volume_envelope": volume_envelope, | |
"protect": protect, | |
"hop_length": hop_length, | |
"f0_method": f0_method, | |
"pth_path": pth_path, | |
"index_path": index_path, | |
"split_audio": split_audio, | |
"f0_autotune": f0_autotune, | |
"f0_autotune_strength": f0_autotune_strength, | |
"clean_audio": clean_audio, | |
"clean_strength": clean_strength, | |
"export_format": export_format, | |
"f0_file": f0_file, | |
"embedder_model": embedder_model, | |
"embedder_model_custom": embedder_model_custom, | |
"post_process": post_process, | |
"formant_shifting": formant_shifting, | |
"formant_qfrency": formant_qfrency, | |
"formant_timbre": formant_timbre, | |
"reverb": reverb, | |
"pitch_shift": pitch_shift, | |
"limiter": limiter, | |
"gain": gain, | |
"distortion": distortion, | |
"chorus": chorus, | |
"bitcrush": bitcrush, | |
"clipping": clipping, | |
"compressor": compressor, | |
"delay": delay, | |
"reverb_room_size": reverb_room_size, | |
"reverb_damping": reverb_damping, | |
"reverb_wet_level": reverb_wet_gain, | |
"reverb_dry_level": reverb_dry_gain, | |
"reverb_width": reverb_width, | |
"reverb_freeze_mode": reverb_freeze_mode, | |
"pitch_shift_semitones": pitch_shift_semitones, | |
"limiter_threshold": limiter_threshold, | |
"limiter_release": limiter_release_time, | |
"gain_db": gain_db, | |
"distortion_gain": distortion_gain, | |
"chorus_rate": chorus_rate, | |
"chorus_depth": chorus_depth, | |
"chorus_delay": chorus_center_delay, | |
"chorus_feedback": chorus_feedback, | |
"chorus_mix": chorus_mix, | |
"bitcrush_bit_depth": bitcrush_bit_depth, | |
"clipping_threshold": clipping_threshold, | |
"compressor_threshold": compressor_threshold, | |
"compressor_ratio": compressor_ratio, | |
"compressor_attack": compressor_attack, | |
"compressor_release": compressor_release, | |
"delay_seconds": delay_seconds, | |
"delay_feedback": delay_feedback, | |
"delay_mix": delay_mix, | |
"sid": sid, | |
} | |
infer_pipeline = import_voice_converter() | |
infer_pipeline.convert_audio_batch( | |
**kwargs, | |
) | |
return f"Files from {input_folder} inferred successfully." | |
# TTS | |
def run_tts_script( | |
tts_file: str, | |
tts_text: str, | |
tts_voice: str, | |
tts_rate: int, | |
pitch: int, | |
filter_radius: int, | |
index_rate: float, | |
volume_envelope: int, | |
protect: float, | |
hop_length: int, | |
f0_method: str, | |
output_tts_path: str, | |
output_rvc_path: str, | |
pth_path: str, | |
index_path: str, | |
split_audio: bool, | |
f0_autotune: bool, | |
f0_autotune_strength: float, | |
clean_audio: bool, | |
clean_strength: float, | |
export_format: str, | |
f0_file: str, | |
embedder_model: str, | |
embedder_model_custom: str = None, | |
sid: int = 0, | |
): | |
tts_script_path = os.path.join("rvc", "lib", "tools", "tts.py") | |
if os.path.exists(output_tts_path): | |
os.remove(output_tts_path) | |
command_tts = [ | |
*map( | |
str, | |
[ | |
python, | |
tts_script_path, | |
tts_file, | |
tts_text, | |
tts_voice, | |
tts_rate, | |
output_tts_path, | |
], | |
), | |
] | |
subprocess.run(command_tts) | |
infer_pipeline = import_voice_converter() | |
infer_pipeline.convert_audio( | |
pitch=pitch, | |
filter_radius=filter_radius, | |
index_rate=index_rate, | |
volume_envelope=volume_envelope, | |
protect=protect, | |
hop_length=hop_length, | |
f0_method=f0_method, | |
audio_input_path=output_tts_path, | |
audio_output_path=output_rvc_path, | |
model_path=pth_path, | |
index_path=index_path, | |
split_audio=split_audio, | |
f0_autotune=f0_autotune, | |
f0_autotune_strength=f0_autotune_strength, | |
clean_audio=clean_audio, | |
clean_strength=clean_strength, | |
export_format=export_format, | |
f0_file=f0_file, | |
embedder_model=embedder_model, | |
embedder_model_custom=embedder_model_custom, | |
sid=sid, | |
formant_shifting=None, | |
formant_qfrency=None, | |
formant_timbre=None, | |
post_process=None, | |
reverb=None, | |
pitch_shift=None, | |
limiter=None, | |
gain=None, | |
distortion=None, | |
chorus=None, | |
bitcrush=None, | |
clipping=None, | |
compressor=None, | |
delay=None, | |
sliders=None, | |
) | |
# Prerequisites | |
def run_prerequisites_script( | |
models: bool, | |
exe: bool, | |
): | |
prequisites_download_pipeline( | |
models, | |
exe, | |
) | |
return "Prerequisites installed successfully." | |