Spaces:
Sleeping
Sleeping
# pipeline.py | |
import os | |
import time | |
import traceback | |
import librosa | |
import torch | |
import numpy as np | |
from utils import logger, remove_directory_contents, create_directories | |
from inference import run_mdx, run_mdx_beta, convert_to_stereo_and_wav, get_hash, random_sleep | |
from effects import add_vocal_effects, add_instrumental_effects | |
def process_uvr_task( | |
orig_song_path: str, | |
main_vocals: bool = False, | |
dereverb: bool = True, | |
song_id: str = "mdx", | |
only_voiceless: bool = False, | |
remove_files_output_dir: bool = False, | |
mdx_models_dir: str = "mdx_models", | |
output_dir: str = "clean_song_output", | |
): | |
device_base = "cuda" if torch.cuda.is_available() else "cpu" | |
logger.info(f"Device: {device_base}") | |
if remove_files_output_dir: | |
remove_directory_contents(output_dir) | |
with open(os.path.join(mdx_models_dir, "data.json")) as infile: | |
mdx_model_params = json.load(infile) | |
song_output_dir = os.path.join(output_dir, song_id) | |
create_directories(song_output_dir) | |
orig_song_path = convert_to_stereo_and_wav(orig_song_path, output_dir) | |
logger.info(f"ONNX Runtime Device >> {ort.get_device()}") | |
if only_voiceless: | |
logger.info("Voiceless Track Separation...") | |
return run_mdx( | |
mdx_model_params, | |
song_output_dir, | |
os.path.join(mdx_models_dir, "UVR-MDX-NET-Inst_HQ_4.onnx"), | |
orig_song_path, | |
suffix="Voiceless", | |
denoise=False, | |
keep_orig=True, | |
exclude_inversion=True, | |
device_base=device_base, | |
) | |
logger.info("Vocal Track Isolation...") | |
vocals_path, instrumentals_path = run_mdx( | |
mdx_model_params, | |
song_output_dir, | |
os.path.join(mdx_models_dir, "UVR-MDX-NET-Voc_FT.onnx"), | |
orig_song_path, | |
denoise=True, | |
keep_orig=True, | |
device_base=device_base, | |
) | |
backup_vocals_path, main_vocals_path = None, vocals_path | |
if main_vocals: | |
random_sleep() | |
try: | |
backup_vocals_path, main_vocals_path = run_mdx( | |
mdx_model_params, | |
song_output_dir, | |
os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"), | |
vocals_path, | |
suffix="Backup", | |
invert_suffix="Main", | |
denoise=True, | |
device_base=device_base, | |
) | |
except Exception: | |
backup_vocals_path, main_vocals_path = run_mdx_beta( | |
mdx_model_params, | |
song_output_dir, | |
os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"), | |
vocals_path, | |
suffix="Backup", | |
invert_suffix="Main", | |
denoise=True, | |
device_base=device_base, | |
) | |
vocals_dereverb_path = main_vocals_path | |
if dereverb: | |
random_sleep() | |
try: | |
_, vocals_dereverb_path = run_mdx( | |
mdx_model_params, | |
song_output_dir, | |
os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"), | |
main_vocals_path, | |
invert_suffix="DeReverb", | |
exclude_main=True, | |
denoise=True, | |
device_base=device_base, | |
) | |
except Exception: | |
_, vocals_dereverb_path = run_mdx_beta( | |
mdx_model_params, | |
song_output_dir, | |
os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"), | |
main_vocals_path, | |
invert_suffix="DeReverb", | |
exclude_main=True, | |
denoise=True, | |
device_base=device_base, | |
) | |
return vocals_path, instrumentals_path, backup_vocals_path, main_vocals_path, vocals_dereverb_path | |
def sound_separate(media_file, stem, main, dereverb, | |
vocal_effects=True, background_effects=True, | |
vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_dryness=0.8, vocal_reverb_wet_level=0.35, | |
vocal_delay_seconds=0.4, vocal_delay_mix=0.25, | |
vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5, | |
vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60, | |
vocal_gain_db=4, | |
background_highpass_freq=120, background_lowpass_freq=11000, | |
background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25, | |
background_compressor_threshold_db=-20, background_compressor_ratio=2.5, | |
background_compressor_attack_ms=15, background_compressor_release_ms=80, | |
background_gain_db=3): | |
if not media_file: | |
raise ValueError("The audio path is missing.") | |
if not stem: | |
raise ValueError("Please select 'vocal' or 'background' stem.") | |
hash_audio = str(get_hash(media_file)) | |
media_dir = os.path.dirname(media_file) | |
outputs = [] | |
start_time = time.time() | |
try: | |
librosa.get_duration(filename=media_file) | |
except Exception as e: | |
print(e) | |
if stem == "vocal": | |
try: | |
_, _, _, _, vocal_audio = process_uvr_task( | |
orig_song_path=media_file, | |
song_id=hash_audio + "mdx", | |
main_vocals=main, | |
dereverb=dereverb, | |
remove_files_output_dir=False, | |
) | |
if vocal_effects: | |
file_name, file_extension = os.path.splitext(os.path.abspath(vocal_audio)) | |
out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}") | |
add_vocal_effects(vocal_audio, out_effects_path, | |
reverb_room_size=vocal_reverb_room_size, | |
reverb_damping=vocal_reverb_damping, | |
vocal_reverb_dryness=vocal_reverb_dryness, | |
reverb_wet_level=vocal_reverb_wet_level, | |
delay_seconds=vocal_delay_seconds, | |
delay_mix=vocal_delay_mix, | |
compressor_threshold_db=vocal_compressor_threshold_db, | |
compressor_ratio=vocal_compressor_ratio, | |
compressor_attack_ms=vocal_compressor_attack_ms, | |
compressor_release_ms=vocal_compressor_release_ms, | |
gain_db=vocal_gain_db) | |
vocal_audio = out_effects_path | |
outputs.append(vocal_audio) | |
except Exception as error: | |
logger.error(str(error)) | |
traceback.print_exc() | |
if stem == "background": | |
background_audio, _ = process_uvr_task( | |
orig_song_path=media_file, | |
song_id=hash_audio + "voiceless", | |
only_voiceless=True, | |
remove_files_output_dir=False, | |
) | |
if background_effects: | |
file_name, file_extension = os.path.splitext(os.path.abspath(background_audio)) | |
out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}") | |
add_instrumental_effects(background_audio, out_effects_path, | |
highpass_freq=background_highpass_freq, | |
lowpass_freq=background_lowpass_freq, | |
reverb_room_size=background_reverb_room_size, | |
reverb_damping=background_reverb_damping, | |
reverb_wet_level=background_reverb_wet_level, | |
compressor_threshold_db=background_compressor_threshold_db, | |
compressor_ratio=background_compressor_ratio, | |
compressor_attack_ms=background_compressor_attack_ms, | |
compressor_release_ms=background_compressor_release_ms, | |
gain_db=background_gain_db) | |
background_audio = out_effects_path | |
outputs.append(background_audio) | |
logger.info(f"Execution time: {time.time() - start_time:.2f} seconds") | |
if not outputs: | |
raise Exception("Error in sound separation.") | |
return outputs | |