# pipeline.py import os import time import traceback import librosa import torch import numpy as np from utils import logger, remove_directory_contents, create_directories from inference import run_mdx, run_mdx_beta, convert_to_stereo_and_wav, get_hash, random_sleep from effects import add_vocal_effects, add_instrumental_effects def process_uvr_task( orig_song_path: str, main_vocals: bool = False, dereverb: bool = True, song_id: str = "mdx", only_voiceless: bool = False, remove_files_output_dir: bool = False, mdx_models_dir: str = "mdx_models", output_dir: str = "clean_song_output", ): device_base = "cuda" if torch.cuda.is_available() else "cpu" logger.info(f"Device: {device_base}") if remove_files_output_dir: remove_directory_contents(output_dir) with open(os.path.join(mdx_models_dir, "data.json")) as infile: mdx_model_params = json.load(infile) song_output_dir = os.path.join(output_dir, song_id) create_directories(song_output_dir) orig_song_path = convert_to_stereo_and_wav(orig_song_path, output_dir) logger.info(f"ONNX Runtime Device >> {ort.get_device()}") if only_voiceless: logger.info("Voiceless Track Separation...") return run_mdx( mdx_model_params, song_output_dir, os.path.join(mdx_models_dir, "UVR-MDX-NET-Inst_HQ_4.onnx"), orig_song_path, suffix="Voiceless", denoise=False, keep_orig=True, exclude_inversion=True, device_base=device_base, ) logger.info("Vocal Track Isolation...") vocals_path, instrumentals_path = run_mdx( mdx_model_params, song_output_dir, os.path.join(mdx_models_dir, "UVR-MDX-NET-Voc_FT.onnx"), orig_song_path, denoise=True, keep_orig=True, device_base=device_base, ) backup_vocals_path, main_vocals_path = None, vocals_path if main_vocals: random_sleep() try: backup_vocals_path, main_vocals_path = run_mdx( mdx_model_params, song_output_dir, os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"), vocals_path, suffix="Backup", invert_suffix="Main", denoise=True, device_base=device_base, ) except Exception: backup_vocals_path, main_vocals_path = run_mdx_beta( mdx_model_params, song_output_dir, os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"), vocals_path, suffix="Backup", invert_suffix="Main", denoise=True, device_base=device_base, ) vocals_dereverb_path = main_vocals_path if dereverb: random_sleep() try: _, vocals_dereverb_path = run_mdx( mdx_model_params, song_output_dir, os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"), main_vocals_path, invert_suffix="DeReverb", exclude_main=True, denoise=True, device_base=device_base, ) except Exception: _, vocals_dereverb_path = run_mdx_beta( mdx_model_params, song_output_dir, os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"), main_vocals_path, invert_suffix="DeReverb", exclude_main=True, denoise=True, device_base=device_base, ) return vocals_path, instrumentals_path, backup_vocals_path, main_vocals_path, vocals_dereverb_path def sound_separate(media_file, stem, main, dereverb, vocal_effects=True, background_effects=True, vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_dryness=0.8, vocal_reverb_wet_level=0.35, vocal_delay_seconds=0.4, vocal_delay_mix=0.25, vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5, vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60, vocal_gain_db=4, background_highpass_freq=120, background_lowpass_freq=11000, background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25, background_compressor_threshold_db=-20, background_compressor_ratio=2.5, background_compressor_attack_ms=15, background_compressor_release_ms=80, background_gain_db=3): if not media_file: raise ValueError("The audio path is missing.") if not stem: raise ValueError("Please select 'vocal' or 'background' stem.") hash_audio = str(get_hash(media_file)) media_dir = os.path.dirname(media_file) outputs = [] start_time = time.time() try: librosa.get_duration(filename=media_file) except Exception as e: print(e) if stem == "vocal": try: _, _, _, _, vocal_audio = process_uvr_task( orig_song_path=media_file, song_id=hash_audio + "mdx", main_vocals=main, dereverb=dereverb, remove_files_output_dir=False, ) if vocal_effects: file_name, file_extension = os.path.splitext(os.path.abspath(vocal_audio)) out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}") add_vocal_effects(vocal_audio, out_effects_path, reverb_room_size=vocal_reverb_room_size, reverb_damping=vocal_reverb_damping, vocal_reverb_dryness=vocal_reverb_dryness, reverb_wet_level=vocal_reverb_wet_level, delay_seconds=vocal_delay_seconds, delay_mix=vocal_delay_mix, compressor_threshold_db=vocal_compressor_threshold_db, compressor_ratio=vocal_compressor_ratio, compressor_attack_ms=vocal_compressor_attack_ms, compressor_release_ms=vocal_compressor_release_ms, gain_db=vocal_gain_db) vocal_audio = out_effects_path outputs.append(vocal_audio) except Exception as error: logger.error(str(error)) traceback.print_exc() if stem == "background": background_audio, _ = process_uvr_task( orig_song_path=media_file, song_id=hash_audio + "voiceless", only_voiceless=True, remove_files_output_dir=False, ) if background_effects: file_name, file_extension = os.path.splitext(os.path.abspath(background_audio)) out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}") add_instrumental_effects(background_audio, out_effects_path, highpass_freq=background_highpass_freq, lowpass_freq=background_lowpass_freq, reverb_room_size=background_reverb_room_size, reverb_damping=background_reverb_damping, reverb_wet_level=background_reverb_wet_level, compressor_threshold_db=background_compressor_threshold_db, compressor_ratio=background_compressor_ratio, compressor_attack_ms=background_compressor_attack_ms, compressor_release_ms=background_compressor_release_ms, gain_db=background_gain_db) background_audio = out_effects_path outputs.append(background_audio) logger.info(f"Execution time: {time.time() - start_time:.2f} seconds") if not outputs: raise Exception("Error in sound separation.") return outputs