Tune-Splitter / pipeline.py
CCockrum's picture
Update pipeline.py
52685e3 verified
raw
history blame
8.41 kB
# pipeline.py
import os
import time
import traceback
import librosa
import torch
import numpy as np
from utils import logger, remove_directory_contents, create_directories
from inference import run_mdx, run_mdx_beta, convert_to_stereo_and_wav, get_hash, random_sleep
from effects import add_vocal_effects, add_instrumental_effects
def process_uvr_task(
orig_song_path: str,
main_vocals: bool = False,
dereverb: bool = True,
song_id: str = "mdx",
only_voiceless: bool = False,
remove_files_output_dir: bool = False,
mdx_models_dir: str = "mdx_models",
output_dir: str = "clean_song_output",
):
device_base = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Device: {device_base}")
if remove_files_output_dir:
remove_directory_contents(output_dir)
with open(os.path.join(mdx_models_dir, "data.json")) as infile:
mdx_model_params = json.load(infile)
song_output_dir = os.path.join(output_dir, song_id)
create_directories(song_output_dir)
orig_song_path = convert_to_stereo_and_wav(orig_song_path, output_dir)
logger.info(f"ONNX Runtime Device >> {ort.get_device()}")
if only_voiceless:
logger.info("Voiceless Track Separation...")
return run_mdx(
mdx_model_params,
song_output_dir,
os.path.join(mdx_models_dir, "UVR-MDX-NET-Inst_HQ_4.onnx"),
orig_song_path,
suffix="Voiceless",
denoise=False,
keep_orig=True,
exclude_inversion=True,
device_base=device_base,
)
logger.info("Vocal Track Isolation...")
vocals_path, instrumentals_path = run_mdx(
mdx_model_params,
song_output_dir,
os.path.join(mdx_models_dir, "UVR-MDX-NET-Voc_FT.onnx"),
orig_song_path,
denoise=True,
keep_orig=True,
device_base=device_base,
)
backup_vocals_path, main_vocals_path = None, vocals_path
if main_vocals:
random_sleep()
try:
backup_vocals_path, main_vocals_path = run_mdx(
mdx_model_params,
song_output_dir,
os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"),
vocals_path,
suffix="Backup",
invert_suffix="Main",
denoise=True,
device_base=device_base,
)
except Exception:
backup_vocals_path, main_vocals_path = run_mdx_beta(
mdx_model_params,
song_output_dir,
os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"),
vocals_path,
suffix="Backup",
invert_suffix="Main",
denoise=True,
device_base=device_base,
)
vocals_dereverb_path = main_vocals_path
if dereverb:
random_sleep()
try:
_, vocals_dereverb_path = run_mdx(
mdx_model_params,
song_output_dir,
os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
main_vocals_path,
invert_suffix="DeReverb",
exclude_main=True,
denoise=True,
device_base=device_base,
)
except Exception:
_, vocals_dereverb_path = run_mdx_beta(
mdx_model_params,
song_output_dir,
os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
main_vocals_path,
invert_suffix="DeReverb",
exclude_main=True,
denoise=True,
device_base=device_base,
)
return vocals_path, instrumentals_path, backup_vocals_path, main_vocals_path, vocals_dereverb_path
def sound_separate(media_file, stem, main, dereverb,
vocal_effects=True, background_effects=True,
vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_dryness=0.8, vocal_reverb_wet_level=0.35,
vocal_delay_seconds=0.4, vocal_delay_mix=0.25,
vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5,
vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60,
vocal_gain_db=4,
background_highpass_freq=120, background_lowpass_freq=11000,
background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25,
background_compressor_threshold_db=-20, background_compressor_ratio=2.5,
background_compressor_attack_ms=15, background_compressor_release_ms=80,
background_gain_db=3):
if not media_file:
raise ValueError("The audio path is missing.")
if not stem:
raise ValueError("Please select 'vocal' or 'background' stem.")
hash_audio = str(get_hash(media_file))
media_dir = os.path.dirname(media_file)
outputs = []
start_time = time.time()
try:
librosa.get_duration(filename=media_file)
except Exception as e:
print(e)
if stem == "vocal":
try:
_, _, _, _, vocal_audio = process_uvr_task(
orig_song_path=media_file,
song_id=hash_audio + "mdx",
main_vocals=main,
dereverb=dereverb,
remove_files_output_dir=False,
)
if vocal_effects:
file_name, file_extension = os.path.splitext(os.path.abspath(vocal_audio))
out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}")
add_vocal_effects(vocal_audio, out_effects_path,
reverb_room_size=vocal_reverb_room_size,
reverb_damping=vocal_reverb_damping,
vocal_reverb_dryness=vocal_reverb_dryness,
reverb_wet_level=vocal_reverb_wet_level,
delay_seconds=vocal_delay_seconds,
delay_mix=vocal_delay_mix,
compressor_threshold_db=vocal_compressor_threshold_db,
compressor_ratio=vocal_compressor_ratio,
compressor_attack_ms=vocal_compressor_attack_ms,
compressor_release_ms=vocal_compressor_release_ms,
gain_db=vocal_gain_db)
vocal_audio = out_effects_path
outputs.append(vocal_audio)
except Exception as error:
logger.error(str(error))
traceback.print_exc()
if stem == "background":
background_audio, _ = process_uvr_task(
orig_song_path=media_file,
song_id=hash_audio + "voiceless",
only_voiceless=True,
remove_files_output_dir=False,
)
if background_effects:
file_name, file_extension = os.path.splitext(os.path.abspath(background_audio))
out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}")
add_instrumental_effects(background_audio, out_effects_path,
highpass_freq=background_highpass_freq,
lowpass_freq=background_lowpass_freq,
reverb_room_size=background_reverb_room_size,
reverb_damping=background_reverb_damping,
reverb_wet_level=background_reverb_wet_level,
compressor_threshold_db=background_compressor_threshold_db,
compressor_ratio=background_compressor_ratio,
compressor_attack_ms=background_compressor_attack_ms,
compressor_release_ms=background_compressor_release_ms,
gain_db=background_gain_db)
background_audio = out_effects_path
outputs.append(background_audio)
logger.info(f"Execution time: {time.time() - start_time:.2f} seconds")
if not outputs:
raise Exception("Error in sound separation.")
return outputs