Spaces:

CCockrum
/

Tune-Splitter

Sleeping

App Files Files Community

Tune-Splitter / pipeline.py

CCockrum

Update pipeline.py

52685e3 verified 5 months ago

raw

history blame

8.41 kB

	# pipeline.py

	import os
	import time
	import traceback
	import librosa
	import torch
	import numpy as np
	from utils import logger, remove_directory_contents, create_directories
	from inference import run_mdx, run_mdx_beta, convert_to_stereo_and_wav, get_hash, random_sleep
	from effects import add_vocal_effects, add_instrumental_effects


	def process_uvr_task(
	orig_song_path: str,
	main_vocals: bool = False,
	dereverb: bool = True,
	song_id: str = "mdx",
	only_voiceless: bool = False,
	remove_files_output_dir: bool = False,
	mdx_models_dir: str = "mdx_models",
	output_dir: str = "clean_song_output",
	):
	device_base = "cuda" if torch.cuda.is_available() else "cpu"
	logger.info(f"Device: {device_base}")

	if remove_files_output_dir:
	remove_directory_contents(output_dir)

	with open(os.path.join(mdx_models_dir, "data.json")) as infile:
	mdx_model_params = json.load(infile)

	song_output_dir = os.path.join(output_dir, song_id)
	create_directories(song_output_dir)
	orig_song_path = convert_to_stereo_and_wav(orig_song_path, output_dir)

	logger.info(f"ONNX Runtime Device >> {ort.get_device()}")

	if only_voiceless:
	logger.info("Voiceless Track Separation...")
	return run_mdx(
	mdx_model_params,
	song_output_dir,
	os.path.join(mdx_models_dir, "UVR-MDX-NET-Inst_HQ_4.onnx"),
	orig_song_path,
	suffix="Voiceless",
	denoise=False,
	keep_orig=True,
	exclude_inversion=True,
	device_base=device_base,
	)

	logger.info("Vocal Track Isolation...")
	vocals_path, instrumentals_path = run_mdx(
	mdx_model_params,
	song_output_dir,
	os.path.join(mdx_models_dir, "UVR-MDX-NET-Voc_FT.onnx"),
	orig_song_path,
	denoise=True,
	keep_orig=True,
	device_base=device_base,
	)

	backup_vocals_path, main_vocals_path = None, vocals_path

	if main_vocals:
	random_sleep()
	try:
	backup_vocals_path, main_vocals_path = run_mdx(
	mdx_model_params,
	song_output_dir,
	os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"),
	vocals_path,
	suffix="Backup",
	invert_suffix="Main",
	denoise=True,
	device_base=device_base,
	)
	except Exception:
	backup_vocals_path, main_vocals_path = run_mdx_beta(
	mdx_model_params,
	song_output_dir,
	os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"),
	vocals_path,
	suffix="Backup",
	invert_suffix="Main",
	denoise=True,
	device_base=device_base,
	)

	vocals_dereverb_path = main_vocals_path
	if dereverb:
	random_sleep()
	try:
	_, vocals_dereverb_path = run_mdx(
	mdx_model_params,
	song_output_dir,
	os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
	main_vocals_path,
	invert_suffix="DeReverb",
	exclude_main=True,
	denoise=True,
	device_base=device_base,
	)
	except Exception:
	_, vocals_dereverb_path = run_mdx_beta(
	mdx_model_params,
	song_output_dir,
	os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
	main_vocals_path,
	invert_suffix="DeReverb",
	exclude_main=True,
	denoise=True,
	device_base=device_base,
	)

	return vocals_path, instrumentals_path, backup_vocals_path, main_vocals_path, vocals_dereverb_path


	def sound_separate(media_file, stem, main, dereverb,
	vocal_effects=True, background_effects=True,
	vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_dryness=0.8, vocal_reverb_wet_level=0.35,
	vocal_delay_seconds=0.4, vocal_delay_mix=0.25,
	vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5,
	vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60,
	vocal_gain_db=4,
	background_highpass_freq=120, background_lowpass_freq=11000,
	background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25,
	background_compressor_threshold_db=-20, background_compressor_ratio=2.5,
	background_compressor_attack_ms=15, background_compressor_release_ms=80,
	background_gain_db=3):

	if not media_file:
	raise ValueError("The audio path is missing.")
	if not stem:
	raise ValueError("Please select 'vocal' or 'background' stem.")

	hash_audio = str(get_hash(media_file))
	media_dir = os.path.dirname(media_file)
	outputs = []

	start_time = time.time()

	try:
	librosa.get_duration(filename=media_file)
	except Exception as e:
	print(e)

	if stem == "vocal":
	try:
	_, _, _, _, vocal_audio = process_uvr_task(
	orig_song_path=media_file,
	song_id=hash_audio + "mdx",
	main_vocals=main,
	dereverb=dereverb,
	remove_files_output_dir=False,
	)

	if vocal_effects:
	file_name, file_extension = os.path.splitext(os.path.abspath(vocal_audio))
	out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}")
	add_vocal_effects(vocal_audio, out_effects_path,
	reverb_room_size=vocal_reverb_room_size,
	reverb_damping=vocal_reverb_damping,
	vocal_reverb_dryness=vocal_reverb_dryness,
	reverb_wet_level=vocal_reverb_wet_level,
	delay_seconds=vocal_delay_seconds,
	delay_mix=vocal_delay_mix,
	compressor_threshold_db=vocal_compressor_threshold_db,
	compressor_ratio=vocal_compressor_ratio,
	compressor_attack_ms=vocal_compressor_attack_ms,
	compressor_release_ms=vocal_compressor_release_ms,
	gain_db=vocal_gain_db)
	vocal_audio = out_effects_path

	outputs.append(vocal_audio)

	except Exception as error:
	logger.error(str(error))
	traceback.print_exc()

	if stem == "background":
	background_audio, _ = process_uvr_task(
	orig_song_path=media_file,
	song_id=hash_audio + "voiceless",
	only_voiceless=True,
	remove_files_output_dir=False,
	)

	if background_effects:
	file_name, file_extension = os.path.splitext(os.path.abspath(background_audio))
	out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}")
	add_instrumental_effects(background_audio, out_effects_path,
	highpass_freq=background_highpass_freq,
	lowpass_freq=background_lowpass_freq,
	reverb_room_size=background_reverb_room_size,
	reverb_damping=background_reverb_damping,
	reverb_wet_level=background_reverb_wet_level,
	compressor_threshold_db=background_compressor_threshold_db,
	compressor_ratio=background_compressor_ratio,
	compressor_attack_ms=background_compressor_attack_ms,
	compressor_release_ms=background_compressor_release_ms,
	gain_db=background_gain_db)
	background_audio = out_effects_path

	outputs.append(background_audio)

	logger.info(f"Execution time: {time.time() - start_time:.2f} seconds")

	if not outputs:
	raise Exception("Error in sound separation.")

	return outputs