CCockrum commited on
Commit
52685e3
·
verified ·
1 Parent(s): 62a5614

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +213 -0
pipeline.py CHANGED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pipeline.py
2
+
3
+ import os
4
+ import time
5
+ import traceback
6
+ import librosa
7
+ import torch
8
+ import numpy as np
9
+ from utils import logger, remove_directory_contents, create_directories
10
+ from inference import run_mdx, run_mdx_beta, convert_to_stereo_and_wav, get_hash, random_sleep
11
+ from effects import add_vocal_effects, add_instrumental_effects
12
+
13
+
14
+ def process_uvr_task(
15
+ orig_song_path: str,
16
+ main_vocals: bool = False,
17
+ dereverb: bool = True,
18
+ song_id: str = "mdx",
19
+ only_voiceless: bool = False,
20
+ remove_files_output_dir: bool = False,
21
+ mdx_models_dir: str = "mdx_models",
22
+ output_dir: str = "clean_song_output",
23
+ ):
24
+ device_base = "cuda" if torch.cuda.is_available() else "cpu"
25
+ logger.info(f"Device: {device_base}")
26
+
27
+ if remove_files_output_dir:
28
+ remove_directory_contents(output_dir)
29
+
30
+ with open(os.path.join(mdx_models_dir, "data.json")) as infile:
31
+ mdx_model_params = json.load(infile)
32
+
33
+ song_output_dir = os.path.join(output_dir, song_id)
34
+ create_directories(song_output_dir)
35
+ orig_song_path = convert_to_stereo_and_wav(orig_song_path, output_dir)
36
+
37
+ logger.info(f"ONNX Runtime Device >> {ort.get_device()}")
38
+
39
+ if only_voiceless:
40
+ logger.info("Voiceless Track Separation...")
41
+ return run_mdx(
42
+ mdx_model_params,
43
+ song_output_dir,
44
+ os.path.join(mdx_models_dir, "UVR-MDX-NET-Inst_HQ_4.onnx"),
45
+ orig_song_path,
46
+ suffix="Voiceless",
47
+ denoise=False,
48
+ keep_orig=True,
49
+ exclude_inversion=True,
50
+ device_base=device_base,
51
+ )
52
+
53
+ logger.info("Vocal Track Isolation...")
54
+ vocals_path, instrumentals_path = run_mdx(
55
+ mdx_model_params,
56
+ song_output_dir,
57
+ os.path.join(mdx_models_dir, "UVR-MDX-NET-Voc_FT.onnx"),
58
+ orig_song_path,
59
+ denoise=True,
60
+ keep_orig=True,
61
+ device_base=device_base,
62
+ )
63
+
64
+ backup_vocals_path, main_vocals_path = None, vocals_path
65
+
66
+ if main_vocals:
67
+ random_sleep()
68
+ try:
69
+ backup_vocals_path, main_vocals_path = run_mdx(
70
+ mdx_model_params,
71
+ song_output_dir,
72
+ os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"),
73
+ vocals_path,
74
+ suffix="Backup",
75
+ invert_suffix="Main",
76
+ denoise=True,
77
+ device_base=device_base,
78
+ )
79
+ except Exception:
80
+ backup_vocals_path, main_vocals_path = run_mdx_beta(
81
+ mdx_model_params,
82
+ song_output_dir,
83
+ os.path.join(mdx_models_dir, "UVR_MDXNET_KARA_2.onnx"),
84
+ vocals_path,
85
+ suffix="Backup",
86
+ invert_suffix="Main",
87
+ denoise=True,
88
+ device_base=device_base,
89
+ )
90
+
91
+ vocals_dereverb_path = main_vocals_path
92
+ if dereverb:
93
+ random_sleep()
94
+ try:
95
+ _, vocals_dereverb_path = run_mdx(
96
+ mdx_model_params,
97
+ song_output_dir,
98
+ os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
99
+ main_vocals_path,
100
+ invert_suffix="DeReverb",
101
+ exclude_main=True,
102
+ denoise=True,
103
+ device_base=device_base,
104
+ )
105
+ except Exception:
106
+ _, vocals_dereverb_path = run_mdx_beta(
107
+ mdx_model_params,
108
+ song_output_dir,
109
+ os.path.join(mdx_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
110
+ main_vocals_path,
111
+ invert_suffix="DeReverb",
112
+ exclude_main=True,
113
+ denoise=True,
114
+ device_base=device_base,
115
+ )
116
+
117
+ return vocals_path, instrumentals_path, backup_vocals_path, main_vocals_path, vocals_dereverb_path
118
+
119
+
120
+ def sound_separate(media_file, stem, main, dereverb,
121
+ vocal_effects=True, background_effects=True,
122
+ vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_dryness=0.8, vocal_reverb_wet_level=0.35,
123
+ vocal_delay_seconds=0.4, vocal_delay_mix=0.25,
124
+ vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5,
125
+ vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60,
126
+ vocal_gain_db=4,
127
+ background_highpass_freq=120, background_lowpass_freq=11000,
128
+ background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25,
129
+ background_compressor_threshold_db=-20, background_compressor_ratio=2.5,
130
+ background_compressor_attack_ms=15, background_compressor_release_ms=80,
131
+ background_gain_db=3):
132
+
133
+ if not media_file:
134
+ raise ValueError("The audio path is missing.")
135
+ if not stem:
136
+ raise ValueError("Please select 'vocal' or 'background' stem.")
137
+
138
+ hash_audio = str(get_hash(media_file))
139
+ media_dir = os.path.dirname(media_file)
140
+ outputs = []
141
+
142
+ start_time = time.time()
143
+
144
+ try:
145
+ librosa.get_duration(filename=media_file)
146
+ except Exception as e:
147
+ print(e)
148
+
149
+ if stem == "vocal":
150
+ try:
151
+ _, _, _, _, vocal_audio = process_uvr_task(
152
+ orig_song_path=media_file,
153
+ song_id=hash_audio + "mdx",
154
+ main_vocals=main,
155
+ dereverb=dereverb,
156
+ remove_files_output_dir=False,
157
+ )
158
+
159
+ if vocal_effects:
160
+ file_name, file_extension = os.path.splitext(os.path.abspath(vocal_audio))
161
+ out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}")
162
+ add_vocal_effects(vocal_audio, out_effects_path,
163
+ reverb_room_size=vocal_reverb_room_size,
164
+ reverb_damping=vocal_reverb_damping,
165
+ vocal_reverb_dryness=vocal_reverb_dryness,
166
+ reverb_wet_level=vocal_reverb_wet_level,
167
+ delay_seconds=vocal_delay_seconds,
168
+ delay_mix=vocal_delay_mix,
169
+ compressor_threshold_db=vocal_compressor_threshold_db,
170
+ compressor_ratio=vocal_compressor_ratio,
171
+ compressor_attack_ms=vocal_compressor_attack_ms,
172
+ compressor_release_ms=vocal_compressor_release_ms,
173
+ gain_db=vocal_gain_db)
174
+ vocal_audio = out_effects_path
175
+
176
+ outputs.append(vocal_audio)
177
+
178
+ except Exception as error:
179
+ logger.error(str(error))
180
+ traceback.print_exc()
181
+
182
+ if stem == "background":
183
+ background_audio, _ = process_uvr_task(
184
+ orig_song_path=media_file,
185
+ song_id=hash_audio + "voiceless",
186
+ only_voiceless=True,
187
+ remove_files_output_dir=False,
188
+ )
189
+
190
+ if background_effects:
191
+ file_name, file_extension = os.path.splitext(os.path.abspath(background_audio))
192
+ out_effects_path = os.path.join(media_dir, f"{file_name}_effects{file_extension}")
193
+ add_instrumental_effects(background_audio, out_effects_path,
194
+ highpass_freq=background_highpass_freq,
195
+ lowpass_freq=background_lowpass_freq,
196
+ reverb_room_size=background_reverb_room_size,
197
+ reverb_damping=background_reverb_damping,
198
+ reverb_wet_level=background_reverb_wet_level,
199
+ compressor_threshold_db=background_compressor_threshold_db,
200
+ compressor_ratio=background_compressor_ratio,
201
+ compressor_attack_ms=background_compressor_attack_ms,
202
+ compressor_release_ms=background_compressor_release_ms,
203
+ gain_db=background_gain_db)
204
+ background_audio = out_effects_path
205
+
206
+ outputs.append(background_audio)
207
+
208
+ logger.info(f"Execution time: {time.time() - start_time:.2f} seconds")
209
+
210
+ if not outputs:
211
+ raise Exception("Error in sound separation.")
212
+
213
+ return outputs