Spaces:
Sleeping
Sleeping
add download models from hugging face
Browse files- .gitignore +2 -1
- app.py +15 -4
- uvr_processing.py +12 -13
.gitignore
CHANGED
@@ -7,4 +7,5 @@ __pycache__
|
|
7 |
*.avi
|
8 |
*.mkv
|
9 |
.env
|
10 |
-
.vscode
|
|
|
|
7 |
*.avi
|
8 |
*.mkv
|
9 |
.env
|
10 |
+
.vscode
|
11 |
+
tmp
|
app.py
CHANGED
@@ -6,9 +6,17 @@ from pathlib import Path
|
|
6 |
import os
|
7 |
import time
|
8 |
import torch
|
|
|
9 |
from uvr_processing import process_uvr_task
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def get_device_info():
|
13 |
if torch.cuda.is_available():
|
14 |
device = f"GPU ({torch.cuda.get_device_name(0)})"
|
@@ -34,9 +42,9 @@ def inference(audio_file: str,
|
|
34 |
outputs = []
|
35 |
start_time = time.time()
|
36 |
background_path, vocals_path = process_uvr_task(
|
37 |
-
mdxnet_models_dir=mdxnet_models_dir,
|
38 |
input_file_path=audio_file,
|
39 |
output_dir=output_dir,
|
|
|
40 |
)
|
41 |
end_time = time.time()
|
42 |
execution_time = end_time - start_time
|
@@ -72,9 +80,12 @@ if __name__ == "__main__":
|
|
72 |
description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
|
73 |
theme = "NoCrypt/miku"
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
78 |
|
79 |
# confirm entry points from client
|
80 |
# client_local = Client("http://127.0.0.1:7860")
|
|
|
6 |
import os
|
7 |
import time
|
8 |
import torch
|
9 |
+
from huggingface_hub import hf_hub_download
|
10 |
from uvr_processing import process_uvr_task
|
11 |
|
12 |
|
13 |
+
MODEL_ID = "masszhou/mdxnet"
|
14 |
+
MODELS_PATH = {
|
15 |
+
"bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
|
16 |
+
"basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
|
17 |
+
"main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
|
18 |
+
}
|
19 |
+
|
20 |
def get_device_info():
|
21 |
if torch.cuda.is_available():
|
22 |
device = f"GPU ({torch.cuda.get_device_name(0)})"
|
|
|
42 |
outputs = []
|
43 |
start_time = time.time()
|
44 |
background_path, vocals_path = process_uvr_task(
|
|
|
45 |
input_file_path=audio_file,
|
46 |
output_dir=output_dir,
|
47 |
+
models_path=MODELS_PATH,
|
48 |
)
|
49 |
end_time = time.time()
|
50 |
execution_time = end_time - start_time
|
|
|
80 |
description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
|
81 |
theme = "NoCrypt/miku"
|
82 |
|
83 |
+
model_id = "masszhou/mdxnet"
|
84 |
+
models_path = {
|
85 |
+
"bgm": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
|
86 |
+
"basic_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Voc_FT.onnx")),
|
87 |
+
"main_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR_MDXNET_KARA_2.onnx"))
|
88 |
+
}
|
89 |
|
90 |
# confirm entry points from client
|
91 |
# client_local = Client("http://127.0.0.1:7860")
|
uvr_processing.py
CHANGED
@@ -142,7 +142,7 @@ def run_mdx_cpu(model_params: Dict,
|
|
142 |
|
143 |
def extract_bgm(mdx_model_params: Dict,
|
144 |
input_filename: Path,
|
145 |
-
|
146 |
output_dir: Path,
|
147 |
device_base: str = "cuda") -> Path:
|
148 |
"""
|
@@ -151,7 +151,7 @@ def extract_bgm(mdx_model_params: Dict,
|
|
151 |
background_path, _ = run_mdx(model_params=mdx_model_params,
|
152 |
input_filename=input_filename,
|
153 |
output_dir=output_dir,
|
154 |
-
model_path=
|
155 |
denoise=False,
|
156 |
device_base=device_base,
|
157 |
)
|
@@ -160,10 +160,10 @@ def extract_bgm(mdx_model_params: Dict,
|
|
160 |
|
161 |
def extract_vocal(mdx_model_params: Dict,
|
162 |
input_filename: Path,
|
163 |
-
|
|
|
164 |
output_dir: Path,
|
165 |
main_vocals_flag: bool = False,
|
166 |
-
dereverb_flag: bool = False,
|
167 |
device_base: str = "cuda") -> Path:
|
168 |
"""
|
169 |
Extract vocals
|
@@ -172,7 +172,7 @@ def extract_vocal(mdx_model_params: Dict,
|
|
172 |
vocals_path, _ = run_mdx(mdx_model_params,
|
173 |
input_filename,
|
174 |
output_dir,
|
175 |
-
|
176 |
denoise=True,
|
177 |
device_base=device_base,
|
178 |
)
|
@@ -181,7 +181,7 @@ def extract_vocal(mdx_model_params: Dict,
|
|
181 |
time.sleep(2)
|
182 |
backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
|
183 |
output_dir,
|
184 |
-
|
185 |
vocals_path,
|
186 |
denoise=True,
|
187 |
device_base=device_base,
|
@@ -201,17 +201,16 @@ def extract_vocal(mdx_model_params: Dict,
|
|
201 |
# vocals_path = vocals_dereverb_path
|
202 |
return vocals_path
|
203 |
|
204 |
-
def process_uvr_task(
|
205 |
-
input_file_path: Path,
|
206 |
output_dir: Path,
|
|
|
207 |
main_vocals_flag: bool = False, # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
|
208 |
-
dereverb_flag: bool = False, # If "DeReverb" is enabled, use Reverb_HQ_By_FoxJoy.onnx for dereverberation
|
209 |
) -> Tuple[Path, Path]:
|
210 |
|
211 |
device_base = "cuda" if torch.cuda.is_available() else "cpu"
|
212 |
|
213 |
# load mdx model definition
|
214 |
-
with open(
|
215 |
mdx_model_params = json.load(infile) # type: Dict
|
216 |
|
217 |
output_dir.mkdir(parents=True, exist_ok=True)
|
@@ -220,7 +219,7 @@ def process_uvr_task(mdxnet_models_dir: Path,
|
|
220 |
# 1. Extract pure background music, remove vocals
|
221 |
background_path = extract_bgm(mdx_model_params,
|
222 |
input_file_path,
|
223 |
-
|
224 |
output_dir,
|
225 |
device_base=device_base)
|
226 |
|
@@ -228,10 +227,10 @@ def process_uvr_task(mdxnet_models_dir: Path,
|
|
228 |
# First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
|
229 |
vocals_path = extract_vocal(mdx_model_params,
|
230 |
input_file_path,
|
231 |
-
|
|
|
232 |
output_dir,
|
233 |
main_vocals_flag=main_vocals_flag,
|
234 |
-
dereverb_flag=dereverb_flag,
|
235 |
device_base=device_base)
|
236 |
|
237 |
return background_path, vocals_path
|
|
|
142 |
|
143 |
def extract_bgm(mdx_model_params: Dict,
|
144 |
input_filename: Path,
|
145 |
+
model_bgm_path: Path,
|
146 |
output_dir: Path,
|
147 |
device_base: str = "cuda") -> Path:
|
148 |
"""
|
|
|
151 |
background_path, _ = run_mdx(model_params=mdx_model_params,
|
152 |
input_filename=input_filename,
|
153 |
output_dir=output_dir,
|
154 |
+
model_path=model_bgm_path,
|
155 |
denoise=False,
|
156 |
device_base=device_base,
|
157 |
)
|
|
|
160 |
|
161 |
def extract_vocal(mdx_model_params: Dict,
|
162 |
input_filename: Path,
|
163 |
+
model_basic_vocal_path: Path,
|
164 |
+
model_main_vocal_path: Path,
|
165 |
output_dir: Path,
|
166 |
main_vocals_flag: bool = False,
|
|
|
167 |
device_base: str = "cuda") -> Path:
|
168 |
"""
|
169 |
Extract vocals
|
|
|
172 |
vocals_path, _ = run_mdx(mdx_model_params,
|
173 |
input_filename,
|
174 |
output_dir,
|
175 |
+
model_basic_vocal_path,
|
176 |
denoise=True,
|
177 |
device_base=device_base,
|
178 |
)
|
|
|
181 |
time.sleep(2)
|
182 |
backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
|
183 |
output_dir,
|
184 |
+
model_main_vocal_path,
|
185 |
vocals_path,
|
186 |
denoise=True,
|
187 |
device_base=device_base,
|
|
|
201 |
# vocals_path = vocals_dereverb_path
|
202 |
return vocals_path
|
203 |
|
204 |
+
def process_uvr_task(input_file_path: Path,
|
|
|
205 |
output_dir: Path,
|
206 |
+
models_path: Dict[str, Path],
|
207 |
main_vocals_flag: bool = False, # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
|
|
|
208 |
) -> Tuple[Path, Path]:
|
209 |
|
210 |
device_base = "cuda" if torch.cuda.is_available() else "cpu"
|
211 |
|
212 |
# load mdx model definition
|
213 |
+
with open("./mdx_models/model_data.json") as infile:
|
214 |
mdx_model_params = json.load(infile) # type: Dict
|
215 |
|
216 |
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
219 |
# 1. Extract pure background music, remove vocals
|
220 |
background_path = extract_bgm(mdx_model_params,
|
221 |
input_file_path,
|
222 |
+
models_path["bgm"],
|
223 |
output_dir,
|
224 |
device_base=device_base)
|
225 |
|
|
|
227 |
# First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
|
228 |
vocals_path = extract_vocal(mdx_model_params,
|
229 |
input_file_path,
|
230 |
+
models_path["basic_vocal"],
|
231 |
+
models_path["main_vocal"],
|
232 |
output_dir,
|
233 |
main_vocals_flag=main_vocals_flag,
|
|
|
234 |
device_base=device_base)
|
235 |
|
236 |
return background_path, vocals_path
|