masszhou commited on
Commit
3e18896
·
1 Parent(s): 6e8f9db

add download models from hugging face

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. app.py +15 -4
  3. uvr_processing.py +12 -13
.gitignore CHANGED
@@ -7,4 +7,5 @@ __pycache__
7
  *.avi
8
  *.mkv
9
  .env
10
- .vscode
 
 
7
  *.avi
8
  *.mkv
9
  .env
10
+ .vscode
11
+ tmp
app.py CHANGED
@@ -6,9 +6,17 @@ from pathlib import Path
6
  import os
7
  import time
8
  import torch
 
9
  from uvr_processing import process_uvr_task
10
 
11
 
 
 
 
 
 
 
 
12
  def get_device_info():
13
  if torch.cuda.is_available():
14
  device = f"GPU ({torch.cuda.get_device_name(0)})"
@@ -34,9 +42,9 @@ def inference(audio_file: str,
34
  outputs = []
35
  start_time = time.time()
36
  background_path, vocals_path = process_uvr_task(
37
- mdxnet_models_dir=mdxnet_models_dir,
38
  input_file_path=audio_file,
39
  output_dir=output_dir,
 
40
  )
41
  end_time = time.time()
42
  execution_time = end_time - start_time
@@ -72,9 +80,12 @@ if __name__ == "__main__":
72
  description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
73
  theme = "NoCrypt/miku"
74
 
75
- BASE_DIR = "." # os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
76
- mdxnet_models_dir = os.path.join(BASE_DIR, "mdx_models")
77
- output_dir = os.path.join(BASE_DIR, "output")
 
 
 
78
 
79
  # confirm entry points from client
80
  # client_local = Client("http://127.0.0.1:7860")
 
6
  import os
7
  import time
8
  import torch
9
+ from huggingface_hub import hf_hub_download
10
  from uvr_processing import process_uvr_task
11
 
12
 
13
+ MODEL_ID = "masszhou/mdxnet"
14
+ MODELS_PATH = {
15
+ "bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
16
+ "basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
17
+ "main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
18
+ }
19
+
20
  def get_device_info():
21
  if torch.cuda.is_available():
22
  device = f"GPU ({torch.cuda.get_device_name(0)})"
 
42
  outputs = []
43
  start_time = time.time()
44
  background_path, vocals_path = process_uvr_task(
 
45
  input_file_path=audio_file,
46
  output_dir=output_dir,
47
+ models_path=MODELS_PATH,
48
  )
49
  end_time = time.time()
50
  execution_time = end_time - start_time
 
80
  description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
81
  theme = "NoCrypt/miku"
82
 
83
+ model_id = "masszhou/mdxnet"
84
+ models_path = {
85
+ "bgm": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
86
+ "basic_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Voc_FT.onnx")),
87
+ "main_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR_MDXNET_KARA_2.onnx"))
88
+ }
89
 
90
  # confirm entry points from client
91
  # client_local = Client("http://127.0.0.1:7860")
uvr_processing.py CHANGED
@@ -142,7 +142,7 @@ def run_mdx_cpu(model_params: Dict,
142
 
143
  def extract_bgm(mdx_model_params: Dict,
144
  input_filename: Path,
145
- mdxnet_models_dir: Path,
146
  output_dir: Path,
147
  device_base: str = "cuda") -> Path:
148
  """
@@ -151,7 +151,7 @@ def extract_bgm(mdx_model_params: Dict,
151
  background_path, _ = run_mdx(model_params=mdx_model_params,
152
  input_filename=input_filename,
153
  output_dir=output_dir,
154
- model_path=mdxnet_models_dir/"UVR-MDX-NET-Inst_HQ_3.onnx",
155
  denoise=False,
156
  device_base=device_base,
157
  )
@@ -160,10 +160,10 @@ def extract_bgm(mdx_model_params: Dict,
160
 
161
  def extract_vocal(mdx_model_params: Dict,
162
  input_filename: Path,
163
- mdxnet_models_dir: Path,
 
164
  output_dir: Path,
165
  main_vocals_flag: bool = False,
166
- dereverb_flag: bool = False,
167
  device_base: str = "cuda") -> Path:
168
  """
169
  Extract vocals
@@ -172,7 +172,7 @@ def extract_vocal(mdx_model_params: Dict,
172
  vocals_path, _ = run_mdx(mdx_model_params,
173
  input_filename,
174
  output_dir,
175
- mdxnet_models_dir/"UVR-MDX-NET-Voc_FT.onnx",
176
  denoise=True,
177
  device_base=device_base,
178
  )
@@ -181,7 +181,7 @@ def extract_vocal(mdx_model_params: Dict,
181
  time.sleep(2)
182
  backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
183
  output_dir,
184
- mdxnet_models_dir/"UVR_MDXNET_KARA_2.onnx",
185
  vocals_path,
186
  denoise=True,
187
  device_base=device_base,
@@ -201,17 +201,16 @@ def extract_vocal(mdx_model_params: Dict,
201
  # vocals_path = vocals_dereverb_path
202
  return vocals_path
203
 
204
- def process_uvr_task(mdxnet_models_dir: Path,
205
- input_file_path: Path,
206
  output_dir: Path,
 
207
  main_vocals_flag: bool = False, # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
208
- dereverb_flag: bool = False, # If "DeReverb" is enabled, use Reverb_HQ_By_FoxJoy.onnx for dereverberation
209
  ) -> Tuple[Path, Path]:
210
 
211
  device_base = "cuda" if torch.cuda.is_available() else "cpu"
212
 
213
  # load mdx model definition
214
- with open(mdxnet_models_dir/"model_data.json") as infile:
215
  mdx_model_params = json.load(infile) # type: Dict
216
 
217
  output_dir.mkdir(parents=True, exist_ok=True)
@@ -220,7 +219,7 @@ def process_uvr_task(mdxnet_models_dir: Path,
220
  # 1. Extract pure background music, remove vocals
221
  background_path = extract_bgm(mdx_model_params,
222
  input_file_path,
223
- mdxnet_models_dir,
224
  output_dir,
225
  device_base=device_base)
226
 
@@ -228,10 +227,10 @@ def process_uvr_task(mdxnet_models_dir: Path,
228
  # First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
229
  vocals_path = extract_vocal(mdx_model_params,
230
  input_file_path,
231
- mdxnet_models_dir,
 
232
  output_dir,
233
  main_vocals_flag=main_vocals_flag,
234
- dereverb_flag=dereverb_flag,
235
  device_base=device_base)
236
 
237
  return background_path, vocals_path
 
142
 
143
  def extract_bgm(mdx_model_params: Dict,
144
  input_filename: Path,
145
+ model_bgm_path: Path,
146
  output_dir: Path,
147
  device_base: str = "cuda") -> Path:
148
  """
 
151
  background_path, _ = run_mdx(model_params=mdx_model_params,
152
  input_filename=input_filename,
153
  output_dir=output_dir,
154
+ model_path=model_bgm_path,
155
  denoise=False,
156
  device_base=device_base,
157
  )
 
160
 
161
  def extract_vocal(mdx_model_params: Dict,
162
  input_filename: Path,
163
+ model_basic_vocal_path: Path,
164
+ model_main_vocal_path: Path,
165
  output_dir: Path,
166
  main_vocals_flag: bool = False,
 
167
  device_base: str = "cuda") -> Path:
168
  """
169
  Extract vocals
 
172
  vocals_path, _ = run_mdx(mdx_model_params,
173
  input_filename,
174
  output_dir,
175
+ model_basic_vocal_path,
176
  denoise=True,
177
  device_base=device_base,
178
  )
 
181
  time.sleep(2)
182
  backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
183
  output_dir,
184
+ model_main_vocal_path,
185
  vocals_path,
186
  denoise=True,
187
  device_base=device_base,
 
201
  # vocals_path = vocals_dereverb_path
202
  return vocals_path
203
 
204
+ def process_uvr_task(input_file_path: Path,
 
205
  output_dir: Path,
206
+ models_path: Dict[str, Path],
207
  main_vocals_flag: bool = False, # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
 
208
  ) -> Tuple[Path, Path]:
209
 
210
  device_base = "cuda" if torch.cuda.is_available() else "cpu"
211
 
212
  # load mdx model definition
213
+ with open("./mdx_models/model_data.json") as infile:
214
  mdx_model_params = json.load(infile) # type: Dict
215
 
216
  output_dir.mkdir(parents=True, exist_ok=True)
 
219
  # 1. Extract pure background music, remove vocals
220
  background_path = extract_bgm(mdx_model_params,
221
  input_file_path,
222
+ models_path["bgm"],
223
  output_dir,
224
  device_base=device_base)
225
 
 
227
  # First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
228
  vocals_path = extract_vocal(mdx_model_params,
229
  input_file_path,
230
+ models_path["basic_vocal"],
231
+ models_path["main_vocal"],
232
  output_dir,
233
  main_vocals_flag=main_vocals_flag,
 
234
  device_base=device_base)
235
 
236
  return background_path, vocals_path