Spaces:

masszhou
/

bgmseparator

Sleeping

App Files Files Community

masszhou commited on Mar 30

Commit

3e18896

1 Parent(s): 6e8f9db

add download models from hugging face

Browse files

Files changed (3) hide show

.gitignore +2 -1
app.py +15 -4
uvr_processing.py +12 -13

.gitignore CHANGED Viewed

@@ -7,4 +7,5 @@ __pycache__
 *.avi
 *.mkv
 .env
-.vscode

 *.avi
 *.mkv
 .env
+.vscode
+tmp

app.py CHANGED Viewed

@@ -6,9 +6,17 @@ from pathlib import Path
 import os
 import time
 import torch
 from uvr_processing import process_uvr_task
 def get_device_info():
     if torch.cuda.is_available():
         device = f"GPU ({torch.cuda.get_device_name(0)})"
@@ -34,9 +42,9 @@ def inference(audio_file: str,
     outputs = []
     start_time = time.time()
     background_path, vocals_path = process_uvr_task(
-        mdxnet_models_dir=mdxnet_models_dir,
         input_file_path=audio_file,
         output_dir=output_dir,
         )
     end_time = time.time()
     execution_time = end_time - start_time
@@ -72,9 +80,12 @@ if __name__ == "__main__":
     description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
     theme = "NoCrypt/miku"
-    BASE_DIR = "."  # os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    mdxnet_models_dir = os.path.join(BASE_DIR, "mdx_models")
-    output_dir = os.path.join(BASE_DIR, "output")
     # confirm entry points from client
     # client_local = Client("http://127.0.0.1:7860")

 import os
 import time
 import torch
+from huggingface_hub import hf_hub_download
 from uvr_processing import process_uvr_task
+MODEL_ID = "masszhou/mdxnet"
+MODELS_PATH = {
+    "bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
+    "basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
+    "main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
+}
 def get_device_info():
     if torch.cuda.is_available():
         device = f"GPU ({torch.cuda.get_device_name(0)})"
     outputs = []
     start_time = time.time()
     background_path, vocals_path = process_uvr_task(
         input_file_path=audio_file,
         output_dir=output_dir,
+        models_path=MODELS_PATH,
         )
     end_time = time.time()
     execution_time = end_time - start_time
     description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
     theme = "NoCrypt/miku"
+    model_id = "masszhou/mdxnet"
+    models_path = {
+        "bgm": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
+        "basic_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR-MDX-NET-Voc_FT.onnx")),
+        "main_vocal": Path(hf_hub_download(repo_id=model_id, filename="UVR_MDXNET_KARA_2.onnx"))
+    }
     # confirm entry points from client
     # client_local = Client("http://127.0.0.1:7860")

uvr_processing.py CHANGED Viewed

@@ -142,7 +142,7 @@ def run_mdx_cpu(model_params: Dict,
 def extract_bgm(mdx_model_params: Dict,
                 input_filename: Path,
-                mdxnet_models_dir: Path,
                 output_dir: Path,
                 device_base: str = "cuda") -> Path:
     """
@@ -151,7 +151,7 @@ def extract_bgm(mdx_model_params: Dict,
     background_path, _ = run_mdx(model_params=mdx_model_params,
                                   input_filename=input_filename,
                                   output_dir=output_dir,
-                                  model_path=mdxnet_models_dir/"UVR-MDX-NET-Inst_HQ_3.onnx",
                                   denoise=False,
                                   device_base=device_base,
                                   )
@@ -160,10 +160,10 @@ def extract_bgm(mdx_model_params: Dict,
 def extract_vocal(mdx_model_params: Dict,
                   input_filename: Path,
-                  mdxnet_models_dir: Path,
                   output_dir: Path,
                   main_vocals_flag: bool = False,
-                  dereverb_flag: bool = False,
                   device_base: str = "cuda") -> Path:
     """
     Extract vocals
@@ -172,7 +172,7 @@ def extract_vocal(mdx_model_params: Dict,
     vocals_path, _ = run_mdx(mdx_model_params,
                              input_filename,
                              output_dir,
-                             mdxnet_models_dir/"UVR-MDX-NET-Voc_FT.onnx",
                              denoise=True,
                              device_base=device_base,
                              )
@@ -181,7 +181,7 @@ def extract_vocal(mdx_model_params: Dict,
         time.sleep(2)
         backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
                                                        output_dir,
-                                                       mdxnet_models_dir/"UVR_MDXNET_KARA_2.onnx",
                                                        vocals_path,
                                                        denoise=True,
                                                        device_base=device_base,
@@ -201,17 +201,16 @@ def extract_vocal(mdx_model_params: Dict,
     #     vocals_path = vocals_dereverb_path
     return vocals_path
-def process_uvr_task(mdxnet_models_dir: Path,
-                     input_file_path: Path,
                      output_dir: Path,
                      main_vocals_flag: bool = False,  # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
-                     dereverb_flag: bool = False,  # If "DeReverb" is enabled, use Reverb_HQ_By_FoxJoy.onnx for dereverberation
                      ) -> Tuple[Path, Path]:
     device_base = "cuda" if torch.cuda.is_available() else "cpu"
     # load mdx model definition
-    with open(mdxnet_models_dir/"model_data.json") as infile:
         mdx_model_params = json.load(infile)  # type: Dict
     output_dir.mkdir(parents=True, exist_ok=True)
@@ -220,7 +219,7 @@ def process_uvr_task(mdxnet_models_dir: Path,
     # 1. Extract pure background music, remove vocals
     background_path = extract_bgm(mdx_model_params,
                                   input_file_path,
-                                  mdxnet_models_dir,
                                   output_dir,
                                   device_base=device_base)
@@ -228,10 +227,10 @@ def process_uvr_task(mdxnet_models_dir: Path,
     # First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
     vocals_path = extract_vocal(mdx_model_params,
                                 input_file_path,
-                                mdxnet_models_dir,
                                 output_dir,
                                 main_vocals_flag=main_vocals_flag,
-                                dereverb_flag=dereverb_flag,
                                 device_base=device_base)
     return background_path, vocals_path

 def extract_bgm(mdx_model_params: Dict,
                 input_filename: Path,
+                model_bgm_path: Path,
                 output_dir: Path,
                 device_base: str = "cuda") -> Path:
     """
     background_path, _ = run_mdx(model_params=mdx_model_params,
                                   input_filename=input_filename,
                                   output_dir=output_dir,
+                                  model_path=model_bgm_path,
                                   denoise=False,
                                   device_base=device_base,
                                   )
 def extract_vocal(mdx_model_params: Dict,
                   input_filename: Path,
+                  model_basic_vocal_path: Path,
+                  model_main_vocal_path: Path,
                   output_dir: Path,
                   main_vocals_flag: bool = False,
                   device_base: str = "cuda") -> Path:
     """
     Extract vocals
     vocals_path, _ = run_mdx(mdx_model_params,
                              input_filename,
                              output_dir,
+                             model_basic_vocal_path,
                              denoise=True,
                              device_base=device_base,
                              )
         time.sleep(2)
         backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
                                                        output_dir,
+                                                       model_main_vocal_path,
                                                        vocals_path,
                                                        denoise=True,
                                                        device_base=device_base,
     #     vocals_path = vocals_dereverb_path
     return vocals_path
+def process_uvr_task(input_file_path: Path,
                      output_dir: Path,
+                     models_path: Dict[str, Path],
                      main_vocals_flag: bool = False,  # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
                      ) -> Tuple[Path, Path]:
     device_base = "cuda" if torch.cuda.is_available() else "cpu"
     # load mdx model definition
+    with open("./mdx_models/model_data.json") as infile:
         mdx_model_params = json.load(infile)  # type: Dict
     output_dir.mkdir(parents=True, exist_ok=True)
     # 1. Extract pure background music, remove vocals
     background_path = extract_bgm(mdx_model_params,
                                   input_file_path,
+                                  models_path["bgm"],
                                   output_dir,
                                   device_base=device_base)
     # First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
     vocals_path = extract_vocal(mdx_model_params,
                                 input_file_path,
+                                models_path["basic_vocal"],
+                                models_path["main_vocal"],
                                 output_dir,
                                 main_vocals_flag=main_vocals_flag,
                                 device_base=device_base)
     return background_path, vocals_path