Spaces:

ccmusic-database
/

erhu_playing_tech

Running

App Files Files

admin commited on Apr 23

Commit

d574298

1 Parent(s): 05f952e

sync ms

Browse files

Files changed (4) hide show

app.py +57 -59
model.py +9 -4
requirements.txt +5 -3
utils.py +62 -12

app.py CHANGED Viewed

@@ -8,25 +8,17 @@ import gradio as gr
 import librosa.display
 import matplotlib.pyplot as plt
 from model import EvalNet
-from utils import get_modelist, find_wav_files, embed_img
-TRANSLATE = {
-    "vibrato": "Rou xian",
-    "trill": "Chan yin",
-    "tremolo": "Chan gong",
-    "staccato": "Dun gong",
-    "ricochet": "Pao gong",
-    "pizzicato": "Bo xian",
-    "percussive": "Ji gong",
-    "legato_slide_glissando": "Lian hua yin",
-    "harmonic": "Fan yin",
-    "diangong": "Dian gong",
-    "detache": "Fen gong",
-}
-CLASSES = list(TRANSLATE.keys())
-TEMP_DIR = "./__pycache__/tmp"
-SAMPLE_RATE = 44100
 def circular_padding(y: np.ndarray, sr: int, dur=3):
@@ -88,33 +80,38 @@ def wav2chroma(audio_path: str):
 def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
-    if os.path.exists(folder_path):
-        shutil.rmtree(folder_path)
-    if not wav_path:
-        return None, "Please input an audio!"
-    spec = log_name.split("_")[-3]
-    os.makedirs(folder_path, exist_ok=True)
-    try:
         model = EvalNet(log_name, len(TRANSLATE)).model
         eval("wav2%s" % spec)(wav_path)
     except Exception as e:
-        return None, f"{e}"
-    input = embed_img(f"{folder_path}/output.jpg")
-    output: torch.Tensor = model(input)
-    pred_id = torch.max(output.data, 1)[1]
-    return (
-        os.path.basename(wav_path),
-        f"{TRANSLATE[CLASSES[pred_id]]} ({CLASSES[pred_id].capitalize()})",
-    )
 if __name__ == "__main__":
     warnings.filterwarnings("ignore")
-    models = get_modelist(assign_model="Swin_T_mel")
     examples = []
     example_wavs = find_wav_files()
     for wav in example_wavs:
@@ -124,36 +121,37 @@ if __name__ == "__main__":
         gr.Interface(
             fn=infer,
             inputs=[
-                gr.Audio(label="Upload a recording", type="filepath"),
-                gr.Dropdown(choices=models, label="Select a model", value=models[0]),
             ],
             outputs=[
-                gr.Textbox(label="Audio filename", show_copy_button=True),
-                gr.Textbox(label="Playing tech recognition", show_copy_button=True),
             ],
             examples=examples,
             cache_examples=False,
-            allow_flagging="never",
-            title="It is recommended to keep the recording length around 3s.",
         )
         gr.Markdown(
-            """
-# Cite
-```bibtex
-@article{Zhou-2025,
-  author  = {Monan Zhou and Shenyang Xu and Zhaorui Liu and Zhaowen Wang and Feng Yu and Wei Li and Baoqiang Han},
-  title   = {CCMusic: An Open and Diverse Database for Chinese Music Information Retrieval Research},
-  journal = {Transactions of the International Society for Music Information Retrieval},
-  volume  = {8},
-  number  = {1},
-  pages   = {22--38},
-  month   = {Mar},
-  year    = {2025},
-  url     = {https://doi.org/10.5334/tismir.194},
-  doi     = {10.5334/tismir.194}
-}
-```"""
         )
-    demo.launch(ssr_mode=False)

 import librosa.display
 import matplotlib.pyplot as plt
 from model import EvalNet
+from utils import (
+    get_modelist,
+    find_wav_files,
+    embed_img,
+    _L,
+    EN_US,
+    SAMPLE_RATE,
+    TEMP_DIR,
+    TRANSLATE,
+    CLASSES,
+)
 def circular_padding(y: np.ndarray, sr: int, dur=3):
 def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
+    status = "Success"
+    filename = result = None
+    try:
+        if os.path.exists(folder_path):
+            shutil.rmtree(folder_path)
+        if not wav_path:
+            return None, "请输入音频!"
+        spec = log_name.split("_")[-3]
+        os.makedirs(folder_path, exist_ok=True)
         model = EvalNet(log_name, len(TRANSLATE)).model
         eval("wav2%s" % spec)(wav_path)
+        input = embed_img(f"{folder_path}/output.jpg")
+        output: torch.Tensor = model(input)
+        pred_id = torch.max(output.data, 1)[1]
+        filename = os.path.basename(wav_path)
+        result = (
+            CLASSES[pred_id].capitalize()
+            if EN_US
+            else f"{TRANSLATE[CLASSES[pred_id]]} ({CLASSES[pred_id].capitalize()})"
+        )
     except Exception as e:
+        status = f"{e}"
+    return status, filename, result
 if __name__ == "__main__":
     warnings.filterwarnings("ignore")
+    models = get_modelist(assign_model="swin_t_mel")
     examples = []
     example_wavs = find_wav_files()
     for wav in example_wavs:
         gr.Interface(
             fn=infer,
             inputs=[
+                gr.Audio(label=_L("上传录音"), type="filepath"),
+                gr.Dropdown(choices=models, label=_L("选择模型"), value=models[0]),
             ],
             outputs=[
+                gr.Textbox(label=_L("状态栏"), show_copy_button=True),
+                gr.Textbox(label=_L("音频文件名"), show_copy_button=True),
+                gr.Textbox(label=_L("演奏技法识别"), show_copy_button=True),
             ],
             examples=examples,
             cache_examples=False,
+            flagging_mode="never",
+            title=_L("建议录音时长保持在 3s 左右"),
         )
         gr.Markdown(
+            f"# {_L('引用')}"
+            + """
+            ```bibtex
+            @article{Zhou-2025,
+                author  = {Monan Zhou and Shenyang Xu and Zhaorui Liu and Zhaowen Wang and Feng Yu and Wei Li and Baoqiang Han},
+                title   = {CCMusic: An Open and Diverse Database for Chinese Music Information Retrieval Research},
+                journal = {Transactions of the International Society for Music Information Retrieval},
+                volume  = {8},
+                number  = {1},
+                pages   = {22--38},
+                month   = {Mar},
+                year    = {2025},
+                url     = {https://doi.org/10.5334/tismir.194},
+                doi     = {10.5334/tismir.194}
+            }
+            ```"""
         )
+    demo.launch()

model.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import torch
 import torch.nn as nn
 import torchvision.models as models
 from datasets import load_dataset
-from utils import MODEL_DIR
 class EvalNet:
@@ -17,7 +18,7 @@ class EvalNet:
         self.m_type, self.input_size = self._model_info(m_ver)
         if not hasattr(models, m_ver):
-            raise Exception("Unsupported model.")
         self.model = eval("models.%s()" % m_ver)
         linear_output = self._set_outsize()
@@ -34,11 +35,15 @@ class EvalNet:
             if ver == bb["ver"]:
                 return bb
-        print("Backbone name not found, using default option - alexnet.")
         return backbone_list[0]
     def _model_info(self, m_ver: str):
-        backbone_list = load_dataset("monetjoe/cv_backbones", split="train")
         backbone = self._get_backbone(m_ver, backbone_list)
         m_type = str(backbone["type"])
         input_size = int(backbone["input_size"])

 import torch
 import torch.nn as nn
 import torchvision.models as models
+from modelscope.msdatasets import MsDataset
 from datasets import load_dataset
+from utils import MODEL_DIR, EN_US
 class EvalNet:
         self.m_type, self.input_size = self._model_info(m_ver)
         if not hasattr(models, m_ver):
+            raise ValueError("不支持的模型")
         self.model = eval("models.%s()" % m_ver)
         linear_output = self._set_outsize()
             if ver == bb["ver"]:
                 return bb
+        print("未找到骨干网络名称，使用默认选项 - alexnet")
         return backbone_list[0]
     def _model_info(self, m_ver: str):
+        backbone_list = (
+            load_dataset("monetjoe/cv_backbones", split="train")
+            if EN_US
+            else MsDataset.load("monetjoe/cv_backbones", split="v1")
+        )
         backbone = self._get_backbone(m_ver, backbone_list)
         m_type = str(backbone["type"])
         input_size = int(backbone["input_size"])

requirements.txt CHANGED Viewed

@@ -1,5 +1,7 @@
-torch
-pillow
 librosa
 matplotlib
-torchvision

+torch==2.6.0+cu118
+-f https://download.pytorch.org/whl/torch
+torchvision==0.21.0+cu118
+-f https://download.pytorch.org/whl/torchvision
 librosa
 matplotlib
+modelscope[framework]==1.21.0

utils.py CHANGED Viewed

@@ -1,15 +1,68 @@
 import os
 import torch
 import torchvision.transforms as transforms
-from huggingface_hub import snapshot_download
 from PIL import Image
-MODEL_DIR = snapshot_download(
-    "ccmusic-database/erhu_playing_tech",
-    cache_dir="./__pycache__",
 )
 def toCUDA(x):
     if hasattr(x, "cuda"):
         if torch.cuda.is_available():
@@ -30,19 +83,16 @@ def find_wav_files(folder_path=f"{MODEL_DIR}/examples"):
 def get_modelist(model_dir=MODEL_DIR, assign_model=""):
-    try:
-        entries = os.listdir(model_dir)
-    except OSError as e:
-        print(f"Cannot access {model_dir}: {e}")
-        return
     output = []
-    for entry in entries:
         full_path = os.path.join(model_dir, entry)
         if entry == ".git" or entry == "examples":
-            print(f"Skip .git / examples dir: {full_path}")
             continue
         if os.path.isdir(full_path):
             model = os.path.basename(full_path)
             if assign_model and assign_model.lower() in model:

 import os
 import torch
 import torchvision.transforms as transforms
+import huggingface_hub
+import modelscope
 from PIL import Image
+EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
+ZH2EN = {
+    "上传录音": "Upload a recording",
+    "选择模型": "Select a model",
+    "状态栏": "Status",
+    "音频文件名": "Audio filename",
+    "演奏技法识别": "Playing tech recognition",
+    "建议录音时长保持在 3s 左右": "It is recommended to keep the recording length around 3s.",
+    "引用": "Cite",
+    "揉弦": "Rou xian",
+    "颤音": "Chan yin",
+    "颤弓": "Chan gong",
+    "顿弓": "Dun gong",
+    "抛弓": "Pao gong",
+    "拨弦": "Bo xian",
+    "击弓": "Ji gong",
+    "连滑音": "Lian hua yin",
+    "泛音": "Fan yin",
+    "垫弓": "Dian gong",
+    "分弓": "Fen gong",
+}
+MODEL_DIR = (
+    huggingface_hub.snapshot_download(
+        "ccmusic-database/erhu_playing_tech",
+        cache_dir="./__pycache__",
+    )
+    if EN_US
+    else modelscope.snapshot_download(
+        "ccmusic-database/erhu_playing_tech",
+        cache_dir="./__pycache__",
+    )
 )
+def _L(zh_txt: str):
+    return ZH2EN[zh_txt] if EN_US else zh_txt
+TRANSLATE = {
+    "vibrato": _L("揉弦"),
+    "trill": _L("颤音"),
+    "tremolo": _L("颤弓"),
+    "staccato": _L("顿弓"),
+    "ricochet": _L("抛弓"),
+    "pizzicato": _L("拨弦"),
+    "percussive": _L("击弓"),
+    "legato_slide_glissando": _L("连滑音"),
+    "harmonic": _L("泛音"),
+    "diangong": _L("垫弓"),
+    "detache": _L("分弓"),
+}
+CLASSES = list(TRANSLATE.keys())
+TEMP_DIR = "./__pycache__/tmp"
+SAMPLE_RATE = 44100
 def toCUDA(x):
     if hasattr(x, "cuda"):
         if torch.cuda.is_available():
 def get_modelist(model_dir=MODEL_DIR, assign_model=""):
     output = []
+    for entry in os.listdir(model_dir):
+        # 获取完整路径
         full_path = os.path.join(model_dir, entry)
+        # 跳过'.git'文件夹
         if entry == ".git" or entry == "examples":
+            print(f"跳过 .git 或 examples 文件夹: {full_path}")
             continue
+        # 检查条目是文件还是目录
         if os.path.isdir(full_path):
             model = os.path.basename(full_path)
             if assign_model and assign_model.lower() in model: