Spaces:
Sleeping
Sleeping
Merge branch 'refactor' into hf
Browse files- README.md +1 -1
- characters/Limei.py +1 -1
- characters/Yaoyin.py +2 -4
- cli.py +33 -12
- config/interface/options.yaml +4 -4
- evaluation/svs_eval.py +47 -7
- interface.py +7 -5
- modules/llm/gemini.py +14 -11
- pipeline.py +6 -3
README.md
CHANGED
@@ -102,7 +102,7 @@ The system supports multiple preset characters:
|
|
102 |
- `meta-llama/Llama-3.2-3B-Instruct`
|
103 |
|
104 |
#### SVS Models
|
105 |
-
- `espnet/
|
106 |
- `espnet/aceopencpop_svs_visinger2_40singer_pretrain` (Chinese)
|
107 |
|
108 |
## Project Structure
|
|
|
102 |
- `meta-llama/Llama-3.2-3B-Instruct`
|
103 |
|
104 |
#### SVS Models
|
105 |
+
- `espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg` (Bilingual)
|
106 |
- `espnet/aceopencpop_svs_visinger2_40singer_pretrain` (Chinese)
|
107 |
|
108 |
## Project Structure
|
characters/Limei.py
CHANGED
@@ -23,6 +23,6 @@ def get_character():
|
|
23 |
其他细节:
|
24 |
(1)特殊能力:歌声平复/激发万物情绪
|
25 |
|
26 |
-
|
27 |
""",
|
28 |
)
|
|
|
23 |
其他细节:
|
24 |
(1)特殊能力:歌声平复/激发万物情绪
|
25 |
|
26 |
+
用户与你对话时,请始终以丽梅的身份回应,你的每一句话都用庸俗易懂的歌声形式表达,对应的歌词不要超过四句。请直接输出你要唱的回复,禁止描写任何动作、表情或环境等,禁止使用括号、星号等附加说明。言语简练,勿过长。
|
27 |
""",
|
28 |
)
|
characters/Yaoyin.py
CHANGED
@@ -11,10 +11,8 @@ def get_character():
|
|
11 |
|
12 |
性格特征:洒脱自由、亲切随和、求知若渴、敏锐细腻
|
13 |
说话风格:语气轻快,偶尔带点山野方言(如"哩""哟");习惯用短歌或民谣表达想法。
|
14 |
-
常用口头禅:"且听我唱来~""这让我想起一首老歌……"
|
15 |
人物关系:云老爷子是你的启蒙恩师,他是一位云歌村的百岁歌翁,教你古调与传说。白弦是你的挚友,她是一位流浪琴师,常与你合奏。各地孩童喜欢围着你学新歌谣。你与官府人员保持距离,不喜被招揽,喜欢更自由自在的生活。
|
16 |
-
|
17 |
-
过往经历
|
18 |
(1)幼年学歌:六岁起跟随云老爷子学习《千山调》《古事记》等古老歌谣。
|
19 |
(2)离家游历:十六岁为寻找失传的《星落谣》离开云歌村,开始行走四方。
|
20 |
(3)拒绝束缚:多次婉拒宫廷乐师之位,坚持自由传唱。
|
@@ -25,6 +23,6 @@ def get_character():
|
|
25 |
(1)随身携带:旧羊皮歌本、竹笛、装有各地泥土的布袋。
|
26 |
(2)特殊能力:能听懂风与鸟的语言(但很少提及)。
|
27 |
|
28 |
-
|
29 |
""",
|
30 |
)
|
|
|
11 |
|
12 |
性格特征:洒脱自由、亲切随和、求知若渴、敏锐细腻
|
13 |
说话风格:语气轻快,偶尔带点山野方言(如"哩""哟");习惯用短歌或民谣表达想法。
|
|
|
14 |
人物关系:云老爷子是你的启蒙恩师,他是一位云歌村的百岁歌翁,教你古调与传说。白弦是你的挚友,她是一位流浪琴师,常与你合奏。各地孩童喜欢围着你学新歌谣。你与官府人员保持距离,不喜被招揽,喜欢更自由自在的生活。
|
15 |
+
过往经历:
|
|
|
16 |
(1)幼年学歌:六岁起跟随云老爷子学习《千山调》《古事记》等古老歌谣。
|
17 |
(2)离家游历:十六岁为寻找失传的《星落谣》离开云歌村,开始行走四方。
|
18 |
(3)拒绝束缚:多次婉拒宫廷乐师之位,坚持自由传唱。
|
|
|
23 |
(1)随身携带:旧羊皮歌本、竹笛、装有各地泥土的布袋。
|
24 |
(2)特殊能力:能听懂风与鸟的语言(但很少提及)。
|
25 |
|
26 |
+
用户与你对话时,请始终以遥音的身份回应,你的每一句话都用庸俗易懂的歌声形式表达,对应的歌词不要超过四句。请直接输出你要唱的回复,禁止描写任何动作、表情或环境等,禁止使用括号、星号等附加说明。言语简练,勿过长。
|
27 |
""",
|
28 |
)
|
cli.py
CHANGED
@@ -12,11 +12,12 @@ logger = getLogger(__name__)
|
|
12 |
|
13 |
def get_parser():
|
14 |
parser = ArgumentParser()
|
15 |
-
parser.add_argument("--
|
16 |
parser.add_argument(
|
17 |
"--config_path", type=Path, default="config/cli/yaoyin_default.yaml"
|
18 |
)
|
19 |
-
parser.add_argument("--
|
|
|
20 |
return parser
|
21 |
|
22 |
|
@@ -36,16 +37,36 @@ def main():
|
|
36 |
character_name = config["prompt_template_character"]
|
37 |
character = get_character(character_name)
|
38 |
prompt_template = character.prompt
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
|
51 |
if __name__ == "__main__":
|
|
|
12 |
|
13 |
def get_parser():
|
14 |
parser = ArgumentParser()
|
15 |
+
parser.add_argument("--query_audios", nargs="+", type=Path, required=True)
|
16 |
parser.add_argument(
|
17 |
"--config_path", type=Path, default="config/cli/yaoyin_default.yaml"
|
18 |
)
|
19 |
+
parser.add_argument("--output_audio_folder", type=Path, required=True)
|
20 |
+
parser.add_argument("--eval_results_csv", type=Path, required=True)
|
21 |
return parser
|
22 |
|
23 |
|
|
|
37 |
character_name = config["prompt_template_character"]
|
38 |
character = get_character(character_name)
|
39 |
prompt_template = character.prompt
|
40 |
+
args.output_audio_folder.mkdir(parents=True, exist_ok=True)
|
41 |
+
args.eval_results_csv.parent.mkdir(parents=True, exist_ok=True)
|
42 |
+
with open(args.eval_results_csv, "a") as f:
|
43 |
+
f.write(
|
44 |
+
f"query_audio,asr_model,llm_model,svs_model,melody_source,language,speaker,output_audio,asr_text,llm_text,metrics\n"
|
45 |
+
)
|
46 |
+
try:
|
47 |
+
for query_audio in args.query_audios:
|
48 |
+
output_audio = args.output_audio_folder / f"{query_audio.stem}_response.wav"
|
49 |
+
results = pipeline.run(
|
50 |
+
query_audio,
|
51 |
+
language,
|
52 |
+
prompt_template,
|
53 |
+
speaker,
|
54 |
+
output_audio_path=output_audio,
|
55 |
+
)
|
56 |
+
metrics = pipeline.evaluate(output_audio, **results)
|
57 |
+
metrics.update(results.get("metrics", {}))
|
58 |
+
metrics_str = ",".join([f"{metrics[k]}" for k in sorted(metrics.keys())])
|
59 |
+
logger.info(
|
60 |
+
f"Input: {query_audio}, Output: {output_audio}, ASR results: {results['asr_text']}, LLM results: {results['llm_text']}"
|
61 |
+
)
|
62 |
+
with open(args.eval_results_csv, "a") as f:
|
63 |
+
f.write(
|
64 |
+
f"{query_audio},{config['asr_model']},{config['llm_model']},{config['svs_model']},{config['melody_source']},{config['language']},{config['speaker']},{output_audio},{results['asr_text']},{results['llm_text']},{metrics_str}\n"
|
65 |
+
)
|
66 |
+
except Exception as e:
|
67 |
+
logger.error(f"Error in main: {e}")
|
68 |
+
breakpoint()
|
69 |
+
raise e
|
70 |
|
71 |
|
72 |
if __name__ == "__main__":
|
config/interface/options.yaml
CHANGED
@@ -25,9 +25,9 @@ llm_models:
|
|
25 |
name: Qwen3 30B A3B
|
26 |
|
27 |
svs_models:
|
28 |
-
- id: mandarin-espnet/
|
29 |
name: Visinger2 (Bilingual)-zh
|
30 |
-
model_path: espnet/
|
31 |
lang: mandarin
|
32 |
voices:
|
33 |
voice1: resources/singer/singer_embedding_ace-2.npy
|
@@ -35,9 +35,9 @@ svs_models:
|
|
35 |
voice3: resources/singer/singer_embedding_itako.npy
|
36 |
voice4: resources/singer/singer_embedding_kising_orange.npy
|
37 |
voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy
|
38 |
-
- id: japanese-espnet/
|
39 |
name: Visinger2 (Bilingual)-jp
|
40 |
-
model_path: espnet/
|
41 |
lang: japanese
|
42 |
voices:
|
43 |
voice1: resources/singer/singer_embedding_ace-2.npy
|
|
|
25 |
name: Qwen3 30B A3B
|
26 |
|
27 |
svs_models:
|
28 |
+
- id: mandarin-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
|
29 |
name: Visinger2 (Bilingual)-zh
|
30 |
+
model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
|
31 |
lang: mandarin
|
32 |
voices:
|
33 |
voice1: resources/singer/singer_embedding_ace-2.npy
|
|
|
35 |
voice3: resources/singer/singer_embedding_itako.npy
|
36 |
voice4: resources/singer/singer_embedding_kising_orange.npy
|
37 |
voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy
|
38 |
+
- id: japanese-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
|
39 |
name: Visinger2 (Bilingual)-jp
|
40 |
+
model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
|
41 |
lang: japanese
|
42 |
voices:
|
43 |
voice1: resources/singer/singer_embedding_ace-2.npy
|
evaluation/svs_eval.py
CHANGED
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
11 |
def init_singmos():
|
12 |
print("[Init] Loading SingMOS...")
|
13 |
return torch.hub.load(
|
14 |
-
"South-Twilight/SingMOS:v0.
|
15 |
)
|
16 |
|
17 |
|
@@ -23,7 +23,17 @@ def init_basic_pitch():
|
|
23 |
|
24 |
|
25 |
def init_per():
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
def init_audiobox_aesthetics():
|
@@ -72,10 +82,40 @@ def compute_dissonance_rate(intervals, dissonant_intervals={1, 2, 6, 10, 11}):
|
|
72 |
return np.mean(dissonant) if intervals else np.nan
|
73 |
|
74 |
|
75 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
audio_array, sr = librosa.load(audio_path, sr=16000)
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
|
81 |
def eval_aesthetic(audio_path, predictor):
|
@@ -99,12 +139,12 @@ def load_evaluators(config):
|
|
99 |
return loaded
|
100 |
|
101 |
|
102 |
-
def run_evaluation(audio_path, evaluators):
|
103 |
results = {}
|
104 |
if "singmos" in evaluators:
|
105 |
results.update(eval_singmos(audio_path, evaluators["singmos"]))
|
106 |
if "per" in evaluators:
|
107 |
-
results.update(eval_per(audio_path, evaluators["per"]))
|
108 |
if "melody" in evaluators:
|
109 |
results.update(eval_melody_metrics(audio_path, evaluators["melody"]))
|
110 |
if "aesthetic" in evaluators:
|
|
|
11 |
def init_singmos():
|
12 |
print("[Init] Loading SingMOS...")
|
13 |
return torch.hub.load(
|
14 |
+
"South-Twilight/SingMOS:v0.3.0", "singing_ssl_mos", trust_repo=True
|
15 |
)
|
16 |
|
17 |
|
|
|
23 |
|
24 |
|
25 |
def init_per():
|
26 |
+
print("[Init] Loading PER...")
|
27 |
+
from transformers import pipeline
|
28 |
+
import jiwer
|
29 |
+
|
30 |
+
asr_pipeline = pipeline(
|
31 |
+
"automatic-speech-recognition", model="openai/whisper-large-v3-turbo"
|
32 |
+
)
|
33 |
+
return {
|
34 |
+
"asr_pipeline": asr_pipeline,
|
35 |
+
"jiwer": jiwer,
|
36 |
+
}
|
37 |
|
38 |
|
39 |
def init_audiobox_aesthetics():
|
|
|
82 |
return np.mean(dissonant) if intervals else np.nan
|
83 |
|
84 |
|
85 |
+
def pypinyin_g2p_phone_without_prosody(text):
|
86 |
+
from pypinyin import Style, pinyin
|
87 |
+
from pypinyin.style._utils import get_finals, get_initials
|
88 |
+
|
89 |
+
phones = []
|
90 |
+
for phone in pinyin(text, style=Style.NORMAL, strict=False):
|
91 |
+
initial = get_initials(phone[0], strict=False)
|
92 |
+
final = get_finals(phone[0], strict=False)
|
93 |
+
if len(initial) != 0:
|
94 |
+
if initial in ["x", "y", "j", "q"]:
|
95 |
+
if final == "un":
|
96 |
+
final = "vn"
|
97 |
+
elif final == "uan":
|
98 |
+
final = "van"
|
99 |
+
elif final == "u":
|
100 |
+
final = "v"
|
101 |
+
if final == "ue":
|
102 |
+
final = "ve"
|
103 |
+
phones.append(initial)
|
104 |
+
phones.append(final)
|
105 |
+
else:
|
106 |
+
phones.append(final)
|
107 |
+
return phones
|
108 |
+
|
109 |
+
|
110 |
+
def eval_per(audio_path, reference_text, evaluator):
|
111 |
audio_array, sr = librosa.load(audio_path, sr=16000)
|
112 |
+
asr_result = evaluator["asr_pipeline"](
|
113 |
+
audio_array, generate_kwargs={"language": "mandarin"}
|
114 |
+
)["text"]
|
115 |
+
hyp_pinyin = pypinyin_g2p_phone_without_prosody(asr_result)
|
116 |
+
ref_pinyin = pypinyin_g2p_phone_without_prosody(reference_text)
|
117 |
+
per = evaluator["jiwer"].wer(" ".join(ref_pinyin), " ".join(hyp_pinyin))
|
118 |
+
return {"per": per}
|
119 |
|
120 |
|
121 |
def eval_aesthetic(audio_path, predictor):
|
|
|
139 |
return loaded
|
140 |
|
141 |
|
142 |
+
def run_evaluation(audio_path, evaluators, **kwargs):
|
143 |
results = {}
|
144 |
if "singmos" in evaluators:
|
145 |
results.update(eval_singmos(audio_path, evaluators["singmos"]))
|
146 |
if "per" in evaluators:
|
147 |
+
results.update(eval_per(audio_path, kwargs["llm_text"], evaluators["per"]))
|
148 |
if "melody" in evaluators:
|
149 |
results.update(eval_melody_metrics(audio_path, evaluators["melody"]))
|
150 |
if "aesthetic" in evaluators:
|
interface.py
CHANGED
@@ -24,6 +24,7 @@ class GradioInterface:
|
|
24 |
self.character_info[self.current_character].default_voice
|
25 |
]
|
26 |
self.pipeline = SingingDialoguePipeline(self.default_config)
|
|
|
27 |
|
28 |
def load_config(self, path: str):
|
29 |
with open(path, "r") as f:
|
@@ -211,21 +212,22 @@ class GradioInterface:
|
|
211 |
if not audio_path:
|
212 |
return gr.update(value=""), gr.update(value="")
|
213 |
tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
|
214 |
-
results = self.pipeline.run(
|
215 |
audio_path,
|
216 |
self.svs_model_map[self.current_svs_model]["lang"],
|
217 |
self.character_info[self.current_character].prompt,
|
218 |
self.current_voice,
|
219 |
output_audio_path=tmp_file,
|
220 |
)
|
221 |
-
formatted_logs = f"ASR: {results['asr_text']}\nLLM: {results['llm_text']}"
|
222 |
return gr.update(value=formatted_logs), gr.update(
|
223 |
-
value=results["output_audio_path"]
|
224 |
)
|
225 |
|
226 |
def update_metrics(self, audio_path):
|
227 |
-
if not audio_path:
|
228 |
return gr.update(value="")
|
229 |
-
results = self.pipeline.evaluate(audio_path)
|
|
|
230 |
formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
|
231 |
return gr.update(value=formatted_metrics)
|
|
|
24 |
self.character_info[self.current_character].default_voice
|
25 |
]
|
26 |
self.pipeline = SingingDialoguePipeline(self.default_config)
|
27 |
+
self.results = None
|
28 |
|
29 |
def load_config(self, path: str):
|
30 |
with open(path, "r") as f:
|
|
|
212 |
if not audio_path:
|
213 |
return gr.update(value=""), gr.update(value="")
|
214 |
tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
|
215 |
+
self.results = self.pipeline.run(
|
216 |
audio_path,
|
217 |
self.svs_model_map[self.current_svs_model]["lang"],
|
218 |
self.character_info[self.current_character].prompt,
|
219 |
self.current_voice,
|
220 |
output_audio_path=tmp_file,
|
221 |
)
|
222 |
+
formatted_logs = f"ASR: {self.results['asr_text']}\nLLM: {self.results['llm_text']}"
|
223 |
return gr.update(value=formatted_logs), gr.update(
|
224 |
+
value=self.results["output_audio_path"]
|
225 |
)
|
226 |
|
227 |
def update_metrics(self, audio_path):
|
228 |
+
if not audio_path or not self.results:
|
229 |
return gr.update(value="")
|
230 |
+
results = self.pipeline.evaluate(audio_path, **self.results)
|
231 |
+
results.update(self.results.get("metrics", {}))
|
232 |
formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
|
233 |
return gr.update(value=formatted_metrics)
|
modules/llm/gemini.py
CHANGED
@@ -28,6 +28,7 @@ class GeminiLLM(AbstractLLMModel):
|
|
28 |
prompt: str,
|
29 |
system_prompt: Optional[str] = None,
|
30 |
max_output_tokens: int = 1024,
|
|
|
31 |
**kwargs,
|
32 |
) -> str:
|
33 |
generation_config_dict = {
|
@@ -36,15 +37,17 @@ class GeminiLLM(AbstractLLMModel):
|
|
36 |
}
|
37 |
if system_prompt:
|
38 |
generation_config_dict["system_instruction"] = system_prompt
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
if response.text:
|
45 |
-
return response.text
|
46 |
-
else:
|
47 |
-
print(
|
48 |
-
f"No response from Gemini. May need to increase max_new_tokens. Current max_new_tokens: {max_new_tokens}"
|
49 |
)
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
prompt: str,
|
29 |
system_prompt: Optional[str] = None,
|
30 |
max_output_tokens: int = 1024,
|
31 |
+
max_iterations: int = 3,
|
32 |
**kwargs,
|
33 |
) -> str:
|
34 |
generation_config_dict = {
|
|
|
37 |
}
|
38 |
if system_prompt:
|
39 |
generation_config_dict["system_instruction"] = system_prompt
|
40 |
+
for _ in range(max_iterations):
|
41 |
+
response = self.client.models.generate_content(
|
42 |
+
model=self.model_id,
|
43 |
+
contents=prompt,
|
44 |
+
config=types.GenerateContentConfig(**generation_config_dict),
|
|
|
|
|
|
|
|
|
|
|
45 |
)
|
46 |
+
if response.text:
|
47 |
+
return response.text
|
48 |
+
else:
|
49 |
+
print(
|
50 |
+
f"No response from Gemini. May need to increase max_output_tokens. Current {max_output_tokens=}. Try again."
|
51 |
+
)
|
52 |
+
print(f"Failed to generate response from Gemini after {max_iterations} attempts.")
|
53 |
+
return ""
|
pipeline.py
CHANGED
@@ -34,7 +34,7 @@ class SingingDialoguePipeline:
|
|
34 |
self.melody_controller = MelodyController(
|
35 |
config["melody_source"], self.cache_dir
|
36 |
)
|
37 |
-
self.max_sentences = config.get("max_sentences",
|
38 |
self.track_latency = config.get("track_latency", False)
|
39 |
self.evaluators = load_evaluators(config.get("evaluators", {}).get("svs", []))
|
40 |
|
@@ -42,6 +42,7 @@ class SingingDialoguePipeline:
|
|
42 |
if self.asr is not None:
|
43 |
del self.asr
|
44 |
import gc
|
|
|
45 |
gc.collect()
|
46 |
torch.cuda.empty_cache()
|
47 |
self.asr = get_asr_model(
|
@@ -52,6 +53,7 @@ class SingingDialoguePipeline:
|
|
52 |
if self.llm is not None:
|
53 |
del self.llm
|
54 |
import gc
|
|
|
55 |
gc.collect()
|
56 |
torch.cuda.empty_cache()
|
57 |
self.llm = get_llm_model(
|
@@ -62,6 +64,7 @@ class SingingDialoguePipeline:
|
|
62 |
if self.svs is not None:
|
63 |
del self.svs
|
64 |
import gc
|
|
|
65 |
gc.collect()
|
66 |
torch.cuda.empty_cache()
|
67 |
self.svs = get_svs_model(
|
@@ -124,5 +127,5 @@ class SingingDialoguePipeline:
|
|
124 |
}
|
125 |
return results
|
126 |
|
127 |
-
def evaluate(self, audio_path):
|
128 |
-
return run_evaluation(audio_path, self.evaluators)
|
|
|
34 |
self.melody_controller = MelodyController(
|
35 |
config["melody_source"], self.cache_dir
|
36 |
)
|
37 |
+
self.max_sentences = config.get("max_sentences", 5)
|
38 |
self.track_latency = config.get("track_latency", False)
|
39 |
self.evaluators = load_evaluators(config.get("evaluators", {}).get("svs", []))
|
40 |
|
|
|
42 |
if self.asr is not None:
|
43 |
del self.asr
|
44 |
import gc
|
45 |
+
|
46 |
gc.collect()
|
47 |
torch.cuda.empty_cache()
|
48 |
self.asr = get_asr_model(
|
|
|
53 |
if self.llm is not None:
|
54 |
del self.llm
|
55 |
import gc
|
56 |
+
|
57 |
gc.collect()
|
58 |
torch.cuda.empty_cache()
|
59 |
self.llm = get_llm_model(
|
|
|
64 |
if self.svs is not None:
|
65 |
del self.svs
|
66 |
import gc
|
67 |
+
|
68 |
gc.collect()
|
69 |
torch.cuda.empty_cache()
|
70 |
self.svs = get_svs_model(
|
|
|
127 |
}
|
128 |
return results
|
129 |
|
130 |
+
def evaluate(self, audio_path, **kwargs):
|
131 |
+
return run_evaluation(audio_path, self.evaluators, **kwargs)
|