Spaces:
Sleeping
Sleeping
Update requirements and add audiobox aesthetics evaluation functions
Browse files- requirements.txt +4 -1
- svs_eval.py +16 -0
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
git+https://github.com/
|
2 |
espnet_model_zoo
|
3 |
# pyopenjtalk
|
4 |
datasets
|
@@ -9,3 +9,6 @@ fastapi
|
|
9 |
uvicorn
|
10 |
fugashi
|
11 |
pykakasi
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/espnet/espnet
|
2 |
espnet_model_zoo
|
3 |
# pyopenjtalk
|
4 |
datasets
|
|
|
9 |
uvicorn
|
10 |
fugashi
|
11 |
pykakasi
|
12 |
+
basic-pitch[onnx]
|
13 |
+
audiobox_aesthetics
|
14 |
+
git+https://github.com/sea-turt1e/kanjiconv
|
svs_eval.py
CHANGED
@@ -18,6 +18,17 @@ def singmos_evaluation(predictor, wav_info, fs):
|
|
18 |
return score
|
19 |
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def score_extract_warmpup():
|
22 |
from basic_pitch.inference import predict
|
23 |
|
@@ -76,6 +87,7 @@ if __name__ == "__main__":
|
|
76 |
# warmup
|
77 |
predictor = singmos_warmup()
|
78 |
score_extractor = score_extract_warmpup()
|
|
|
79 |
|
80 |
# evaluate the audio
|
81 |
metrics = {}
|
@@ -87,6 +99,10 @@ if __name__ == "__main__":
|
|
87 |
# score metric evaluation
|
88 |
score_results = score_metric_evaluation(score_extractor, args.wav_path)
|
89 |
metrics.update(score_results)
|
|
|
|
|
|
|
|
|
90 |
|
91 |
# save results
|
92 |
with open(args.results_csv, "a") as f:
|
|
|
18 |
return score
|
19 |
|
20 |
|
21 |
+
def initialize_audiobox_predictor():
|
22 |
+
from audiobox_aesthetics.infer import initialize_predictor
|
23 |
+
predictor = initialize_predictor()
|
24 |
+
return predictor
|
25 |
+
|
26 |
+
|
27 |
+
def audiobox_aesthetics_evaluation(predictor, audio_path):
|
28 |
+
score = predictor.forward([{"path": str(audio_path)}])
|
29 |
+
return score
|
30 |
+
|
31 |
+
|
32 |
def score_extract_warmpup():
|
33 |
from basic_pitch.inference import predict
|
34 |
|
|
|
87 |
# warmup
|
88 |
predictor = singmos_warmup()
|
89 |
score_extractor = score_extract_warmpup()
|
90 |
+
aesthetic_predictor = initialize_audiobox_predictor()
|
91 |
|
92 |
# evaluate the audio
|
93 |
metrics = {}
|
|
|
99 |
# score metric evaluation
|
100 |
score_results = score_metric_evaluation(score_extractor, args.wav_path)
|
101 |
metrics.update(score_results)
|
102 |
+
|
103 |
+
# audiobox aesthetics evaluation
|
104 |
+
score_results = audiobox_aesthetics_evaluation(aesthetic_predictor, args.wav_path)
|
105 |
+
metrics.update(score_results[0])
|
106 |
|
107 |
# save results
|
108 |
with open(args.results_csv, "a") as f:
|