jhansss commited on
Commit
1ec2d7e
·
1 Parent(s): 025d5b1

Update requirements and add audiobox aesthetics evaluation functions

Browse files
Files changed (2) hide show
  1. requirements.txt +4 -1
  2. svs_eval.py +16 -0
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- git+https://github.com/South-Twilight/espnet==202402
2
  espnet_model_zoo
3
  # pyopenjtalk
4
  datasets
@@ -9,3 +9,6 @@ fastapi
9
  uvicorn
10
  fugashi
11
  pykakasi
 
 
 
 
1
+ git+https://github.com/espnet/espnet
2
  espnet_model_zoo
3
  # pyopenjtalk
4
  datasets
 
9
  uvicorn
10
  fugashi
11
  pykakasi
12
+ basic-pitch[onnx]
13
+ audiobox_aesthetics
14
+ git+https://github.com/sea-turt1e/kanjiconv
svs_eval.py CHANGED
@@ -18,6 +18,17 @@ def singmos_evaluation(predictor, wav_info, fs):
18
  return score
19
 
20
 
 
 
 
 
 
 
 
 
 
 
 
21
  def score_extract_warmpup():
22
  from basic_pitch.inference import predict
23
 
@@ -76,6 +87,7 @@ if __name__ == "__main__":
76
  # warmup
77
  predictor = singmos_warmup()
78
  score_extractor = score_extract_warmpup()
 
79
 
80
  # evaluate the audio
81
  metrics = {}
@@ -87,6 +99,10 @@ if __name__ == "__main__":
87
  # score metric evaluation
88
  score_results = score_metric_evaluation(score_extractor, args.wav_path)
89
  metrics.update(score_results)
 
 
 
 
90
 
91
  # save results
92
  with open(args.results_csv, "a") as f:
 
18
  return score
19
 
20
 
21
+ def initialize_audiobox_predictor():
22
+ from audiobox_aesthetics.infer import initialize_predictor
23
+ predictor = initialize_predictor()
24
+ return predictor
25
+
26
+
27
+ def audiobox_aesthetics_evaluation(predictor, audio_path):
28
+ score = predictor.forward([{"path": str(audio_path)}])
29
+ return score
30
+
31
+
32
  def score_extract_warmpup():
33
  from basic_pitch.inference import predict
34
 
 
87
  # warmup
88
  predictor = singmos_warmup()
89
  score_extractor = score_extract_warmpup()
90
+ aesthetic_predictor = initialize_audiobox_predictor()
91
 
92
  # evaluate the audio
93
  metrics = {}
 
99
  # score metric evaluation
100
  score_results = score_metric_evaluation(score_extractor, args.wav_path)
101
  metrics.update(score_results)
102
+
103
+ # audiobox aesthetics evaluation
104
+ score_results = audiobox_aesthetics_evaluation(aesthetic_predictor, args.wav_path)
105
+ metrics.update(score_results[0])
106
 
107
  # save results
108
  with open(args.results_csv, "a") as f: