jhansss commited on
Commit
2548eae
·
1 Parent(s): d793dd3

Add score metrics calculation; remove frame-level pitch interval and chroma entropy functions

Browse files
Files changed (1) hide show
  1. svs_eval.py +39 -47
svs_eval.py CHANGED
@@ -1,5 +1,4 @@
1
  import librosa
2
- import pyworld as pw
3
  import numpy as np
4
  import torch
5
 
@@ -19,32 +18,36 @@ def singmos_evaluation(predictor, wav_info, fs):
19
  return score
20
 
21
 
22
- def pitch_interval_evaluation(y, fs):
23
- _f0, t = pw.dio(y.astype(np.float64), fs)
24
- f0 = pw.stonemask(y.astype(np.float64), _f0, t, fs)
25
 
26
- f0[f0 == 0] = np.nan
27
- midi_f0 = librosa.hz_to_midi(f0)
28
 
29
- if len(midi_f0) < 2:
30
- return np.nan, np.nan
31
 
32
- # only consider the intervals between notes
33
- intervals = np.diff(midi_f0)
34
- intervals = intervals[~np.isnan(intervals)]
35
- interval_mean = np.mean(np.abs(intervals))
36
- interval_std = np.std(intervals)
37
- return interval_mean, interval_std
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
- def chroma_entropy_evaluation(y, fs):
41
- chroma = librosa.feature.chroma_cqt(y=y, sr=fs)
42
- chroma_sum = np.sum(chroma, axis=0, keepdims=True)
43
- chroma_sum = np.clip(chroma_sum, 1e-6, None)
44
- chroma_norm = chroma / chroma_sum
45
- chroma_norm = np.clip(chroma_norm, 1e-6, 1.0)
46
- entropy = -np.sum(chroma_norm * np.log2(chroma_norm), axis=0)
47
- return np.mean(entropy)
48
 
49
 
50
  if __name__ == "__main__":
@@ -65,49 +68,38 @@ if __name__ == "__main__":
65
  parser.parse_args()
66
 
67
  args = parser.parse_args()
68
-
69
  args.results_csv.parent.mkdir(parents=True, exist_ok=True)
70
 
71
  y, fs = librosa.load(args.wav_path, sr=None)
72
 
73
  # warmup
74
  predictor = singmos_warmup()
 
 
 
 
75
 
76
  # singmos evaluation
77
  score = singmos_evaluation(predictor, y, fs)
78
-
79
- # pitch interval evaluation
80
- interval_mean, interval_std = pitch_interval_evaluation(y, fs)
81
- # chroma entropy evaluation
82
- chroma_entropy = chroma_entropy_evaluation(y, fs)
83
 
84
- # # visualize
85
- # import matplotlib.pyplot as plt
86
- # import librosa.display
87
- # chroma = librosa.feature.chroma_cqt(y=y, sr=fs)
88
- # img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
89
- # plt.colorbar(img)
90
- # plt.savefig(args.results_csv.parent / args.wav_path.with_suffix('.png'))
91
 
92
  # save results
93
- results = {
94
- "singmos": score,
95
- "pitch_interval_mean": interval_mean,
96
- "pitch_interval_std": interval_std,
97
- "chroma_entropy": chroma_entropy,
98
- }
99
-
100
  with open(args.results_csv, "a") as f:
101
- header = "file," + ",".join(results.keys()) + "\n"
102
  if f.tell() == 0:
103
  f.write(header)
104
  else:
105
  with open(args.results_csv, "r") as f2:
106
  file_header = f2.readline()
107
  if file_header != header:
108
- raise ValueError(
109
- f"Header mismatch: {file_header} vs {header}"
110
- )
111
 
112
- line = ",".join([str(args.wav_path)] + [str(v) for v in results.values()]) + "\n"
 
 
113
  f.write(line)
 
1
  import librosa
 
2
  import numpy as np
3
  import torch
4
 
 
18
  return score
19
 
20
 
21
+ def score_extract_warmpup():
22
+ from basic_pitch.inference import predict
 
23
 
24
+ return predict
 
25
 
 
 
26
 
27
+ def score_metric_evaluation(score_extractor, audio_path):
28
+ model_output, midi_data, note_events = score_extractor(audio_path)
29
+ metrics = {}
30
+ assert (
31
+ len(midi_data.instruments) == 1
32
+ ), f"Detected {len(midi_data.instruments)} instruments for {audio_path}"
33
+ midi_notes = midi_data.instruments[0].notes
34
+ melody = [note.pitch for note in midi_notes]
35
+ if len(melody) == 0:
36
+ print(f"No notes detected in {audio_path}")
37
+ return {}
38
+ intervals = [abs(melody[i + 1] - melody[i]) for i in range(len(melody) - 1)]
39
+ metrics["pitch_range"] = max(melody) - min(melody)
40
+ if len(intervals) > 0:
41
+ metrics["interval_mean"] = np.mean(intervals)
42
+ metrics["interval_std"] = np.std(intervals)
43
+ metrics["interval_large_jump_ratio"] = np.mean([i > 5 for i in intervals])
44
+ metrics["dissonance_rate"] = compute_dissonance_rate(intervals)
45
+ return metrics
46
 
47
 
48
+ def compute_dissonance_rate(intervals, dissonant_intervals={1, 2, 6, 10, 11}):
49
+ dissonant = [i % 12 in dissonant_intervals for i in intervals]
50
+ return np.mean(dissonant) if intervals else np.nan
 
 
 
 
 
51
 
52
 
53
  if __name__ == "__main__":
 
68
  parser.parse_args()
69
 
70
  args = parser.parse_args()
71
+
72
  args.results_csv.parent.mkdir(parents=True, exist_ok=True)
73
 
74
  y, fs = librosa.load(args.wav_path, sr=None)
75
 
76
  # warmup
77
  predictor = singmos_warmup()
78
+ score_extractor = score_extract_warmpup()
79
+
80
+ # evaluate the audio
81
+ metrics = {}
82
 
83
  # singmos evaluation
84
  score = singmos_evaluation(predictor, y, fs)
85
+ metrics["singmos"] = score
 
 
 
 
86
 
87
+ # score metric evaluation
88
+ score_results = score_metric_evaluation(score_extractor, args.wav_path)
89
+ metrics.update(score_results)
 
 
 
 
90
 
91
  # save results
 
 
 
 
 
 
 
92
  with open(args.results_csv, "a") as f:
93
+ header = "file," + ",".join(metrics.keys()) + "\n"
94
  if f.tell() == 0:
95
  f.write(header)
96
  else:
97
  with open(args.results_csv, "r") as f2:
98
  file_header = f2.readline()
99
  if file_header != header:
100
+ raise ValueError(f"Header mismatch: {file_header} vs {header}")
 
 
101
 
102
+ line = (
103
+ ",".join([str(args.wav_path)] + [str(v) for v in metrics.values()]) + "\n"
104
+ )
105
  f.write(line)