Spaces:
Sleeping
Sleeping
Add score metrics calculation; remove frame-level pitch interval and chroma entropy functions
Browse files- svs_eval.py +39 -47
svs_eval.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import librosa
|
2 |
-
import pyworld as pw
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
|
@@ -19,32 +18,36 @@ def singmos_evaluation(predictor, wav_info, fs):
|
|
19 |
return score
|
20 |
|
21 |
|
22 |
-
def
|
23 |
-
|
24 |
-
f0 = pw.stonemask(y.astype(np.float64), _f0, t, fs)
|
25 |
|
26 |
-
|
27 |
-
midi_f0 = librosa.hz_to_midi(f0)
|
28 |
|
29 |
-
if len(midi_f0) < 2:
|
30 |
-
return np.nan, np.nan
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
|
40 |
-
def
|
41 |
-
|
42 |
-
|
43 |
-
chroma_sum = np.clip(chroma_sum, 1e-6, None)
|
44 |
-
chroma_norm = chroma / chroma_sum
|
45 |
-
chroma_norm = np.clip(chroma_norm, 1e-6, 1.0)
|
46 |
-
entropy = -np.sum(chroma_norm * np.log2(chroma_norm), axis=0)
|
47 |
-
return np.mean(entropy)
|
48 |
|
49 |
|
50 |
if __name__ == "__main__":
|
@@ -65,49 +68,38 @@ if __name__ == "__main__":
|
|
65 |
parser.parse_args()
|
66 |
|
67 |
args = parser.parse_args()
|
68 |
-
|
69 |
args.results_csv.parent.mkdir(parents=True, exist_ok=True)
|
70 |
|
71 |
y, fs = librosa.load(args.wav_path, sr=None)
|
72 |
|
73 |
# warmup
|
74 |
predictor = singmos_warmup()
|
|
|
|
|
|
|
|
|
75 |
|
76 |
# singmos evaluation
|
77 |
score = singmos_evaluation(predictor, y, fs)
|
78 |
-
|
79 |
-
# pitch interval evaluation
|
80 |
-
interval_mean, interval_std = pitch_interval_evaluation(y, fs)
|
81 |
-
# chroma entropy evaluation
|
82 |
-
chroma_entropy = chroma_entropy_evaluation(y, fs)
|
83 |
|
84 |
-
#
|
85 |
-
|
86 |
-
|
87 |
-
# chroma = librosa.feature.chroma_cqt(y=y, sr=fs)
|
88 |
-
# img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
|
89 |
-
# plt.colorbar(img)
|
90 |
-
# plt.savefig(args.results_csv.parent / args.wav_path.with_suffix('.png'))
|
91 |
|
92 |
# save results
|
93 |
-
results = {
|
94 |
-
"singmos": score,
|
95 |
-
"pitch_interval_mean": interval_mean,
|
96 |
-
"pitch_interval_std": interval_std,
|
97 |
-
"chroma_entropy": chroma_entropy,
|
98 |
-
}
|
99 |
-
|
100 |
with open(args.results_csv, "a") as f:
|
101 |
-
header = "file," + ",".join(
|
102 |
if f.tell() == 0:
|
103 |
f.write(header)
|
104 |
else:
|
105 |
with open(args.results_csv, "r") as f2:
|
106 |
file_header = f2.readline()
|
107 |
if file_header != header:
|
108 |
-
raise ValueError(
|
109 |
-
f"Header mismatch: {file_header} vs {header}"
|
110 |
-
)
|
111 |
|
112 |
-
line =
|
|
|
|
|
113 |
f.write(line)
|
|
|
1 |
import librosa
|
|
|
2 |
import numpy as np
|
3 |
import torch
|
4 |
|
|
|
18 |
return score
|
19 |
|
20 |
|
21 |
+
def score_extract_warmpup():
|
22 |
+
from basic_pitch.inference import predict
|
|
|
23 |
|
24 |
+
return predict
|
|
|
25 |
|
|
|
|
|
26 |
|
27 |
+
def score_metric_evaluation(score_extractor, audio_path):
|
28 |
+
model_output, midi_data, note_events = score_extractor(audio_path)
|
29 |
+
metrics = {}
|
30 |
+
assert (
|
31 |
+
len(midi_data.instruments) == 1
|
32 |
+
), f"Detected {len(midi_data.instruments)} instruments for {audio_path}"
|
33 |
+
midi_notes = midi_data.instruments[0].notes
|
34 |
+
melody = [note.pitch for note in midi_notes]
|
35 |
+
if len(melody) == 0:
|
36 |
+
print(f"No notes detected in {audio_path}")
|
37 |
+
return {}
|
38 |
+
intervals = [abs(melody[i + 1] - melody[i]) for i in range(len(melody) - 1)]
|
39 |
+
metrics["pitch_range"] = max(melody) - min(melody)
|
40 |
+
if len(intervals) > 0:
|
41 |
+
metrics["interval_mean"] = np.mean(intervals)
|
42 |
+
metrics["interval_std"] = np.std(intervals)
|
43 |
+
metrics["interval_large_jump_ratio"] = np.mean([i > 5 for i in intervals])
|
44 |
+
metrics["dissonance_rate"] = compute_dissonance_rate(intervals)
|
45 |
+
return metrics
|
46 |
|
47 |
|
48 |
+
def compute_dissonance_rate(intervals, dissonant_intervals={1, 2, 6, 10, 11}):
|
49 |
+
dissonant = [i % 12 in dissonant_intervals for i in intervals]
|
50 |
+
return np.mean(dissonant) if intervals else np.nan
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
|
53 |
if __name__ == "__main__":
|
|
|
68 |
parser.parse_args()
|
69 |
|
70 |
args = parser.parse_args()
|
71 |
+
|
72 |
args.results_csv.parent.mkdir(parents=True, exist_ok=True)
|
73 |
|
74 |
y, fs = librosa.load(args.wav_path, sr=None)
|
75 |
|
76 |
# warmup
|
77 |
predictor = singmos_warmup()
|
78 |
+
score_extractor = score_extract_warmpup()
|
79 |
+
|
80 |
+
# evaluate the audio
|
81 |
+
metrics = {}
|
82 |
|
83 |
# singmos evaluation
|
84 |
score = singmos_evaluation(predictor, y, fs)
|
85 |
+
metrics["singmos"] = score
|
|
|
|
|
|
|
|
|
86 |
|
87 |
+
# score metric evaluation
|
88 |
+
score_results = score_metric_evaluation(score_extractor, args.wav_path)
|
89 |
+
metrics.update(score_results)
|
|
|
|
|
|
|
|
|
90 |
|
91 |
# save results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
with open(args.results_csv, "a") as f:
|
93 |
+
header = "file," + ",".join(metrics.keys()) + "\n"
|
94 |
if f.tell() == 0:
|
95 |
f.write(header)
|
96 |
else:
|
97 |
with open(args.results_csv, "r") as f2:
|
98 |
file_header = f2.readline()
|
99 |
if file_header != header:
|
100 |
+
raise ValueError(f"Header mismatch: {file_header} vs {header}")
|
|
|
|
|
101 |
|
102 |
+
line = (
|
103 |
+
",".join([str(args.wav_path)] + [str(v) for v in metrics.values()]) + "\n"
|
104 |
+
)
|
105 |
f.write(line)
|