Spaces:
Paused
Paused
fixs
Browse files- midi_synthesizer.py +1 -1
- midi_tokenizer.py +77 -6
midi_synthesizer.py
CHANGED
|
@@ -14,7 +14,7 @@ def synthesis(midi_opus, soundfont_path, sample_rate=44100):
|
|
| 14 |
event_list.append(event_new)
|
| 15 |
event_list = sorted(event_list, key=lambda e: e[1])
|
| 16 |
|
| 17 |
-
tempo = int((60 /
|
| 18 |
ss = np.empty((0, 2), dtype=np.int16)
|
| 19 |
fl = fluidsynth.Synth(samplerate=float(sample_rate))
|
| 20 |
sfid = fl.sfload(soundfont_path)
|
|
|
|
| 14 |
event_list.append(event_new)
|
| 15 |
event_list = sorted(event_list, key=lambda e: e[1])
|
| 16 |
|
| 17 |
+
tempo = int((60 / 120) * 10 ** 6) # default 120 bpm
|
| 18 |
ss = np.empty((0, 2), dtype=np.int16)
|
| 19 |
fl = fluidsynth.Synth(samplerate=float(sample_rate))
|
| 20 |
sfid = fl.sfload(soundfont_path)
|
midi_tokenizer.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import PIL
|
| 2 |
import numpy as np
|
| 3 |
|
|
@@ -43,19 +45,31 @@ class MIDITokenizer:
|
|
| 43 |
def tokenize(self, midi_score, add_bos_eos=True):
|
| 44 |
ticks_per_beat = midi_score[0]
|
| 45 |
event_list = {}
|
| 46 |
-
track_num = len(midi_score[1:])
|
| 47 |
for track_idx, track in enumerate(midi_score[1:129]):
|
|
|
|
| 48 |
for event in track:
|
| 49 |
-
t = round(16 * event[1] / ticks_per_beat)
|
| 50 |
new_event = [event[0], t // 16, t % 16, track_idx] + event[2:]
|
| 51 |
if event[0] == "note":
|
| 52 |
new_event[4] = max(1, round(16 * new_event[4] / ticks_per_beat))
|
| 53 |
elif event[0] == "set_tempo":
|
| 54 |
new_event[4] = int(self.tempo2bpm(new_event[4]))
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
event_list[key] = new_event
|
| 57 |
event_list = list(event_list.values())
|
| 58 |
-
event_list = sorted(event_list, key=lambda e:
|
| 59 |
midi_seq = []
|
| 60 |
|
| 61 |
last_t1 = 0
|
|
@@ -113,18 +127,24 @@ class MIDITokenizer:
|
|
| 113 |
tracks_dict[track_idx] = []
|
| 114 |
tracks_dict[track_idx].append([event[0], t] + event[4:])
|
| 115 |
tracks = list(tracks_dict.values())
|
| 116 |
-
|
|
|
|
| 117 |
track = tracks[i]
|
| 118 |
track = sorted(track, key=lambda e: e[1])
|
| 119 |
last_note_t = {}
|
|
|
|
| 120 |
for e in reversed(track):
|
| 121 |
if e[0] == "note":
|
| 122 |
t, d, c, p = e[1:5]
|
| 123 |
key = (c, p)
|
| 124 |
if key in last_note_t:
|
| 125 |
-
d = min(d, max(last_note_t[key] - t, 0))
|
| 126 |
last_note_t[key] = t
|
| 127 |
e[2] = d
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
tracks[i] = track
|
| 129 |
return [ticks_per_beat, *tracks]
|
| 130 |
|
|
@@ -148,3 +168,54 @@ class MIDITokenizer:
|
|
| 148 |
img[p, t: t + d] = colors[(tr, c)]
|
| 149 |
img = PIL.Image.fromarray(np.flip(img, 0))
|
| 150 |
return img
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
|
| 3 |
import PIL
|
| 4 |
import numpy as np
|
| 5 |
|
|
|
|
| 45 |
def tokenize(self, midi_score, add_bos_eos=True):
|
| 46 |
ticks_per_beat = midi_score[0]
|
| 47 |
event_list = {}
|
|
|
|
| 48 |
for track_idx, track in enumerate(midi_score[1:129]):
|
| 49 |
+
last_notes = {}
|
| 50 |
for event in track:
|
| 51 |
+
t = round(16 * event[1] / ticks_per_beat) # quantization
|
| 52 |
new_event = [event[0], t // 16, t % 16, track_idx] + event[2:]
|
| 53 |
if event[0] == "note":
|
| 54 |
new_event[4] = max(1, round(16 * new_event[4] / ticks_per_beat))
|
| 55 |
elif event[0] == "set_tempo":
|
| 56 |
new_event[4] = int(self.tempo2bpm(new_event[4]))
|
| 57 |
+
if event[0] == "note":
|
| 58 |
+
key = tuple(new_event[:4] + new_event[5:-1])
|
| 59 |
+
else:
|
| 60 |
+
key = tuple(new_event[:-1])
|
| 61 |
+
if event[0] == "note": # to eliminate note overlap due to quantization
|
| 62 |
+
cp = tuple(new_event[5:7])
|
| 63 |
+
if cp in last_notes:
|
| 64 |
+
last_note_key, last_note = last_notes[cp]
|
| 65 |
+
last_t = last_note[1] * 16 + last_note[2]
|
| 66 |
+
last_note[4] = max(0, min(last_note[4], t - last_t))
|
| 67 |
+
if last_note[4] == 0:
|
| 68 |
+
event_list.pop(last_note_key)
|
| 69 |
+
last_notes[cp] = (key, new_event)
|
| 70 |
event_list[key] = new_event
|
| 71 |
event_list = list(event_list.values())
|
| 72 |
+
event_list = sorted(event_list, key=lambda e: e[1:4])
|
| 73 |
midi_seq = []
|
| 74 |
|
| 75 |
last_t1 = 0
|
|
|
|
| 127 |
tracks_dict[track_idx] = []
|
| 128 |
tracks_dict[track_idx].append([event[0], t] + event[4:])
|
| 129 |
tracks = list(tracks_dict.values())
|
| 130 |
+
|
| 131 |
+
for i in range(len(tracks)): # to eliminate note overlap
|
| 132 |
track = tracks[i]
|
| 133 |
track = sorted(track, key=lambda e: e[1])
|
| 134 |
last_note_t = {}
|
| 135 |
+
zero_len_notes = []
|
| 136 |
for e in reversed(track):
|
| 137 |
if e[0] == "note":
|
| 138 |
t, d, c, p = e[1:5]
|
| 139 |
key = (c, p)
|
| 140 |
if key in last_note_t:
|
| 141 |
+
d = min(d, max(last_note_t[key] - t, 0))
|
| 142 |
last_note_t[key] = t
|
| 143 |
e[2] = d
|
| 144 |
+
if d == 0:
|
| 145 |
+
zero_len_notes.append(e)
|
| 146 |
+
for e in zero_len_notes:
|
| 147 |
+
track.remove(e)
|
| 148 |
tracks[i] = track
|
| 149 |
return [ticks_per_beat, *tracks]
|
| 150 |
|
|
|
|
| 168 |
img[p, t: t + d] = colors[(tr, c)]
|
| 169 |
img = PIL.Image.fromarray(np.flip(img, 0))
|
| 170 |
return img
|
| 171 |
+
|
| 172 |
+
def augment(self, midi_seq, max_pitch_shift=4, max_vel_shift=10, max_cc_val_shift=10, max_bpm_shift=10):
|
| 173 |
+
pitch_shift = random.randint(-max_pitch_shift, max_pitch_shift)
|
| 174 |
+
vel_shift = random.randint(-max_vel_shift, max_vel_shift)
|
| 175 |
+
cc_val_shift = random.randint(-max_cc_val_shift, max_cc_val_shift)
|
| 176 |
+
bpm_shift = random.randint(-max_bpm_shift, max_bpm_shift)
|
| 177 |
+
midi_seq_new = []
|
| 178 |
+
for tokens in midi_seq:
|
| 179 |
+
tokens_new = [*tokens]
|
| 180 |
+
if tokens[0] in self.id_events:
|
| 181 |
+
name = self.id_events[tokens[0]]
|
| 182 |
+
if name == "note":
|
| 183 |
+
c = tokens[5] - self.parameter_ids["channel"][0]
|
| 184 |
+
p = tokens[6] - self.parameter_ids["pitch"][0]
|
| 185 |
+
v = tokens[7] - self.parameter_ids["velocity"][0]
|
| 186 |
+
if c != 9: # no shift for drums
|
| 187 |
+
p += pitch_shift
|
| 188 |
+
if not 0 <= p < 128:
|
| 189 |
+
return midi_seq
|
| 190 |
+
v += vel_shift
|
| 191 |
+
v = max(1, min(127, v))
|
| 192 |
+
tokens_new[6] = self.parameter_ids["pitch"][p]
|
| 193 |
+
tokens_new[7] = self.parameter_ids["velocity"][v]
|
| 194 |
+
elif name == "control_change":
|
| 195 |
+
cc = tokens[5] - self.parameter_ids["controller"][0]
|
| 196 |
+
val = tokens[6] - self.parameter_ids["value"][0]
|
| 197 |
+
if cc in [1, 2, 7, 11]:
|
| 198 |
+
val += cc_val_shift
|
| 199 |
+
val = max(1, min(127, val))
|
| 200 |
+
tokens_new[6] = self.parameter_ids["value"][val]
|
| 201 |
+
elif name == "set_tempo":
|
| 202 |
+
bpm = tokens[4] - self.parameter_ids["bpm"][0]
|
| 203 |
+
bpm += bpm_shift
|
| 204 |
+
bpm = max(1, min(255, bpm))
|
| 205 |
+
tokens_new[4] = self.parameter_ids["bpm"][bpm]
|
| 206 |
+
midi_seq_new.append(tokens_new)
|
| 207 |
+
return midi_seq_new
|
| 208 |
+
|
| 209 |
+
def check_alignment(self, midi_seq, threshold=0.4):
|
| 210 |
+
total = 0
|
| 211 |
+
hist = [0] * 16
|
| 212 |
+
for tokens in midi_seq:
|
| 213 |
+
if tokens[0] in self.id_events and self.id_events[tokens[0]] == "note":
|
| 214 |
+
t2 = tokens[2] - self.parameter_ids["time2"][0]
|
| 215 |
+
total += 1
|
| 216 |
+
hist[t2] += 1
|
| 217 |
+
if total == 0:
|
| 218 |
+
return False
|
| 219 |
+
hist = sorted(hist, reverse=True)
|
| 220 |
+
p = sum(hist[:2]) / total
|
| 221 |
+
return p > threshold
|