File size: 5,968 Bytes
04e3e49
 
 
 
5b523ca
04e3e49
39bb818
 
04e3e49
 
 
 
 
6b40c3f
04e3e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36191eb
04e3e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5867445
 
131ac4e
 
 
 
5d08ec4
1398337
1f4a6b2
1398337
 
 
 
 
 
 
 
 
 
 
 
04e3e49
672fe50
69bca2b
 
186045c
 
 
 
900e185
 
 
55a4634
 
 
900e185
 
5d08ec4
 
5867445
0bb43b1
55a4634
 
 
900e185
bfa85e8
1f4a6b2
900e185
 
 
f277159
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import gradio as gr
import note_seq
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
from constants import GM_INSTRUMENTS

tokenizer = AutoTokenizer.from_pretrained("Katpeeler/midi_model_3")
model = AutoModelForCausalLM.from_pretrained("Katpeeler/midi_model_3")

NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / 120
BAR_LENGTH_120BPM = 4.0 * 60 / 120
SAMPLE_RATE=44100

def token_sequence_to_note_sequence(token_sequence, use_program=True, use_drums=True, instrument_mapper=None, only_piano=False):
    if isinstance(token_sequence, str):
        token_sequence = token_sequence.split()
    note_sequence = empty_note_sequence()

    # Render all notes.
    current_program = 1
    current_is_drum = False
    current_instrument = 0
    track_count = 0
    for token_index, token in enumerate(token_sequence):

        if token == "PIECE_START":
            pass
        elif token == "PIECE_END":
            print("The end.")
            break
        elif token == "TRACK_START":
            current_bar_index = 0
            track_count += 1
            pass
        elif token == "TRACK_END":
            pass
        elif token == "KEYS_START":
            pass
        elif token == "KEYS_END":
            pass
        elif token.startswith("KEY="):
            pass
        elif token.startswith("INST"):
            instrument = token.split("=")[-1]
            if instrument != "DRUMS" and use_program:
                if instrument_mapper is not None:
                    if instrument in instrument_mapper:
                        instrument = instrument_mapper[instrument]
                current_program = int(instrument)
                current_instrument = track_count
                current_is_drum = False
            if instrument == "DRUMS" and use_drums:
                current_instrument = 0
                current_program = 0
                current_is_drum = True
        elif token == "BAR_START":
            current_time = current_bar_index * BAR_LENGTH_120BPM
            current_notes = {}
        elif token == "BAR_END":
            current_bar_index += 1
            pass
        elif token.startswith("NOTE_ON"):
            pitch = int(token.split("=")[-1])
            note = note_sequence.notes.add()
            note.start_time = current_time
            note.end_time = current_time + 4 * NOTE_LENGTH_16TH_120BPM
            note.pitch = pitch
            note.instrument = current_instrument
            note.program = current_program
            note.velocity = 80
            note.is_drum = current_is_drum
            current_notes[pitch] = note
        elif token.startswith("NOTE_OFF"):
            pitch = int(token.split("=")[-1])
            if pitch in current_notes:
                note = current_notes[pitch]
                note.end_time = current_time
        elif token.startswith("TIME_DELTA"):
            delta = float(token.split("=")[-1]) * NOTE_LENGTH_16TH_120BPM
            current_time += delta
        elif token.startswith("DENSITY="):
            pass
        elif token == "[PAD]":
            pass
        else:
            print(f"Ignored token {token}.")
            pass

    # Make the instruments right.
    instruments_drums = []
    for note in note_sequence.notes:
        pair = [note.program, note.is_drum]
        if pair not in instruments_drums:
            instruments_drums += [pair]
        note.instrument = instruments_drums.index(pair)

    if only_piano:
        for note in note_sequence.notes:
            if not note.is_drum:
                note.instrument = 0
                note.program = 0

    return note_sequence

def empty_note_sequence(qpm=120.0, total_time=0.0):
    note_sequence = note_seq.protobuf.music_pb2.NoteSequence()
    note_sequence.tempos.add().qpm = qpm
    note_sequence.ticks_per_quarter = note_seq.constants.STANDARD_PPQ
    note_sequence.total_time = total_time
    return note_sequence

def process(num1, num2, num3):
    created_text = f"""PIECE_START STYLE=JSFAKES GENRE=JSFAKES TRACK_START INST={num1} BAR_START NOTE_ON={num2}"""
    global NOTE_LENGTH_16TH_120BPM
    NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / num3
    global BAR_LENGTH_120BPM
    BAR_LENGTH_120BPM = 4.0 * 60 / num3
    input_ids = tokenizer.encode(created_text, return_tensors="pt")
    generated_ids = model.generate(input_ids, max_length=500)
    global generated_sequence
    generated_sequence = tokenizer.decode(generated_ids[0])

    # Convert text of notes to audio
    note_sequence = token_sequence_to_note_sequence(generated_sequence)
    synth = note_seq.midi_synth.synthesize
    array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
    note_plot = note_seq.plot_sequence(note_sequence, False)
    array_of_floats /=1.414
    array_of_floats *= 32767
    int16_data = array_of_floats.astype(np.int16)
    return SAMPLE_RATE, int16_data


def generation():
    return generated_sequence


def identity(x, state):
    state += 1
    return x, state, state    

with gr.Blocks() as demo:
    gr.Markdown("Midi Generation")
    #with gr.Tab("Token generation"):
    #    text_output = gr.Textbox()
    #    text_button = gr.Button("show generated tokens")
    with gr.Tab("Audio generation"):
        audio_output = gr.Audio()
        number1 = gr.Slider(1, 100, value=25, label="Inst number", step=1, info="Choose between 1 and 100")
        number2 = gr.Slider(1, 100, value=40, label="Note number", step=1, info="Choose between 1 and 100") 
        number3 = gr.Slider(60, 140, value=120, label="BPM", step=5, info="Choose between 60 and 140")
        audio_button = gr.Button("generate audio")
    with gr.Tab("Token generation"):
        text_output = gr.Textbox()
        text_button = gr.Button("show generated tokens")
    
    text_button.click(generation, inputs=None, outputs=text_output)
    audio_button.click(process, inputs=[number1, number2, number3], outputs=audio_output)


if __name__ == "__main__":
    demo.launch()