File size: 8,107 Bytes
f8be0ae
 
 
 
 
 
 
 
 
 
 
 
3ea6c7d
f8be0ae
 
 
 
 
 
 
 
 
 
 
 
a030417
f8be0ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ea6c7d
 
 
 
 
b5800d0
 
3ea6c7d
 
 
 
 
 
f8be0ae
 
3ea6c7d
 
 
 
b5800d0
f8be0ae
b5800d0
f8be0ae
 
 
 
0e4c117
8341263
 
 
 
7de3d91
3ea6c7d
 
 
 
 
 
 
f8be0ae
 
 
b46a248
 
f8be0ae
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import importlib
import re

import gradio as gr
import yaml
from gradio.components import Textbox, Dropdown

from inference.m4singer.base_svs_infer import BaseSVSInfer
from utils.hparams import set_hparams
from utils.hparams import hparams as hp
import numpy as np
from inference.m4singer.gradio.share_btn import community_icon_html, loading_icon_html, share_js
from textwrap import dedent


class GradioInfer:
    def __init__(self, exp_name, inference_cls, title, description, article, example_inputs):
        self.exp_name = exp_name
        self.title = title
        self.description = description
        self.article = article
        self.example_inputs = example_inputs
        pkg = ".".join(inference_cls.split(".")[:-1])
        cls_name = inference_cls.split(".")[-1]
        self.inference_cls = getattr(importlib.import_module(pkg), cls_name)

    def greet(self, singer, text, notes, notes_duration):
        PUNCS = '。?;:'
        sents = re.split(rf'([{PUNCS}])', text.replace('\n', ','))
        sents_notes = re.split(rf'([{PUNCS}])', notes.replace('\n', ','))
        sents_notes_dur = re.split(rf'([{PUNCS}])', notes_duration.replace('\n', ','))

        if sents[-1] not in list(PUNCS):
            sents = sents + ['']
            sents_notes = sents_notes + ['']
            sents_notes_dur = sents_notes_dur + ['']

        audio_outs = []
        s, n, n_dur = "", "", ""
        for i in range(0, len(sents), 2):
            if len(sents[i]) > 0:
                s += sents[i] + sents[i + 1]
                n += sents_notes[i] + sents_notes[i+1]
                n_dur += sents_notes_dur[i] + sents_notes_dur[i+1]
            if len(s) >= 400 or (i >= len(sents) - 2 and len(s) > 0):
                audio_out = self.infer_ins.infer_once({
                    'spk_name': singer,
                    'text': s,
                    'notes': n,
                    'notes_duration': n_dur,
                })
                audio_out = audio_out * 32767
                audio_out = audio_out.astype(np.int16)
                audio_outs.append(audio_out)
                audio_outs.append(np.zeros(int(hp['audio_sample_rate'] * 0.3)).astype(np.int16))
                s = ""
                n = ""
        audio_outs = np.concatenate(audio_outs)
        return (hp['audio_sample_rate'], audio_outs), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)

    def run(self):
        set_hparams(config=f'checkpoints/{self.exp_name}/config.yaml', exp_name=self.exp_name, print_hparams=False)
        infer_cls = self.inference_cls
        self.infer_ins: BaseSVSInfer = infer_cls(hp)
        example_inputs = self.example_inputs
        for i in range(len(example_inputs)):
            singer, text, notes, notes_dur = example_inputs[i].split('<sep>')
            example_inputs[i] = [singer, text, notes, notes_dur]

        singerList = \
            [
            'Tenor-1', 'Tenor-2', 'Tenor-3', 'Tenor-4', 'Tenor-5', 'Tenor-6', 'Tenor-7',
            'Alto-1', 'Alto-2', 'Alto-3', 'Alto-4', 'Alto-5', 'Alto-6', 'Alto-7',
            'Soprano-1', 'Soprano-2', 'Soprano-3',
            'Bass-1',  'Bass-2',  'Bass-3',
            ]

        css = """
        #share-btn-container {
            display: flex; padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; width: 13rem;
        }
        #share-btn {
            all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.25rem !important; padding-bottom: 0.25rem !important;right:0;
        }
        #share-btn * {
            all: unset;
        }
        #share-btn-container div:nth-child(-n+2){
            width: auto !important;
            min-height: 0px !important;
        }
        #share-btn-container .wrap {
            display: none !important;
        }
        """
        with gr.Blocks(theme="JohnSmith9982/small_and_pretty", css=css) as demo:
            gr.Markdown("# <center>🌊💕🎶 滔滔AI,自定义歌词</center>")
            gr.Markdown("## <center>🌟 歌词唱什么,由你来决定!随时随地,唱我想唱!</center>")
            gr.Markdown("### <center>🤗 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);相关问题欢迎在我们的[B站](https://space.bilibili.com/501495851)账号交流!滔滔AI,为爱滔滔!💕</center>")

            with gr.Accordion("💡📘 使用指南(建议阅读哦)", open=False):
                _ = f""" 在“请选择一位歌手”处您可以预览各歌手的音色。点击下方Examples可以快速加载乐谱和音频,您可以直接根据示例中的乐谱更改歌词、进行创作。
                    * 程序内置的不同歌手信息:Tenor是男高音,Alto是中音,Soprano为女高音,Bass为低音。
                    * 请给每个汉字分配音高和时值, 每个字对应的音高和时值需要用 | 分隔符隔开。需要保证分隔符分割出来的音符窗口与汉字个数一致。换气或静音符也算一个汉字。
                    * AP和SP对应的音符均为rest。AP为换气声的停顿,SP为无声音的停顿。若一个窗口(| .... |)内有多个音符, 代表该窗口对应的汉字为滑音, 需要为每个音符都分配时长。
                    """
                gr.Markdown(dedent(_))            

            with gr.Row():
                with gr.Column():
                    singer_l = Dropdown(choices=singerList, value=example_inputs[0][0], label="请选择一位歌手", elem_id="inp_singer")
                    inp_text = Textbox(lines=2, placeholder=None, value=example_inputs[0][1], label="请输入您喜欢的歌词", elem_id="inp_text")
                    inp_note = Textbox(lines=2, placeholder=None, value=example_inputs[0][2], label="请填写歌词对应的乐谱", elem_id="inp_note")
                    inp_duration = Textbox(lines=2, placeholder=None, value=example_inputs[0][3], label="请填写乐谱中每个音高对应的时长", elem_id="inp_duration")
                    generate = gr.Button("一键开启创作之旅吧🎉", variant="primary")
                with gr.Column():
                    singing_output = gr.Audio(label="您创作的专属歌曲🎶", type="filepath", elem_id="music-output")

                    with gr.Group(elem_id="share-btn-container"):
                        community_icon = gr.HTML(community_icon_html, visible=False)
                        loading_icon = gr.HTML(loading_icon_html, visible=False)
                        share_button = gr.Button("滔滔AI,为爱滔滔💕", elem_id="share-btn", visible=True)
            gr.Examples(examples=self.example_inputs,
                        inputs=[singer_l, inp_text, inp_note, inp_duration],
                        outputs=[singing_output, share_button, community_icon, loading_icon],
                        fn=self.greet,
                        cache_examples=False)
            gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。请自觉合规使用此程序,程序开发者不负有任何责任。</center>")
            gr.HTML('''
                <div class="footer">
                            <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
                            </p>
                </div>
            ''')
            generate.click(self.greet,
                               inputs=[singer_l, inp_text, inp_note, inp_duration],
                               outputs=[singing_output, share_button, community_icon, loading_icon],)
        demo.queue(max_size=40, api_open=False)
        demo.launch(max_threads=400, show_error=True)

if __name__ == '__main__':
    gradio_config = yaml.safe_load(open('inference/m4singer/gradio/gradio_settings.yaml'))
    g = GradioInfer(**gradio_config)
    g.run()