Spaces:

kevinwang676
/

GPT-SoVITS-v2-jay

Running

App Files Files Community

kevinwang676 commited on Aug 8, 2024

Commit

3b9c372

verified ·

1 Parent(s): 1fad103

Update GPT_SoVITS/app.py

Browse files

Files changed (1) hide show

GPT_SoVITS/app.py +21 -13

GPT_SoVITS/app.py CHANGED Viewed

@@ -682,9 +682,10 @@ def html_left(text, label='p'):
 with gr.Blocks(title="GPT-SoVITS WebUI") as app:
-    gr.Markdown(
-        value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
-    )
     with gr.Group():
         gr.Markdown(html_center(i18n("模型切换"),'h3'))
         with gr.Row():
@@ -694,11 +695,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
             refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
         gr.Markdown(html_center(i18n("*请上传并填写参考信息"),'h3'))
         with gr.Row():
-            inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频，超过会报错！"), type="filepath", scale=13)
             with gr.Column(scale=13):
                 ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True)
                 gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT，听不清参考音频说的啥(不晓得写啥)可以开。<br>开启后无视填写的参考文本。")))
-                prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=3, max_lines=3)
             prompt_language = gr.Dropdown(
                 label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"), scale=14
             )
@@ -706,10 +707,10 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
         gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"),'h3'))
         with gr.Row():
             with gr.Column(scale=13):
-                text = gr.Textbox(label=i18n("需要合成的文本"), value="", lines=26, max_lines=26)
             with gr.Column(scale=7):
                 text_language = gr.Dropdown(
-                        label=i18n("需要合成的语种")+i18n(".限制范围越小判别效果越好。"), choices=list(dict_language.keys()), value=i18n("中文"), scale=1
                     )
                 how_to_cut = gr.Dropdown(
                         label=i18n("怎么切"),
@@ -720,17 +721,17 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
                 gr.Markdown(value=html_center(i18n("语速调整，高为更快")))
                 if_freeze=gr.Checkbox(label=i18n("是否直接对上次合成结果调整语速和音色。防止随机性。"), value=False, interactive=True,show_label=True, scale=1)
                 speed = gr.Slider(minimum=0.6,maximum=1.65,step=0.05,label=i18n("语速"),value=1,interactive=True, scale=1)
-                gr.Markdown(html_center(i18n("GPT采样参数(无参考文本时不要太低。不懂就用默认)：")))
-                top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=15,interactive=True, scale=1)
-                top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True, scale=1)
-                temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True,  scale=1)
             # with gr.Column():
             #     gr.Markdown(value=i18n("手工调整音素。当音素框不为空时使用手工音素输入推理，无视目标文本框。"))
             #     phoneme=gr.Textbox(label=i18n("音素框"), value="")
             #     get_phoneme_button = gr.Button(i18n("目标文本转音素"), variant="primary")
         with gr.Row():
-            inference_button = gr.Button(i18n("合成语音"), variant="primary", size='lg', scale=25)
-            output = gr.Audio(label=i18n("输出的语音"), scale=14)
         inference_button.click(
             get_tts_wav,
@@ -740,6 +741,13 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
         SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language])
         GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], [])
         # gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好，所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
         # with gr.Row():
         #     text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"), value="")

 with gr.Blocks(title="GPT-SoVITS WebUI") as app:
+    gr.Markdown("# <center>🌊💕🎶 第二代[GPT-SoVITS](https://github.com/RVC-Boss/GPT-SoVITS) 更大更强、完美复刻</center>")
+    gr.Markdown("## <center>🌟 只需1分钟音频，超拟人真实声音复刻，支持中日英韩粤语，最强开源模型！</center>")
+    gr.Markdown("### <center>🤗 更多精彩，尽在[滔滔AI](https://www.talktalkai.com/)；滔滔AI，为爱滔滔！💕</center>")
     with gr.Group():
         gr.Markdown(html_center(i18n("模型切换"),'h3'))
         with gr.Row():
             refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
         gr.Markdown(html_center(i18n("*请上传并填写参考信息"),'h3'))
         with gr.Row():
+            inp_ref = gr.Audio(label=i18n("请上传3~10秒的参考音频，超过会报错！"), type="filepath", scale=13)
             with gr.Column(scale=13):
                 ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True)
                 gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT，听不清参考音频说的啥(不晓得写啥)可以开。<br>开启后无视填写的参考文本。")))
+                prompt_text = gr.Textbox(label=i18n("参考音频对应的文本内容"), value="", lines=3, max_lines=3)
             prompt_language = gr.Dropdown(
                 label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"), scale=14
             )
         gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"),'h3'))
         with gr.Row():
             with gr.Column(scale=13):
+                text = gr.Textbox(label=i18n("请填写您想要合成的文本"), placeholder="想说却还没说的，还很多...", lines=6)
             with gr.Column(scale=7):
                 text_language = gr.Dropdown(
+                        label=i18n("需要合成的语种")+i18n("限制范围越小判别效果越好。"), choices=list(dict_language.keys()), value=i18n("中文"), scale=1
                     )
                 how_to_cut = gr.Dropdown(
                         label=i18n("怎么切"),
                 gr.Markdown(value=html_center(i18n("语速调整，高为更快")))
                 if_freeze=gr.Checkbox(label=i18n("是否直接对上次合成结果调整语速和音色。防止随机性。"), value=False, interactive=True,show_label=True, scale=1)
                 speed = gr.Slider(minimum=0.6,maximum=1.65,step=0.05,label=i18n("语速"),value=1,interactive=True, scale=1)
+                #gr.Markdown(html_center(i18n("GPT采样参数(无参考文本时不要太低。不懂就用默认)：")))
+                top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=15,interactive=True, scale=1, visible=False)
+                top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True, scale=1, visible=False)
+                temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True,  scale=1, visible=False)
             # with gr.Column():
             #     gr.Markdown(value=i18n("手工调整音素。当音素框不为空时使用手工音素输入推理，无视目标文本框。"))
             #     phoneme=gr.Textbox(label=i18n("音素框"), value="")
             #     get_phoneme_button = gr.Button(i18n("目标文本转音素"), variant="primary")
         with gr.Row():
+            inference_button = gr.Button(i18n("开启声音复刻之旅吧💕"), variant="primary", size='lg', scale=25)
+            output = gr.Audio(label=i18n("为您合成的专属音频🎶"), scale=14)
         inference_button.click(
             get_tts_wav,
         SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language])
         GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], [])
+        gr.Markdown("### <center>注意❗：请不要生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及个人娱乐使用。请自觉合规使用此程序，程序开发者不负有任何责任。</center>")
+        gr.HTML('''
+            <div class="footer">
+                        <p>🌊🏞️🎶 - 江水东流急，滔滔无尽声。 明·顾璘
+                        </p>
+            </div>
+        ''')
         # gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好，所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
         # with gr.Row():
         #     text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"), value="")