wsj1995 commited on
Commit
5dec745
·
1 Parent(s): 48f1862

feat: file upload

Browse files
Files changed (1) hide show
  1. GPT_SoVITS/inference_webui.py +26 -4
GPT_SoVITS/inference_webui.py CHANGED
@@ -39,6 +39,9 @@ import torch
39
  import torchaudio
40
  from text.LangSegmenter import LangSegmenter
41
 
 
 
 
42
  import zipfile
43
  from huggingface_hub import hf_hub_download
44
  import nltk
@@ -808,6 +811,7 @@ def get_tts_wav(
808
  sample_steps=8,
809
  if_sr=False,
810
  pause_second=0.3,
 
811
  ):
812
  global cache
813
  if ref_wav_path:
@@ -1052,7 +1056,21 @@ def get_tts_wav(
1052
  audio_opt /= max_audio
1053
  else:
1054
  audio_opt = audio_opt.cpu().detach().numpy()
1055
- yield opt_sr, (audio_opt * 32767).astype(np.int16)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1056
 
1057
 
1058
  def split(todo_text):
@@ -1216,8 +1234,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
1216
  # SoVITS_dropdown, GPT_dropdown])
1217
  gr.Markdown(html_center(i18n("*请上传并填写参考信息"), "h3"))
1218
  with gr.Row():
1219
- inp_ref = gr.Audio(label=i18n(
1220
- "请上传3~10秒内参考音频,超过会报错!"), type="filepath", scale=13)
 
 
 
1221
  with gr.Column(scale=13):
1222
  ref_text_free = gr.Checkbox(
1223
  label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。")
@@ -1235,7 +1256,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
1235
  )
1236
  )
1237
  prompt_text = gr.Textbox(label=i18n(
1238
- "参考音频的文本"), value="", lines=5, max_lines=5, scale=1)
1239
  with gr.Column(scale=14):
1240
  prompt_language = gr.Dropdown(
1241
  label=i18n("参考音频的语种"),
@@ -1367,6 +1388,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
1367
  sample_steps,
1368
  if_sr_Checkbox,
1369
  pause_second_slider,
 
1370
  ],
1371
  [output],
1372
  )
 
39
  import torchaudio
40
  from text.LangSegmenter import LangSegmenter
41
 
42
+ from scipy.io.wavfile import write
43
+ import requests
44
+ import io
45
  import zipfile
46
  from huggingface_hub import hf_hub_download
47
  import nltk
 
811
  sample_steps=8,
812
  if_sr=False,
813
  pause_second=0.3,
814
+ uploadParams=None
815
  ):
816
  global cache
817
  if ref_wav_path:
 
1056
  audio_opt /= max_audio
1057
  else:
1058
  audio_opt = audio_opt.cpu().detach().numpy()
1059
+ audioData = (audio_opt * 32767).astype(np.int16)
1060
+ uploadAudio(opt_sr, audioData, uploadParams)
1061
+ yield opt_sr, audioData
1062
+
1063
+
1064
+ def uploadAudio(opt_sr, audio_int16, uploadParams):
1065
+ bio = io.BytesIO()
1066
+ write(bio, opt_sr, audio_int16)
1067
+ files = {
1068
+ "file": ("audio.wav", bio.getvalue(), "audio/wav")
1069
+ }
1070
+ url = uploadParams['url']
1071
+ del uploadParams['url']
1072
+ response = requests.post(url, files=files, data=uploadParams)
1073
+ print(response.text)
1074
 
1075
 
1076
  def split(todo_text):
 
1234
  # SoVITS_dropdown, GPT_dropdown])
1235
  gr.Markdown(html_center(i18n("*请上传并填写参考信息"), "h3"))
1236
  with gr.Row():
1237
+ with gr.Column():
1238
+ inp_ref = gr.Audio(label=i18n(
1239
+ "请上传3~10秒内参考音频,超过会报错!"), type="filepath", scale=13)
1240
+ uploadParams = gr.Textbox(label=i18n("结果上传参数"),
1241
+ value="", lines=2, max_lines=2)
1242
  with gr.Column(scale=13):
1243
  ref_text_free = gr.Checkbox(
1244
  label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。")
 
1256
  )
1257
  )
1258
  prompt_text = gr.Textbox(label=i18n(
1259
+ "参考音频的文本"), value="", lines=6, max_lines=6, scale=1)
1260
  with gr.Column(scale=14):
1261
  prompt_language = gr.Dropdown(
1262
  label=i18n("参考音频的语种"),
 
1388
  sample_steps,
1389
  if_sr_Checkbox,
1390
  pause_second_slider,
1391
+ uploadParams
1392
  ],
1393
  [output],
1394
  )