File size: 3,111 Bytes
4465af7 8db92ed 579fccc ec8ba93 579fccc 8db92ed 63b64fa fe90cff 8db92ed 33551a3 515f8e3 8db92ed 7eeb257 09c6470 7eeb257 09c6470 7eeb257 09c6470 8db92ed 4bead6e 7eeb257 dd205e4 229bbd8 515f8e3 a8a860e 229bbd8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import spaces
import os
import shutil
import threading
import time
import sys
from huggingface_hub import snapshot_download
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(current_dir)
sys.path.append(os.path.join(current_dir, "indextts"))
import gradio as gr
from indextts.infer import IndexTTS
from tools.i18n.i18n import I18nAuto
i18n = I18nAuto(language="zh_CN")
MODE = 'local'
snapshot_download("IndexTeam/IndexTTS-1.5",local_dir="checkpoints",)
tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
os.makedirs("outputs/tasks",exist_ok=True)
os.makedirs("prompts",exist_ok=True)
@spaces.GPU
def infer(voice, text,output_path=None):
if not tts:
raise Exception("Model not loaded")
if not output_path:
output_path = os.path.join("outputs", f"spk_{int(time.time())}.wav")
tts.infer(voice, text, output_path)
return output_path
def tts_api(voice, text):
try:
output_path = infer(voice, text)
with open(output_path, "rb") as f:
audio_bytes = f.read()
return (200, {}, audio_bytes)
except Exception as e:
return (500, {"error": str(e)}, None)
def gen_single(prompt, text):
output_path = infer(prompt, text)
return gr.update(value=output_path,visible=True)
def update_prompt_audio():
update_button = gr.update(interactive=True)
return update_button
with gr.Blocks() as demo:
mutex = threading.Lock()
gr.HTML('''
<h2><center>IndexTTS: An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System</h2>
<p align="center">
<a href='https://arxiv.org/abs/2502.05512'><img src='https://img.shields.io/badge/ArXiv-2502.05512-red'></a>
''')
with gr.Tab("้ณ้ข็ๆ"):
with gr.Row():
os.makedirs("prompts",exist_ok=True)
prompt_audio = gr.Audio(label="่ฏทไธไผ ๅ่้ณ้ข",key="prompt_audio",
sources=["upload","microphone"],type="filepath")
prompt_list = os.listdir("prompts")
default = ''
if prompt_list:
default = prompt_list[0]
input_text_single = gr.Textbox(label="่ฏท่พๅ
ฅ็ฎๆ ๆๆฌ",key="input_text_single")
gen_button = gr.Button("็ๆ่ฏญ้ณ",key="gen_button",interactive=True)
output_audio = gr.Audio(label="็ๆ็ปๆ", visible=False,key="output_audio")
prompt_audio.upload(update_prompt_audio,
inputs=[],
outputs=[gen_button])
gen_button.click(gen_single,
inputs=[prompt_audio, input_text_single],
outputs=[output_audio])
# ็งป้ค Interface ็ธๅ
ณๅ
ๅฎน๏ผ้ฟๅ
้ๅคๆธฒๆ
# ๅชไฟ็ Blocks demo๏ผUIๅAPIๅ
ฑ็จ
# ่ฟๆ ทๆขๆUI๏ผไน่ฝ้่ฟGradio HTTP API่ฐ็จ
# ้่ฟPOST /run/predictๅณๅฏAPI่ฐ็จ
# ็งป้ค add_api_route ๅ mount_gradio_app๏ผSpaces ไธๆฏๆ
def main():
tts.load_normalizer()
demo.launch(server_name="0.0.0.0", server_port=7860, share = True)
if __name__ == "__main__":
main()
|