IndexTTS

Runtime error

File size: 3,111 Bytes

4465af7
8db92ed
 
 
 
 
579fccc
ec8ba93
579fccc
8db92ed
 
 
 
 
 
 
 
 
 
63b64fa
fe90cff
8db92ed
 
 
33551a3
515f8e3
 
 
 
8db92ed
 
 
 
 
7eeb257
09c6470
 
7eeb257
 
 
09c6470
7eeb257
09c6470
8db92ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4bead6e
 
 
 
7eeb257
 
dd205e4
229bbd8
515f8e3
a8a860e
229bbd8

import spaces
import os
import shutil
import threading
import time
import sys

from huggingface_hub import snapshot_download

current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(current_dir)
sys.path.append(os.path.join(current_dir, "indextts"))

import gradio as gr
from indextts.infer import IndexTTS
from tools.i18n.i18n import I18nAuto

i18n = I18nAuto(language="zh_CN")
MODE = 'local'
snapshot_download("IndexTeam/IndexTTS-1.5",local_dir="checkpoints",)
tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")

os.makedirs("outputs/tasks",exist_ok=True)
os.makedirs("prompts",exist_ok=True)

@spaces.GPU
def infer(voice, text,output_path=None):
    if not tts:
        raise Exception("Model not loaded")
    if not output_path:
        output_path = os.path.join("outputs", f"spk_{int(time.time())}.wav")
    tts.infer(voice, text, output_path)
    return output_path

def tts_api(voice, text):
    try:
        output_path = infer(voice, text)
        with open(output_path, "rb") as f:
            audio_bytes = f.read()
        return (200, {}, audio_bytes)
    except Exception as e:
        return (500, {"error": str(e)}, None)

def gen_single(prompt, text):
    output_path = infer(prompt, text)
    return gr.update(value=output_path,visible=True)

def update_prompt_audio():
    update_button = gr.update(interactive=True)
    return update_button

with gr.Blocks() as demo:
    mutex = threading.Lock()
    gr.HTML('''
    <h2><center>IndexTTS: An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System</h2>

<p align="center">
<a href='https://arxiv.org/abs/2502.05512'><img src='https://img.shields.io/badge/ArXiv-2502.05512-red'></a>
    ''')
    with gr.Tab("音频生成"):
        with gr.Row():
            os.makedirs("prompts",exist_ok=True)
            prompt_audio = gr.Audio(label="请上传参考音频",key="prompt_audio",
                                    sources=["upload","microphone"],type="filepath")
            prompt_list = os.listdir("prompts")
            default = ''
            if prompt_list:
                default = prompt_list[0]
            input_text_single = gr.Textbox(label="请输入目标文本",key="input_text_single")
            gen_button = gr.Button("生成语音",key="gen_button",interactive=True)
            output_audio = gr.Audio(label="生成结果", visible=False,key="output_audio")

    prompt_audio.upload(update_prompt_audio,
                         inputs=[],
                         outputs=[gen_button])

    gen_button.click(gen_single,
                     inputs=[prompt_audio, input_text_single],
                     outputs=[output_audio])

    # 移除 Interface 相关内容，避免重复渲染
    # 只保留 Blocks demo，UI和API共用
    # 这样既有UI，也能通过Gradio HTTP API调用
    # 通过POST /run/predict即可API调用

    # 移除 add_api_route 和 mount_gradio_app，Spaces 不支持

def main():
    tts.load_normalizer()
    demo.launch(server_name="0.0.0.0", server_port=7860, share = True)

if __name__ == "__main__":
    main()