File size: 2,220 Bytes
2d92619
1b41e6d
 
a257b37
1b41e6d
 
a257b37
f1e71b2
 
 
a257b37
 
1b41e6d
f1e71b2
1b41e6d
a257b37
 
ac7fa1f
f1e71b2
ac7fa1f
f1e71b2
100a302
a257b37
 
f1e71b2
a257b37
 
 
 
 
 
f1e71b2
a257b37
 
f1e71b2
a257b37
f1e71b2
a257b37
1b41e6d
a257b37
 
1b41e6d
a257b37
 
100a302
a257b37
 
 
 
 
 
 
f1e71b2
 
100a302
a257b37
 
100a302
a257b37
f1e71b2
1b41e6d
 
 
a257b37
2d92619
 
 
 
a257b37
2d92619
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import gradio as gr
import asyncio
import edge_tts
import tempfile

# Load voices once
def load_voices():
    loop = asyncio.get_event_loop()
    voices = loop.run_until_complete(edge_tts.list_voices())
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] 
            for v in voices}

VOICES = load_voices()

# Async TTS
async def _tts(text, short_name, rate_str, pitch_str):
    comm = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
        await comm.save(tmp.name)
        return tmp.name

# Sync wrapper
def tts_interface(text, voice, rate, pitch):
    if not text.strip():
        return None, "🚨 Enter some text."
    if not voice:
        return None, "🚨 Select a voice."
    name = voice.split(" - ")[0]
    rate_s = f"{rate:+d}%"
    pitch_s = f"{pitch:+d}Hz"
    try:
        path = asyncio.get_event_loop().run_until_complete(
            _tts(text, name, rate_s, pitch_s)
        )
        return path, ""
    except Exception as e:
        return None, f"❌ TTS failed: {e}"

# Build UI
def create_app():
    with gr.Blocks(analytics_enabled=False) as demo:
        gr.Markdown("# 🎙️ Edge TTS in Hugging Face Space")
        gr.Markdown("Convert text to speech with Microsoft Edge voices.")
        with gr.Row():
            txt  = gr.Textbox(lines=5, label="Input Text")
            vox  = gr.Dropdown(list(VOICES.keys()), label="Voice")
            rate = gr.Slider(-50, 50, value=0, label="Rate (%)")
            pit  = gr.Slider(-20, 20, value=0, label="Pitch (Hz)")
        btn = gr.Button("Generate")
        out_audio = gr.Audio(type="filepath", label="Audio")
        warn = gr.Markdown("", label="Warning")

        btn.click(
            fn=tts_interface,
            inputs=[txt, vox, rate, pit],
            outputs=[out_audio, warn]
        )
        demo.queue()  # Register /api endpoints

    return demo

if __name__ == "__main__":
    demo = create_app()
    port = int(os.environ.get("PORT", 7860))
    demo.launch(
        server_name="0.0.0.0",
        server_port=port,
        ssr_mode=False  # disable SSR introspection errors
    )