File size: 2,655 Bytes
1b41e6d
 
 
 
 
f1e71b2
 
 
 
abd911f
 
 
 
1b41e6d
f1e71b2
1b41e6d
f1e71b2
 
 
 
 
 
100a302
f1e71b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b41e6d
f1e71b2
 
1b41e6d
f1e71b2
 
1b41e6d
f1e71b2
 
 
 
 
100a302
f1e71b2
 
 
 
 
 
 
 
 
 
 
100a302
f1e71b2
 
100a302
f1e71b2
 
 
 
1b41e6d
 
f1e71b2
1b41e6d
f1e71b2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
import edge_tts
import asyncio
import tempfile

# 1) Fetch voices once, synchronously at startup
def load_voices():
    loop = asyncio.get_event_loop()
    voices = loop.run_until_complete(edge_tts.list_voices())
    return {
        f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
        for v in voices
    }

VOICES = load_voices()

# 2) Async function to drive Edge TTS
async def _text_to_speech(text, short_name, rate_str, pitch_str):
    communicate = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
        await communicate.save(tmp.name)
        return tmp.name

# 3) Sync wrapper for the click callback
def tts_interface(text, voice_choice, rate, pitch):
    if not text.strip():
        return None, "🚨 Please enter some text."
    if not voice_choice:
        return None, "🚨 Please select a voice."
    short_name = voice_choice.split(" - ")[0]
    rate_str  = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    try:
        # Run the async TTS call synchronously
        audio_path = asyncio.get_event_loop().run_until_complete(
            _text_to_speech(text, short_name, rate_str, pitch_str)
        )
        return audio_path, ""
    except Exception as e:
        return None, f"❌ TTS failed: {e}"

# 4) Build the Gradio Blocks UI
def create_demo():
    with gr.Blocks(analytics_enabled=False) as demo:
        gr.Markdown("# 🎙️ Edge TTS on Hugging Face Spaces")

        with gr.Row():
            gr.Markdown("""
**Convert your text to speech** using Microsoft Edge's neural voices.
Adjust rate and pitch to fine-tune the output.
""")

        with gr.Row():
            txt = gr.Textbox(label="Input Text", lines=5, placeholder="Type something…")
            vox = gr.Dropdown(choices=list(VOICES.keys()), label="Voice")
            rate = gr.Slider(-50, 50, value=0, label="Rate (%)")
            pitch = gr.Slider(-20, 20, value=0, label="Pitch (Hz)")

        btn = gr.Button("Generate Speech")
        audio_out = gr.Audio(type="filepath", label="Audio Output")
        warn_md  = gr.Markdown("", label="Warnings / Errors")

        # IMPORTANT: wire queue here so the frontend finds /api/predict
        btn.click(
            fn=tts_interface,
            inputs=[txt, vox, rate, pitch],
            outputs=[audio_out, warn_md]
        )

        # Enable the queue on the entire app
        demo.queue()

    return demo

# 5) Launch
if __name__ == "__main__":
    demo = create_demo()
    # On Spaces, this will serve on 0.0.0.0:7860 automatically
    demo.launch()