File size: 2,512 Bytes
1b41e6d
 
 
 
 
ac7fa1f
f1e71b2
 
 
abd911f
 
 
 
1b41e6d
f1e71b2
1b41e6d
ac7fa1f
f1e71b2
ac7fa1f
f1e71b2
ac7fa1f
f1e71b2
100a302
ac7fa1f
f1e71b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b41e6d
ac7fa1f
f1e71b2
1b41e6d
f1e71b2
 
ac7fa1f
 
 
 
f1e71b2
100a302
ac7fa1f
 
 
f1e71b2
 
ac7fa1f
f1e71b2
ac7fa1f
f1e71b2
ac7fa1f
f1e71b2
100a302
f1e71b2
 
100a302
f1e71b2
ac7fa1f
f1e71b2
 
1b41e6d
 
f1e71b2
1b41e6d
f1e71b2
ac7fa1f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import edge_tts
import asyncio
import tempfile

# 1) Load voices once at startup
def load_voices():
    loop = asyncio.get_event_loop()
    voices = loop.run_until_complete(edge_tts.list_voices())
    return {
        f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
        for v in voices
    }

VOICES = load_voices()

# 2) Async TTS worker
async def _text_to_speech(text, short_name, rate_str, pitch_str):
    comm = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
        await comm.save(tmp.name)
        return tmp.name

# 3) Synchronous wrapper for Gradio callback
def tts_interface(text, voice_choice, rate, pitch):
    if not text.strip():
        return None, "🚨 Please enter some text."
    if not voice_choice:
        return None, "🚨 Please select a voice."
    short_name = voice_choice.split(" - ")[0]
    rate_str  = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    try:
        audio_path = asyncio.get_event_loop().run_until_complete(
            _text_to_speech(text, short_name, rate_str, pitch_str)
        )
        return audio_path, ""
    except Exception as e:
        return None, f"❌ TTS failed: {e}"

# 4) Build Gradio Blocks UI
def create_demo():
    with gr.Blocks(analytics_enabled=False) as demo:
        gr.Markdown("# 🎙️ Edge TTS on Hugging Face Spaces")

        gr.Markdown(
            "**Convert your text to speech** using Microsoft Edge's neural voices. "
            "Adjust rate and pitch to fine-tune the output."
        )

        with gr.Row():
            txt   = gr.Textbox(label="Input Text", lines=5, placeholder="Type something…")
            vox   = gr.Dropdown(choices=list(VOICES.keys()), label="Voice")
            rate  = gr.Slider(-50, 50, value=0, label="Rate (%)")
            pitch = gr.Slider(-20, 20, value=0, label="Pitch (Hz)")

        btn       = gr.Button("Generate Speech")
        audio_out = gr.Audio(type="filepath", label="Audio Output")
        warn_md   = gr.Markdown("", label="Warnings / Errors")

        # Wire the callback and register the /api endpoints
        btn.click(
            fn=tts_interface,
            inputs=[txt, vox, rate, pitch],
            outputs=[audio_out, warn_md]
        )

        # Enable queuing so that Gradio registers its API
        demo.queue()

    return demo

# 5) Launch
if __name__ == "__main__":
    demo = create_demo()
    demo.launch()