Edge-TTS-Text-to-Speech

Sleeping

File size: 2,655 Bytes

1b41e6d
 
 
 
 
f1e71b2
 
 
 
abd911f
 
 
 
1b41e6d
f1e71b2
1b41e6d
f1e71b2
 
 
 
 
 
100a302
f1e71b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b41e6d
f1e71b2
 
1b41e6d
f1e71b2
 
1b41e6d
f1e71b2
 
 
 
 
100a302
f1e71b2
 
 
 
 
 
 
 
 
 
 
100a302
f1e71b2
 
100a302
f1e71b2
 
 
 
1b41e6d
 
f1e71b2
1b41e6d
f1e71b2

import gradio as gr
import edge_tts
import asyncio
import tempfile

# 1) Fetch voices once, synchronously at startup
def load_voices():
    loop = asyncio.get_event_loop()
    voices = loop.run_until_complete(edge_tts.list_voices())
    return {
        f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
        for v in voices
    }

VOICES = load_voices()

# 2) Async function to drive Edge TTS
async def _text_to_speech(text, short_name, rate_str, pitch_str):
    communicate = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
        await communicate.save(tmp.name)
        return tmp.name

# 3) Sync wrapper for the click callback
def tts_interface(text, voice_choice, rate, pitch):
    if not text.strip():
        return None, "🚨 Please enter some text."
    if not voice_choice:
        return None, "🚨 Please select a voice."
    short_name = voice_choice.split(" - ")[0]
    rate_str  = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    try:
        # Run the async TTS call synchronously
        audio_path = asyncio.get_event_loop().run_until_complete(
            _text_to_speech(text, short_name, rate_str, pitch_str)
        )
        return audio_path, ""
    except Exception as e:
        return None, f"❌ TTS failed: {e}"

# 4) Build the Gradio Blocks UI
def create_demo():
    with gr.Blocks(analytics_enabled=False) as demo:
        gr.Markdown("# 🎙️ Edge TTS on Hugging Face Spaces")

        with gr.Row():
            gr.Markdown("""
**Convert your text to speech** using Microsoft Edge's neural voices.
Adjust rate and pitch to fine-tune the output.
""")

        with gr.Row():
            txt = gr.Textbox(label="Input Text", lines=5, placeholder="Type something…")
            vox = gr.Dropdown(choices=list(VOICES.keys()), label="Voice")
            rate = gr.Slider(-50, 50, value=0, label="Rate (%)")
            pitch = gr.Slider(-20, 20, value=0, label="Pitch (Hz)")

        btn = gr.Button("Generate Speech")
        audio_out = gr.Audio(type="filepath", label="Audio Output")
        warn_md  = gr.Markdown("", label="Warnings / Errors")

        # IMPORTANT: wire queue here so the frontend finds /api/predict
        btn.click(
            fn=tts_interface,
            inputs=[txt, vox, rate, pitch],
            outputs=[audio_out, warn_md]
        )

        # Enable the queue on the entire app
        demo.queue()

    return demo

# 5) Launch
if __name__ == "__main__":
    demo = create_demo()
    # On Spaces, this will serve on 0.0.0.0:7860 automatically
    demo.launch()