Edge-TTS-Text-to-Speech

Sleeping

App Files Files Community

EmRa228 commited on May 8

Commit

f1e71b2

verified ·

1 Parent(s): 100a302

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -57

app.py CHANGED Viewed

@@ -3,76 +3,77 @@ import edge_tts
 import asyncio
 import tempfile
-async def get_voices():
-    voices = await edge_tts.list_voices()
     return {
         f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
         for v in voices
     }
-async def text_to_speech(text, voice, rate, pitch):
-    if not text.strip():
-        return None, "Please enter text to convert."
-    if not voice:
-        return None, "Please select a voice."
-    short_name = voice.split(" - ")[0]
-    rate_str = f"{rate:+d}%"
-    pitch_str = f"{pitch:+d}Hz"
-    comm = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
-        await comm.save(tmp.name)
-        return tmp.name, ""
-def tts_interface(text, voice, rate, pitch):
-    # Run asyncio in sync function
-    audio_path, warning_text = asyncio.get_event_loop().run_until_complete(
-        text_to_speech(text, voice, rate, pitch)
-    )
-    # Return the file path (or None) and the warning string
-    return audio_path, warning_text
-def build_ui():
-    voices = asyncio.get_event_loop().run_until_complete(get_voices())
     with gr.Blocks(analytics_enabled=False) as demo:
-        gr.Markdown("# 🎙️ Edge TTS Text-to-Speech")
         with gr.Row():
-            with gr.Column():
-                gr.Markdown("## Text-to-Speech with Microsoft Edge TTS")
-                gr.Markdown(
-                    "Convert text to speech using Microsoft Edge TTS. "
-                    "Adjust rate/pitch: 0 is default, +/− to change."
-                )
         with gr.Row():
-            text_input = gr.Textbox(label="Input Text", lines=5)
-            voice_dropdown = gr.Dropdown(
-                choices=list(voices.keys()),
-                label="Select Voice"
-            )
-            rate_slider = gr.Slider(-50, 50, value=0, label="Rate (%)")
-            pitch_slider = gr.Slider(-20, 20, value=0, label="Pitch (Hz)")
-        generate_btn = gr.Button("Generate Speech")
-        audio_out = gr.Audio(type="filepath", label="Output Audio")
-        warning_md = gr.Markdown("", label="Warning")
-        generate_btn.click(
             fn=tts_interface,
-            inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
-            outputs=[audio_out, warning_md]
         )
-        gr.Markdown(
-            "Try our Text-to-Video converter, or tweak rate/pitch for the perfect delivery!"
-        )
     return demo
 if __name__ == "__main__":
-    ui = build_ui()
-    # Launch synchronously—no .queue(), so no internal API schema error
-    ui.launch()

 import asyncio
 import tempfile
+# 1) Fetch voices once, synchronously at startup
+def load_voices():
+    loop = asyncio.get_event_loop()
+    voices = loop.run_until_complete(edge_tts.list_voices())
     return {
         f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
         for v in voices
     }
+VOICES = load_voices()
+# 2) Async function to drive Edge TTS
+async def _text_to_speech(text, short_name, rate_str, pitch_str):
+    communicate = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
+        await communicate.save(tmp.name)
+        return tmp.name
+# 3) Sync wrapper for the click callback
+def tts_interface(text, voice_choice, rate, pitch):
+    if not text.strip():
+        return None, "🚨 Please enter some text."
+    if not voice_choice:
+        return None, "🚨 Please select a voice."
+    short_name = voice_choice.split(" - ")[0]
+    rate_str  = f"{rate:+d}%"
+    pitch_str = f"{pitch:+d}Hz"
+    try:
+        # Run the async TTS call synchronously
+        audio_path = asyncio.get_event_loop().run_until_complete(
+            _text_to_speech(text, short_name, rate_str, pitch_str)
+        )
+        return audio_path, ""
+    except Exception as e:
+        return None, f"❌ TTS failed: {e}"
+# 4) Build the Gradio Blocks UI
+def create_demo():
     with gr.Blocks(analytics_enabled=False) as demo:
+        gr.Markdown("# 🎙️ Edge TTS on Hugging Face Spaces")
         with gr.Row():
+            gr.Markdown("""
+**Convert your text to speech** using Microsoft Edge's neural voices.
+Adjust rate and pitch to fine-tune the output.
+""")
         with gr.Row():
+            txt = gr.Textbox(label="Input Text", lines=5, placeholder="Type something…")
+            vox = gr.Dropdown(choices=list(VOICES.keys()), label="Voice")
+            rate = gr.Slider(-50, 50, value=0, label="Rate (%)")
+            pitch = gr.Slider(-20, 20, value=0, label="Pitch (Hz)")
+        btn = gr.Button("Generate Speech")
+        audio_out = gr.Audio(type="filepath", label="Audio Output")
+        warn_md  = gr.Markdown("", label="Warnings / Errors")
+        # IMPORTANT: wire queue here so the frontend finds /api/predict
+        btn.click(
             fn=tts_interface,
+            inputs=[txt, vox, rate, pitch],
+            outputs=[audio_out, warn_md]
         )
+        # Enable the queue on the entire app
+        demo.queue()
     return demo
+# 5) Launch
 if __name__ == "__main__":
+    demo = create_demo()
+    # On Spaces, this will serve on 0.0.0.0:7860 automatically
+    demo.launch()