Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,76 +3,77 @@ import edge_tts
|
|
3 |
import asyncio
|
4 |
import tempfile
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
8 |
return {
|
9 |
f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
|
10 |
for v in voices
|
11 |
}
|
12 |
|
13 |
-
|
14 |
-
if not text.strip():
|
15 |
-
return None, "Please enter text to convert."
|
16 |
-
if not voice:
|
17 |
-
return None, "Please select a voice."
|
18 |
-
|
19 |
-
short_name = voice.split(" - ")[0]
|
20 |
-
rate_str = f"{rate:+d}%"
|
21 |
-
pitch_str = f"{pitch:+d}Hz"
|
22 |
-
comm = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
|
23 |
-
|
24 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
|
25 |
-
await comm.save(tmp.name)
|
26 |
-
return tmp.name, ""
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
return audio_path, warning_text
|
35 |
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
|
|
|
|
39 |
with gr.Blocks(analytics_enabled=False) as demo:
|
40 |
-
gr.Markdown("# 🎙️ Edge TTS
|
41 |
-
|
42 |
with gr.Row():
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
)
|
49 |
-
|
50 |
with gr.Row():
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
generate_btn.click(
|
64 |
fn=tts_interface,
|
65 |
-
inputs=[
|
66 |
-
outputs=[audio_out,
|
67 |
)
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
return demo
|
74 |
|
|
|
75 |
if __name__ == "__main__":
|
76 |
-
|
77 |
-
#
|
78 |
-
|
|
|
3 |
import asyncio
|
4 |
import tempfile
|
5 |
|
6 |
+
# 1) Fetch voices once, synchronously at startup
|
7 |
+
def load_voices():
|
8 |
+
loop = asyncio.get_event_loop()
|
9 |
+
voices = loop.run_until_complete(edge_tts.list_voices())
|
10 |
return {
|
11 |
f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
|
12 |
for v in voices
|
13 |
}
|
14 |
|
15 |
+
VOICES = load_voices()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# 2) Async function to drive Edge TTS
|
18 |
+
async def _text_to_speech(text, short_name, rate_str, pitch_str):
|
19 |
+
communicate = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
|
20 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
|
21 |
+
await communicate.save(tmp.name)
|
22 |
+
return tmp.name
|
|
|
23 |
|
24 |
+
# 3) Sync wrapper for the click callback
|
25 |
+
def tts_interface(text, voice_choice, rate, pitch):
|
26 |
+
if not text.strip():
|
27 |
+
return None, "🚨 Please enter some text."
|
28 |
+
if not voice_choice:
|
29 |
+
return None, "🚨 Please select a voice."
|
30 |
+
short_name = voice_choice.split(" - ")[0]
|
31 |
+
rate_str = f"{rate:+d}%"
|
32 |
+
pitch_str = f"{pitch:+d}Hz"
|
33 |
+
try:
|
34 |
+
# Run the async TTS call synchronously
|
35 |
+
audio_path = asyncio.get_event_loop().run_until_complete(
|
36 |
+
_text_to_speech(text, short_name, rate_str, pitch_str)
|
37 |
+
)
|
38 |
+
return audio_path, ""
|
39 |
+
except Exception as e:
|
40 |
+
return None, f"❌ TTS failed: {e}"
|
41 |
|
42 |
+
# 4) Build the Gradio Blocks UI
|
43 |
+
def create_demo():
|
44 |
with gr.Blocks(analytics_enabled=False) as demo:
|
45 |
+
gr.Markdown("# 🎙️ Edge TTS on Hugging Face Spaces")
|
46 |
+
|
47 |
with gr.Row():
|
48 |
+
gr.Markdown("""
|
49 |
+
**Convert your text to speech** using Microsoft Edge's neural voices.
|
50 |
+
Adjust rate and pitch to fine-tune the output.
|
51 |
+
""")
|
52 |
+
|
|
|
|
|
53 |
with gr.Row():
|
54 |
+
txt = gr.Textbox(label="Input Text", lines=5, placeholder="Type something…")
|
55 |
+
vox = gr.Dropdown(choices=list(VOICES.keys()), label="Voice")
|
56 |
+
rate = gr.Slider(-50, 50, value=0, label="Rate (%)")
|
57 |
+
pitch = gr.Slider(-20, 20, value=0, label="Pitch (Hz)")
|
58 |
+
|
59 |
+
btn = gr.Button("Generate Speech")
|
60 |
+
audio_out = gr.Audio(type="filepath", label="Audio Output")
|
61 |
+
warn_md = gr.Markdown("", label="Warnings / Errors")
|
62 |
+
|
63 |
+
# IMPORTANT: wire queue here so the frontend finds /api/predict
|
64 |
+
btn.click(
|
|
|
|
|
65 |
fn=tts_interface,
|
66 |
+
inputs=[txt, vox, rate, pitch],
|
67 |
+
outputs=[audio_out, warn_md]
|
68 |
)
|
69 |
+
|
70 |
+
# Enable the queue on the entire app
|
71 |
+
demo.queue()
|
72 |
+
|
|
|
73 |
return demo
|
74 |
|
75 |
+
# 5) Launch
|
76 |
if __name__ == "__main__":
|
77 |
+
demo = create_demo()
|
78 |
+
# On Spaces, this will serve on 0.0.0.0:7860 automatically
|
79 |
+
demo.launch()
|