EmRa228 commited on
Commit
f1e71b2
·
verified ·
1 Parent(s): 100a302

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -57
app.py CHANGED
@@ -3,76 +3,77 @@ import edge_tts
3
  import asyncio
4
  import tempfile
5
 
6
- async def get_voices():
7
- voices = await edge_tts.list_voices()
 
 
8
  return {
9
  f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
10
  for v in voices
11
  }
12
 
13
- async def text_to_speech(text, voice, rate, pitch):
14
- if not text.strip():
15
- return None, "Please enter text to convert."
16
- if not voice:
17
- return None, "Please select a voice."
18
-
19
- short_name = voice.split(" - ")[0]
20
- rate_str = f"{rate:+d}%"
21
- pitch_str = f"{pitch:+d}Hz"
22
- comm = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
23
-
24
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
25
- await comm.save(tmp.name)
26
- return tmp.name, ""
27
 
28
- def tts_interface(text, voice, rate, pitch):
29
- # Run asyncio in sync function
30
- audio_path, warning_text = asyncio.get_event_loop().run_until_complete(
31
- text_to_speech(text, voice, rate, pitch)
32
- )
33
- # Return the file path (or None) and the warning string
34
- return audio_path, warning_text
35
 
36
- def build_ui():
37
- voices = asyncio.get_event_loop().run_until_complete(get_voices())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
 
 
39
  with gr.Blocks(analytics_enabled=False) as demo:
40
- gr.Markdown("# 🎙️ Edge TTS Text-to-Speech")
41
-
42
  with gr.Row():
43
- with gr.Column():
44
- gr.Markdown("## Text-to-Speech with Microsoft Edge TTS")
45
- gr.Markdown(
46
- "Convert text to speech using Microsoft Edge TTS. "
47
- "Adjust rate/pitch: 0 is default, +/− to change."
48
- )
49
-
50
  with gr.Row():
51
- text_input = gr.Textbox(label="Input Text", lines=5)
52
- voice_dropdown = gr.Dropdown(
53
- choices=list(voices.keys()),
54
- label="Select Voice"
55
- )
56
- rate_slider = gr.Slider(-50, 50, value=0, label="Rate (%)")
57
- pitch_slider = gr.Slider(-20, 20, value=0, label="Pitch (Hz)")
58
-
59
- generate_btn = gr.Button("Generate Speech")
60
- audio_out = gr.Audio(type="filepath", label="Output Audio")
61
- warning_md = gr.Markdown("", label="Warning")
62
-
63
- generate_btn.click(
64
  fn=tts_interface,
65
- inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
66
- outputs=[audio_out, warning_md]
67
  )
68
-
69
- gr.Markdown(
70
- "Try our Text-to-Video converter, or tweak rate/pitch for the perfect delivery!"
71
- )
72
-
73
  return demo
74
 
 
75
  if __name__ == "__main__":
76
- ui = build_ui()
77
- # Launch synchronously—no .queue(), so no internal API schema error
78
- ui.launch()
 
3
  import asyncio
4
  import tempfile
5
 
6
+ # 1) Fetch voices once, synchronously at startup
7
+ def load_voices():
8
+ loop = asyncio.get_event_loop()
9
+ voices = loop.run_until_complete(edge_tts.list_voices())
10
  return {
11
  f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
12
  for v in voices
13
  }
14
 
15
+ VOICES = load_voices()
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # 2) Async function to drive Edge TTS
18
+ async def _text_to_speech(text, short_name, rate_str, pitch_str):
19
+ communicate = edge_tts.Communicate(text, short_name, rate=rate_str, pitch=pitch_str)
20
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
21
+ await communicate.save(tmp.name)
22
+ return tmp.name
 
23
 
24
+ # 3) Sync wrapper for the click callback
25
+ def tts_interface(text, voice_choice, rate, pitch):
26
+ if not text.strip():
27
+ return None, "🚨 Please enter some text."
28
+ if not voice_choice:
29
+ return None, "🚨 Please select a voice."
30
+ short_name = voice_choice.split(" - ")[0]
31
+ rate_str = f"{rate:+d}%"
32
+ pitch_str = f"{pitch:+d}Hz"
33
+ try:
34
+ # Run the async TTS call synchronously
35
+ audio_path = asyncio.get_event_loop().run_until_complete(
36
+ _text_to_speech(text, short_name, rate_str, pitch_str)
37
+ )
38
+ return audio_path, ""
39
+ except Exception as e:
40
+ return None, f"❌ TTS failed: {e}"
41
 
42
+ # 4) Build the Gradio Blocks UI
43
+ def create_demo():
44
  with gr.Blocks(analytics_enabled=False) as demo:
45
+ gr.Markdown("# 🎙️ Edge TTS on Hugging Face Spaces")
46
+
47
  with gr.Row():
48
+ gr.Markdown("""
49
+ **Convert your text to speech** using Microsoft Edge's neural voices.
50
+ Adjust rate and pitch to fine-tune the output.
51
+ """)
52
+
 
 
53
  with gr.Row():
54
+ txt = gr.Textbox(label="Input Text", lines=5, placeholder="Type something…")
55
+ vox = gr.Dropdown(choices=list(VOICES.keys()), label="Voice")
56
+ rate = gr.Slider(-50, 50, value=0, label="Rate (%)")
57
+ pitch = gr.Slider(-20, 20, value=0, label="Pitch (Hz)")
58
+
59
+ btn = gr.Button("Generate Speech")
60
+ audio_out = gr.Audio(type="filepath", label="Audio Output")
61
+ warn_md = gr.Markdown("", label="Warnings / Errors")
62
+
63
+ # IMPORTANT: wire queue here so the frontend finds /api/predict
64
+ btn.click(
 
 
65
  fn=tts_interface,
66
+ inputs=[txt, vox, rate, pitch],
67
+ outputs=[audio_out, warn_md]
68
  )
69
+
70
+ # Enable the queue on the entire app
71
+ demo.queue()
72
+
 
73
  return demo
74
 
75
+ # 5) Launch
76
  if __name__ == "__main__":
77
+ demo = create_demo()
78
+ # On Spaces, this will serve on 0.0.0.0:7860 automatically
79
+ demo.launch()