EmRa228 commited on
Commit
7877a4f
·
verified ·
1 Parent(s): 25681f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -36
app.py CHANGED
@@ -1,44 +1,87 @@
1
  import asyncio
 
2
  import edge_tts
3
- ing import GradioComponent to match huggingface space conventions
4
  import gradio as gr
5
 
6
- # Fetch available voices once at startup
7
- df_voices = asyncio.run(edge_tts.list_voices())
8
- voice_names = [v["Name"] for v in df_voices]
 
9
 
10
- async def generate_tts(text: str, voice: str, rate: int, pitch: int):
11
- # Edge TTS parameters expect strings
12
- rate_str = f"{rate}%"
13
- pitch_str = f"{pitch}Hz"
14
- communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
15
- # Stream audio to memory
16
- audio_chunks = []
17
- async for chunk in communicate.stream():
18
- audio_chunks.append(chunk)
19
- return b"".join(audio_chunks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- # Synchronous wrapper for Gradio
22
- def tts(text, voice, rate, pitch):
23
- audio = asyncio.run(generate_tts(text, voice, rate, pitch))
24
- return ("output.mp3", audio)
25
-
26
- # Gradio UI
27
- def main():
28
- with gr.Blocks() as demo:
29
- gr.Markdown("## Edge TTS Text-to-Speech Converter")
30
- with gr.Row():
31
- text_input = gr.Textbox(label="Input Text", lines=4, placeholder="Enter text to convert...")
32
- voice_selector = gr.Dropdown(label="Voice Model", choices=voice_names, value=voice_names[0])
33
- with gr.Row():
34
- rate_slider = gr.Slider(label="Speaking Rate (%)", minimum=10, maximum=200, step=1, value=100)
35
- pitch_slider = gr.Slider(label="Pitch (Hz)", minimum=-20, maximum=20, step=1, value=0)
36
- output_audio = gr.Audio(label="Generated Audio", type="file")
37
- generate_btn = gr.Button("Convert to Speech")
38
- generate_btn.click(fn=tts,
39
- inputs=[text_input, voice_selector, rate_slider, pitch_slider],
40
- outputs=output_audio)
41
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
 
43
  if __name__ == "__main__":
44
- main().launch()
 
 
1
  import asyncio
2
+ import os
3
  import edge_tts
 
4
  import gradio as gr
5
 
6
+ # Function to get available voices
7
+ async def get_voices():
8
+ voices = await edge_tts.list_voices()
9
+ return [f"{voice['ShortName']} ({voice['Gender']})" for voice in voices]
10
 
11
+ # Function to convert text to speech
12
+ async def text_to_speech(text, voice, rate, pitch):
13
+ try:
14
+ # Extract voice ShortName from the dropdown (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
15
+ voice_short_name = voice.split(" (")[0]
16
+
17
+ # Convert rate from percentage (e.g., "10" for +10%) to edge-tts format (e.g., "+10%")
18
+ rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"
19
+
20
+ # Convert pitch from Hz (e.g., "100" for +100Hz) to edge-tts format (e.g., "+100Hz")
21
+ pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"
22
+
23
+ # Generate unique output filename
24
+ output_file = "output.mp3"
25
+
26
+ # Initialize edge-tts communication
27
+ communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
28
+
29
+ # Save the audio
30
+ await communicate.save(output_file)
31
+
32
+ # Check if file was created
33
+ if os.path.exists(output_file):
34
+ return output_file
35
+ else:
36
+ return "Error: Audio file was not generated."
37
+ except Exception as e:
38
+ return f"Error: {str(e)}"
39
 
40
+ # Gradio interface function
41
+ def create_gradio_interface():
42
+ # Get voices synchronously
43
+ loop = asyncio.get_event_loop()
44
+ voices = loop.run_until_complete(get_voices())
45
+
46
+ # Define Gradio interface
47
+ with gr.Blocks(title="Edge TTS Text-to-Speech") as interface:
48
+ gr.Markdown("# Edge TTS Text-to-Speech")
49
+ gr.Markdown("Enter text, select a voice, adjust rate and pitch, and generate audio.")
50
+
51
+ # Input components
52
+ text_input = gr.Textbox(label="Input Text", placeholder="Type your text here...")
53
+ voice_dropdown = gr.Dropdown(choices=voices, label="Voice", value=voices[0] if voices else None)
54
+ rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, step=1, label="Rate (%)")
55
+ pitch_slider = gr.Slider(minimum=-200, maximum=200, value=0, step=10, label="Pitch (Hz)")
56
+
57
+ # Generate button
58
+ generate_button = gr.Button("Generate Audio")
59
+
60
+ # Output
61
+ audio_output = gr.Audio(label="Generated Audio")
62
+ error_output = gr.Textbox(label="Status", interactive=False)
63
+
64
+ # Button click event
65
+ async def on_generate(text, voice, rate, pitch):
66
+ if not text:
67
+ return None, "Error: Please enter some text."
68
+ if not voice:
69
+ return None, "Error: Please select a voice."
70
+
71
+ result = await text_to_speech(text, voice, rate, pitch)
72
+ if result.startswith("Error"):
73
+ return None, result
74
+ return result, "Audio generated successfully!"
75
+
76
+ generate_button.click(
77
+ fn=on_generate,
78
+ inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
79
+ outputs=[audio_output, error_output]
80
+ )
81
+
82
+ return interface
83
 
84
+ # Launch the interface
85
  if __name__ == "__main__":
86
+ interface = create_gradio_interface()
87
+ interface.launch(server_name="0.0.0.0", server_port=7860)