Edge-TTS-Text-to-Speech

Sleeping

App Files Files Community

EmRa228 commited on May 8

Commit

37a2817

verified ·

1 Parent(s): 7877a4f

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -36

app.py CHANGED Viewed

@@ -2,26 +2,34 @@ import asyncio
 import os
 import edge_tts
 import gradio as gr
 # Function to get available voices
 async def get_voices():
-    voices = await edge_tts.list_voices()
-    return [f"{voice['ShortName']} ({voice['Gender']})" for voice in voices]
 # Function to convert text to speech
 async def text_to_speech(text, voice, rate, pitch):
     try:
-        # Extract voice ShortName from the dropdown (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
         voice_short_name = voice.split(" (")[0]
-        # Convert rate from percentage (e.g., "10" for +10%) to edge-tts format (e.g., "+10%")
         rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"
-        # Convert pitch from Hz (e.g., "100" for +100Hz) to edge-tts format (e.g., "+100Hz")
         pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"
-        # Generate unique output filename
-        output_file = "output.mp3"
         # Initialize edge-tts communication
         communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
@@ -31,11 +39,11 @@ async def text_to_speech(text, voice, rate, pitch):
         # Check if file was created
         if os.path.exists(output_file):
-            return output_file
         else:
-            return "Error: Audio file was not generated."
     except Exception as e:
-        return f"Error: {str(e)}"
 # Gradio interface function
 def create_gradio_interface():
@@ -43,40 +51,81 @@ def create_gradio_interface():
     loop = asyncio.get_event_loop()
     voices = loop.run_until_complete(get_voices())
     # Define Gradio interface
-    with gr.Blocks(title="Edge TTS Text-to-Speech") as interface:
-        gr.Markdown("# Edge TTS Text-to-Speech")
-        gr.Markdown("Enter text, select a voice, adjust rate and pitch, and generate audio.")
-        # Input components
-        text_input = gr.Textbox(label="Input Text", placeholder="Type your text here...")
-        voice_dropdown = gr.Dropdown(choices=voices, label="Voice", value=voices[0] if voices else None)
-        rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, step=1, label="Rate (%)")
-        pitch_slider = gr.Slider(minimum=-200, maximum=200, value=0, step=10, label="Pitch (Hz)")
-        # Generate button
-        generate_button = gr.Button("Generate Audio")
-        # Output
-        audio_output = gr.Audio(label="Generated Audio")
-        error_output = gr.Textbox(label="Status", interactive=False)
         # Button click event
         async def on_generate(text, voice, rate, pitch):
-            if not text:
-                return None, "Error: Please enter some text."
-            if not voice:
-                return None, "Error: Please select a voice."
-            result = await text_to_speech(text, voice, rate, pitch)
-            if result.startswith("Error"):
-                return None, result
-            return result, "Audio generated successfully!"
         generate_button.click(
             fn=on_generate,
             inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
-            outputs=[audio_output, error_output]
         )
     return interface
@@ -84,4 +133,4 @@ def create_gradio_interface():
 # Launch the interface
 if __name__ == "__main__":
     interface = create_gradio_interface()
-    interface.launch(server_name="0.0.0.0", server_port=7860)

 import os
 import edge_tts
 import gradio as gr
+from datetime import datetime
 # Function to get available voices
 async def get_voices():
+    try:
+        voices = await edge_tts.list_voices()
+        return sorted([f"{voice['ShortName']} ({voice['Gender']})" for voice in voices])
+    except Exception as e:
+        return [f"Error fetching voices: {str(e)}"]
 # Function to convert text to speech
 async def text_to_speech(text, voice, rate, pitch):
     try:
+        if not text or not voice:
+            return None, "Error: Text and voice selection are required."
+        # Extract voice ShortName (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
         voice_short_name = voice.split(" (")[0]
+        # Convert rate to edge-tts format (e.g., 10 -> "+10%", -10 -> "-10%")
         rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"
+        # Convert pitch to edge-tts format (e.g., 100 -> "+100Hz", -100 -> "-100Hz")
         pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"
+        # Generate unique output filename with timestamp
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        output_file = f"output_{timestamp}.mp3"
         # Initialize edge-tts communication
         communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
         # Check if file was created
         if os.path.exists(output_file):
+            return output_file, "Audio generated successfully!"
         else:
+            return None, "Error: Audio file was not generated."
     except Exception as e:
+        return None, f"Error: {str(e)}"
 # Gradio interface function
 def create_gradio_interface():
     loop = asyncio.get_event_loop()
     voices = loop.run_until_complete(get_voices())
+    # Custom CSS for a polished look
+    css = """
+    .gradio-container {background-color: #f5f7fa;}
+    .title {text-align: center; color: #2c3e50;}
+    .footer {text-align: center; color: #7f8c8d; font-size: 0.9em; margin-top: 20px;}
+    .button-primary {background-color: #3498db !important; color: white !important;}
+    .input-box {border-radius: 8px;}
+    """
     # Define Gradio interface
+    with gr.Blocks(css=css, theme=gr.themes.Soft()) as interface:
+        gr.Markdown(
+            """
+            <h1 class='title'>Edge TTS Text-to-Speech</h1>
+            <p style='text-align: center;'>Convert text to speech with customizable voice, rate, and pitch.</p>
+            """
+        )
+        with gr.Row():
+            with gr.Column(scale=2):
+                text_input = gr.Textbox(
+                    label="Input Text",
+                    placeholder="Enter the text you want to convert to speech...",
+                    lines=5,
+                    elem_classes="input-box"
+                )
+                voice_dropdown = gr.Dropdown(
+                    choices=voices,
+                    label="Voice Model",
+                    value=voices[0] if voices else None,
+                    allow_custom_value=False
+                )
+                rate_slider = gr.Slider(
+                    minimum=-50,
+                    maximum=50,
+                    value=0,
+                    step=1,
+                    label="Speech Rate (%)",
+                    info="Adjust the speed of the speech (±50%)"
+                )
+                pitch_slider = gr.Slider(
+                    minimum=-200,
+                    maximum=200,
+                    value=0,
+                    step=10,
+                    label="Pitch (Hz)",
+                    info="Adjust the pitch of the voice (±200Hz)"
+                )
+                generate_button = gr.Button("Generate Audio", variant="primary", elem_classes="button-primary")
+            with gr.Column(scale=1):
+                audio_output = gr.Audio(label="Generated Audio", interactive=False)
+                status_output = gr.Textbox(
+                    label="Status",
+                    interactive=False,
+                    placeholder="Status messages will appear here..."
+                )
         # Button click event
         async def on_generate(text, voice, rate, pitch):
+            audio, status = await text_to_speech(text, voice, rate, pitch)
+            return audio, status
         generate_button.click(
             fn=on_generate,
             inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
+            outputs=[audio_output, status_output]
+        )
+        gr.Markdown(
+            """
+            <p class='footer'>
+                Powered by Edge TTS and Gradio | Deployed on Hugging Face Spaces
+            </p>
+            """
         )
     return interface
 # Launch the interface
 if __name__ == "__main__":
     interface = create_gradio_interface()
+    interface.launch(server_name="0.0.0.0", server_port=7860, share=False)