EmRa228's picture
Update app.py
37a2817 verified
raw
history blame
5.04 kB
import asyncio
import os
import edge_tts
import gradio as gr
from datetime import datetime
# Function to get available voices
async def get_voices():
try:
voices = await edge_tts.list_voices()
return sorted([f"{voice['ShortName']} ({voice['Gender']})" for voice in voices])
except Exception as e:
return [f"Error fetching voices: {str(e)}"]
# Function to convert text to speech
async def text_to_speech(text, voice, rate, pitch):
try:
if not text or not voice:
return None, "Error: Text and voice selection are required."
# Extract voice ShortName (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
voice_short_name = voice.split(" (")[0]
# Convert rate to edge-tts format (e.g., 10 -> "+10%", -10 -> "-10%")
rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"
# Convert pitch to edge-tts format (e.g., 100 -> "+100Hz", -100 -> "-100Hz")
pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"
# Generate unique output filename with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f"output_{timestamp}.mp3"
# Initialize edge-tts communication
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
# Save the audio
await communicate.save(output_file)
# Check if file was created
if os.path.exists(output_file):
return output_file, "Audio generated successfully!"
else:
return None, "Error: Audio file was not generated."
except Exception as e:
return None, f"Error: {str(e)}"
# Gradio interface function
def create_gradio_interface():
# Get voices synchronously
loop = asyncio.get_event_loop()
voices = loop.run_until_complete(get_voices())
# Custom CSS for a polished look
css = """
.gradio-container {background-color: #f5f7fa;}
.title {text-align: center; color: #2c3e50;}
.footer {text-align: center; color: #7f8c8d; font-size: 0.9em; margin-top: 20px;}
.button-primary {background-color: #3498db !important; color: white !important;}
.input-box {border-radius: 8px;}
"""
# Define Gradio interface
with gr.Blocks(css=css, theme=gr.themes.Soft()) as interface:
gr.Markdown(
"""
<h1 class='title'>Edge TTS Text-to-Speech</h1>
<p style='text-align: center;'>Convert text to speech with customizable voice, rate, and pitch.</p>
"""
)
with gr.Row():
with gr.Column(scale=2):
text_input = gr.Textbox(
label="Input Text",
placeholder="Enter the text you want to convert to speech...",
lines=5,
elem_classes="input-box"
)
voice_dropdown = gr.Dropdown(
choices=voices,
label="Voice Model",
value=voices[0] if voices else None,
allow_custom_value=False
)
rate_slider = gr.Slider(
minimum=-50,
maximum=50,
value=0,
step=1,
label="Speech Rate (%)",
info="Adjust the speed of the speech (±50%)"
)
pitch_slider = gr.Slider(
minimum=-200,
maximum=200,
value=0,
step=10,
label="Pitch (Hz)",
info="Adjust the pitch of the voice (±200Hz)"
)
generate_button = gr.Button("Generate Audio", variant="primary", elem_classes="button-primary")
with gr.Column(scale=1):
audio_output = gr.Audio(label="Generated Audio", interactive=False)
status_output = gr.Textbox(
label="Status",
interactive=False,
placeholder="Status messages will appear here..."
)
# Button click event
async def on_generate(text, voice, rate, pitch):
audio, status = await text_to_speech(text, voice, rate, pitch)
return audio, status
generate_button.click(
fn=on_generate,
inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
outputs=[audio_output, status_output]
)
gr.Markdown(
"""
<p class='footer'>
Powered by Edge TTS and Gradio | Deployed on Hugging Face Spaces
</p>
"""
)
return interface
# Launch the interface
if __name__ == "__main__":
interface = create_gradio_interface()
interface.launch(server_name="0.0.0.0", server_port=7860, share=False)