Spaces:
Sleeping
Sleeping
import asyncio | |
import os | |
import edge_tts | |
import gradio as gr | |
# Function to get available voices | |
async def get_voices(): | |
voices = await edge_tts.list_voices() | |
return [f"{voice['ShortName']} ({voice['Gender']})" for voice in voices] | |
# Function to convert text to speech | |
async def text_to_speech(text, voice, rate, pitch): | |
try: | |
# Extract voice ShortName from the dropdown (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural") | |
voice_short_name = voice.split(" (")[0] | |
# Convert rate from percentage (e.g., "10" for +10%) to edge-tts format (e.g., "+10%") | |
rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%" | |
# Convert pitch from Hz (e.g., "100" for +100Hz) to edge-tts format (e.g., "+100Hz") | |
pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz" | |
# Generate unique output filename | |
output_file = "output.mp3" | |
# Initialize edge-tts communication | |
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str) | |
# Save the audio | |
await communicate.save(output_file) | |
# Check if file was created | |
if os.path.exists(output_file): | |
return output_file | |
else: | |
return "Error: Audio file was not generated." | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Gradio interface function | |
def create_gradio_interface(): | |
# Get voices synchronously | |
loop = asyncio.get_event_loop() | |
voices = loop.run_until_complete(get_voices()) | |
# Define Gradio interface | |
with gr.Blocks(title="Edge TTS Text-to-Speech") as interface: | |
gr.Markdown("# Edge TTS Text-to-Speech") | |
gr.Markdown("Enter text, select a voice, adjust rate and pitch, and generate audio.") | |
# Input components | |
text_input = gr.Textbox(label="Input Text", placeholder="Type your text here...") | |
voice_dropdown = gr.Dropdown(choices=voices, label="Voice", value=voices[0] if voices else None) | |
rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, step=1, label="Rate (%)") | |
pitch_slider = gr.Slider(minimum=-200, maximum=200, value=0, step=10, label="Pitch (Hz)") | |
# Generate button | |
generate_button = gr.Button("Generate Audio") | |
# Output | |
audio_output = gr.Audio(label="Generated Audio") | |
error_output = gr.Textbox(label="Status", interactive=False) | |
# Button click event | |
async def on_generate(text, voice, rate, pitch): | |
if not text: | |
return None, "Error: Please enter some text." | |
if not voice: | |
return None, "Error: Please select a voice." | |
result = await text_to_speech(text, voice, rate, pitch) | |
if result.startswith("Error"): | |
return None, result | |
return result, "Audio generated successfully!" | |
generate_button.click( | |
fn=on_generate, | |
inputs=[text_input, voice_dropdown, rate_slider, pitch_slider], | |
outputs=[audio_output, error_output] | |
) | |
return interface | |
# Launch the interface | |
if __name__ == "__main__": | |
interface = create_gradio_interface() | |
interface.launch(server_name="0.0.0.0", server_port=7860) |