Spaces:
Running
Running
File size: 5,040 Bytes
1b41e6d 7877a4f a257b37 25681f6 37a2817 1b41e6d 7877a4f 37a2817 25681f6 7877a4f 37a2817 7877a4f 37a2817 7877a4f 37a2817 7877a4f 37a2817 7877a4f 37a2817 7877a4f 37a2817 7877a4f 37a2817 25681f6 7877a4f 37a2817 7877a4f 37a2817 7877a4f 37a2817 7877a4f 37a2817 7877a4f 37a2817 7877a4f 1b41e6d 7877a4f 1b41e6d 7877a4f 37a2817 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import asyncio
import os
import edge_tts
import gradio as gr
from datetime import datetime
# Function to get available voices
async def get_voices():
try:
voices = await edge_tts.list_voices()
return sorted([f"{voice['ShortName']} ({voice['Gender']})" for voice in voices])
except Exception as e:
return [f"Error fetching voices: {str(e)}"]
# Function to convert text to speech
async def text_to_speech(text, voice, rate, pitch):
try:
if not text or not voice:
return None, "Error: Text and voice selection are required."
# Extract voice ShortName (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
voice_short_name = voice.split(" (")[0]
# Convert rate to edge-tts format (e.g., 10 -> "+10%", -10 -> "-10%")
rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"
# Convert pitch to edge-tts format (e.g., 100 -> "+100Hz", -100 -> "-100Hz")
pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"
# Generate unique output filename with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f"output_{timestamp}.mp3"
# Initialize edge-tts communication
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
# Save the audio
await communicate.save(output_file)
# Check if file was created
if os.path.exists(output_file):
return output_file, "Audio generated successfully!"
else:
return None, "Error: Audio file was not generated."
except Exception as e:
return None, f"Error: {str(e)}"
# Gradio interface function
def create_gradio_interface():
# Get voices synchronously
loop = asyncio.get_event_loop()
voices = loop.run_until_complete(get_voices())
# Custom CSS for a polished look
css = """
.gradio-container {background-color: #f5f7fa;}
.title {text-align: center; color: #2c3e50;}
.footer {text-align: center; color: #7f8c8d; font-size: 0.9em; margin-top: 20px;}
.button-primary {background-color: #3498db !important; color: white !important;}
.input-box {border-radius: 8px;}
"""
# Define Gradio interface
with gr.Blocks(css=css, theme=gr.themes.Soft()) as interface:
gr.Markdown(
"""
<h1 class='title'>Edge TTS Text-to-Speech</h1>
<p style='text-align: center;'>Convert text to speech with customizable voice, rate, and pitch.</p>
"""
)
with gr.Row():
with gr.Column(scale=2):
text_input = gr.Textbox(
label="Input Text",
placeholder="Enter the text you want to convert to speech...",
lines=5,
elem_classes="input-box"
)
voice_dropdown = gr.Dropdown(
choices=voices,
label="Voice Model",
value=voices[0] if voices else None,
allow_custom_value=False
)
rate_slider = gr.Slider(
minimum=-50,
maximum=50,
value=0,
step=1,
label="Speech Rate (%)",
info="Adjust the speed of the speech (±50%)"
)
pitch_slider = gr.Slider(
minimum=-200,
maximum=200,
value=0,
step=10,
label="Pitch (Hz)",
info="Adjust the pitch of the voice (±200Hz)"
)
generate_button = gr.Button("Generate Audio", variant="primary", elem_classes="button-primary")
with gr.Column(scale=1):
audio_output = gr.Audio(label="Generated Audio", interactive=False)
status_output = gr.Textbox(
label="Status",
interactive=False,
placeholder="Status messages will appear here..."
)
# Button click event
async def on_generate(text, voice, rate, pitch):
audio, status = await text_to_speech(text, voice, rate, pitch)
return audio, status
generate_button.click(
fn=on_generate,
inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
outputs=[audio_output, status_output]
)
gr.Markdown(
"""
<p class='footer'>
Powered by Edge TTS and Gradio | Deployed on Hugging Face Spaces
</p>
"""
)
return interface
# Launch the interface
if __name__ == "__main__":
interface = create_gradio_interface()
interface.launch(server_name="0.0.0.0", server_port=7860, share=False) |