Edge-TTS-Text-to-Speech

Sleeping

App Files Files Community

Edge-TTS-Text-to-Speech / app.py

EmRa228

Update app.py

37a2817 verified 2 months ago

raw

history blame

5.04 kB

	import asyncio
	import os
	import edge_tts
	import gradio as gr
	from datetime import datetime

	# Function to get available voices
	async def get_voices():
	try:
	voices = await edge_tts.list_voices()
	return sorted([f"{voice['ShortName']} ({voice['Gender']})" for voice in voices])
	except Exception as e:
	return [f"Error fetching voices: {str(e)}"]

	# Function to convert text to speech
	async def text_to_speech(text, voice, rate, pitch):
	try:
	if not text or not voice:
	return None, "Error: Text and voice selection are required."

	# Extract voice ShortName (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
	voice_short_name = voice.split(" (")[0]

	# Convert rate to edge-tts format (e.g., 10 -> "+10%", -10 -> "-10%")
	rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"

	# Convert pitch to edge-tts format (e.g., 100 -> "+100Hz", -100 -> "-100Hz")
	pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"

	# Generate unique output filename with timestamp
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	output_file = f"output_{timestamp}.mp3"

	# Initialize edge-tts communication
	communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)

	# Save the audio
	await communicate.save(output_file)

	# Check if file was created
	if os.path.exists(output_file):
	return output_file, "Audio generated successfully!"
	else:
	return None, "Error: Audio file was not generated."
	except Exception as e:
	return None, f"Error: {str(e)}"

	# Gradio interface function
	def create_gradio_interface():
	# Get voices synchronously
	loop = asyncio.get_event_loop()
	voices = loop.run_until_complete(get_voices())

	# Custom CSS for a polished look
	css = """
	.gradio-container {background-color: #f5f7fa;}
	.title {text-align: center; color: #2c3e50;}
	.footer {text-align: center; color: #7f8c8d; font-size: 0.9em; margin-top: 20px;}
	.button-primary {background-color: #3498db !important; color: white !important;}
	.input-box {border-radius: 8px;}
	"""

	# Define Gradio interface
	with gr.Blocks(css=css, theme=gr.themes.Soft()) as interface:
	gr.Markdown(
	"""
	<h1 class='title'>Edge TTS Text-to-Speech</h1>
	<p style='text-align: center;'>Convert text to speech with customizable voice, rate, and pitch.</p>
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	text_input = gr.Textbox(
	label="Input Text",
	placeholder="Enter the text you want to convert to speech...",
	lines=5,
	elem_classes="input-box"
	)
	voice_dropdown = gr.Dropdown(
	choices=voices,
	label="Voice Model",
	value=voices[0] if voices else None,
	allow_custom_value=False
	)
	rate_slider = gr.Slider(
	minimum=-50,
	maximum=50,
	value=0,
	step=1,
	label="Speech Rate (%)",
	info="Adjust the speed of the speech (±50%)"
	)
	pitch_slider = gr.Slider(
	minimum=-200,
	maximum=200,
	value=0,
	step=10,
	label="Pitch (Hz)",
	info="Adjust the pitch of the voice (±200Hz)"
	)
	generate_button = gr.Button("Generate Audio", variant="primary", elem_classes="button-primary")

	with gr.Column(scale=1):
	audio_output = gr.Audio(label="Generated Audio", interactive=False)
	status_output = gr.Textbox(
	label="Status",
	interactive=False,
	placeholder="Status messages will appear here..."
	)

	# Button click event
	async def on_generate(text, voice, rate, pitch):
	audio, status = await text_to_speech(text, voice, rate, pitch)
	return audio, status

	generate_button.click(
	fn=on_generate,
	inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
	outputs=[audio_output, status_output]
	)

	gr.Markdown(
	"""
	<p class='footer'>
	Powered by Edge TTS and Gradio \| Deployed on Hugging Face Spaces
	</p>
	"""
	)

	return interface

	# Launch the interface
	if __name__ == "__main__":
	interface = create_gradio_interface()
	interface.launch(server_name="0.0.0.0", server_port=7860, share=False)