Edge-TTS-Text-to-Speech

Sleeping

App Files Files Community

Edge-TTS-Text-to-Speech / app.py

EmRa228

Update app.py

7877a4f verified 3 months ago

raw

history blame

3.37 kB

	import asyncio
	import os
	import edge_tts
	import gradio as gr

	# Function to get available voices
	async def get_voices():
	voices = await edge_tts.list_voices()
	return [f"{voice['ShortName']} ({voice['Gender']})" for voice in voices]

	# Function to convert text to speech
	async def text_to_speech(text, voice, rate, pitch):
	try:
	# Extract voice ShortName from the dropdown (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
	voice_short_name = voice.split(" (")[0]

	# Convert rate from percentage (e.g., "10" for +10%) to edge-tts format (e.g., "+10%")
	rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"

	# Convert pitch from Hz (e.g., "100" for +100Hz) to edge-tts format (e.g., "+100Hz")
	pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"

	# Generate unique output filename
	output_file = "output.mp3"

	# Initialize edge-tts communication
	communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)

	# Save the audio
	await communicate.save(output_file)

	# Check if file was created
	if os.path.exists(output_file):
	return output_file
	else:
	return "Error: Audio file was not generated."
	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio interface function
	def create_gradio_interface():
	# Get voices synchronously
	loop = asyncio.get_event_loop()
	voices = loop.run_until_complete(get_voices())

	# Define Gradio interface
	with gr.Blocks(title="Edge TTS Text-to-Speech") as interface:
	gr.Markdown("# Edge TTS Text-to-Speech")
	gr.Markdown("Enter text, select a voice, adjust rate and pitch, and generate audio.")

	# Input components
	text_input = gr.Textbox(label="Input Text", placeholder="Type your text here...")
	voice_dropdown = gr.Dropdown(choices=voices, label="Voice", value=voices[0] if voices else None)
	rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, step=1, label="Rate (%)")
	pitch_slider = gr.Slider(minimum=-200, maximum=200, value=0, step=10, label="Pitch (Hz)")

	# Generate button
	generate_button = gr.Button("Generate Audio")

	# Output
	audio_output = gr.Audio(label="Generated Audio")
	error_output = gr.Textbox(label="Status", interactive=False)

	# Button click event
	async def on_generate(text, voice, rate, pitch):
	if not text:
	return None, "Error: Please enter some text."
	if not voice:
	return None, "Error: Please select a voice."

	result = await text_to_speech(text, voice, rate, pitch)
	if result.startswith("Error"):
	return None, result
	return result, "Audio generated successfully!"

	generate_button.click(
	fn=on_generate,
	inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
	outputs=[audio_output, error_output]
	)

	return interface

	# Launch the interface
	if __name__ == "__main__":
	interface = create_gradio_interface()
	interface.launch(server_name="0.0.0.0", server_port=7860)