Spaces:

taylorchu
/

2cent-tts

Running

App Files Files Community

2cent-tts / app.py

taylorchu

8080

2a32af5 3 months ago

raw

history blame contribute delete

4.35 kB

	import gradio as gr
	import requests
	import time
	import subprocess
	import threading
	import wave
	import os
	import uuid
	import tempfile
	import socket


	# Start the TTS server in the background
	def start_tts_server():
	subprocess.Popen(["tts-http-server"])


	# Start the server in a separate thread
	threading.Thread(target=start_tts_server, daemon=True).start()


	# Wait for the server port to be open
	def wait_for_server(timeout=30):
	port = 8080 # The port your TTS server runs on
	start_time = time.time()
	while time.time() - start_time < timeout:
	try:
	# Try to connect to the server port
	sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
	sock.settimeout(1)
	result = sock.connect_ex(("localhost", port))
	sock.close()
	if result == 0: # Port is open
	return True
	except:
	pass
	time.sleep(1)
	return False


	# Function to convert text to speech
	def text_to_speech(text):
	try:
	response = requests.post(
	"http://localhost:8080/v1/audio/speech",
	headers={"Content-Type": "application/json"},
	json={
	"model": "tts-1",
	"input": text,
	"voice": "alloy",
	},
	)

	if response.status_code == 200:
	# Create a unique filename for each request
	unique_id = uuid.uuid4().hex
	temp_dir = tempfile.gettempdir()
	audio_file = os.path.join(temp_dir, f"tts_output_{unique_id}.wav")

	# Convert PCM data to WAV file
	pcm_data = response.content

	with wave.open(audio_file, "wb") as wav_file:
	wav_file.setnchannels(1) # Mono
	wav_file.setsampwidth(2) # 16-bit (2 bytes)
	wav_file.setframerate(24000) # 24kHz (common for TTS)
	wav_file.writeframes(pcm_data)

	return audio_file
	else:
	return f"Error: {response.status_code} - {response.text}"
	except Exception as e:
	return f"Error: {str(e)}"


	# Create Gradio interface
	demo = gr.Interface(
	fn=text_to_speech,
	inputs=gr.Textbox(
	placeholder="Enter text to convert to speech...",
	label="Text",
	lines=10,
	),
	outputs=gr.Audio(type="filepath"),
	title="2cent TTS",
	description="Convert text to speech using an OpenAI TTS compatible server.",
	# Add automatic file cleanup after Gradio is done with the files
	examples=[
	["Hello, welcome to this text-to-speech demo. How does my voice sound today?"],
	[
	"Text-to-speech technology has evolved rapidly! It now handles inflections, pauses, and emotions. Can you believe how natural it sounds?"
	],
	[
	"Have you ever wondered what it would be like to hear your writing read back to you with perfect pronunciation?"
	],
	[
	"This neural network uses a 12-layer transformer architecture with 768-dimensional embeddings and 12 attention heads."
	],
	[
	"The temperature today is 72.5 degrees Fahrenheit, with a 30% chance of rain and winds at 15 mph."
	],
	[
	"I'm absolutely thrilled to announce that our team has finally achieved the breakthrough we've been working toward!"
	],
	[
	'"Do you think we should knock first?" she asked.',
	],
	[
	"The xylophonist performed a captivating rendition of Tchaikovsky's Symphony No. 6 in B minor."
	],
	[
	"In 1969, Apollo 11 landed on the moon. The spacecraft carried 3 astronauts and traveled approximately 240,000 miles from Earth."
	],
	],
	cache_examples=False, # Don't cache, generate new audio each time
	)

	# Wait for server to be ready before launching the interface
	print("Waiting for TTS server to start...")
	if wait_for_server():
	print("TTS server is ready. Starting Gradio interface.")
	else:
	print(
	"Warning: TTS server did not start in the expected time. The interface may not work correctly."
	)

	# Launch the app
	if __name__ == "__main__":
	# Add concurrency_count if you want to limit concurrent processing
	demo.launch(server_name="0.0.0.0", server_port=7860, max_threads=40)