import gradio as gr import requests import time import subprocess import threading import wave import os import uuid import tempfile import socket # Start the TTS server in the background def start_tts_server(): subprocess.Popen(["tts-http-server"]) # Start the server in a separate thread threading.Thread(target=start_tts_server, daemon=True).start() # Wait for the server port to be open def wait_for_server(timeout=30): port = 8080 # The port your TTS server runs on start_time = time.time() while time.time() - start_time < timeout: try: # Try to connect to the server port sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(1) result = sock.connect_ex(("localhost", port)) sock.close() if result == 0: # Port is open return True except: pass time.sleep(1) return False # Function to convert text to speech def text_to_speech(text): try: response = requests.post( "http://localhost:8080/v1/audio/speech", headers={"Content-Type": "application/json"}, json={ "model": "tts-1", "input": text, "voice": "alloy", }, ) if response.status_code == 200: # Create a unique filename for each request unique_id = uuid.uuid4().hex temp_dir = tempfile.gettempdir() audio_file = os.path.join(temp_dir, f"tts_output_{unique_id}.wav") # Convert PCM data to WAV file pcm_data = response.content with wave.open(audio_file, "wb") as wav_file: wav_file.setnchannels(1) # Mono wav_file.setsampwidth(2) # 16-bit (2 bytes) wav_file.setframerate(24000) # 24kHz (common for TTS) wav_file.writeframes(pcm_data) return audio_file else: return f"Error: {response.status_code} - {response.text}" except Exception as e: return f"Error: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=text_to_speech, inputs=gr.Textbox( placeholder="Enter text to convert to speech...", label="Text", lines=10, ), outputs=gr.Audio(type="filepath"), title="2cent TTS", description="Convert text to speech using an OpenAI TTS compatible server.", # Add automatic file cleanup after Gradio is done with the files examples=[ ["Hello, welcome to this text-to-speech demo. How does my voice sound today?"], [ "Text-to-speech technology has evolved rapidly! It now handles inflections, pauses, and emotions. Can you believe how natural it sounds?" ], [ "Have you ever wondered what it would be like to hear your writing read back to you with perfect pronunciation?" ], [ "This neural network uses a 12-layer transformer architecture with 768-dimensional embeddings and 12 attention heads." ], [ "The temperature today is 72.5 degrees Fahrenheit, with a 30% chance of rain and winds at 15 mph." ], [ "I'm absolutely thrilled to announce that our team has finally achieved the breakthrough we've been working toward!" ], [ '"Do you think we should knock first?" she asked.', ], [ "The xylophonist performed a captivating rendition of Tchaikovsky's Symphony No. 6 in B minor." ], [ "In 1969, Apollo 11 landed on the moon. The spacecraft carried 3 astronauts and traveled approximately 240,000 miles from Earth." ], ], cache_examples=False, # Don't cache, generate new audio each time ) # Wait for server to be ready before launching the interface print("Waiting for TTS server to start...") if wait_for_server(): print("TTS server is ready. Starting Gradio interface.") else: print( "Warning: TTS server did not start in the expected time. The interface may not work correctly." ) # Launch the app if __name__ == "__main__": # Add concurrency_count if you want to limit concurrent processing demo.launch(server_name="0.0.0.0", server_port=7860, max_threads=40)