taylorchu commited on
Commit
89b236a
·
1 Parent(s): c0ec5ca
Files changed (2) hide show
  1. Dockerfile +44 -0
  2. app.py +130 -0
Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM alpine AS onnxruntime
2
+
3
+ ADD https://github.com/microsoft/onnxruntime/releases/download/v1.21.0/onnxruntime-linux-x64-1.21.0.tgz /
4
+ RUN tar -xvf /onnxruntime-linux-x64-1.21.0.tgz
5
+
6
+ FROM ghcr.io/ggml-org/llama.cpp:server-b5318
7
+
8
+ RUN \
9
+ apt-get update && \
10
+ apt-get install -y --no-install-recommends --no-install-suggests \
11
+ espeak-ng && \
12
+ rm -rf /var/lib/apt/lists/*
13
+
14
+ COPY --from=onnxruntime /onnxruntime-linux-x64-1.21.0/lib/libonnxruntime.so libonnxruntime.so
15
+ ENV ONNX_PATH=/app/libonnxruntime.so
16
+
17
+ ADD https://huggingface.co/onnx-community/snac_24khz-ONNX/resolve/main/onnx/decoder_model.onnx snac.onnx
18
+ ADD https://github.com/taylorchu/2cent-tts/releases/download/v0.2.0/2cent.gguf 2cent.gguf
19
+ ADD https://github.com/taylorchu/2cent-tts/releases/download/v0.2.0/tokenizer.json tokenizer.json
20
+ ADD https://github.com/taylorchu/2cent-tts/releases/download/v0.2.0/tts-http-server tts-http-server
21
+
22
+ RUN chmod +x tts-http-server
23
+
24
+ ENV PATH="$PATH:/app"
25
+
26
+ # For huggingface
27
+
28
+ RUN \
29
+ apt-get update && \
30
+ apt-get install -y --no-install-recommends --no-install-suggests \
31
+ python3-pip && \
32
+ rm -rf /var/lib/apt/lists/*
33
+
34
+ # Install necessary packages for the web UI
35
+ RUN pip install gradio requests
36
+
37
+ # Copy the app file
38
+ COPY app.py app.py
39
+
40
+ # Expose the ports
41
+ EXPOSE 7860
42
+
43
+ # Start both the TTS server and the Gradio interface
44
+ ENTRYPOINT ["python3", "-u", "app.py"]
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import time
4
+ import subprocess
5
+ import threading
6
+ import wave
7
+ import os
8
+ import uuid
9
+ import tempfile
10
+ import socket
11
+
12
+
13
+ # Start the TTS server in the background
14
+ def start_tts_server():
15
+ subprocess.Popen(["tts-http-server"])
16
+
17
+
18
+ # Start the server in a separate thread
19
+ threading.Thread(target=start_tts_server, daemon=True).start()
20
+
21
+
22
+ # Wait for the server port to be open
23
+ def wait_for_server(timeout=30):
24
+ port = 80 # The port your TTS server runs on
25
+ start_time = time.time()
26
+ while time.time() - start_time < timeout:
27
+ try:
28
+ # Try to connect to the server port
29
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
30
+ sock.settimeout(1)
31
+ result = sock.connect_ex(("localhost", port))
32
+ sock.close()
33
+ if result == 0: # Port is open
34
+ return True
35
+ except:
36
+ pass
37
+ time.sleep(1)
38
+ return False
39
+
40
+
41
+ # Function to convert text to speech
42
+ def text_to_speech(text):
43
+ try:
44
+ response = requests.post(
45
+ "http://localhost:80/v1/audio/speech",
46
+ headers={"Content-Type": "application/json"},
47
+ json={
48
+ "model": "tts-1",
49
+ "input": text,
50
+ "voice": "alloy",
51
+ },
52
+ )
53
+
54
+ if response.status_code == 200:
55
+ # Create a unique filename for each request
56
+ unique_id = uuid.uuid4().hex
57
+ temp_dir = tempfile.gettempdir()
58
+ audio_file = os.path.join(temp_dir, f"tts_output_{unique_id}.wav")
59
+
60
+ # Convert PCM data to WAV file
61
+ pcm_data = response.content
62
+
63
+ with wave.open(audio_file, "wb") as wav_file:
64
+ wav_file.setnchannels(1) # Mono
65
+ wav_file.setsampwidth(2) # 16-bit (2 bytes)
66
+ wav_file.setframerate(24000) # 24kHz (common for TTS)
67
+ wav_file.writeframes(pcm_data)
68
+
69
+ return audio_file
70
+ else:
71
+ return f"Error: {response.status_code} - {response.text}"
72
+ except Exception as e:
73
+ return f"Error: {str(e)}"
74
+
75
+
76
+ # Create Gradio interface
77
+ demo = gr.Interface(
78
+ fn=text_to_speech,
79
+ inputs=gr.Textbox(
80
+ placeholder="Enter text to convert to speech...",
81
+ label="Text",
82
+ lines=10,
83
+ ),
84
+ outputs=gr.Audio(type="filepath"),
85
+ title="2cent TTS",
86
+ description="Convert text to speech using an OpenAI TTS compatible server.",
87
+ # Add automatic file cleanup after Gradio is done with the files
88
+ examples=[
89
+ ["Hello, welcome to this text-to-speech demo. How does my voice sound today?"],
90
+ [
91
+ "Text-to-speech technology has evolved rapidly! It now handles inflections, pauses, and emotions. Can you believe how natural it sounds?"
92
+ ],
93
+ [
94
+ "Have you ever wondered what it would be like to hear your writing read back to you with perfect pronunciation?"
95
+ ],
96
+ [
97
+ "This neural network uses a 12-layer transformer architecture with 768-dimensional embeddings and 12 attention heads."
98
+ ],
99
+ [
100
+ "The temperature today is 72.5 degrees Fahrenheit, with a 30% chance of rain and winds at 15 mph."
101
+ ],
102
+ [
103
+ "I'm absolutely thrilled to announce that our team has finally achieved the breakthrough we've been working toward!"
104
+ ],
105
+ [
106
+ '"Do you think we should knock first?" she asked.',
107
+ ],
108
+ [
109
+ "The xylophonist performed a captivating rendition of Tchaikovsky's Symphony No. 6 in B minor."
110
+ ],
111
+ [
112
+ "In 1969, Apollo 11 landed on the moon. The spacecraft carried 3 astronauts and traveled approximately 240,000 miles from Earth."
113
+ ],
114
+ ],
115
+ cache_examples=False, # Don't cache, generate new audio each time
116
+ )
117
+
118
+ # Wait for server to be ready before launching the interface
119
+ print("Waiting for TTS server to start...")
120
+ if wait_for_server():
121
+ print("TTS server is ready. Starting Gradio interface.")
122
+ else:
123
+ print(
124
+ "Warning: TTS server did not start in the expected time. The interface may not work correctly."
125
+ )
126
+
127
+ # Launch the app
128
+ if __name__ == "__main__":
129
+ # Add concurrency_count if you want to limit concurrent processing
130
+ demo.launch(server_name="0.0.0.0", server_port=7860, max_threads=40)