init
Browse files- Dockerfile +44 -0
- app.py +130 -0
Dockerfile
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM alpine AS onnxruntime
|
2 |
+
|
3 |
+
ADD https://github.com/microsoft/onnxruntime/releases/download/v1.21.0/onnxruntime-linux-x64-1.21.0.tgz /
|
4 |
+
RUN tar -xvf /onnxruntime-linux-x64-1.21.0.tgz
|
5 |
+
|
6 |
+
FROM ghcr.io/ggml-org/llama.cpp:server-b5318
|
7 |
+
|
8 |
+
RUN \
|
9 |
+
apt-get update && \
|
10 |
+
apt-get install -y --no-install-recommends --no-install-suggests \
|
11 |
+
espeak-ng && \
|
12 |
+
rm -rf /var/lib/apt/lists/*
|
13 |
+
|
14 |
+
COPY --from=onnxruntime /onnxruntime-linux-x64-1.21.0/lib/libonnxruntime.so libonnxruntime.so
|
15 |
+
ENV ONNX_PATH=/app/libonnxruntime.so
|
16 |
+
|
17 |
+
ADD https://huggingface.co/onnx-community/snac_24khz-ONNX/resolve/main/onnx/decoder_model.onnx snac.onnx
|
18 |
+
ADD https://github.com/taylorchu/2cent-tts/releases/download/v0.2.0/2cent.gguf 2cent.gguf
|
19 |
+
ADD https://github.com/taylorchu/2cent-tts/releases/download/v0.2.0/tokenizer.json tokenizer.json
|
20 |
+
ADD https://github.com/taylorchu/2cent-tts/releases/download/v0.2.0/tts-http-server tts-http-server
|
21 |
+
|
22 |
+
RUN chmod +x tts-http-server
|
23 |
+
|
24 |
+
ENV PATH="$PATH:/app"
|
25 |
+
|
26 |
+
# For huggingface
|
27 |
+
|
28 |
+
RUN \
|
29 |
+
apt-get update && \
|
30 |
+
apt-get install -y --no-install-recommends --no-install-suggests \
|
31 |
+
python3-pip && \
|
32 |
+
rm -rf /var/lib/apt/lists/*
|
33 |
+
|
34 |
+
# Install necessary packages for the web UI
|
35 |
+
RUN pip install gradio requests
|
36 |
+
|
37 |
+
# Copy the app file
|
38 |
+
COPY app.py app.py
|
39 |
+
|
40 |
+
# Expose the ports
|
41 |
+
EXPOSE 7860
|
42 |
+
|
43 |
+
# Start both the TTS server and the Gradio interface
|
44 |
+
ENTRYPOINT ["python3", "-u", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
import time
|
4 |
+
import subprocess
|
5 |
+
import threading
|
6 |
+
import wave
|
7 |
+
import os
|
8 |
+
import uuid
|
9 |
+
import tempfile
|
10 |
+
import socket
|
11 |
+
|
12 |
+
|
13 |
+
# Start the TTS server in the background
|
14 |
+
def start_tts_server():
|
15 |
+
subprocess.Popen(["tts-http-server"])
|
16 |
+
|
17 |
+
|
18 |
+
# Start the server in a separate thread
|
19 |
+
threading.Thread(target=start_tts_server, daemon=True).start()
|
20 |
+
|
21 |
+
|
22 |
+
# Wait for the server port to be open
|
23 |
+
def wait_for_server(timeout=30):
|
24 |
+
port = 80 # The port your TTS server runs on
|
25 |
+
start_time = time.time()
|
26 |
+
while time.time() - start_time < timeout:
|
27 |
+
try:
|
28 |
+
# Try to connect to the server port
|
29 |
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
30 |
+
sock.settimeout(1)
|
31 |
+
result = sock.connect_ex(("localhost", port))
|
32 |
+
sock.close()
|
33 |
+
if result == 0: # Port is open
|
34 |
+
return True
|
35 |
+
except:
|
36 |
+
pass
|
37 |
+
time.sleep(1)
|
38 |
+
return False
|
39 |
+
|
40 |
+
|
41 |
+
# Function to convert text to speech
|
42 |
+
def text_to_speech(text):
|
43 |
+
try:
|
44 |
+
response = requests.post(
|
45 |
+
"http://localhost:80/v1/audio/speech",
|
46 |
+
headers={"Content-Type": "application/json"},
|
47 |
+
json={
|
48 |
+
"model": "tts-1",
|
49 |
+
"input": text,
|
50 |
+
"voice": "alloy",
|
51 |
+
},
|
52 |
+
)
|
53 |
+
|
54 |
+
if response.status_code == 200:
|
55 |
+
# Create a unique filename for each request
|
56 |
+
unique_id = uuid.uuid4().hex
|
57 |
+
temp_dir = tempfile.gettempdir()
|
58 |
+
audio_file = os.path.join(temp_dir, f"tts_output_{unique_id}.wav")
|
59 |
+
|
60 |
+
# Convert PCM data to WAV file
|
61 |
+
pcm_data = response.content
|
62 |
+
|
63 |
+
with wave.open(audio_file, "wb") as wav_file:
|
64 |
+
wav_file.setnchannels(1) # Mono
|
65 |
+
wav_file.setsampwidth(2) # 16-bit (2 bytes)
|
66 |
+
wav_file.setframerate(24000) # 24kHz (common for TTS)
|
67 |
+
wav_file.writeframes(pcm_data)
|
68 |
+
|
69 |
+
return audio_file
|
70 |
+
else:
|
71 |
+
return f"Error: {response.status_code} - {response.text}"
|
72 |
+
except Exception as e:
|
73 |
+
return f"Error: {str(e)}"
|
74 |
+
|
75 |
+
|
76 |
+
# Create Gradio interface
|
77 |
+
demo = gr.Interface(
|
78 |
+
fn=text_to_speech,
|
79 |
+
inputs=gr.Textbox(
|
80 |
+
placeholder="Enter text to convert to speech...",
|
81 |
+
label="Text",
|
82 |
+
lines=10,
|
83 |
+
),
|
84 |
+
outputs=gr.Audio(type="filepath"),
|
85 |
+
title="2cent TTS",
|
86 |
+
description="Convert text to speech using an OpenAI TTS compatible server.",
|
87 |
+
# Add automatic file cleanup after Gradio is done with the files
|
88 |
+
examples=[
|
89 |
+
["Hello, welcome to this text-to-speech demo. How does my voice sound today?"],
|
90 |
+
[
|
91 |
+
"Text-to-speech technology has evolved rapidly! It now handles inflections, pauses, and emotions. Can you believe how natural it sounds?"
|
92 |
+
],
|
93 |
+
[
|
94 |
+
"Have you ever wondered what it would be like to hear your writing read back to you with perfect pronunciation?"
|
95 |
+
],
|
96 |
+
[
|
97 |
+
"This neural network uses a 12-layer transformer architecture with 768-dimensional embeddings and 12 attention heads."
|
98 |
+
],
|
99 |
+
[
|
100 |
+
"The temperature today is 72.5 degrees Fahrenheit, with a 30% chance of rain and winds at 15 mph."
|
101 |
+
],
|
102 |
+
[
|
103 |
+
"I'm absolutely thrilled to announce that our team has finally achieved the breakthrough we've been working toward!"
|
104 |
+
],
|
105 |
+
[
|
106 |
+
'"Do you think we should knock first?" she asked.',
|
107 |
+
],
|
108 |
+
[
|
109 |
+
"The xylophonist performed a captivating rendition of Tchaikovsky's Symphony No. 6 in B minor."
|
110 |
+
],
|
111 |
+
[
|
112 |
+
"In 1969, Apollo 11 landed on the moon. The spacecraft carried 3 astronauts and traveled approximately 240,000 miles from Earth."
|
113 |
+
],
|
114 |
+
],
|
115 |
+
cache_examples=False, # Don't cache, generate new audio each time
|
116 |
+
)
|
117 |
+
|
118 |
+
# Wait for server to be ready before launching the interface
|
119 |
+
print("Waiting for TTS server to start...")
|
120 |
+
if wait_for_server():
|
121 |
+
print("TTS server is ready. Starting Gradio interface.")
|
122 |
+
else:
|
123 |
+
print(
|
124 |
+
"Warning: TTS server did not start in the expected time. The interface may not work correctly."
|
125 |
+
)
|
126 |
+
|
127 |
+
# Launch the app
|
128 |
+
if __name__ == "__main__":
|
129 |
+
# Add concurrency_count if you want to limit concurrent processing
|
130 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, max_threads=40)
|