Spaces:

taylorchu
/

2cent-tts

Sleeping

App Files Files Community

taylorchu commited on May 19

Commit

89b236a

1 Parent(s): c0ec5ca

init

Browse files

Files changed (2) hide show

Dockerfile +44 -0
app.py +130 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,44 @@

+FROM alpine AS onnxruntime
+ADD https://github.com/microsoft/onnxruntime/releases/download/v1.21.0/onnxruntime-linux-x64-1.21.0.tgz /
+RUN tar -xvf /onnxruntime-linux-x64-1.21.0.tgz
+FROM ghcr.io/ggml-org/llama.cpp:server-b5318
+RUN \
+  apt-get update && \
+  apt-get install -y --no-install-recommends --no-install-suggests \
+  espeak-ng && \
+  rm -rf /var/lib/apt/lists/*
+COPY --from=onnxruntime /onnxruntime-linux-x64-1.21.0/lib/libonnxruntime.so libonnxruntime.so
+ENV ONNX_PATH=/app/libonnxruntime.so
+ADD https://huggingface.co/onnx-community/snac_24khz-ONNX/resolve/main/onnx/decoder_model.onnx snac.onnx
+ADD https://github.com/taylorchu/2cent-tts/releases/download/v0.2.0/2cent.gguf 2cent.gguf
+ADD https://github.com/taylorchu/2cent-tts/releases/download/v0.2.0/tokenizer.json tokenizer.json
+ADD https://github.com/taylorchu/2cent-tts/releases/download/v0.2.0/tts-http-server tts-http-server
+RUN chmod +x tts-http-server
+ENV PATH="$PATH:/app"
+# For huggingface
+RUN \
+  apt-get update && \
+  apt-get install -y --no-install-recommends --no-install-suggests \
+  python3-pip && \
+  rm -rf /var/lib/apt/lists/*
+# Install necessary packages for the web UI
+RUN pip install gradio requests
+# Copy the app file
+COPY app.py app.py
+# Expose the ports
+EXPOSE 7860
+# Start both the TTS server and the Gradio interface
+ENTRYPOINT ["python3", "-u", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import gradio as gr
+import requests
+import time
+import subprocess
+import threading
+import wave
+import os
+import uuid
+import tempfile
+import socket
+# Start the TTS server in the background
+def start_tts_server():
+    subprocess.Popen(["tts-http-server"])
+# Start the server in a separate thread
+threading.Thread(target=start_tts_server, daemon=True).start()
+# Wait for the server port to be open
+def wait_for_server(timeout=30):
+    port = 80  # The port your TTS server runs on
+    start_time = time.time()
+    while time.time() - start_time < timeout:
+        try:
+            # Try to connect to the server port
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.settimeout(1)
+            result = sock.connect_ex(("localhost", port))
+            sock.close()
+            if result == 0:  # Port is open
+                return True
+        except:
+            pass
+        time.sleep(1)
+    return False
+# Function to convert text to speech
+def text_to_speech(text):
+    try:
+        response = requests.post(
+            "http://localhost:80/v1/audio/speech",
+            headers={"Content-Type": "application/json"},
+            json={
+                "model": "tts-1",
+                "input": text,
+                "voice": "alloy",
+            },
+        )
+        if response.status_code == 200:
+            # Create a unique filename for each request
+            unique_id = uuid.uuid4().hex
+            temp_dir = tempfile.gettempdir()
+            audio_file = os.path.join(temp_dir, f"tts_output_{unique_id}.wav")
+            # Convert PCM data to WAV file
+            pcm_data = response.content
+            with wave.open(audio_file, "wb") as wav_file:
+                wav_file.setnchannels(1)  # Mono
+                wav_file.setsampwidth(2)  # 16-bit (2 bytes)
+                wav_file.setframerate(24000)  # 24kHz (common for TTS)
+                wav_file.writeframes(pcm_data)
+            return audio_file
+        else:
+            return f"Error: {response.status_code} - {response.text}"
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=text_to_speech,
+    inputs=gr.Textbox(
+        placeholder="Enter text to convert to speech...",
+        label="Text",
+        lines=10,
+    ),
+    outputs=gr.Audio(type="filepath"),
+    title="2cent TTS",
+    description="Convert text to speech using an OpenAI TTS compatible server.",
+    # Add automatic file cleanup after Gradio is done with the files
+    examples=[
+        ["Hello, welcome to this text-to-speech demo. How does my voice sound today?"],
+        [
+            "Text-to-speech technology has evolved rapidly! It now handles inflections, pauses, and emotions. Can you believe how natural it sounds?"
+        ],
+        [
+            "Have you ever wondered what it would be like to hear your writing read back to you with perfect pronunciation?"
+        ],
+        [
+            "This neural network uses a 12-layer transformer architecture with 768-dimensional embeddings and 12 attention heads."
+        ],
+        [
+            "The temperature today is 72.5 degrees Fahrenheit, with a 30% chance of rain and winds at 15 mph."
+        ],
+        [
+            "I'm absolutely thrilled to announce that our team has finally achieved the breakthrough we've been working toward!"
+        ],
+        [
+            '"Do you think we should knock first?" she asked.',
+        ],
+        [
+            "The xylophonist performed a captivating rendition of Tchaikovsky's Symphony No. 6 in B minor."
+        ],
+        [
+            "In 1969, Apollo 11 landed on the moon. The spacecraft carried 3 astronauts and traveled approximately 240,000 miles from Earth."
+        ],
+    ],
+    cache_examples=False,  # Don't cache, generate new audio each time
+)
+# Wait for server to be ready before launching the interface
+print("Waiting for TTS server to start...")
+if wait_for_server():
+    print("TTS server is ready. Starting Gradio interface.")
+else:
+    print(
+        "Warning: TTS server did not start in the expected time. The interface may not work correctly."
+    )
+# Launch the app
+if __name__ == "__main__":
+    # Add concurrency_count if you want to limit concurrent processing
+    demo.launch(server_name="0.0.0.0", server_port=7860, max_threads=40)