Spaces:

Luka512
/

website

Running

App Files Files Community

Tim Luka Horstmann commited on Jul 25

Commit

1292878

1 Parent(s): ea17465

Add ElevenLabs TTS integration

Browse files

Files changed (3) hide show

app.py +86 -12
requirements.txt +3 -1
test_gemini_integration.py +0 -120

app.py CHANGED Viewed

@@ -4,7 +4,8 @@ import time
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from fastapi import FastAPI, HTTPException, BackgroundTasks
-from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import login, hf_hub_download
@@ -15,6 +16,8 @@ import asyncio
 import psutil  # Added for RAM tracking
 from google import genai
 from google.genai import types
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -22,6 +25,15 @@ logger = logging.getLogger(__name__)
 app = FastAPI()
 # Global lock for model access
 model_lock = asyncio.Lock()
@@ -51,6 +63,18 @@ else:
     gemini_client = None
     logger.info("Using local model (Gemini disabled)")
 # Define FAQs
 faqs = [
     {"question": "What is your name?", "answer": "My name is Tim Luka Horstmann."},
@@ -287,7 +311,10 @@ async def stream_response_local(query, history):
 class QueryRequest(BaseModel):
     query: str
-    history: list[dict]
 # RAM Usage Tracking Function
 def get_ram_usage():
@@ -309,32 +336,79 @@ async def predict(request: QueryRequest):
     history = request.history
     return StreamingResponse(stream_response(query, history), media_type="text/event-stream")
 @app.get("/health")
 async def health_check():
     return {"status": "healthy"}
 @app.get("/model_info")
 async def model_info():
     if USE_GEMINI:
-        return {
             "model_type": "gemini",
             "model_name": gemini_model,
             "provider": "Google Gemini API",
-            "embedding_model": sentence_transformer_model,
-            "faiss_index_size": len(cv_chunks),
-            "faiss_index_dim": cv_embeddings.shape[1],
-        }
     else:
-        return {
             "model_type": "local",
             "model_name": filename,
             "repo_id": repo_id,
             "model_size": "1.7B",
             "quantization": "Q4_K_M",
-            "embedding_model": sentence_transformer_model,
-            "faiss_index_size": len(cv_chunks),
-            "faiss_index_dim": cv_embeddings.shape[1],
-        }
 @app.get("/ram_usage")
 async def ram_usage():

 import numpy as np
 from sentence_transformers import SentenceTransformer
 from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.responses import StreamingResponse, Response
+from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import login, hf_hub_download
 import psutil  # Added for RAM tracking
 from google import genai
 from google.genai import types
+import httpx
+from elevenlabs import ElevenLabs, VoiceSettings
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 app = FastAPI()
+# Add CORS middleware to handle cross-origin requests
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, specify your domain
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 # Global lock for model access
 model_lock = asyncio.Lock()
     gemini_client = None
     logger.info("Using local model (Gemini disabled)")
+# ElevenLabs Configuration
+elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
+if elevenlabs_api_key:
+    elevenlabs_client = ElevenLabs(api_key=elevenlabs_api_key)
+    # You can set a specific voice ID here or use the default voice
+    # Get your voice ID from ElevenLabs dashboard after cloning your voice
+    tts_voice_id = os.getenv("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")  # Default voice, replace with your cloned voice ID
+    logger.info("ElevenLabs TTS client initialized")
+else:
+    elevenlabs_client = None
+    logger.info("ElevenLabs TTS disabled (no API key provided)")
 # Define FAQs
 faqs = [
     {"question": "What is your name?", "answer": "My name is Tim Luka Horstmann."},
 class QueryRequest(BaseModel):
     query: str
+    history: list
+class TTSRequest(BaseModel):
+    text: str[dict]
 # RAM Usage Tracking Function
 def get_ram_usage():
     history = request.history
     return StreamingResponse(stream_response(query, history), media_type="text/event-stream")
+@app.post("/api/tts")
+async def text_to_speech(request: TTSRequest):
+    """Convert text to speech using ElevenLabs API"""
+    if not elevenlabs_client:
+        raise HTTPException(status_code=503, detail="TTS service not available")
+    try:
+        # Clean the text for TTS (remove markdown and special characters)
+        clean_text = request.text.replace("**", "").replace("*", "").replace("\n", " ").strip()
+        if not clean_text:
+            raise HTTPException(status_code=400, detail="No text provided for TTS")
+        if len(clean_text) > 1000:  # Limit text length to avoid long processing times
+            clean_text = clean_text[:1000] + "..."
+        # Generate speech
+        response = elevenlabs_client.text_to_speech.convert(
+            voice_id=tts_voice_id,
+            text=clean_text,
+            voice_settings=VoiceSettings(
+                stability=0.5,
+                similarity_boost=0.8,
+                style=0.2,
+                use_speaker_boost=True
+            )
+        )
+        # Convert generator to bytes
+        audio_bytes = b"".join(response)
+        return Response(
+            content=audio_bytes,
+            media_type="audio/mpeg",
+            headers={
+                "Content-Disposition": "inline; filename=tts_audio.mp3",
+                "Cache-Control": "no-cache"
+            }
+        )
+    except Exception as e:
+        logger.error(f"TTS error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"TTS conversion failed: {str(e)}")
 @app.get("/health")
 async def health_check():
     return {"status": "healthy"}
 @app.get("/model_info")
 async def model_info():
+    base_info = {
+        "embedding_model": sentence_transformer_model,
+        "faiss_index_size": len(cv_chunks),
+        "faiss_index_dim": cv_embeddings.shape[1],
+        "tts_available": elevenlabs_client is not None,
+    }
     if USE_GEMINI:
+        base_info.update({
             "model_type": "gemini",
             "model_name": gemini_model,
             "provider": "Google Gemini API",
+        })
     else:
+        base_info.update({
             "model_type": "local",
             "model_name": filename,
             "repo_id": repo_id,
             "model_size": "1.7B",
             "quantization": "Q4_K_M",
+        })
+    return base_info
 @app.get("/ram_usage")
 async def ram_usage():

requirements.txt CHANGED Viewed

@@ -8,4 +8,6 @@ huggingface_hub==0.30.1
 faiss-cpu==1.8.0
 asyncio
 psutil
-google-genai

 faiss-cpu==1.8.0
 asyncio
 psutil
+google-genai
+elevenlabs==1.1.3
+httpx==0.25.0

test_gemini_integration.py DELETED Viewed

@@ -1,120 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for Gemini API integration
-"""
-import os
-import asyncio
-from datetime import datetime
-# Mock the dependencies for testing
-class MockClient:
-    def __init__(self, api_key):
-        self.api_key = api_key
-    class models:
-        @staticmethod
-        def generate_content_stream(model, contents, config):
-            # Mock streaming response
-            class MockChunk:
-                text = "Hello! I'm Tim Luka Horstmann, a Computer Scientist currently pursuing my MSc in Data and AI at Institut Polytechnique de Paris."
-            yield MockChunk()
-class MockTypes:
-    class Content:
-        def __init__(self, role, parts):
-            self.role = role
-            self.parts = parts
-    class Part:
-        def __init__(self, text):
-            self.text = text
-        @classmethod
-        def from_text(cls, text):
-            return cls(text)
-    class GenerateContentConfig:
-        def __init__(self, temperature, top_p, max_output_tokens):
-            self.temperature = temperature
-            self.top_p = top_p
-            self.max_output_tokens = max_output_tokens
-# Test function similar to our Gemini implementation
-async def test_gemini_integration():
-    """Test the Gemini integration logic"""
-    # Mock environment variables
-    USE_GEMINI = True
-    gemini_api_key = "test_api_key"
-    gemini_model = "gemini-2.5-flash-preview-05-20"
-    # Mock full CV text
-    full_cv_text = "Tim Luka Horstmann is a Computer Scientist pursuing MSc in Data and AI at Institut Polytechnique de Paris."
-    # Initialize mock client
-    gemini_client = MockClient(api_key=gemini_api_key)
-    types = MockTypes()
-    # Test query and history
-    query = "What is your education?"
-    history = []
-    print(f"Testing Gemini integration...")
-    print(f"USE_GEMINI: {USE_GEMINI}")
-    print(f"Query: {query}")
-    # Simulate the Gemini function logic
-    current_date = datetime.now().strftime("%Y-%m-%d")
-    system_prompt = (
-        "You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
-        "For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
-        "For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
-        "and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. Do not invent facts, experiences, or opinions not supported by the CV or FAQs. "
-        f"Today's date is {current_date}. "
-        f"CV: {full_cv_text}"
-    )
-    # Build messages for Gemini (no system role - embed instructions in first user message)
-    messages = []
-    # Add conversation history
-    for msg in history:
-        role = "user" if msg["role"] == "user" else "model"
-        messages.append(types.Content(role=role, parts=[types.Part.from_text(text=msg["content"])]))
-    # Add current query with system prompt embedded
-    if not history:  # If no history, include system prompt with the first message
-        combined_query = f"{system_prompt}\n\nUser question: {query}"
-    else:
-        combined_query = query
-    messages.append(types.Content(role="user", parts=[types.Part.from_text(text=combined_query)]))
-    print(f"System prompt length: {len(system_prompt)}")
-    print(f"Number of messages: {len(messages)}")
-    # Mock the streaming response
-    response = gemini_client.models.generate_content_stream(
-        model=gemini_model,
-        contents=messages,
-        config=types.GenerateContentConfig(
-            temperature=0.3,
-            top_p=0.7,
-            max_output_tokens=512,
-        )
-    )
-    print("Streaming response:")
-    for chunk in response:
-        if chunk.text:
-            print(f"Chunk: {chunk.text}")
-    print("✅ Gemini integration test completed successfully!")
-    return True
-if __name__ == "__main__":
-    asyncio.run(test_gemini_integration())