|
from fastapi import FastAPI, WebSocket,WebSocketDisconnect |
|
import asyncio |
|
import base64 |
|
from src.core.speechtotext import SpeechToText |
|
from src.core.texttospeech import TextToSpeech |
|
from src.core.texttotext import ConversationHandler |
|
import os |
|
app = FastAPI() |
|
spt = SpeechToText() |
|
ttt = ConversationHandler() |
|
tts = TextToSpeech() |
|
@app.websocket("/ws/voicechat") |
|
async def websocket_endpoint(websocket: WebSocket): |
|
await websocket.accept() |
|
print("User connected.") |
|
audio_buffer = bytearray() |
|
|
|
try: |
|
while True: |
|
try: |
|
audio_data = await asyncio.wait_for(websocket.receive_bytes(), timeout=3.0) |
|
print(f"Received {len(audio_data)} bytes") |
|
audio_buffer.extend(audio_data) |
|
|
|
except asyncio.TimeoutError: |
|
if len(audio_buffer) > 0: |
|
print("Silence detected. Processing speech...") |
|
transcript = await spt.trancribe_audio(audio_buffer) |
|
audio_buffer.clear() |
|
|
|
if transcript: |
|
print(f"User said: {transcript}") |
|
response = await ttt.handle_conversation(transcript) |
|
if response: |
|
print(f"AI Response: {response}") |
|
audio = await tts.synthesize(response) |
|
audio_base64 = base64.b64encode(audio).decode("utf-8") |
|
await websocket.send_json({ |
|
"transcript": transcript, |
|
"response": response, |
|
"audio": audio_base64, |
|
"status": "complete" |
|
}) |
|
await websocket.receive_text() |
|
|
|
except Exception as e: |
|
print(f"Error: {e}") |
|
except WebSocketDisconnect: |
|
print("User disconnected.") |
|
if __name__ == '__main__': |
|
import uvicorn |
|
uvicorn.run(app,port=7860,host= "0.0.0.0", |
|
timeout_keep_alive=300, timeout_graceful_shutdown=600) |