Spaces:
Paused
Paused
Update websocket_handler.py
Browse files- websocket_handler.py +72 -32
websocket_handler.py
CHANGED
|
@@ -455,38 +455,38 @@ async def handle_control_message(websocket: WebSocket, session: RealtimeSession,
|
|
| 455 |
}
|
| 456 |
})
|
| 457 |
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
|
| 491 |
|
| 492 |
# ========================= PROCESSING FUNCTIONS =========================
|
|
@@ -620,6 +620,46 @@ async def generate_and_stream_tts(
|
|
| 620 |
chunk = audio_data[i:i + chunk_size]
|
| 621 |
chunk_index = i // chunk_size
|
| 622 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 623 |
await websocket.send_json({
|
| 624 |
"type": "tts_audio",
|
| 625 |
"data": base64.b64encode(chunk).decode('utf-8'),
|
|
|
|
| 455 |
}
|
| 456 |
})
|
| 457 |
|
| 458 |
+
# Send welcome message and TTS if available
|
| 459 |
+
chat_history = session.session.chat_history
|
| 460 |
+
if chat_history and len(chat_history) > 0:
|
| 461 |
+
last_message = chat_history[-1]
|
| 462 |
+
if last_message["role"] == "assistant":
|
| 463 |
+
welcome_text = last_message["content"]
|
| 464 |
+
|
| 465 |
+
# Send text message
|
| 466 |
+
await websocket.send_json({
|
| 467 |
+
"type": "assistant_response",
|
| 468 |
+
"text": welcome_text
|
| 469 |
+
})
|
| 470 |
+
|
| 471 |
+
# Generate TTS if enabled
|
| 472 |
+
tts_provider = TTSFactory.create_provider()
|
| 473 |
+
if tts_provider:
|
| 474 |
+
await session.change_state(ConversationState.PROCESSING_TTS)
|
| 475 |
+
await websocket.send_json({
|
| 476 |
+
"type": "state_change",
|
| 477 |
+
"from": "idle",
|
| 478 |
+
"to": "processing_tts"
|
| 479 |
+
})
|
| 480 |
+
|
| 481 |
+
# Generate and stream TTS
|
| 482 |
+
tts_task = session.barge_in_handler.start_tts_task(
|
| 483 |
+
generate_and_stream_tts(websocket, session, tts_provider, welcome_text)
|
| 484 |
+
)
|
| 485 |
+
|
| 486 |
+
try:
|
| 487 |
+
await tts_task
|
| 488 |
+
except asyncio.CancelledError:
|
| 489 |
+
log_info("Welcome TTS cancelled", session_id=session.session.session_id)
|
| 490 |
|
| 491 |
|
| 492 |
# ========================= PROCESSING FUNCTIONS =========================
|
|
|
|
| 620 |
chunk = audio_data[i:i + chunk_size]
|
| 621 |
chunk_index = i // chunk_size
|
| 622 |
|
| 623 |
+
await websocket.send_json({
|
| 624 |
+
"type": "tts_audio",
|
| 625 |
+
"data": base64.b64encode(chunk).decode('utf-8'),
|
| 626 |
+
"chunk_index": chunk_index,
|
| 627 |
+
"total_chunks": total_chunks,
|
| 628 |
+
"is_last": chunk_index == total_chunks - 1,
|
| 629 |
+
"mime_type": "audio/mpeg" # MP3 format for ElevenLabs
|
| 630 |
+
})
|
| 631 |
+
|
| 632 |
+
# Small delay to prevent overwhelming the client
|
| 633 |
+
await asyncio.sleep(0.01)
|
| 634 |
+
|
| 635 |
+
# Send state back to idle after completion
|
| 636 |
+
await session.change_state(ConversationState.IDLE)
|
| 637 |
+
await websocket.send_json({
|
| 638 |
+
"type": "state_change",
|
| 639 |
+
"from": "playing_audio",
|
| 640 |
+
"to": "idle"
|
| 641 |
+
})
|
| 642 |
+
|
| 643 |
+
log_info(
|
| 644 |
+
f"TTS streaming completed",
|
| 645 |
+
session_id=session.session.session_id,
|
| 646 |
+
text_length=len(text),
|
| 647 |
+
audio_size=len(audio_data)
|
| 648 |
+
)
|
| 649 |
+
|
| 650 |
+
except asyncio.CancelledError:
|
| 651 |
+
log_info("TTS streaming cancelled", session_id=session.session.session_id)
|
| 652 |
+
raise
|
| 653 |
+
except Exception as e:
|
| 654 |
+
log_error(
|
| 655 |
+
f"TTS generation error",
|
| 656 |
+
error=str(e),
|
| 657 |
+
session_id=session.session.session_id
|
| 658 |
+
)
|
| 659 |
+
await websocket.send_json({
|
| 660 |
+
"type": "error",
|
| 661 |
+
"message": f"TTS error: {str(e)}"
|
| 662 |
+
})
|
| 663 |
await websocket.send_json({
|
| 664 |
"type": "tts_audio",
|
| 665 |
"data": base64.b64encode(chunk).decode('utf-8'),
|