Spaces:
Running
Running
Update websocket_handler.py
Browse files- websocket_handler.py +31 -13
websocket_handler.py
CHANGED
@@ -425,8 +425,15 @@ async def websocket_endpoint(websocket: WebSocket, session_id: str):
|
|
425 |
"message": str(e)
|
426 |
})
|
427 |
finally:
|
428 |
-
|
429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
430 |
|
431 |
# ========================= MESSAGE HANDLERS =========================
|
432 |
async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
|
@@ -455,15 +462,22 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
455 |
"to": "listening"
|
456 |
})
|
457 |
|
458 |
-
# Add to buffer
|
459 |
await session.audio_buffer.add_chunk(audio_data)
|
460 |
|
461 |
# Decode for processing
|
462 |
decoded_audio = base64.b64decode(audio_data)
|
463 |
|
|
|
|
|
|
|
464 |
# Check silence
|
465 |
silence_duration = session.silence_detector.update(decoded_audio)
|
466 |
|
|
|
|
|
|
|
|
|
467 |
# Stream to STT if available
|
468 |
if session.stt_manager and session.state == ConversationState.LISTENING:
|
469 |
# Ensure streaming is active
|
@@ -497,6 +511,10 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
497 |
session.current_transcription = result.text
|
498 |
log_info(f"📝 Final transcription: {result.text}", session_id=session.session.session_id)
|
499 |
|
|
|
|
|
|
|
|
|
500 |
except Exception as e:
|
501 |
log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
|
502 |
await websocket.send_json({
|
@@ -504,16 +522,17 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
504 |
"error_type": "stt_error",
|
505 |
"message": f"STT error: {str(e)}"
|
506 |
})
|
507 |
-
|
|
|
508 |
# Process if silence detected and we have transcription
|
509 |
-
if silence_duration > session.silence_threshold_ms and session.current_transcription:
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
|
518 |
except Exception as e:
|
519 |
log_error(
|
@@ -527,7 +546,6 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
527 |
"message": f"Audio processing error: {str(e)}"
|
528 |
})
|
529 |
|
530 |
-
|
531 |
async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
|
532 |
"""Handle control messages"""
|
533 |
action = message.get("action")
|
|
|
425 |
"message": str(e)
|
426 |
})
|
427 |
finally:
|
428 |
+
log_info(f"🧹 Cleaning up WebSocket connection", session_id=session_id)
|
429 |
+
await realtime_session.cleanup()
|
430 |
+
|
431 |
+
# WebSocket'in açık olup olmadığını kontrol et
|
432 |
+
try:
|
433 |
+
if websocket.client_state.value == 1: # 1 = CONNECTED state
|
434 |
+
await websocket.close()
|
435 |
+
except Exception as e:
|
436 |
+
log_debug(f"WebSocket already closed or error during close: {e}", session_id=session_id)
|
437 |
|
438 |
# ========================= MESSAGE HANDLERS =========================
|
439 |
async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
|
|
|
462 |
"to": "listening"
|
463 |
})
|
464 |
|
465 |
+
# Add to buffer
|
466 |
await session.audio_buffer.add_chunk(audio_data)
|
467 |
|
468 |
# Decode for processing
|
469 |
decoded_audio = base64.b64decode(audio_data)
|
470 |
|
471 |
+
# DEBUG: Log audio chunk size
|
472 |
+
log_debug(f"🎤 Audio chunk received: {len(decoded_audio)} bytes", session_id=session.session.session_id)
|
473 |
+
|
474 |
# Check silence
|
475 |
silence_duration = session.silence_detector.update(decoded_audio)
|
476 |
|
477 |
+
# DEBUG: Log silence detection
|
478 |
+
if silence_duration > 0:
|
479 |
+
log_debug(f"🔇 Silence detected: {silence_duration}ms", session_id=session.session.session_id)
|
480 |
+
|
481 |
# Stream to STT if available
|
482 |
if session.stt_manager and session.state == ConversationState.LISTENING:
|
483 |
# Ensure streaming is active
|
|
|
511 |
session.current_transcription = result.text
|
512 |
log_info(f"📝 Final transcription: {result.text}", session_id=session.session.session_id)
|
513 |
|
514 |
+
# BURADA: Final transcription geldiğinde hemen işle
|
515 |
+
if session.current_transcription:
|
516 |
+
await process_user_input(websocket, session)
|
517 |
+
|
518 |
except Exception as e:
|
519 |
log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
|
520 |
await websocket.send_json({
|
|
|
522 |
"error_type": "stt_error",
|
523 |
"message": f"STT error: {str(e)}"
|
524 |
})
|
525 |
+
|
526 |
+
# SILENCE DETECTION'I GEÇİCİ OLARAK DEVRE DIŞI BIRAK
|
527 |
# Process if silence detected and we have transcription
|
528 |
+
# if silence_duration > session.silence_threshold_ms and session.current_transcription:
|
529 |
+
# log_info(
|
530 |
+
# f"🔇 User stopped speaking",
|
531 |
+
# session_id=session.session.session_id,
|
532 |
+
# silence_ms=silence_duration,
|
533 |
+
# text=session.current_transcription
|
534 |
+
# )
|
535 |
+
# await process_user_input(websocket, session)
|
536 |
|
537 |
except Exception as e:
|
538 |
log_error(
|
|
|
546 |
"message": f"Audio processing error: {str(e)}"
|
547 |
})
|
548 |
|
|
|
549 |
async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
|
550 |
"""Handle control messages"""
|
551 |
action = message.get("action")
|