Spaces:
Running
Running
Update websocket_handler.py
Browse files- websocket_handler.py +16 -24
websocket_handler.py
CHANGED
@@ -521,9 +521,22 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
521 |
session.current_transcription = result.text
|
522 |
log_info(f"📝 Final transcription: {result.text}", session_id=session.session.session_id)
|
523 |
|
524 |
-
#
|
525 |
if session.current_transcription:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
526 |
await process_user_input(websocket, session)
|
|
|
|
|
|
|
|
|
527 |
|
528 |
except Exception as e:
|
529 |
log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
|
@@ -532,17 +545,6 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
532 |
"error_type": "stt_error",
|
533 |
"message": f"STT error: {str(e)}"
|
534 |
})
|
535 |
-
|
536 |
-
# SILENCE DETECTION'I GEÇİCİ OLARAK DEVRE DIŞI BIRAK
|
537 |
-
# Process if silence detected and we have transcription
|
538 |
-
# if silence_duration > session.silence_threshold_ms and session.current_transcription:
|
539 |
-
# log_info(
|
540 |
-
# f"🔇 User stopped speaking",
|
541 |
-
# session_id=session.session.session_id,
|
542 |
-
# silence_ms=silence_duration,
|
543 |
-
# text=session.current_transcription
|
544 |
-
# )
|
545 |
-
# await process_user_input(websocket, session)
|
546 |
|
547 |
except Exception as e:
|
548 |
log_error(
|
@@ -555,7 +557,7 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
555 |
"type": "error",
|
556 |
"message": f"Audio processing error: {str(e)}"
|
557 |
})
|
558 |
-
|
559 |
async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
|
560 |
"""Handle control messages"""
|
561 |
action = message.get("action")
|
@@ -622,13 +624,7 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
|
|
622 |
|
623 |
log_info(f"🎯 Processing user input", text=user_text, session_id=session.session.session_id)
|
624 |
|
625 |
-
# State
|
626 |
-
await session.change_state(ConversationState.PROCESSING_STT)
|
627 |
-
await websocket.send_json({
|
628 |
-
"type": "state_change",
|
629 |
-
"from": "listening",
|
630 |
-
"to": "processing_stt"
|
631 |
-
})
|
632 |
|
633 |
# Send final transcription
|
634 |
await websocket.send_json({
|
@@ -696,9 +692,6 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
|
|
696 |
"to": "idle"
|
697 |
})
|
698 |
|
699 |
-
# Reset for next input
|
700 |
-
await session.reset_for_new_utterance()
|
701 |
-
|
702 |
except Exception as e:
|
703 |
log_error(
|
704 |
f"❌ Error processing user input",
|
@@ -713,7 +706,6 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
|
|
713 |
await session.reset_for_new_utterance()
|
714 |
await session.change_state(ConversationState.IDLE)
|
715 |
|
716 |
-
|
717 |
async def generate_and_stream_tts(
|
718 |
websocket: WebSocket,
|
719 |
session: RealtimeSession,
|
|
|
521 |
session.current_transcription = result.text
|
522 |
log_info(f"📝 Final transcription: {result.text}", session_id=session.session.session_id)
|
523 |
|
524 |
+
# Final transcription geldiğinde hemen işle
|
525 |
if session.current_transcription:
|
526 |
+
# State'i değiştir ve user input'u işle
|
527 |
+
await session.change_state(ConversationState.PROCESSING_STT)
|
528 |
+
await websocket.send_json({
|
529 |
+
"type": "state_change",
|
530 |
+
"from": "listening",
|
531 |
+
"to": "processing_stt"
|
532 |
+
})
|
533 |
+
|
534 |
+
# Process user input
|
535 |
await process_user_input(websocket, session)
|
536 |
+
|
537 |
+
# STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
|
538 |
+
await session.reset_for_new_utterance()
|
539 |
+
return # Bu audio chunk için işlem tamamlandı
|
540 |
|
541 |
except Exception as e:
|
542 |
log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
|
|
|
545 |
"error_type": "stt_error",
|
546 |
"message": f"STT error: {str(e)}"
|
547 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
548 |
|
549 |
except Exception as e:
|
550 |
log_error(
|
|
|
557 |
"type": "error",
|
558 |
"message": f"Audio processing error: {str(e)}"
|
559 |
})
|
560 |
+
|
561 |
async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
|
562 |
"""Handle control messages"""
|
563 |
action = message.get("action")
|
|
|
624 |
|
625 |
log_info(f"🎯 Processing user input", text=user_text, session_id=session.session.session_id)
|
626 |
|
627 |
+
# State zaten PROCESSING_STT olarak set edildi, direkt devam et
|
|
|
|
|
|
|
|
|
|
|
|
|
628 |
|
629 |
# Send final transcription
|
630 |
await websocket.send_json({
|
|
|
692 |
"to": "idle"
|
693 |
})
|
694 |
|
|
|
|
|
|
|
695 |
except Exception as e:
|
696 |
log_error(
|
697 |
f"❌ Error processing user input",
|
|
|
706 |
await session.reset_for_new_utterance()
|
707 |
await session.change_state(ConversationState.IDLE)
|
708 |
|
|
|
709 |
async def generate_and_stream_tts(
|
710 |
websocket: WebSocket,
|
711 |
session: RealtimeSession,
|