ciyidogan commited on
Commit
4c14f2d
·
verified ·
1 Parent(s): 486eb58

Update websocket_handler.py

Browse files
Files changed (1) hide show
  1. websocket_handler.py +16 -24
websocket_handler.py CHANGED
@@ -521,9 +521,22 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
521
  session.current_transcription = result.text
522
  log_info(f"📝 Final transcription: {result.text}", session_id=session.session.session_id)
523
 
524
- # BURADA: Final transcription geldiğinde hemen işle
525
  if session.current_transcription:
 
 
 
 
 
 
 
 
 
526
  await process_user_input(websocket, session)
 
 
 
 
527
 
528
  except Exception as e:
529
  log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
@@ -532,17 +545,6 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
532
  "error_type": "stt_error",
533
  "message": f"STT error: {str(e)}"
534
  })
535
-
536
- # SILENCE DETECTION'I GEÇİCİ OLARAK DEVRE DIŞI BIRAK
537
- # Process if silence detected and we have transcription
538
- # if silence_duration > session.silence_threshold_ms and session.current_transcription:
539
- # log_info(
540
- # f"🔇 User stopped speaking",
541
- # session_id=session.session.session_id,
542
- # silence_ms=silence_duration,
543
- # text=session.current_transcription
544
- # )
545
- # await process_user_input(websocket, session)
546
 
547
  except Exception as e:
548
  log_error(
@@ -555,7 +557,7 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
555
  "type": "error",
556
  "message": f"Audio processing error: {str(e)}"
557
  })
558
-
559
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
560
  """Handle control messages"""
561
  action = message.get("action")
@@ -622,13 +624,7 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
622
 
623
  log_info(f"🎯 Processing user input", text=user_text, session_id=session.session.session_id)
624
 
625
- # State: STT Processing
626
- await session.change_state(ConversationState.PROCESSING_STT)
627
- await websocket.send_json({
628
- "type": "state_change",
629
- "from": "listening",
630
- "to": "processing_stt"
631
- })
632
 
633
  # Send final transcription
634
  await websocket.send_json({
@@ -696,9 +692,6 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
696
  "to": "idle"
697
  })
698
 
699
- # Reset for next input
700
- await session.reset_for_new_utterance()
701
-
702
  except Exception as e:
703
  log_error(
704
  f"❌ Error processing user input",
@@ -713,7 +706,6 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
713
  await session.reset_for_new_utterance()
714
  await session.change_state(ConversationState.IDLE)
715
 
716
-
717
  async def generate_and_stream_tts(
718
  websocket: WebSocket,
719
  session: RealtimeSession,
 
521
  session.current_transcription = result.text
522
  log_info(f"📝 Final transcription: {result.text}", session_id=session.session.session_id)
523
 
524
+ # Final transcription geldiğinde hemen işle
525
  if session.current_transcription:
526
+ # State'i değiştir ve user input'u işle
527
+ await session.change_state(ConversationState.PROCESSING_STT)
528
+ await websocket.send_json({
529
+ "type": "state_change",
530
+ "from": "listening",
531
+ "to": "processing_stt"
532
+ })
533
+
534
+ # Process user input
535
  await process_user_input(websocket, session)
536
+
537
+ # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
538
+ await session.reset_for_new_utterance()
539
+ return # Bu audio chunk için işlem tamamlandı
540
 
541
  except Exception as e:
542
  log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
 
545
  "error_type": "stt_error",
546
  "message": f"STT error: {str(e)}"
547
  })
 
 
 
 
 
 
 
 
 
 
 
548
 
549
  except Exception as e:
550
  log_error(
 
557
  "type": "error",
558
  "message": f"Audio processing error: {str(e)}"
559
  })
560
+
561
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
562
  """Handle control messages"""
563
  action = message.get("action")
 
624
 
625
  log_info(f"🎯 Processing user input", text=user_text, session_id=session.session.session_id)
626
 
627
+ # State zaten PROCESSING_STT olarak set edildi, direkt devam et
 
 
 
 
 
 
628
 
629
  # Send final transcription
630
  await websocket.send_json({
 
692
  "to": "idle"
693
  })
694
 
 
 
 
695
  except Exception as e:
696
  log_error(
697
  f"❌ Error processing user input",
 
706
  await session.reset_for_new_utterance()
707
  await session.change_state(ConversationState.IDLE)
708
 
 
709
  async def generate_and_stream_tts(
710
  websocket: WebSocket,
711
  session: RealtimeSession,