Spaces:

UcsTurkey
/

flare

Paused

App Files Files Community

ciyidogan commited on Jul 2

Commit

4c14f2d

verified ·

1 Parent(s): 486eb58

Update websocket_handler.py

Browse files

Files changed (1) hide show

websocket_handler.py +16 -24

websocket_handler.py CHANGED Viewed

@@ -521,9 +521,22 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
                         session.current_transcription = result.text
                         log_info(f"📝 Final transcription: {result.text}", session_id=session.session.session_id)
-                        # BURADA: Final transcription geldiğinde hemen işle
                         if session.current_transcription:
                             await process_user_input(websocket, session)
             except Exception as e:
                 log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
@@ -532,17 +545,6 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
                     "error_type": "stt_error",
                     "message": f"STT error: {str(e)}"
                 })
-        # SILENCE DETECTION'I GEÇİCİ OLARAK DEVRE DIŞI BIRAK
-        # Process if silence detected and we have transcription
-        # if silence_duration > session.silence_threshold_ms and session.current_transcription:
-        #     log_info(
-        #         f"🔇 User stopped speaking",
-        #         session_id=session.session.session_id,
-        #         silence_ms=silence_duration,
-        #         text=session.current_transcription
-        #     )
-        #     await process_user_input(websocket, session)
     except Exception as e:
         log_error(
@@ -555,7 +557,7 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
             "type": "error",
             "message": f"Audio processing error: {str(e)}"
         })
 async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
     """Handle control messages"""
     action = message.get("action")
@@ -622,13 +624,7 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
         log_info(f"🎯 Processing user input", text=user_text, session_id=session.session.session_id)
-        # State: STT Processing
-        await session.change_state(ConversationState.PROCESSING_STT)
-        await websocket.send_json({
-            "type": "state_change",
-            "from": "listening",
-            "to": "processing_stt"
-        })
         # Send final transcription
         await websocket.send_json({
@@ -696,9 +692,6 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
                 "to": "idle"
             })
-        # Reset for next input
-        await session.reset_for_new_utterance()
     except Exception as e:
         log_error(
             f"❌ Error processing user input",
@@ -713,7 +706,6 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
         await session.reset_for_new_utterance()
         await session.change_state(ConversationState.IDLE)
 async def generate_and_stream_tts(
     websocket: WebSocket,
     session: RealtimeSession,

                         session.current_transcription = result.text
                         log_info(f"📝 Final transcription: {result.text}", session_id=session.session.session_id)
+                        # Final transcription geldiğinde hemen işle
                         if session.current_transcription:
+                            # State'i değiştir ve user input'u işle
+                            await session.change_state(ConversationState.PROCESSING_STT)
+                            await websocket.send_json({
+                                "type": "state_change",
+                                "from": "listening",
+                                "to": "processing_stt"
+                            })
+                            # Process user input
                             await process_user_input(websocket, session)
+                            # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
+                            await session.reset_for_new_utterance()
+                            return  # Bu audio chunk için işlem tamamlandı
             except Exception as e:
                 log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
                     "error_type": "stt_error",
                     "message": f"STT error: {str(e)}"
                 })
     except Exception as e:
         log_error(
             "type": "error",
             "message": f"Audio processing error: {str(e)}"
         })
 async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
     """Handle control messages"""
     action = message.get("action")
         log_info(f"🎯 Processing user input", text=user_text, session_id=session.session.session_id)
+        # State zaten PROCESSING_STT olarak set edildi, direkt devam et
         # Send final transcription
         await websocket.send_json({
                 "to": "idle"
             })
     except Exception as e:
         log_error(
             f"❌ Error processing user input",
         await session.reset_for_new_utterance()
         await session.change_state(ConversationState.IDLE)
 async def generate_and_stream_tts(
     websocket: WebSocket,
     session: RealtimeSession,