Spaces:

UcsTurkey
/

flare

Paused

App Files Files Community

ciyidogan commited on Jul 2

Commit

7040225

verified ·

1 Parent(s): c582b8f

Update websocket_handler.py

Browse files

Files changed (1) hide show

websocket_handler.py +20 -15

websocket_handler.py CHANGED Viewed

@@ -246,6 +246,8 @@ class RealtimeSession:
         await self.audio_buffer.clear()
         self.silence_detector.reset()
         self.current_transcription = ""
     async def cleanup(self):
         """Clean up resources"""
@@ -470,7 +472,7 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
         # Decode for processing
         decoded_audio = base64.b64decode(audio_data)
-        # Check silence
         silence_duration = session.silence_detector.update(decoded_audio)
         # Stream to STT if available
@@ -501,22 +503,20 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
                 # STT'ye gönder ve sonuçları bekle
                 async for result in session.stt_manager.stream_audio(decoded_audio):
-                    # Sadece anlamlı sonuçları logla
-                    if result.text.strip():  # Boş olmayan text varsa
-                        log_info(f"🎤 STT: '{result.text}' (final: {result.is_final})", session_id=session.session.session_id)
-                    # Send transcription updates
-                    await websocket.send_json({
-                        "type": "transcription",
-                        "text": result.text,
-                        "is_final": result.is_final,
-                        "confidence": result.confidence
-                    })
                     if result.is_final:
-                        session.current_transcription = result.text
                         log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
                         # Final transcription geldiğinde hemen işle
                         if session.current_transcription:
                             # State'i değiştir ve user input'u işle
@@ -533,7 +533,12 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
                             # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
                             await session.reset_for_new_utterance()
                             return  # Bu audio chunk için işlem tamamlandı
             except Exception as e:
                 log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
                 await websocket.send_json({

         await self.audio_buffer.clear()
         self.silence_detector.reset()
         self.current_transcription = ""
+        if hasattr(self, 'speech_started'):
+            delattr(self, 'speech_started')  # Speech started flag'ini sıfırla
     async def cleanup(self):
         """Clean up resources"""
         # Decode for processing
         decoded_audio = base64.b64decode(audio_data)
+        # Check silence - LOGLAMA YAPMA
         silence_duration = session.silence_detector.update(decoded_audio)
         # Stream to STT if available
                 # STT'ye gönder ve sonuçları bekle
                 async for result in session.stt_manager.stream_audio(decoded_audio):
+                    # SADECE FINAL RESULT'LARI LOGLA VE GÖNDER
                     if result.is_final:
                         log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
+                        # Send ONLY final transcription to frontend
+                        await websocket.send_json({
+                            "type": "transcription",
+                            "text": result.text,
+                            "is_final": True,
+                            "confidence": result.confidence
+                        })
+                        session.current_transcription = result.text
                         # Final transcription geldiğinde hemen işle
                         if session.current_transcription:
                             # State'i değiştir ve user input'u işle
                             # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
                             await session.reset_for_new_utterance()
                             return  # Bu audio chunk için işlem tamamlandı
+                    # Interim result'ları SADECE ilk kelimede logla (konuşma başlangıcı)
+                    elif result.text.strip() and not hasattr(session, 'speech_started'):
+                        session.speech_started = True
+                        log_info(f"🎤 User started speaking", session_id=session.session.session_id)
             except Exception as e:
                 log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
                 await websocket.send_json({