Spaces:
Running
Running
Update websocket_handler.py
Browse files- websocket_handler.py +20 -15
websocket_handler.py
CHANGED
@@ -246,6 +246,8 @@ class RealtimeSession:
|
|
246 |
await self.audio_buffer.clear()
|
247 |
self.silence_detector.reset()
|
248 |
self.current_transcription = ""
|
|
|
|
|
249 |
|
250 |
async def cleanup(self):
|
251 |
"""Clean up resources"""
|
@@ -470,7 +472,7 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
470 |
# Decode for processing
|
471 |
decoded_audio = base64.b64decode(audio_data)
|
472 |
|
473 |
-
# Check silence
|
474 |
silence_duration = session.silence_detector.update(decoded_audio)
|
475 |
|
476 |
# Stream to STT if available
|
@@ -501,22 +503,20 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
501 |
|
502 |
# STT'ye gönder ve sonuçları bekle
|
503 |
async for result in session.stt_manager.stream_audio(decoded_audio):
|
504 |
-
#
|
505 |
-
if result.text.strip(): # Boş olmayan text varsa
|
506 |
-
log_info(f"🎤 STT: '{result.text}' (final: {result.is_final})", session_id=session.session.session_id)
|
507 |
-
|
508 |
-
# Send transcription updates
|
509 |
-
await websocket.send_json({
|
510 |
-
"type": "transcription",
|
511 |
-
"text": result.text,
|
512 |
-
"is_final": result.is_final,
|
513 |
-
"confidence": result.confidence
|
514 |
-
})
|
515 |
-
|
516 |
if result.is_final:
|
517 |
-
session.current_transcription = result.text
|
518 |
log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
|
519 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
520 |
# Final transcription geldiğinde hemen işle
|
521 |
if session.current_transcription:
|
522 |
# State'i değiştir ve user input'u işle
|
@@ -533,7 +533,12 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
533 |
# STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
|
534 |
await session.reset_for_new_utterance()
|
535 |
return # Bu audio chunk için işlem tamamlandı
|
536 |
-
|
|
|
|
|
|
|
|
|
|
|
537 |
except Exception as e:
|
538 |
log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
|
539 |
await websocket.send_json({
|
|
|
246 |
await self.audio_buffer.clear()
|
247 |
self.silence_detector.reset()
|
248 |
self.current_transcription = ""
|
249 |
+
if hasattr(self, 'speech_started'):
|
250 |
+
delattr(self, 'speech_started') # Speech started flag'ini sıfırla
|
251 |
|
252 |
async def cleanup(self):
|
253 |
"""Clean up resources"""
|
|
|
472 |
# Decode for processing
|
473 |
decoded_audio = base64.b64decode(audio_data)
|
474 |
|
475 |
+
# Check silence - LOGLAMA YAPMA
|
476 |
silence_duration = session.silence_detector.update(decoded_audio)
|
477 |
|
478 |
# Stream to STT if available
|
|
|
503 |
|
504 |
# STT'ye gönder ve sonuçları bekle
|
505 |
async for result in session.stt_manager.stream_audio(decoded_audio):
|
506 |
+
# SADECE FINAL RESULT'LARI LOGLA VE GÖNDER
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
if result.is_final:
|
|
|
508 |
log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
|
509 |
|
510 |
+
# Send ONLY final transcription to frontend
|
511 |
+
await websocket.send_json({
|
512 |
+
"type": "transcription",
|
513 |
+
"text": result.text,
|
514 |
+
"is_final": True,
|
515 |
+
"confidence": result.confidence
|
516 |
+
})
|
517 |
+
|
518 |
+
session.current_transcription = result.text
|
519 |
+
|
520 |
# Final transcription geldiğinde hemen işle
|
521 |
if session.current_transcription:
|
522 |
# State'i değiştir ve user input'u işle
|
|
|
533 |
# STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
|
534 |
await session.reset_for_new_utterance()
|
535 |
return # Bu audio chunk için işlem tamamlandı
|
536 |
+
|
537 |
+
# Interim result'ları SADECE ilk kelimede logla (konuşma başlangıcı)
|
538 |
+
elif result.text.strip() and not hasattr(session, 'speech_started'):
|
539 |
+
session.speech_started = True
|
540 |
+
log_info(f"🎤 User started speaking", session_id=session.session.session_id)
|
541 |
+
|
542 |
except Exception as e:
|
543 |
log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
|
544 |
await websocket.send_json({
|