ciyidogan commited on
Commit
ad5c8be
·
verified ·
1 Parent(s): e8fb1f7

Update stt/stt_deepgram.py

Browse files
Files changed (1) hide show
  1. stt/stt_deepgram.py +224 -286
stt/stt_deepgram.py CHANGED
@@ -1,39 +1,37 @@
1
  """
2
- Deepgram Speech-to-Text Implementation - Optimized for Voice Agent
3
  """
4
- import os
5
  import asyncio
6
- import websockets
7
- import json
8
- from typing import AsyncIterator, Optional, List, Any, Dict
9
  from datetime import datetime
10
  import queue
11
  import threading
12
- import time
13
  import traceback
14
- import base64
15
- from urllib.parse import urlencode
 
 
 
 
 
 
16
 
17
  from utils.logger import log_info, log_error, log_debug, log_warning
18
  from .stt_interface import STTInterface, STTConfig, TranscriptionResult
19
 
20
 
21
  class DeepgramSTT(STTInterface):
22
- """Deepgram STT - Single utterance mode with VAD"""
23
 
24
  def __init__(self, api_key: str):
25
  if not api_key:
26
  raise ValueError("Deepgram API key is required")
27
-
28
- # Debug için API key'in ilk 5 karakterini logla
29
- log_info(f"🔑 Deepgram API key resolved: {api_key[:10]}... (length: {len(api_key)})")
30
-
31
  self.api_key = api_key
32
- self.websocket = None
 
33
  self.is_streaming = False
34
  self.responses_queue = queue.Queue(maxsize=100)
35
- self.ws_thread = None
36
- self.stop_event = threading.Event()
37
 
38
  # Session tracking
39
  self.session_id = 0
@@ -42,64 +40,34 @@ class DeepgramSTT(STTInterface):
42
 
43
  # Final result tracking
44
  self.final_result_received = False
 
45
 
46
- log_info(f"✅ Deepgram STT initialized for single utterance mode")
47
-
48
- def _get_websocket_url(self, config: STTConfig) -> str:
49
- """Build Deepgram WebSocket URL with optimized parameters"""
50
- base_url = "wss://api.deepgram.com/v1/listen"
51
-
52
- # Manuel olarak optimize edilmiş parametreler
53
- """
54
- params = {
55
- "language": config.language, # Dil config'den alınır
56
- "model": "nova-2", # En iyi model
57
- "punctuate": "true", # Noktalama işaretleri açık
58
- "interim_results": "false", # ❌ Interim results KAPALI
59
- "utterance_end_ms": "1000", # 1 saniye sessizlik = konuşma sonu
60
- "vad_events": "true", # VAD events AÇIK
61
- "smart_format": "true", # Akıllı formatlama
62
- "no_delay": "true", # Düşük gecikme modu
63
- "encoding": "webm-opus", # WebM Opus encoding
64
- "sample_rate": "16000", # 16kHz sample rate
65
- "endpointing": "1000", # 1 saniye endpointing
66
- "diarize": "false", # Speaker diarization kapalı
67
- "multichannel": "false", # Tek kanal
68
- "alternatives": "1", # Sadece en iyi alternatif
69
- "profanity_filter": "false", # Küfür filtresi kapalı
70
- "redact": "false", # Redaction kapalı
71
- "replace": "false", # Replace kapalı
72
- "search": "false", # Search kapalı
73
- "keywords": "false", # Keywords kapalı
74
- "filler_words": "false", # Filler words algılama kapalı
75
- "numerals": "true" # Sayıları rakam olarak yaz
76
- }
77
- """
78
 
79
- params = {
80
- "language": "tr",
81
- "model": "nova-2",
82
- "punctuate": "true",
83
- "interim_results": "false",
84
- "smart_format": "true",
85
- "encoding": "webm-opus",
86
- "sample_rate": "16000"
 
 
 
 
 
 
 
 
87
  }
88
-
89
- # VAD ve endpointing parametreleri
90
- if config.single_utterance or True: # Single utterance mode için
91
- params["utterance_end_ms"] = "1000" # 1 saniye sessizlik
92
- params["vad_events"] = "true" # VAD events
93
- params["endpointing"] = "1000" # Endpointing
94
-
95
- query_string = urlencode(params)
96
- return f"{base_url}?{query_string}"
97
 
98
  async def start_streaming(self, config: STTConfig) -> None:
99
- """Initialize streaming session - single utterance mode"""
100
  try:
101
  # Stop any existing stream
102
- if self.is_streaming or self.ws_thread:
103
  log_warning("⚠️ Previous stream still active, stopping it first")
104
  await self.stop_streaming()
105
  await asyncio.sleep(0.5)
@@ -107,124 +75,204 @@ class DeepgramSTT(STTInterface):
107
  # Reset session data
108
  self._reset_session_data()
109
 
110
- log_info(f"🎤 Starting Deepgram STT - Single Utterance Mode #{self.session_id}")
111
- log_debug(f"Language: {config.language}, Sample Rate: 16kHz, Utterance End: 1000ms")
112
 
113
- # Clear stop event
114
- self.stop_event.clear()
115
- self.final_result_received = False
 
 
 
116
 
117
- # Store config
118
- self.config = config
119
 
120
- # Start WebSocket thread
121
- self.is_streaming = True
122
- self.ws_thread = threading.Thread(
123
- target=self._run_websocket,
124
- args=(config,),
125
- name=f"DeepgramSTT-SingleUtterance-{self.session_id}"
 
 
 
 
 
 
 
 
 
 
 
126
  )
127
- self.ws_thread.daemon = True
128
- self.ws_thread.start()
129
 
130
- # Wait for connection
131
- await asyncio.sleep(0.5)
132
 
133
- if not self.is_streaming:
134
- raise RuntimeError("Failed to establish WebSocket connection")
135
-
136
- log_info(f"✅ Deepgram STT ready - Listening for single utterance")
 
137
 
 
 
 
 
 
 
 
138
  except Exception as e:
139
  log_error(f"❌ Failed to start Deepgram STT", error=str(e))
140
  self.is_streaming = False
141
- self.websocket = None
 
142
  raise
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
145
- """Stream audio chunk - only returns final results"""
146
- if not self.is_streaming:
147
  raise RuntimeError("Streaming not started. Call start_streaming() first.")
148
 
149
- # Eğer final result alındıysa, daha fazla audio kabul etme
150
  if self.final_result_received:
151
  log_debug("Final result already received, ignoring audio chunk")
152
  return
153
 
154
  try:
155
- # Send audio to WebSocket
156
- if self.websocket and not self.websocket.closed:
157
- # Send as binary data
158
- await asyncio.get_event_loop().run_in_executor(
159
- None,
160
- self._send_audio_sync,
161
- audio_chunk
162
- )
163
-
164
- self.total_chunks += 1
165
- self.total_bytes += len(audio_chunk)
166
-
167
- # Log progress every 50 chunks
168
- if self.total_chunks % 50 == 0:
169
- log_debug(f"📊 Listening... {self.total_chunks} chunks, {self.total_bytes/1024:.1f}KB")
170
 
171
- # Check for final results only
 
 
 
 
 
 
 
172
  while True:
173
  try:
174
  result = self.responses_queue.get_nowait()
175
- # Sadece final result'ları yield et
176
  if result.is_final:
177
  yield result
178
  except queue.Empty:
179
  break
180
 
181
  except Exception as e:
182
- log_error(f"❌ Deepgram STT streaming error", error=str(e))
183
  self.is_streaming = False
184
  raise
185
 
186
- def _send_audio_sync(self, audio_chunk: bytes):
187
- """Synchronous method to send audio"""
188
- if self.websocket and not self.websocket.closed and not self.final_result_received:
189
- try:
190
- asyncio.run(self.websocket.send(audio_chunk))
191
- except Exception as e:
192
- log_error(f"❌ Error sending audio chunk: {e}")
193
-
194
  async def stop_streaming(self) -> Optional[TranscriptionResult]:
195
- """Stop streaming and dispose"""
196
- if not self.is_streaming and not self.ws_thread:
197
  log_debug("Already stopped, nothing to do")
198
  return None
199
 
200
  try:
201
- log_info(f"🛑 Disposing Deepgram STT session #{self.session_id}")
202
 
203
- # Set stop flag
204
  self.is_streaming = False
205
- self.stop_event.set()
206
 
207
- # Close WebSocket with close frame
208
- if self.websocket and not self.websocket.closed:
209
  try:
210
- # Send close frame to trigger final response
211
- await self.websocket.send(json.dumps({"type": "CloseStream"}))
212
- await asyncio.sleep(0.2) # Wait for final response
213
- await self.websocket.close()
214
- except:
215
- pass
216
 
217
- # Wait for thread
218
- if self.ws_thread and self.ws_thread.is_alive():
219
- log_debug("⏳ Waiting for WebSocket thread to finish...")
220
- self.ws_thread.join(timeout=3.0)
221
-
222
- if self.ws_thread.is_alive():
223
- log_warning("⚠️ WebSocket thread did not stop gracefully")
224
- else:
225
- log_debug("✅ WebSocket thread finished")
226
 
227
- # Get the final result
 
 
 
228
  final_result = None
229
  while not self.responses_queue.empty():
230
  try:
@@ -234,143 +282,21 @@ class DeepgramSTT(STTInterface):
234
  except queue.Empty:
235
  break
236
 
237
- # Reset everything
238
- self.websocket = None
239
- self.ws_thread = None
240
- self.stop_event.clear()
241
  self.final_result_received = False
242
 
243
- log_info(f"✅ Deepgram STT session #{self.session_id} disposed")
244
  return final_result
245
 
246
  except Exception as e:
247
  log_error(f"❌ Error during stop_streaming", error=str(e))
248
  self.is_streaming = False
249
- self.websocket = None
250
- self.ws_thread = None
251
  return None
252
 
253
- def _run_websocket(self, config: STTConfig):
254
- """Run WebSocket connection in separate thread"""
255
- asyncio.set_event_loop(asyncio.new_event_loop())
256
- loop = asyncio.get_event_loop()
257
-
258
- try:
259
- loop.run_until_complete(self._websocket_handler(config))
260
- except Exception as e:
261
- log_error(f"❌ WebSocket thread error", error=str(e), traceback=traceback.format_exc())
262
- finally:
263
- loop.close()
264
- self.is_streaming = False
265
-
266
- async def _websocket_handler(self, config: STTConfig):
267
- """Handle WebSocket connection and messages"""
268
- url = self._get_websocket_url(config)
269
- headers = {
270
- "Authorization": f"Token {self.api_key}"
271
- }
272
-
273
- try:
274
- log_debug(f"🔌 Connecting to Deepgram WebSocket...")
275
-
276
- async with websockets.connect(url, extra_headers=headers, ping_interval=5) as websocket:
277
- self.websocket = websocket
278
- log_info(f"✅ Connected to Deepgram - Ready for speech")
279
-
280
- # Receive messages task only (no keepalive needed for short sessions)
281
- receive_task = asyncio.create_task(self._receive_messages())
282
-
283
- # Wait until stop event, final result, or connection closes
284
- while not self.stop_event.is_set() and not websocket.closed and not self.final_result_received:
285
- await asyncio.sleep(0.1)
286
-
287
- # Cancel task
288
- receive_task.cancel()
289
-
290
- try:
291
- await receive_task
292
- except asyncio.CancelledError:
293
- pass
294
-
295
- except Exception as e:
296
- log_error(f"❌ WebSocket connection error", error=str(e))
297
- self.is_streaming = False
298
-
299
- async def _receive_messages(self):
300
- """Receive and process messages from WebSocket"""
301
- try:
302
- async for message in self.websocket:
303
- if self.stop_event.is_set() or self.final_result_received:
304
- break
305
-
306
- try:
307
- data = json.loads(message)
308
- self._process_deepgram_message(data)
309
- except json.JSONDecodeError as e:
310
- log_error(f"❌ Failed to parse message: {e}")
311
-
312
- except websockets.exceptions.ConnectionClosed:
313
- log_info("WebSocket connection closed")
314
- except Exception as e:
315
- log_error(f"❌ Error receiving messages: {e}")
316
-
317
- def _process_deepgram_message(self, data: Dict[str, Any]):
318
- """Process Deepgram response message"""
319
- msg_type = data.get("type", "")
320
-
321
- if msg_type == "Results":
322
- # Transcription result
323
- is_final = data.get("is_final", False)
324
-
325
- # Sadece final result'ları işle
326
- if is_final:
327
- channel = data.get("channel", {})
328
- alternatives = channel.get("alternatives", [])
329
-
330
- if alternatives:
331
- alt = alternatives[0]
332
- transcript = alt.get("transcript", "")
333
- confidence = alt.get("confidence", 0.0)
334
-
335
- # Create final result
336
- result = TranscriptionResult(
337
- text=transcript,
338
- is_final=True,
339
- confidence=confidence,
340
- timestamp=datetime.now().timestamp()
341
- )
342
-
343
- # Queue result
344
- try:
345
- self.responses_queue.put(result)
346
- self.final_result_received = True
347
-
348
- log_info(f"🎯 FINAL RESULT: '{transcript}' (confidence: {confidence:.2f})")
349
- log_info(f"📊 Session stats: {self.total_chunks} chunks, {self.total_bytes/1024:.1f}KB")
350
-
351
- except queue.Full:
352
- log_warning("⚠️ Response queue full")
353
-
354
- elif msg_type == "SpeechStarted":
355
- # VAD: Speech started
356
- log_info("🎤 Speech detected - User started speaking")
357
-
358
- elif msg_type == "UtteranceEnd":
359
- # VAD: Utterance ended - kullanıcı konuşmayı bitirdi
360
- log_info("🔚 Speech ended - User stopped speaking")
361
-
362
- # Bu noktada Deepgram final result gönderecek
363
-
364
- elif msg_type == "Error":
365
- # Error message
366
- error = data.get("error", {})
367
- log_error(f"❌ Deepgram error: {error}")
368
-
369
- elif msg_type == "Metadata":
370
- # Connection metadata
371
- request_id = data.get("request_id", "")
372
- log_debug(f"📋 Connected with request_id: {request_id}")
373
-
374
  def _reset_session_data(self):
375
  """Reset session-specific data"""
376
  # Clear queue
@@ -394,26 +320,38 @@ class DeepgramSTT(STTInterface):
394
 
395
  def get_supported_languages(self) -> List[str]:
396
  """Get list of supported language codes"""
397
- # Deepgram Nova-2 supported languages
398
  return [
399
- "tr", # Turkish
400
- "en", # English
401
  "en-US", # English (US)
402
  "en-GB", # English (UK)
403
- "de", # German
404
- "fr", # French
405
- "es", # Spanish
406
- "it", # Italian
407
- "pt", # Portuguese
408
- "ru", # Russian
409
- "ja", # Japanese
410
- "ko", # Korean
411
- "zh", # Chinese
412
- "ar", # Arabic
413
- "nl", # Dutch
414
- "sv", # Swedish
415
- "pl", # Polish
416
- "hi", # Hindi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  ]
418
 
419
  def get_provider_name(self) -> str:
 
1
  """
2
+ Deepgram Speech-to-Text Implementation using Deepgram SDK
3
  """
 
4
  import asyncio
5
+ from typing import AsyncIterator, Optional, List, Any
 
 
6
  from datetime import datetime
7
  import queue
8
  import threading
 
9
  import traceback
10
+
11
+ from deepgram import (
12
+ DeepgramClient,
13
+ DeepgramClientOptions,
14
+ LiveTranscriptionEvents,
15
+ LiveOptions,
16
+ Microphone,
17
+ )
18
 
19
  from utils.logger import log_info, log_error, log_debug, log_warning
20
  from .stt_interface import STTInterface, STTConfig, TranscriptionResult
21
 
22
 
23
  class DeepgramSTT(STTInterface):
24
+ """Deepgram STT implementation using official SDK"""
25
 
26
  def __init__(self, api_key: str):
27
  if not api_key:
28
  raise ValueError("Deepgram API key is required")
29
+
 
 
 
30
  self.api_key = api_key
31
+ self.deepgram_client = None
32
+ self.live_connection = None
33
  self.is_streaming = False
34
  self.responses_queue = queue.Queue(maxsize=100)
 
 
35
 
36
  # Session tracking
37
  self.session_id = 0
 
40
 
41
  # Final result tracking
42
  self.final_result_received = False
43
+ self.stop_event = threading.Event()
44
 
45
+ log_info(f"✅ Deepgram STT initialized (SDK version)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ def _map_language_code(self, language: str) -> str:
48
+ """Map language codes to Deepgram format"""
49
+ language_map = {
50
+ "tr-TR": "tr",
51
+ "en-US": "en-US",
52
+ "en-GB": "en-GB",
53
+ "de-DE": "de",
54
+ "fr-FR": "fr",
55
+ "es-ES": "es",
56
+ "it-IT": "it",
57
+ "pt-BR": "pt-BR",
58
+ "ru-RU": "ru",
59
+ "ja-JP": "ja",
60
+ "ko-KR": "ko",
61
+ "zh-CN": "zh-CN",
62
+ "ar-SA": "ar",
63
  }
64
+ return language_map.get(language, language)
 
 
 
 
 
 
 
 
65
 
66
  async def start_streaming(self, config: STTConfig) -> None:
67
+ """Initialize streaming session using SDK"""
68
  try:
69
  # Stop any existing stream
70
+ if self.is_streaming:
71
  log_warning("⚠️ Previous stream still active, stopping it first")
72
  await self.stop_streaming()
73
  await asyncio.sleep(0.5)
 
75
  # Reset session data
76
  self._reset_session_data()
77
 
78
+ log_info(f"🎤 Starting Deepgram STT (SDK) - Session #{self.session_id}")
 
79
 
80
+ # Create Deepgram client
81
+ config_options = DeepgramClientOptions(
82
+ verbose=False,
83
+ options={"keepalive": "true"}
84
+ )
85
+ self.deepgram_client = DeepgramClient(self.api_key, config=config_options)
86
 
87
+ # Configure live transcription options
88
+ deepgram_language = self._map_language_code(config.language)
89
 
90
+ options = LiveOptions(
91
+ language="tr",
92
+ model="nova-2",
93
+ punctuate=True,
94
+ smart_format=True,
95
+ encoding="webm-opus",
96
+ sample_rate=16000,
97
+ channels=1,
98
+ interim_results=False, # Only final results
99
+ utterance_end_ms=1000, # 1 second silence = end
100
+ vad_events=True, # Enable VAD events
101
+ endpointing=1000, # Enable endpointing
102
+ diarize=False,
103
+ numerals=True,
104
+ profanity_filter=False,
105
+ redact=False,
106
+ filler_words=False,
107
  )
 
 
108
 
109
+ log_debug(f"🔧 Deepgram options: language={deepgram_language}, model=nova-2, utterance_end=1000ms")
 
110
 
111
+ # Create live connection
112
+ self.live_connection = self.deepgram_client.listen.live.v("1")
113
+
114
+ # Setup event handlers
115
+ self._setup_event_handlers()
116
 
117
+ # Start the connection
118
+ if await self.live_connection.start(options):
119
+ self.is_streaming = True
120
+ log_info(f"✅ Deepgram SDK connected - Ready for speech")
121
+ else:
122
+ raise RuntimeError("Failed to start Deepgram connection")
123
+
124
  except Exception as e:
125
  log_error(f"❌ Failed to start Deepgram STT", error=str(e))
126
  self.is_streaming = False
127
+ self.live_connection = None
128
+ self.deepgram_client = None
129
  raise
130
 
131
+ def _setup_event_handlers(self):
132
+ """Setup event handlers for Deepgram events"""
133
+
134
+ # Transcript received
135
+ self.live_connection.on(LiveTranscriptionEvents.Transcript, self._on_transcript)
136
+
137
+ # Speech started
138
+ self.live_connection.on(LiveTranscriptionEvents.SpeechStarted, self._on_speech_started)
139
+
140
+ # Utterance end
141
+ self.live_connection.on(LiveTranscriptionEvents.UtteranceEnd, self._on_utterance_end)
142
+
143
+ # Metadata
144
+ self.live_connection.on(LiveTranscriptionEvents.Metadata, self._on_metadata)
145
+
146
+ # Error
147
+ self.live_connection.on(LiveTranscriptionEvents.Error, self._on_error)
148
+
149
+ # Connection closed
150
+ self.live_connection.on(LiveTranscriptionEvents.Close, self._on_close)
151
+
152
+ def _on_transcript(self, *args, **kwargs):
153
+ """Handle transcript event"""
154
+ try:
155
+ result = args[1] if len(args) > 1 else kwargs.get("result", {})
156
+
157
+ # Extract transcript data
158
+ is_final = result.get("is_final", False)
159
+
160
+ # Only process final results
161
+ if is_final:
162
+ channel = result.get("channel", {})
163
+ alternatives = channel.get("alternatives", [])
164
+
165
+ if alternatives:
166
+ alt = alternatives[0]
167
+ transcript = alt.get("transcript", "")
168
+ confidence = alt.get("confidence", 0.0)
169
+
170
+ if transcript.strip(): # Only process non-empty transcripts
171
+ transcription_result = TranscriptionResult(
172
+ text=transcript,
173
+ is_final=True,
174
+ confidence=confidence,
175
+ timestamp=datetime.now().timestamp()
176
+ )
177
+
178
+ # Queue result
179
+ try:
180
+ self.responses_queue.put(transcription_result)
181
+ self.final_result_received = True
182
+
183
+ log_info(f"🎯 FINAL RESULT: '{transcript}' (confidence: {confidence:.2f})")
184
+ log_info(f"📊 Session stats: {self.total_chunks} chunks, {self.total_audio_bytes/1024:.1f}KB")
185
+
186
+ except queue.Full:
187
+ log_warning("⚠️ Response queue full")
188
+
189
+ except Exception as e:
190
+ log_error(f"❌ Error processing transcript: {e}")
191
+
192
+ def _on_speech_started(self, *args, **kwargs):
193
+ """Handle speech started event"""
194
+ log_info("🎤 Speech detected - User started speaking")
195
+
196
+ def _on_utterance_end(self, *args, **kwargs):
197
+ """Handle utterance end event"""
198
+ log_info("🔚 Speech ended - User stopped speaking")
199
+ # Deepgram will send final transcript after this
200
+
201
+ def _on_metadata(self, *args, **kwargs):
202
+ """Handle metadata event"""
203
+ metadata = args[1] if len(args) > 1 else kwargs.get("metadata", {})
204
+ request_id = metadata.get("request_id", "")
205
+ log_debug(f"📋 Deepgram metadata - Request ID: {request_id}")
206
+
207
+ def _on_error(self, *args, **kwargs):
208
+ """Handle error event"""
209
+ error = args[1] if len(args) > 1 else kwargs.get("error", {})
210
+ log_error(f"❌ Deepgram error: {error}")
211
+
212
+ def _on_close(self, *args, **kwargs):
213
+ """Handle connection close event"""
214
+ log_info("🔌 Deepgram connection closed")
215
+ self.is_streaming = False
216
+
217
  async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
218
+ """Stream audio chunk and get transcription results"""
219
+ if not self.is_streaming or not self.live_connection:
220
  raise RuntimeError("Streaming not started. Call start_streaming() first.")
221
 
222
+ # Don't send audio if final result already received
223
  if self.final_result_received:
224
  log_debug("Final result already received, ignoring audio chunk")
225
  return
226
 
227
  try:
228
+ # Send audio to Deepgram
229
+ self.live_connection.send(audio_chunk)
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
+ self.total_chunks += 1
232
+ self.total_audio_bytes += len(audio_chunk)
233
+
234
+ # Log progress
235
+ if self.total_chunks % 50 == 0:
236
+ log_debug(f"📊 Listening... {self.total_chunks} chunks, {self.total_audio_bytes/1024:.1f}KB")
237
+
238
+ # Check for final results
239
  while True:
240
  try:
241
  result = self.responses_queue.get_nowait()
 
242
  if result.is_final:
243
  yield result
244
  except queue.Empty:
245
  break
246
 
247
  except Exception as e:
248
+ log_error(f"❌ Error streaming audio", error=str(e))
249
  self.is_streaming = False
250
  raise
251
 
 
 
 
 
 
 
 
 
252
  async def stop_streaming(self) -> Optional[TranscriptionResult]:
253
+ """Stop streaming and clean up"""
254
+ if not self.is_streaming:
255
  log_debug("Already stopped, nothing to do")
256
  return None
257
 
258
  try:
259
+ log_info(f"🛑 Stopping Deepgram STT session #{self.session_id}")
260
 
 
261
  self.is_streaming = False
 
262
 
263
+ # Finish the stream to get final results
264
+ if self.live_connection:
265
  try:
266
+ # Finish the stream - this triggers final transcript
267
+ self.live_connection.finish()
 
 
 
 
268
 
269
+ # Wait a bit for final result
270
+ await asyncio.sleep(0.5)
 
 
 
 
 
 
 
271
 
272
+ except Exception as e:
273
+ log_warning(f"⚠️ Error finishing stream: {e}")
274
+
275
+ # Get final result from queue
276
  final_result = None
277
  while not self.responses_queue.empty():
278
  try:
 
282
  except queue.Empty:
283
  break
284
 
285
+ # Clean up
286
+ self.live_connection = None
287
+ self.deepgram_client = None
 
288
  self.final_result_received = False
289
 
290
+ log_info(f"✅ Deepgram STT session #{self.session_id} stopped")
291
  return final_result
292
 
293
  except Exception as e:
294
  log_error(f"❌ Error during stop_streaming", error=str(e))
295
  self.is_streaming = False
296
+ self.live_connection = None
297
+ self.deepgram_client = None
298
  return None
299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  def _reset_session_data(self):
301
  """Reset session-specific data"""
302
  # Clear queue
 
320
 
321
  def get_supported_languages(self) -> List[str]:
322
  """Get list of supported language codes"""
 
323
  return [
324
+ "tr-TR", # Turkish
 
325
  "en-US", # English (US)
326
  "en-GB", # English (UK)
327
+ "de-DE", # German
328
+ "fr-FR", # French
329
+ "es-ES", # Spanish
330
+ "it-IT", # Italian
331
+ "pt-BR", # Portuguese (Brazil)
332
+ "ru-RU", # Russian
333
+ "ja-JP", # Japanese
334
+ "ko-KR", # Korean
335
+ "zh-CN", # Chinese (Simplified)
336
+ "ar-SA", # Arabic
337
+ "nl-NL", # Dutch
338
+ "sv-SE", # Swedish
339
+ "pl-PL", # Polish
340
+ "hi-IN", # Hindi
341
+ "cs-CZ", # Czech
342
+ "da-DK", # Danish
343
+ "fi-FI", # Finnish
344
+ "el-GR", # Greek
345
+ "he-IL", # Hebrew
346
+ "hu-HU", # Hungarian
347
+ "id-ID", # Indonesian
348
+ "ms-MY", # Malay
349
+ "no-NO", # Norwegian
350
+ "ro-RO", # Romanian
351
+ "sk-SK", # Slovak
352
+ "th-TH", # Thai
353
+ "uk-UA", # Ukrainian
354
+ "vi-VN", # Vietnamese
355
  ]
356
 
357
  def get_provider_name(self) -> str: