ciyidogan commited on
Commit
5d50ed0
Β·
verified Β·
1 Parent(s): 114bc80

Update stt/stt_deepgram.py

Browse files
Files changed (1) hide show
  1. stt/stt_deepgram.py +389 -401
stt/stt_deepgram.py CHANGED
@@ -1,402 +1,390 @@
1
- """
2
- Deepgram Speech-to-Text Implementation using Deepgram SDK
3
- """
4
- import asyncio
5
- from typing import AsyncIterator, Optional, List, Any
6
- from datetime import datetime
7
- import queue
8
- import threading
9
- import traceback
10
-
11
- from deepgram import (
12
- DeepgramClient,
13
- DeepgramClientOptions,
14
- LiveTranscriptionEvents,
15
- LiveOptions,
16
- Microphone,
17
- )
18
-
19
- from utils.logger import log_info, log_error, log_debug, log_warning
20
- from .stt_interface import STTInterface, STTConfig, TranscriptionResult
21
-
22
-
23
- class DeepgramSTT(STTInterface):
24
- """Deepgram STT implementation using official SDK"""
25
-
26
- def __init__(self, api_key: str):
27
- if not api_key:
28
- raise ValueError("Deepgram API key is required")
29
-
30
- # Debug iΓ§in API key'in ilk 10 karakterini logla
31
- log_info(f"πŸ”‘ Deepgram API key resolved: {api_key[:10]}... (length: {len(api_key)})")
32
-
33
- self.api_key = api_key
34
- self.deepgram_client = None
35
- self.live_connection = None
36
- self.is_streaming = False
37
- self.responses_queue = queue.Queue(maxsize=100)
38
-
39
- # Session tracking
40
- self.session_id = 0
41
- self.total_audio_bytes = 0
42
- self.total_chunks = 0
43
-
44
- # Final result tracking
45
- self.final_result_received = False
46
- self.stop_event = threading.Event()
47
-
48
- log_info(f"βœ… Deepgram STT initialized (SDK version)")
49
-
50
- def _map_language_code(self, language: str) -> str:
51
- """Map language codes to Deepgram format"""
52
- language_map = {
53
- "tr-TR": "tr",
54
- "en-US": "en-US",
55
- "en-GB": "en-GB",
56
- "de-DE": "de",
57
- "fr-FR": "fr",
58
- "es-ES": "es",
59
- "it-IT": "it",
60
- "pt-BR": "pt-BR",
61
- "ru-RU": "ru",
62
- "ja-JP": "ja",
63
- "ko-KR": "ko",
64
- "zh-CN": "zh-CN",
65
- "ar-SA": "ar",
66
- }
67
- return language_map.get(language, language)
68
-
69
- async def start_streaming(self, config: STTConfig) -> None:
70
- """Initialize streaming session using SDK"""
71
- try:
72
- # Stop any existing stream
73
- if self.is_streaming:
74
- log_warning("⚠️ Previous stream still active, stopping it first")
75
- await self.stop_streaming()
76
- await asyncio.sleep(0.5)
77
-
78
- # Reset session data
79
- self._reset_session_data()
80
-
81
- log_info(f"🎀 Starting Deepgram STT (SDK) - Session #{self.session_id}")
82
-
83
- # Create Deepgram client with more verbose logging for debugging
84
- config_options = DeepgramClientOptions(
85
- verbose=True, # βœ… DEBUG iΓ§in verbose aΓ§Δ±k
86
- options={"keepalive": "true"}
87
- )
88
- self.deepgram_client = DeepgramClient(self.api_key, config=config_options)
89
-
90
- """
91
- options = LiveOptions(
92
- language="tr",
93
- model="nova-2",
94
- punctuate=True,
95
- smart_format=True,
96
- encoding="opus",
97
- sample_rate=16000,
98
- channels=1,
99
- interim_results=False, # Only final results
100
- utterance_end_ms=1000, # 1 second silence = end
101
- vad_events=True, # Enable VAD events
102
- diarize=False,
103
- numerals=True,
104
- profanity_filter=False,
105
- redact=False
106
- )
107
- """
108
-
109
- # Try minimal configuration first
110
- options = LiveOptions(
111
- language="tr",
112
- model="nova-2",
113
- encoding="linear16",
114
- sample_rate=16000,
115
- interim_results=True,
116
- channels=1
117
- )
118
-
119
- log_info(f"πŸ”§ Deepgram options: language=tr, model=nova-2, encoding=linear16, interim_results=True")
120
-
121
- # Create live connection
122
- self.live_connection = self.deepgram_client.listen.live.v("1")
123
-
124
- # Setup event handlers
125
- self._setup_event_handlers()
126
-
127
- try:
128
- # Log before connection attempt
129
- log_info("πŸ”Œ Attempting to connect to Deepgram...")
130
-
131
- result = self.live_connection.start(options)
132
- log_info(f"πŸ”Œ Connection start result: {result}")
133
-
134
- if result:
135
- self.is_streaming = True
136
- log_info(f"βœ… Deepgram SDK connected - Ready for speech")
137
- else:
138
- # Try to get more error details
139
- if hasattr(self.live_connection, 'get_error') or hasattr(self.live_connection, 'error'):
140
- error_detail = getattr(self.live_connection, 'error', 'No error details')
141
- log_error(f"❌ Connection failed with details: {error_detail}")
142
- raise RuntimeError("Failed to start Deepgram connection")
143
-
144
- except Exception as e:
145
- log_error(f"❌ Connection error: {str(e)}")
146
- # Log more details about the exception
147
- if hasattr(e, 'response'):
148
- log_error(f"❌ Response: {e.response}")
149
- if hasattr(e, 'status_code'):
150
- log_error(f"❌ Status code: {e.status_code}")
151
- raise
152
-
153
- except Exception as e:
154
- log_error(f"❌ Failed to start Deepgram STT", error=str(e))
155
- if hasattr(e, '__dict__'):
156
- log_error(f"❌ Error details: {e.__dict__}")
157
- self.is_streaming = False
158
- self.live_connection = None
159
- self.deepgram_client = None
160
- raise
161
-
162
- def _setup_event_handlers(self):
163
- """Setup event handlers for Deepgram events"""
164
-
165
- # Transcript received - use the existing class method
166
- self.live_connection.on(LiveTranscriptionEvents.Transcript, self._on_transcript)
167
-
168
- # Speech started
169
- self.live_connection.on(LiveTranscriptionEvents.SpeechStarted, self._on_speech_started)
170
-
171
- # Utterance end
172
- self.live_connection.on(LiveTranscriptionEvents.UtteranceEnd, self._on_utterance_end)
173
-
174
- # Metadata
175
- self.live_connection.on(LiveTranscriptionEvents.Metadata, self._on_metadata)
176
-
177
- # Error
178
- self.live_connection.on(LiveTranscriptionEvents.Error, self._on_error)
179
-
180
- # Connection closed
181
- self.live_connection.on(LiveTranscriptionEvents.Close, self._on_close)
182
-
183
- def _on_transcript(self, *args, **kwargs):
184
- """Handle transcript event - SDK calls this method directly"""
185
- try:
186
- # SDK passes the result as second argument
187
- result = args[1] if len(args) > 1 else kwargs.get("result")
188
-
189
- if not result:
190
- log_warning("⚠️ No result in transcript event")
191
- return
192
-
193
- # Access properties directly from the result object
194
- is_final = result.is_final if hasattr(result, 'is_final') else False
195
-
196
- # Get transcript from channel alternatives
197
- if hasattr(result, 'channel') and result.channel:
198
- alternatives = result.channel.alternatives
199
- if alternatives and len(alternatives) > 0:
200
- transcript = alternatives[0].transcript
201
- confidence = alternatives[0].confidence
202
-
203
- # Log all transcripts for debugging
204
- log_debug(f"πŸ“ Raw transcript: '{transcript}' (is_final: {is_final}, confidence: {confidence})")
205
-
206
- if transcript and transcript.strip(): # Only process non-empty transcripts
207
- transcription_result = TranscriptionResult(
208
- text=transcript,
209
- is_final=is_final,
210
- confidence=confidence,
211
- timestamp=datetime.now().timestamp()
212
- )
213
-
214
- # Queue result
215
- try:
216
- self.responses_queue.put(transcription_result)
217
- if is_final:
218
- self.final_result_received = True
219
- log_info(f"🎯 FINAL TRANSCRIPT: '{transcript}' (confidence: {confidence:.2f})")
220
- else:
221
- log_info(f"πŸ“ Interim transcript: '{transcript}'")
222
- except queue.Full:
223
- log_warning("⚠️ Response queue full")
224
- else:
225
- if is_final:
226
- log_warning(f"⚠️ Empty final transcript received")
227
-
228
- except Exception as e:
229
- log_error(f"❌ Error processing transcript: {e}")
230
- log_error(f"❌ Args: {args}")
231
- log_error(f"❌ Kwargs: {kwargs}")
232
- import traceback
233
- log_error(f"❌ Traceback: {traceback.format_exc()}")
234
-
235
- def _on_speech_started(self, *args, **kwargs):
236
- """Handle speech started event"""
237
- log_info("🎀 Speech detected - User started speaking")
238
-
239
- def _on_utterance_end(self, *args, **kwargs):
240
- """Handle utterance end event"""
241
- log_info("πŸ”š Speech ended - User stopped speaking")
242
- # Deepgram will send final transcript after this
243
-
244
- def _on_metadata(self, *args, **kwargs):
245
- """Handle metadata event"""
246
- metadata = args[1] if len(args) > 1 else kwargs.get("metadata", {})
247
- request_id = metadata.get("request_id", "")
248
- log_debug(f"πŸ“‹ Deepgram metadata - Request ID: {request_id}")
249
-
250
- def _on_error(self, *args, **kwargs):
251
- """Handle error event"""
252
- error = args[1] if len(args) > 1 else kwargs.get("error", {})
253
- log_error(f"❌ Deepgram error: {error}")
254
-
255
- def _on_close(self, *args, **kwargs):
256
- """Handle connection close event"""
257
- log_info("πŸ”Œ Deepgram connection closed")
258
- self.is_streaming = False
259
-
260
- async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
261
- """Stream audio chunk and get transcription results"""
262
- if not self.is_streaming or not self.live_connection:
263
- raise RuntimeError("Streaming not started. Call start_streaming() first.")
264
-
265
- # Don't send audio if final result already received
266
- if self.final_result_received:
267
- log_debug("Final result already received, ignoring audio chunk")
268
- return
269
-
270
- try:
271
- # Send audio to Deepgram
272
- self.live_connection.send(audio_chunk)
273
-
274
- self.total_chunks += 1
275
- self.total_audio_bytes += len(audio_chunk)
276
-
277
- # Log progress
278
- if self.total_chunks % 50 == 0:
279
- log_debug(f"πŸ“Š Listening... {self.total_chunks} chunks, {self.total_audio_bytes/1024:.1f}KB")
280
-
281
- # Check for final results
282
- while True:
283
- try:
284
- result = self.responses_queue.get_nowait()
285
- if result.is_final:
286
- yield result
287
- except queue.Empty:
288
- break
289
-
290
- except Exception as e:
291
- log_error(f"❌ Error streaming audio", error=str(e))
292
- self.is_streaming = False
293
- raise
294
-
295
- async def stop_streaming(self) -> Optional[TranscriptionResult]:
296
- """Stop streaming and clean up"""
297
- if not self.is_streaming:
298
- log_debug("Already stopped, nothing to do")
299
- return None
300
-
301
- try:
302
- log_info(f"πŸ›‘ Stopping Deepgram STT session #{self.session_id}")
303
-
304
- self.is_streaming = False
305
-
306
- # Finish the stream to get final results
307
- if self.live_connection:
308
- try:
309
- # Finish the stream - this triggers final transcript
310
- self.live_connection.finish()
311
-
312
- # Wait a bit for final result
313
- await asyncio.sleep(0.5)
314
-
315
- except Exception as e:
316
- log_warning(f"⚠️ Error finishing stream: {e}")
317
-
318
- # Get final result from queue
319
- final_result = None
320
- while not self.responses_queue.empty():
321
- try:
322
- result = self.responses_queue.get_nowait()
323
- if result.is_final:
324
- final_result = result
325
- except queue.Empty:
326
- break
327
-
328
- # Clean up
329
- self.live_connection = None
330
- self.deepgram_client = None
331
- self.final_result_received = False
332
-
333
- log_info(f"βœ… Deepgram STT session #{self.session_id} stopped")
334
- return final_result
335
-
336
- except Exception as e:
337
- log_error(f"❌ Error during stop_streaming", error=str(e))
338
- self.is_streaming = False
339
- self.live_connection = None
340
- self.deepgram_client = None
341
- return None
342
-
343
- def _reset_session_data(self):
344
- """Reset session-specific data"""
345
- # Clear queue
346
- while not self.responses_queue.empty():
347
- try:
348
- self.responses_queue.get_nowait()
349
- except:
350
- pass
351
-
352
- # Reset counters
353
- self.total_audio_bytes = 0
354
- self.total_chunks = 0
355
- self.session_id += 1
356
- self.final_result_received = False
357
-
358
- log_debug(f"πŸ”„ Session data reset. New session ID: {self.session_id}")
359
-
360
- def supports_realtime(self) -> bool:
361
- """Deepgram supports real-time streaming"""
362
- return True
363
-
364
- def get_supported_languages(self) -> List[str]:
365
- """Get list of supported language codes"""
366
- return [
367
- "tr-TR", # Turkish
368
- "en-US", # English (US)
369
- "en-GB", # English (UK)
370
- "de-DE", # German
371
- "fr-FR", # French
372
- "es-ES", # Spanish
373
- "it-IT", # Italian
374
- "pt-BR", # Portuguese (Brazil)
375
- "ru-RU", # Russian
376
- "ja-JP", # Japanese
377
- "ko-KR", # Korean
378
- "zh-CN", # Chinese (Simplified)
379
- "ar-SA", # Arabic
380
- "nl-NL", # Dutch
381
- "sv-SE", # Swedish
382
- "pl-PL", # Polish
383
- "hi-IN", # Hindi
384
- "cs-CZ", # Czech
385
- "da-DK", # Danish
386
- "fi-FI", # Finnish
387
- "el-GR", # Greek
388
- "he-IL", # Hebrew
389
- "hu-HU", # Hungarian
390
- "id-ID", # Indonesian
391
- "ms-MY", # Malay
392
- "no-NO", # Norwegian
393
- "ro-RO", # Romanian
394
- "sk-SK", # Slovak
395
- "th-TH", # Thai
396
- "uk-UA", # Ukrainian
397
- "vi-VN", # Vietnamese
398
- ]
399
-
400
- def get_provider_name(self) -> str:
401
- """Get provider name"""
402
  return "deepgram"
 
1
+ """
2
+ Deepgram Speech-to-Text Implementation using Deepgram SDK
3
+ """
4
+ import asyncio
5
+ from typing import AsyncIterator, Optional, List, Any
6
+ from datetime import datetime
7
+ import queue
8
+ import threading
9
+ import traceback
10
+
11
+ from deepgram import (
12
+ DeepgramClient,
13
+ DeepgramClientOptions,
14
+ LiveTranscriptionEvents,
15
+ LiveOptions,
16
+ Microphone,
17
+ )
18
+
19
+ from utils.logger import log_info, log_error, log_debug, log_warning
20
+ from .stt_interface import STTInterface, STTConfig, TranscriptionResult
21
+
22
+
23
+ class DeepgramSTT(STTInterface):
24
+ """Deepgram STT implementation using official SDK"""
25
+
26
+ def __init__(self, api_key: str):
27
+ if not api_key:
28
+ raise ValueError("Deepgram API key is required")
29
+
30
+ # Debug iΓ§in API key'in ilk 10 karakterini logla
31
+ log_info(f"πŸ”‘ Deepgram API key resolved: {api_key[:10]}... (length: {len(api_key)})")
32
+
33
+ self.api_key = api_key
34
+ self.deepgram_client = None
35
+ self.live_connection = None
36
+ self.is_streaming = False
37
+ self.responses_queue = queue.Queue(maxsize=100)
38
+
39
+ # Session tracking
40
+ self.session_id = 0
41
+ self.total_audio_bytes = 0
42
+ self.total_chunks = 0
43
+
44
+ # Final result tracking
45
+ self.final_result_received = False
46
+ self.stop_event = threading.Event()
47
+
48
+ log_info(f"βœ… Deepgram STT initialized (SDK version)")
49
+
50
+ def _map_language_code(self, language: str) -> str:
51
+ """Map language codes to Deepgram format"""
52
+ language_map = {
53
+ "tr-TR": "tr",
54
+ "en-US": "en-US",
55
+ "en-GB": "en-GB",
56
+ "de-DE": "de",
57
+ "fr-FR": "fr",
58
+ "es-ES": "es",
59
+ "it-IT": "it",
60
+ "pt-BR": "pt-BR",
61
+ "ru-RU": "ru",
62
+ "ja-JP": "ja",
63
+ "ko-KR": "ko",
64
+ "zh-CN": "zh-CN",
65
+ "ar-SA": "ar",
66
+ }
67
+ return language_map.get(language, language)
68
+
69
+ async def start_streaming(self, config: STTConfig) -> None:
70
+ """Initialize streaming session using SDK"""
71
+ try:
72
+ # Stop any existing stream
73
+ if self.is_streaming:
74
+ log_warning("⚠️ Previous stream still active, stopping it first")
75
+ await self.stop_streaming()
76
+ await asyncio.sleep(0.5)
77
+
78
+ # Reset session data
79
+ self._reset_session_data()
80
+
81
+ log_info(f"🎀 Starting Deepgram STT (SDK) - Session #{self.session_id}")
82
+
83
+ # Create Deepgram client with more verbose logging for debugging
84
+ config_options = DeepgramClientOptions(
85
+ verbose=False,
86
+ options={"keepalive": "true"}
87
+ )
88
+ self.deepgram_client = DeepgramClient(self.api_key, config=config_options)
89
+
90
+ # Try minimal configuration first
91
+ options = LiveOptions(
92
+ language="tr",
93
+ model="nova-2",
94
+ encoding="linear16",
95
+ sample_rate=16000,
96
+ interim_results=False, # Only final results
97
+ utterance_end_ms=1000, # 1 second silence = end
98
+ punctuate=True,
99
+ diarize=False,
100
+ numerals=True,
101
+ profanity_filter=False,
102
+ redact=False,
103
+ vad_events=True, # Enable VAD events
104
+ channels=1
105
+ )
106
+
107
+ log_info(f"πŸ”§ Deepgram options: language=tr, model=nova-2, encoding=linear16, interim_results=True")
108
+
109
+ # Create live connection
110
+ self.live_connection = self.deepgram_client.listen.live.v("1")
111
+
112
+ # Setup event handlers
113
+ self._setup_event_handlers()
114
+
115
+ try:
116
+ # Log before connection attempt
117
+ log_info("πŸ”Œ Attempting to connect to Deepgram...")
118
+
119
+ result = self.live_connection.start(options)
120
+ log_info(f"πŸ”Œ Connection start result: {result}")
121
+
122
+ if result:
123
+ self.is_streaming = True
124
+ log_info(f"βœ… Deepgram SDK connected - Ready for speech")
125
+ else:
126
+ # Try to get more error details
127
+ if hasattr(self.live_connection, 'get_error') or hasattr(self.live_connection, 'error'):
128
+ error_detail = getattr(self.live_connection, 'error', 'No error details')
129
+ log_error(f"❌ Connection failed with details: {error_detail}")
130
+ raise RuntimeError("Failed to start Deepgram connection")
131
+
132
+ except Exception as e:
133
+ log_error(f"❌ Connection error: {str(e)}")
134
+ # Log more details about the exception
135
+ if hasattr(e, 'response'):
136
+ log_error(f"❌ Response: {e.response}")
137
+ if hasattr(e, 'status_code'):
138
+ log_error(f"❌ Status code: {e.status_code}")
139
+ raise
140
+
141
+ except Exception as e:
142
+ log_error(f"❌ Failed to start Deepgram STT", error=str(e))
143
+ if hasattr(e, '__dict__'):
144
+ log_error(f"❌ Error details: {e.__dict__}")
145
+ self.is_streaming = False
146
+ self.live_connection = None
147
+ self.deepgram_client = None
148
+ raise
149
+
150
+ def _setup_event_handlers(self):
151
+ """Setup event handlers for Deepgram events"""
152
+
153
+ # Transcript received - use the existing class method
154
+ self.live_connection.on(LiveTranscriptionEvents.Transcript, self._on_transcript)
155
+
156
+ # Speech started
157
+ self.live_connection.on(LiveTranscriptionEvents.SpeechStarted, self._on_speech_started)
158
+
159
+ # Utterance end
160
+ self.live_connection.on(LiveTranscriptionEvents.UtteranceEnd, self._on_utterance_end)
161
+
162
+ # Metadata
163
+ self.live_connection.on(LiveTranscriptionEvents.Metadata, self._on_metadata)
164
+
165
+ # Error
166
+ self.live_connection.on(LiveTranscriptionEvents.Error, self._on_error)
167
+
168
+ # Connection closed
169
+ self.live_connection.on(LiveTranscriptionEvents.Close, self._on_close)
170
+
171
+ def _on_transcript(self, *args, **kwargs):
172
+ """Handle transcript event - SDK calls this method directly"""
173
+ try:
174
+ # SDK passes the result as second argument
175
+ result = args[1] if len(args) > 1 else kwargs.get("result")
176
+
177
+ if not result:
178
+ log_warning("⚠️ No result in transcript event")
179
+ return
180
+
181
+ # Access properties directly from the result object
182
+ is_final = result.is_final if hasattr(result, 'is_final') else False
183
+
184
+ # Get transcript from channel alternatives
185
+ if hasattr(result, 'channel') and result.channel:
186
+ alternatives = result.channel.alternatives
187
+ if alternatives and len(alternatives) > 0:
188
+ transcript = alternatives[0].transcript
189
+ confidence = alternatives[0].confidence
190
+
191
+ # Log all transcripts for debugging
192
+ log_debug(f"πŸ“ Raw transcript: '{transcript}' (is_final: {is_final}, confidence: {confidence})")
193
+
194
+ if transcript and transcript.strip(): # Only process non-empty transcripts
195
+ transcription_result = TranscriptionResult(
196
+ text=transcript,
197
+ is_final=is_final,
198
+ confidence=confidence,
199
+ timestamp=datetime.now().timestamp()
200
+ )
201
+
202
+ # Queue result
203
+ try:
204
+ self.responses_queue.put(transcription_result)
205
+ if is_final:
206
+ self.final_result_received = True
207
+ log_info(f"🎯 FINAL TRANSCRIPT: '{transcript}' (confidence: {confidence:.2f})")
208
+ else:
209
+ log_info(f"πŸ“ Interim transcript: '{transcript}'")
210
+ except queue.Full:
211
+ log_warning("⚠️ Response queue full")
212
+ else:
213
+ if is_final:
214
+ log_warning(f"⚠️ Empty final transcript received")
215
+
216
+ except Exception as e:
217
+ log_error(f"❌ Error processing transcript: {e}")
218
+ log_error(f"❌ Args: {args}")
219
+ log_error(f"❌ Kwargs: {kwargs}")
220
+ import traceback
221
+ log_error(f"❌ Traceback: {traceback.format_exc()}")
222
+
223
+ def _on_speech_started(self, *args, **kwargs):
224
+ """Handle speech started event"""
225
+ log_info("🎀 Speech detected - User started speaking")
226
+
227
+ def _on_utterance_end(self, *args, **kwargs):
228
+ """Handle utterance end event"""
229
+ log_info("πŸ”š Speech ended - User stopped speaking")
230
+ # Deepgram will send final transcript after this
231
+
232
+ def _on_metadata(self, *args, **kwargs):
233
+ """Handle metadata event"""
234
+ metadata = args[1] if len(args) > 1 else kwargs.get("metadata", {})
235
+ request_id = metadata.get("request_id", "")
236
+ log_debug(f"πŸ“‹ Deepgram metadata - Request ID: {request_id}")
237
+
238
+ def _on_error(self, *args, **kwargs):
239
+ """Handle error event"""
240
+ error = args[1] if len(args) > 1 else kwargs.get("error", {})
241
+ log_error(f"❌ Deepgram error: {error}")
242
+
243
+ def _on_close(self, *args, **kwargs):
244
+ """Handle connection close event"""
245
+ log_info("πŸ”Œ Deepgram connection closed")
246
+ self.is_streaming = False
247
+
248
+ async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
249
+ """Stream audio chunk and get transcription results"""
250
+ if not self.is_streaming or not self.live_connection:
251
+ raise RuntimeError("Streaming not started. Call start_streaming() first.")
252
+
253
+ # Don't send audio if final result already received
254
+ if self.final_result_received:
255
+ log_debug("Final result already received, ignoring audio chunk")
256
+ return
257
+
258
+ try:
259
+ # Send audio to Deepgram
260
+ self.live_connection.send(audio_chunk)
261
+
262
+ self.total_chunks += 1
263
+ self.total_audio_bytes += len(audio_chunk)
264
+
265
+ # Log progress
266
+ if self.total_chunks % 50 == 0:
267
+ log_debug(f"πŸ“Š Listening... {self.total_chunks} chunks, {self.total_audio_bytes/1024:.1f}KB")
268
+
269
+ # Check for final results
270
+ while True:
271
+ try:
272
+ result = self.responses_queue.get_nowait()
273
+ if result.is_final:
274
+ yield result
275
+ except queue.Empty:
276
+ break
277
+
278
+ except Exception as e:
279
+ log_error(f"❌ Error streaming audio", error=str(e))
280
+ self.is_streaming = False
281
+ raise
282
+
283
+ async def stop_streaming(self) -> Optional[TranscriptionResult]:
284
+ """Stop streaming and clean up"""
285
+ if not self.is_streaming:
286
+ log_debug("Already stopped, nothing to do")
287
+ return None
288
+
289
+ try:
290
+ log_info(f"πŸ›‘ Stopping Deepgram STT session #{self.session_id}")
291
+
292
+ self.is_streaming = False
293
+
294
+ # Finish the stream to get final results
295
+ if self.live_connection:
296
+ try:
297
+ # Finish the stream - this triggers final transcript
298
+ self.live_connection.finish()
299
+
300
+ # Wait a bit for final result
301
+ await asyncio.sleep(0.5)
302
+
303
+ except Exception as e:
304
+ log_warning(f"⚠️ Error finishing stream: {e}")
305
+
306
+ # Get final result from queue
307
+ final_result = None
308
+ while not self.responses_queue.empty():
309
+ try:
310
+ result = self.responses_queue.get_nowait()
311
+ if result.is_final:
312
+ final_result = result
313
+ except queue.Empty:
314
+ break
315
+
316
+ # Clean up
317
+ self.live_connection = None
318
+ self.deepgram_client = None
319
+ self.final_result_received = False
320
+
321
+ log_info(f"βœ… Deepgram STT session #{self.session_id} stopped")
322
+ return final_result
323
+
324
+ except Exception as e:
325
+ log_error(f"❌ Error during stop_streaming", error=str(e))
326
+ self.is_streaming = False
327
+ self.live_connection = None
328
+ self.deepgram_client = None
329
+ return None
330
+
331
+ def _reset_session_data(self):
332
+ """Reset session-specific data"""
333
+ # Clear queue
334
+ while not self.responses_queue.empty():
335
+ try:
336
+ self.responses_queue.get_nowait()
337
+ except:
338
+ pass
339
+
340
+ # Reset counters
341
+ self.total_audio_bytes = 0
342
+ self.total_chunks = 0
343
+ self.session_id += 1
344
+ self.final_result_received = False
345
+
346
+ log_debug(f"πŸ”„ Session data reset. New session ID: {self.session_id}")
347
+
348
+ def supports_realtime(self) -> bool:
349
+ """Deepgram supports real-time streaming"""
350
+ return True
351
+
352
+ def get_supported_languages(self) -> List[str]:
353
+ """Get list of supported language codes"""
354
+ return [
355
+ "tr-TR", # Turkish
356
+ "en-US", # English (US)
357
+ "en-GB", # English (UK)
358
+ "de-DE", # German
359
+ "fr-FR", # French
360
+ "es-ES", # Spanish
361
+ "it-IT", # Italian
362
+ "pt-BR", # Portuguese (Brazil)
363
+ "ru-RU", # Russian
364
+ "ja-JP", # Japanese
365
+ "ko-KR", # Korean
366
+ "zh-CN", # Chinese (Simplified)
367
+ "ar-SA", # Arabic
368
+ "nl-NL", # Dutch
369
+ "sv-SE", # Swedish
370
+ "pl-PL", # Polish
371
+ "hi-IN", # Hindi
372
+ "cs-CZ", # Czech
373
+ "da-DK", # Danish
374
+ "fi-FI", # Finnish
375
+ "el-GR", # Greek
376
+ "he-IL", # Hebrew
377
+ "hu-HU", # Hungarian
378
+ "id-ID", # Indonesian
379
+ "ms-MY", # Malay
380
+ "no-NO", # Norwegian
381
+ "ro-RO", # Romanian
382
+ "sk-SK", # Slovak
383
+ "th-TH", # Thai
384
+ "uk-UA", # Ukrainian
385
+ "vi-VN", # Vietnamese
386
+ ]
387
+
388
+ def get_provider_name(self) -> str:
389
+ """Get provider name"""
 
 
 
 
 
 
 
 
 
 
 
 
390
  return "deepgram"