ciyidogan commited on
Commit
478e05e
Β·
verified Β·
1 Parent(s): 15d3e9a

Update stt/stt_deepgram.py

Browse files
Files changed (1) hide show
  1. stt/stt_deepgram.py +51 -108
stt/stt_deepgram.py CHANGED
@@ -118,7 +118,7 @@ class DeepgramSTT(STTInterface):
118
  channels=1,
119
  )
120
 
121
- log_info(f"πŸ”§ Deepgram options: language=tr, model=nova-2, encoding=opus, interim_results=True")
122
 
123
  # Create live connection
124
  self.live_connection = self.deepgram_client.listen.live.v("1")
@@ -163,116 +163,52 @@ class DeepgramSTT(STTInterface):
163
 
164
  def _setup_event_handlers(self):
165
  """Setup event handlers for Deepgram events"""
166
- if not self.live_connection:
167
- log_error("❌ No live connection to setup handlers")
168
- return
169
-
170
- # Results handler - handles transcription results
171
- def _on_transcript(self, *args, **kwargs):
172
- """Handle transcript event - SDK calls this method directly"""
173
- try:
174
- # SDK passes the result as second argument
175
- result = args[1] if len(args) > 1 else kwargs.get("result")
176
-
177
- if not result:
178
- log_warning("⚠️ No result in transcript event")
179
- return
180
-
181
- # Access properties directly from the result object
182
- is_final = result.is_final
183
-
184
- # Get transcript from channel alternatives
185
- if hasattr(result, 'channel') and result.channel:
186
- alternatives = result.channel.alternatives
187
- if alternatives and len(alternatives) > 0:
188
- transcript = alternatives[0].transcript
189
- confidence = alternatives[0].confidence
190
-
191
- # Log all transcripts for debugging
192
- log_debug(f"πŸ“ Raw transcript: '{transcript}' (is_final: {is_final}, confidence: {confidence})")
193
-
194
- if transcript and transcript.strip(): # Only process non-empty transcripts
195
- transcription_result = TranscriptionResult(
196
- text=transcript,
197
- is_final=is_final,
198
- confidence=confidence,
199
- timestamp=datetime.now().timestamp()
200
- )
201
-
202
- # Queue result
203
- try:
204
- self.responses_queue.put(transcription_result)
205
- if is_final:
206
- self.final_result_received = True
207
- log_info(f"🎯 FINAL TRANSCRIPT: '{transcript}' (confidence: {confidence:.2f})")
208
- else:
209
- log_info(f"πŸ“ Interim transcript: '{transcript}'")
210
- except queue.Full:
211
- log_warning("⚠️ Response queue full")
212
- else:
213
- if is_final:
214
- log_warning(f"⚠️ Empty final transcript received")
215
-
216
- except Exception as e:
217
- log_error(f"❌ Error processing transcript: {e}")
218
- log_error(f"❌ Args: {args}")
219
- log_error(f"❌ Kwargs: {kwargs}")
220
- import traceback
221
- log_error(f"❌ Traceback: {traceback.format_exc()}")
222
-
223
- # Speech started handler
224
- def on_speech_started(self, speech_started, **kwargs):
225
- log_info(f"🎀 Speech started event: {speech_started}")
226
-
227
- # Utterance end handler
228
- def on_utterance_end(self, utterance_end, **kwargs):
229
- log_info(f"πŸ”š Utterance ended event: {utterance_end}")
230
-
231
- # Metadata handler
232
- def on_metadata(self, metadata, **kwargs):
233
- log_info(f"πŸ“Š Deepgram Metadata: {metadata}")
234
-
235
- # Error handler
236
- def on_error(self, error, **kwargs):
237
- log_error(f"❌ Deepgram error event: {error}")
238
-
239
- # Close handler
240
- def on_close(self, close, **kwargs):
241
- log_warning(f"πŸ”Œ Deepgram connection closed: {close}")
242
- self.is_streaming = False
243
-
244
- # Register handlers with correct syntax
245
- self.live_connection.on(LiveTranscriptionEvents.Transcript, on_transcript)
246
- self.live_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started)
247
- self.live_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
248
- self.live_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
249
- self.live_connection.on(LiveTranscriptionEvents.Error, on_error)
250
- self.live_connection.on(LiveTranscriptionEvents.Close, on_close)
251
-
252
- log_info("βœ… Event handlers setup complete")
253
 
254
  def _on_transcript(self, *args, **kwargs):
255
- """Handle transcript event"""
256
  try:
257
- result = args[1] if len(args) > 1 else kwargs.get("result", {})
258
-
259
- # Extract transcript data
260
- is_final = result.get("is_final", False)
261
 
262
- # Only process final results
263
- if is_final:
264
- channel = result.get("channel", {})
265
- alternatives = channel.get("alternatives", [])
266
 
267
- if alternatives:
268
- alt = alternatives[0]
269
- transcript = alt.get("transcript", "")
270
- confidence = alt.get("confidence", 0.0)
 
 
 
 
 
 
 
 
271
 
272
- if transcript.strip(): # Only process non-empty transcripts
273
  transcription_result = TranscriptionResult(
274
  text=transcript,
275
- is_final=True,
276
  confidence=confidence,
277
  timestamp=datetime.now().timestamp()
278
  )
@@ -280,16 +216,23 @@ class DeepgramSTT(STTInterface):
280
  # Queue result
281
  try:
282
  self.responses_queue.put(transcription_result)
283
- self.final_result_received = True
284
-
285
- log_info(f"🎯 FINAL RESULT: '{transcript}' (confidence: {confidence:.2f})")
286
- log_info(f"πŸ“Š Session stats: {self.total_chunks} chunks, {self.total_audio_bytes/1024:.1f}KB")
287
-
288
  except queue.Full:
289
  log_warning("⚠️ Response queue full")
 
 
 
290
 
291
  except Exception as e:
292
  log_error(f"❌ Error processing transcript: {e}")
 
 
 
 
293
 
294
  def _on_speech_started(self, *args, **kwargs):
295
  """Handle speech started event"""
 
118
  channels=1,
119
  )
120
 
121
+ log_info(f"πŸ”§ Deepgram options: language=tr, model=nova-2, encoding=linear16, interim_results=True")
122
 
123
  # Create live connection
124
  self.live_connection = self.deepgram_client.listen.live.v("1")
 
163
 
164
  def _setup_event_handlers(self):
165
  """Setup event handlers for Deepgram events"""
166
+
167
+ # Transcript received - use the existing class method
168
+ self.live_connection.on(LiveTranscriptionEvents.Transcript, self._on_transcript)
169
+
170
+ # Speech started
171
+ self.live_connection.on(LiveTranscriptionEvents.SpeechStarted, self._on_speech_started)
172
+
173
+ # Utterance end
174
+ self.live_connection.on(LiveTranscriptionEvents.UtteranceEnd, self._on_utterance_end)
175
+
176
+ # Metadata
177
+ self.live_connection.on(LiveTranscriptionEvents.Metadata, self._on_metadata)
178
+
179
+ # Error
180
+ self.live_connection.on(LiveTranscriptionEvents.Error, self._on_error)
181
+
182
+ # Connection closed
183
+ self.live_connection.on(LiveTranscriptionEvents.Close, self._on_close)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  def _on_transcript(self, *args, **kwargs):
186
+ """Handle transcript event - SDK calls this method directly"""
187
  try:
188
+ # SDK passes the result as second argument
189
+ result = args[1] if len(args) > 1 else kwargs.get("result")
 
 
190
 
191
+ if not result:
192
+ log_warning("⚠️ No result in transcript event")
193
+ return
 
194
 
195
+ # Access properties directly from the result object
196
+ is_final = result.is_final if hasattr(result, 'is_final') else False
197
+
198
+ # Get transcript from channel alternatives
199
+ if hasattr(result, 'channel') and result.channel:
200
+ alternatives = result.channel.alternatives
201
+ if alternatives and len(alternatives) > 0:
202
+ transcript = alternatives[0].transcript
203
+ confidence = alternatives[0].confidence
204
+
205
+ # Log all transcripts for debugging
206
+ log_debug(f"πŸ“ Raw transcript: '{transcript}' (is_final: {is_final}, confidence: {confidence})")
207
 
208
+ if transcript and transcript.strip(): # Only process non-empty transcripts
209
  transcription_result = TranscriptionResult(
210
  text=transcript,
211
+ is_final=is_final,
212
  confidence=confidence,
213
  timestamp=datetime.now().timestamp()
214
  )
 
216
  # Queue result
217
  try:
218
  self.responses_queue.put(transcription_result)
219
+ if is_final:
220
+ self.final_result_received = True
221
+ log_info(f"🎯 FINAL TRANSCRIPT: '{transcript}' (confidence: {confidence:.2f})")
222
+ else:
223
+ log_info(f"πŸ“ Interim transcript: '{transcript}'")
224
  except queue.Full:
225
  log_warning("⚠️ Response queue full")
226
+ else:
227
+ if is_final:
228
+ log_warning(f"⚠️ Empty final transcript received")
229
 
230
  except Exception as e:
231
  log_error(f"❌ Error processing transcript: {e}")
232
+ log_error(f"❌ Args: {args}")
233
+ log_error(f"❌ Kwargs: {kwargs}")
234
+ import traceback
235
+ log_error(f"❌ Traceback: {traceback.format_exc()}")
236
 
237
  def _on_speech_started(self, *args, **kwargs):
238
  """Handle speech started event"""