Spaces:
Paused
Paused
Update stt/stt_deepgram.py
Browse files- stt/stt_deepgram.py +51 -108
stt/stt_deepgram.py
CHANGED
@@ -118,7 +118,7 @@ class DeepgramSTT(STTInterface):
|
|
118 |
channels=1,
|
119 |
)
|
120 |
|
121 |
-
log_info(f"π§ Deepgram options: language=tr, model=nova-2, encoding=
|
122 |
|
123 |
# Create live connection
|
124 |
self.live_connection = self.deepgram_client.listen.live.v("1")
|
@@ -163,116 +163,52 @@ class DeepgramSTT(STTInterface):
|
|
163 |
|
164 |
def _setup_event_handlers(self):
|
165 |
"""Setup event handlers for Deepgram events"""
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
#
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
# Get transcript from channel alternatives
|
185 |
-
if hasattr(result, 'channel') and result.channel:
|
186 |
-
alternatives = result.channel.alternatives
|
187 |
-
if alternatives and len(alternatives) > 0:
|
188 |
-
transcript = alternatives[0].transcript
|
189 |
-
confidence = alternatives[0].confidence
|
190 |
-
|
191 |
-
# Log all transcripts for debugging
|
192 |
-
log_debug(f"π Raw transcript: '{transcript}' (is_final: {is_final}, confidence: {confidence})")
|
193 |
-
|
194 |
-
if transcript and transcript.strip(): # Only process non-empty transcripts
|
195 |
-
transcription_result = TranscriptionResult(
|
196 |
-
text=transcript,
|
197 |
-
is_final=is_final,
|
198 |
-
confidence=confidence,
|
199 |
-
timestamp=datetime.now().timestamp()
|
200 |
-
)
|
201 |
-
|
202 |
-
# Queue result
|
203 |
-
try:
|
204 |
-
self.responses_queue.put(transcription_result)
|
205 |
-
if is_final:
|
206 |
-
self.final_result_received = True
|
207 |
-
log_info(f"π― FINAL TRANSCRIPT: '{transcript}' (confidence: {confidence:.2f})")
|
208 |
-
else:
|
209 |
-
log_info(f"π Interim transcript: '{transcript}'")
|
210 |
-
except queue.Full:
|
211 |
-
log_warning("β οΈ Response queue full")
|
212 |
-
else:
|
213 |
-
if is_final:
|
214 |
-
log_warning(f"β οΈ Empty final transcript received")
|
215 |
-
|
216 |
-
except Exception as e:
|
217 |
-
log_error(f"β Error processing transcript: {e}")
|
218 |
-
log_error(f"β Args: {args}")
|
219 |
-
log_error(f"β Kwargs: {kwargs}")
|
220 |
-
import traceback
|
221 |
-
log_error(f"β Traceback: {traceback.format_exc()}")
|
222 |
-
|
223 |
-
# Speech started handler
|
224 |
-
def on_speech_started(self, speech_started, **kwargs):
|
225 |
-
log_info(f"π€ Speech started event: {speech_started}")
|
226 |
-
|
227 |
-
# Utterance end handler
|
228 |
-
def on_utterance_end(self, utterance_end, **kwargs):
|
229 |
-
log_info(f"π Utterance ended event: {utterance_end}")
|
230 |
-
|
231 |
-
# Metadata handler
|
232 |
-
def on_metadata(self, metadata, **kwargs):
|
233 |
-
log_info(f"π Deepgram Metadata: {metadata}")
|
234 |
-
|
235 |
-
# Error handler
|
236 |
-
def on_error(self, error, **kwargs):
|
237 |
-
log_error(f"β Deepgram error event: {error}")
|
238 |
-
|
239 |
-
# Close handler
|
240 |
-
def on_close(self, close, **kwargs):
|
241 |
-
log_warning(f"π Deepgram connection closed: {close}")
|
242 |
-
self.is_streaming = False
|
243 |
-
|
244 |
-
# Register handlers with correct syntax
|
245 |
-
self.live_connection.on(LiveTranscriptionEvents.Transcript, on_transcript)
|
246 |
-
self.live_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started)
|
247 |
-
self.live_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
|
248 |
-
self.live_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
|
249 |
-
self.live_connection.on(LiveTranscriptionEvents.Error, on_error)
|
250 |
-
self.live_connection.on(LiveTranscriptionEvents.Close, on_close)
|
251 |
-
|
252 |
-
log_info("β
Event handlers setup complete")
|
253 |
|
254 |
def _on_transcript(self, *args, **kwargs):
|
255 |
-
"""Handle transcript event"""
|
256 |
try:
|
257 |
-
|
258 |
-
|
259 |
-
# Extract transcript data
|
260 |
-
is_final = result.get("is_final", False)
|
261 |
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
alternatives = channel.get("alternatives", [])
|
266 |
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
|
272 |
-
if transcript.strip(): # Only process non-empty transcripts
|
273 |
transcription_result = TranscriptionResult(
|
274 |
text=transcript,
|
275 |
-
is_final=
|
276 |
confidence=confidence,
|
277 |
timestamp=datetime.now().timestamp()
|
278 |
)
|
@@ -280,16 +216,23 @@ class DeepgramSTT(STTInterface):
|
|
280 |
# Queue result
|
281 |
try:
|
282 |
self.responses_queue.put(transcription_result)
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
except queue.Full:
|
289 |
log_warning("β οΈ Response queue full")
|
|
|
|
|
|
|
290 |
|
291 |
except Exception as e:
|
292 |
log_error(f"β Error processing transcript: {e}")
|
|
|
|
|
|
|
|
|
293 |
|
294 |
def _on_speech_started(self, *args, **kwargs):
|
295 |
"""Handle speech started event"""
|
|
|
118 |
channels=1,
|
119 |
)
|
120 |
|
121 |
+
log_info(f"π§ Deepgram options: language=tr, model=nova-2, encoding=linear16, interim_results=True")
|
122 |
|
123 |
# Create live connection
|
124 |
self.live_connection = self.deepgram_client.listen.live.v("1")
|
|
|
163 |
|
164 |
def _setup_event_handlers(self):
|
165 |
"""Setup event handlers for Deepgram events"""
|
166 |
+
|
167 |
+
# Transcript received - use the existing class method
|
168 |
+
self.live_connection.on(LiveTranscriptionEvents.Transcript, self._on_transcript)
|
169 |
+
|
170 |
+
# Speech started
|
171 |
+
self.live_connection.on(LiveTranscriptionEvents.SpeechStarted, self._on_speech_started)
|
172 |
+
|
173 |
+
# Utterance end
|
174 |
+
self.live_connection.on(LiveTranscriptionEvents.UtteranceEnd, self._on_utterance_end)
|
175 |
+
|
176 |
+
# Metadata
|
177 |
+
self.live_connection.on(LiveTranscriptionEvents.Metadata, self._on_metadata)
|
178 |
+
|
179 |
+
# Error
|
180 |
+
self.live_connection.on(LiveTranscriptionEvents.Error, self._on_error)
|
181 |
+
|
182 |
+
# Connection closed
|
183 |
+
self.live_connection.on(LiveTranscriptionEvents.Close, self._on_close)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
def _on_transcript(self, *args, **kwargs):
|
186 |
+
"""Handle transcript event - SDK calls this method directly"""
|
187 |
try:
|
188 |
+
# SDK passes the result as second argument
|
189 |
+
result = args[1] if len(args) > 1 else kwargs.get("result")
|
|
|
|
|
190 |
|
191 |
+
if not result:
|
192 |
+
log_warning("β οΈ No result in transcript event")
|
193 |
+
return
|
|
|
194 |
|
195 |
+
# Access properties directly from the result object
|
196 |
+
is_final = result.is_final if hasattr(result, 'is_final') else False
|
197 |
+
|
198 |
+
# Get transcript from channel alternatives
|
199 |
+
if hasattr(result, 'channel') and result.channel:
|
200 |
+
alternatives = result.channel.alternatives
|
201 |
+
if alternatives and len(alternatives) > 0:
|
202 |
+
transcript = alternatives[0].transcript
|
203 |
+
confidence = alternatives[0].confidence
|
204 |
+
|
205 |
+
# Log all transcripts for debugging
|
206 |
+
log_debug(f"π Raw transcript: '{transcript}' (is_final: {is_final}, confidence: {confidence})")
|
207 |
|
208 |
+
if transcript and transcript.strip(): # Only process non-empty transcripts
|
209 |
transcription_result = TranscriptionResult(
|
210 |
text=transcript,
|
211 |
+
is_final=is_final,
|
212 |
confidence=confidence,
|
213 |
timestamp=datetime.now().timestamp()
|
214 |
)
|
|
|
216 |
# Queue result
|
217 |
try:
|
218 |
self.responses_queue.put(transcription_result)
|
219 |
+
if is_final:
|
220 |
+
self.final_result_received = True
|
221 |
+
log_info(f"π― FINAL TRANSCRIPT: '{transcript}' (confidence: {confidence:.2f})")
|
222 |
+
else:
|
223 |
+
log_info(f"π Interim transcript: '{transcript}'")
|
224 |
except queue.Full:
|
225 |
log_warning("β οΈ Response queue full")
|
226 |
+
else:
|
227 |
+
if is_final:
|
228 |
+
log_warning(f"β οΈ Empty final transcript received")
|
229 |
|
230 |
except Exception as e:
|
231 |
log_error(f"β Error processing transcript: {e}")
|
232 |
+
log_error(f"β Args: {args}")
|
233 |
+
log_error(f"β Kwargs: {kwargs}")
|
234 |
+
import traceback
|
235 |
+
log_error(f"β Traceback: {traceback.format_exc()}")
|
236 |
|
237 |
def _on_speech_started(self, *args, **kwargs):
|
238 |
"""Handle speech started event"""
|