Spaces:

UcsTurkey
/

flare

Paused

App Files Files Community

ciyidogan commited on Jul 8

Commit

478e05e

verified ·

1 Parent(s): 15d3e9a

Update stt/stt_deepgram.py

Browse files

Files changed (1) hide show

stt/stt_deepgram.py +51 -108

stt/stt_deepgram.py CHANGED Viewed

@@ -118,7 +118,7 @@ class DeepgramSTT(STTInterface):
                 channels=1,
             )
-            log_info(f"🔧 Deepgram options: language=tr, model=nova-2, encoding=opus, interim_results=True")
             # Create live connection
             self.live_connection = self.deepgram_client.listen.live.v("1")
@@ -163,116 +163,52 @@ class DeepgramSTT(STTInterface):
     def _setup_event_handlers(self):
         """Setup event handlers for Deepgram events"""
-        if not self.live_connection:
-            log_error("❌ No live connection to setup handlers")
-            return
-        # Results handler - handles transcription results
-        def _on_transcript(self, *args, **kwargs):
-            """Handle transcript event - SDK calls this method directly"""
-            try:
-                # SDK passes the result as second argument
-                result = args[1] if len(args) > 1 else kwargs.get("result")
-                if not result:
-                    log_warning("⚠️ No result in transcript event")
-                    return
-                # Access properties directly from the result object
-                is_final = result.is_final
-                # Get transcript from channel alternatives
-                if hasattr(result, 'channel') and result.channel:
-                    alternatives = result.channel.alternatives
-                    if alternatives and len(alternatives) > 0:
-                        transcript = alternatives[0].transcript
-                        confidence = alternatives[0].confidence
-                        # Log all transcripts for debugging
-                        log_debug(f"📝 Raw transcript: '{transcript}' (is_final: {is_final}, confidence: {confidence})")
-                        if transcript and transcript.strip():  # Only process non-empty transcripts
-                            transcription_result = TranscriptionResult(
-                                text=transcript,
-                                is_final=is_final,
-                                confidence=confidence,
-                                timestamp=datetime.now().timestamp()
-                            )
-                            # Queue result
-                            try:
-                                self.responses_queue.put(transcription_result)
-                                if is_final:
-                                    self.final_result_received = True
-                                    log_info(f"🎯 FINAL TRANSCRIPT: '{transcript}' (confidence: {confidence:.2f})")
-                                else:
-                                    log_info(f"📝 Interim transcript: '{transcript}'")
-                            except queue.Full:
-                                log_warning("⚠️ Response queue full")
-                        else:
-                            if is_final:
-                                log_warning(f"⚠️ Empty final transcript received")
-            except Exception as e:
-                log_error(f"❌ Error processing transcript: {e}")
-                log_error(f"❌ Args: {args}")
-                log_error(f"❌ Kwargs: {kwargs}")
-                import traceback
-                log_error(f"❌ Traceback: {traceback.format_exc()}")
-        # Speech started handler
-        def on_speech_started(self, speech_started, **kwargs):
-            log_info(f"🎤 Speech started event: {speech_started}")
-        # Utterance end handler
-        def on_utterance_end(self, utterance_end, **kwargs):
-            log_info(f"🔚 Utterance ended event: {utterance_end}")
-        # Metadata handler
-        def on_metadata(self, metadata, **kwargs):
-            log_info(f"📊 Deepgram Metadata: {metadata}")
-        # Error handler
-        def on_error(self, error, **kwargs):
-            log_error(f"❌ Deepgram error event: {error}")
-        # Close handler
-        def on_close(self, close, **kwargs):
-            log_warning(f"🔌 Deepgram connection closed: {close}")
-            self.is_streaming = False
-        # Register handlers with correct syntax
-        self.live_connection.on(LiveTranscriptionEvents.Transcript, on_transcript)
-        self.live_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started)
-        self.live_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
-        self.live_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
-        self.live_connection.on(LiveTranscriptionEvents.Error, on_error)
-        self.live_connection.on(LiveTranscriptionEvents.Close, on_close)
-        log_info("✅ Event handlers setup complete")
     def _on_transcript(self, *args, **kwargs):
-        """Handle transcript event"""
         try:
-            result = args[1] if len(args) > 1 else kwargs.get("result", {})
-            # Extract transcript data
-            is_final = result.get("is_final", False)
-            # Only process final results
-            if is_final:
-                channel = result.get("channel", {})
-                alternatives = channel.get("alternatives", [])
-                if alternatives:
-                    alt = alternatives[0]
-                    transcript = alt.get("transcript", "")
-                    confidence = alt.get("confidence", 0.0)
-                    if transcript.strip():  # Only process non-empty transcripts
                         transcription_result = TranscriptionResult(
                             text=transcript,
-                            is_final=True,
                             confidence=confidence,
                             timestamp=datetime.now().timestamp()
                         )
@@ -280,16 +216,23 @@ class DeepgramSTT(STTInterface):
                         # Queue result
                         try:
                             self.responses_queue.put(transcription_result)
-                            self.final_result_received = True
-                            log_info(f"🎯 FINAL RESULT: '{transcript}' (confidence: {confidence:.2f})")
-                            log_info(f"📊 Session stats: {self.total_chunks} chunks, {self.total_audio_bytes/1024:.1f}KB")
                         except queue.Full:
                             log_warning("⚠️ Response queue full")
         except Exception as e:
             log_error(f"❌ Error processing transcript: {e}")
     def _on_speech_started(self, *args, **kwargs):
         """Handle speech started event"""

                 channels=1,
             )
+            log_info(f"🔧 Deepgram options: language=tr, model=nova-2, encoding=linear16, interim_results=True")
             # Create live connection
             self.live_connection = self.deepgram_client.listen.live.v("1")
     def _setup_event_handlers(self):
         """Setup event handlers for Deepgram events"""
+        # Transcript received - use the existing class method
+        self.live_connection.on(LiveTranscriptionEvents.Transcript, self._on_transcript)
+        # Speech started
+        self.live_connection.on(LiveTranscriptionEvents.SpeechStarted, self._on_speech_started)
+        # Utterance end
+        self.live_connection.on(LiveTranscriptionEvents.UtteranceEnd, self._on_utterance_end)
+        # Metadata
+        self.live_connection.on(LiveTranscriptionEvents.Metadata, self._on_metadata)
+        # Error
+        self.live_connection.on(LiveTranscriptionEvents.Error, self._on_error)
+        # Connection closed
+        self.live_connection.on(LiveTranscriptionEvents.Close, self._on_close)
     def _on_transcript(self, *args, **kwargs):
+        """Handle transcript event - SDK calls this method directly"""
         try:
+            # SDK passes the result as second argument
+            result = args[1] if len(args) > 1 else kwargs.get("result")
+            if not result:
+                log_warning("⚠️ No result in transcript event")
+                return
+            # Access properties directly from the result object
+            is_final = result.is_final if hasattr(result, 'is_final') else False
+            # Get transcript from channel alternatives
+            if hasattr(result, 'channel') and result.channel:
+                alternatives = result.channel.alternatives
+                if alternatives and len(alternatives) > 0:
+                    transcript = alternatives[0].transcript
+                    confidence = alternatives[0].confidence
+                    # Log all transcripts for debugging
+                    log_debug(f"📝 Raw transcript: '{transcript}' (is_final: {is_final}, confidence: {confidence})")
+                    if transcript and transcript.strip():  # Only process non-empty transcripts
                         transcription_result = TranscriptionResult(
                             text=transcript,
+                            is_final=is_final,
                             confidence=confidence,
                             timestamp=datetime.now().timestamp()
                         )
                         # Queue result
                         try:
                             self.responses_queue.put(transcription_result)
+                            if is_final:
+                                self.final_result_received = True
+                                log_info(f"🎯 FINAL TRANSCRIPT: '{transcript}' (confidence: {confidence:.2f})")
+                            else:
+                                log_info(f"📝 Interim transcript: '{transcript}'")
                         except queue.Full:
                             log_warning("⚠️ Response queue full")
+                    else:
+                        if is_final:
+                            log_warning(f"⚠️ Empty final transcript received")
         except Exception as e:
             log_error(f"❌ Error processing transcript: {e}")
+            log_error(f"❌ Args: {args}")
+            log_error(f"❌ Kwargs: {kwargs}")
+            import traceback
+            log_error(f"❌ Traceback: {traceback.format_exc()}")
     def _on_speech_started(self, *args, **kwargs):
         """Handle speech started event"""