Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 7 days ago

Commit

d846f5e

verified ·

1 Parent(s): bdda7c9

Update stt/stt_google.py

Browse files

Files changed (1) hide show

stt/stt_google.py +54 -0

stt/stt_google.py CHANGED Viewed

@@ -125,6 +125,57 @@ class GoogleSTT(STTInterface):
         except Exception as e:
             log_error(f"❌ Error analyzing audio: {e}")
     async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
         """Transcribe audio data using Google Cloud Speech API"""
@@ -138,6 +189,9 @@ class GoogleSTT(STTInterface):
             # ✅ Audio analizi
             self._analyze_audio_content(audio_data)
             # ✅ WAV formatında gönder - Google bu formatı daha iyi tanıyor
             wav_audio = self._convert_to_wav_proper(audio_data, config.sample_rate)

         except Exception as e:
             log_error(f"❌ Error analyzing audio: {e}")
+    def _trim_silence(self, audio_data: bytes) -> bytes:
+            """Trim silence from beginning and end of audio"""
+            try:
+                if len(audio_data) < 100:
+                    return audio_data
+                # Convert to samples
+                samples = list(struct.unpack(f'{len(audio_data)//2}h', audio_data))
+                # Silence threshold - daha düşük bir threshold kullan
+                silence_threshold = 200  # Daha düşük threshold
+                # Find first non-silent sample
+                start_idx = 0
+                for i, sample in enumerate(samples):
+                    if abs(sample) > silence_threshold:
+                        start_idx = i
+                        break
+                # Find last non-silent sample
+                end_idx = len(samples) - 1
+                for i in range(len(samples) - 1, -1, -1):
+                    if abs(samples[i]) > silence_threshold:
+                        end_idx = i
+                        break
+                # Ensure we have some audio
+                if start_idx >= end_idx:
+                    log_warning("⚠️ No audio content above silence threshold")
+                    return audio_data
+                # Add small padding (250ms = 4000 samples at 16kHz)
+                padding = 2000  # 125ms padding
+                start_idx = max(0, start_idx - padding)
+                end_idx = min(len(samples) - 1, end_idx + padding)
+                # Extract trimmed audio
+                trimmed_samples = samples[start_idx:end_idx + 1]
+                log_info(f"🔧 Silence trimming: {len(samples)} → {len(trimmed_samples)} samples")
+                log_info(f"🔧 Trimmed duration: {len(trimmed_samples)/16000:.2f}s")
+                # Convert back to bytes
+                trimmed_audio = struct.pack(f'{len(trimmed_samples)}h', *trimmed_samples)
+                return trimmed_audio
+            except Exception as e:
+                log_error(f"❌ Silence trimming failed: {e}")
+                return audio_data
     async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
         """Transcribe audio data using Google Cloud Speech API"""
             # ✅ Audio analizi
             self._analyze_audio_content(audio_data)
+            # ✅ Silence trimming ekle
+            trimmed_audio = self._trim_silence(audio_data)
             # ✅ WAV formatında gönder - Google bu formatı daha iyi tanıyor
             wav_audio = self._convert_to_wav_proper(audio_data, config.sample_rate)