ciyidogan commited on
Commit
d846f5e
·
verified ·
1 Parent(s): bdda7c9

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +54 -0
stt/stt_google.py CHANGED
@@ -125,6 +125,57 @@ class GoogleSTT(STTInterface):
125
 
126
  except Exception as e:
127
  log_error(f"❌ Error analyzing audio: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
130
  """Transcribe audio data using Google Cloud Speech API"""
@@ -138,6 +189,9 @@ class GoogleSTT(STTInterface):
138
 
139
  # ✅ Audio analizi
140
  self._analyze_audio_content(audio_data)
 
 
 
141
 
142
  # ✅ WAV formatında gönder - Google bu formatı daha iyi tanıyor
143
  wav_audio = self._convert_to_wav_proper(audio_data, config.sample_rate)
 
125
 
126
  except Exception as e:
127
  log_error(f"❌ Error analyzing audio: {e}")
128
+
129
+ def _trim_silence(self, audio_data: bytes) -> bytes:
130
+ """Trim silence from beginning and end of audio"""
131
+ try:
132
+ if len(audio_data) < 100:
133
+ return audio_data
134
+
135
+ # Convert to samples
136
+ samples = list(struct.unpack(f'{len(audio_data)//2}h', audio_data))
137
+
138
+ # Silence threshold - daha düşük bir threshold kullan
139
+ silence_threshold = 200 # Daha düşük threshold
140
+
141
+ # Find first non-silent sample
142
+ start_idx = 0
143
+ for i, sample in enumerate(samples):
144
+ if abs(sample) > silence_threshold:
145
+ start_idx = i
146
+ break
147
+
148
+ # Find last non-silent sample
149
+ end_idx = len(samples) - 1
150
+ for i in range(len(samples) - 1, -1, -1):
151
+ if abs(samples[i]) > silence_threshold:
152
+ end_idx = i
153
+ break
154
+
155
+ # Ensure we have some audio
156
+ if start_idx >= end_idx:
157
+ log_warning("⚠️ No audio content above silence threshold")
158
+ return audio_data
159
+
160
+ # Add small padding (250ms = 4000 samples at 16kHz)
161
+ padding = 2000 # 125ms padding
162
+ start_idx = max(0, start_idx - padding)
163
+ end_idx = min(len(samples) - 1, end_idx + padding)
164
+
165
+ # Extract trimmed audio
166
+ trimmed_samples = samples[start_idx:end_idx + 1]
167
+
168
+ log_info(f"🔧 Silence trimming: {len(samples)} → {len(trimmed_samples)} samples")
169
+ log_info(f"🔧 Trimmed duration: {len(trimmed_samples)/16000:.2f}s")
170
+
171
+ # Convert back to bytes
172
+ trimmed_audio = struct.pack(f'{len(trimmed_samples)}h', *trimmed_samples)
173
+
174
+ return trimmed_audio
175
+
176
+ except Exception as e:
177
+ log_error(f"❌ Silence trimming failed: {e}")
178
+ return audio_data
179
 
180
  async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
181
  """Transcribe audio data using Google Cloud Speech API"""
 
189
 
190
  # ✅ Audio analizi
191
  self._analyze_audio_content(audio_data)
192
+
193
+ # ✅ Silence trimming ekle
194
+ trimmed_audio = self._trim_silence(audio_data)
195
 
196
  # ✅ WAV formatında gönder - Google bu formatı daha iyi tanıyor
197
  wav_audio = self._convert_to_wav_proper(audio_data, config.sample_rate)