Spaces:
Building
Building
Update stt/stt_google.py
Browse files- stt/stt_google.py +56 -57
stt/stt_google.py
CHANGED
@@ -187,6 +187,42 @@ class GoogleSTT(STTInterface):
|
|
187 |
|
188 |
log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
# ✅ Audio analizi
|
191 |
self._analyze_audio_content(audio_data)
|
192 |
|
@@ -197,69 +233,32 @@ class GoogleSTT(STTInterface):
|
|
197 |
log_warning("⚠️ Audio too short after trimming")
|
198 |
return None
|
199 |
|
200 |
-
#
|
201 |
-
|
202 |
-
log_info(f"🔧 WAV conversion: {len(trimmed_audio)} PCM → {len(wav_audio)} WAV")
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
audio_channel_count=1,
|
210 |
-
enable_separate_recognition_per_channel=False,
|
211 |
-
)
|
212 |
-
|
213 |
-
log_debug(f"Recognition config: language=tr-TR, sample_rate={config.sample_rate}")
|
214 |
-
|
215 |
-
# ✅ Create audio object with WAV data
|
216 |
-
audio = RecognitionAudio(content=wav_audio)
|
217 |
|
218 |
-
|
219 |
-
log_info(f"🔄 Sending {len(wav_audio)} bytes WAV to Google Cloud Speech API...")
|
220 |
-
response = self.client.recognize(config=recognition_config, audio=audio)
|
221 |
-
|
222 |
-
# ✅ Detaylı response analizi
|
223 |
-
log_debug(f"API Response: {response}")
|
224 |
-
log_info(f"🔍 Google response details:")
|
225 |
-
log_info(f"- Has results: {bool(response.results)}")
|
226 |
-
log_info(f"- Results count: {len(response.results)}")
|
227 |
|
228 |
-
#
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
billed_seconds = response.total_billed_time.total_seconds()
|
234 |
-
log_info(f"- Billed time: {billed_seconds}s")
|
235 |
|
236 |
-
|
237 |
-
if
|
238 |
-
log_error("
|
239 |
-
return None
|
240 |
-
else:
|
241 |
-
log_info(f"- Billed time: 0s (no audio processed)")
|
242 |
-
|
243 |
-
# Process results
|
244 |
-
if response.results:
|
245 |
-
for i, result in enumerate(response.results):
|
246 |
-
log_debug(f"Result {i}: {result}")
|
247 |
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
transcription = TranscriptionResult(
|
252 |
-
text=alternative.transcript,
|
253 |
-
confidence=alternative.confidence,
|
254 |
-
timestamp=datetime.now().timestamp(),
|
255 |
-
language="tr-TR",
|
256 |
-
word_timestamps=None
|
257 |
-
)
|
258 |
-
|
259 |
-
log_info(f"✅ Transcription: '{alternative.transcript}' (confidence: {alternative.confidence:.2f})")
|
260 |
-
return transcription
|
261 |
|
262 |
-
|
|
|
263 |
return None
|
264 |
|
265 |
except Exception as e:
|
|
|
187 |
|
188 |
log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
|
189 |
|
190 |
+
# ✅ Raw audio'yu direkt WAV olarak kaydet ve test et
|
191 |
+
import tempfile
|
192 |
+
import os
|
193 |
+
import wave
|
194 |
+
|
195 |
+
# Raw audio'yu WAV olarak kaydet
|
196 |
+
raw_wav_file = f"/tmp/raw_audio_{datetime.now().strftime('%H%M%S')}.wav"
|
197 |
+
|
198 |
+
with wave.open(raw_wav_file, 'wb') as wav_file:
|
199 |
+
wav_file.setnchannels(1)
|
200 |
+
wav_file.setsampwidth(2)
|
201 |
+
wav_file.setframerate(config.sample_rate)
|
202 |
+
wav_file.writeframes(audio_data)
|
203 |
+
|
204 |
+
log_info(f"🎯 RAW audio saved as WAV: {raw_wav_file}")
|
205 |
+
|
206 |
+
# Test koduyla test et
|
207 |
+
try:
|
208 |
+
import subprocess
|
209 |
+
result = subprocess.run([
|
210 |
+
'python', '/app/test_single_wav.py', raw_wav_file
|
211 |
+
], capture_output=True, text=True, timeout=30)
|
212 |
+
|
213 |
+
log_info(f"🔍 Raw WAV test result: {result.stdout}")
|
214 |
+
if result.stderr:
|
215 |
+
log_error(f"🔍 Raw WAV test error: {result.stderr}")
|
216 |
+
|
217 |
+
# Eğer raw audio çalışıyorsa, sorun trimming'te
|
218 |
+
if "Transcript:" in result.stdout:
|
219 |
+
log_info("✅ RAW audio works! Problem is in our processing.")
|
220 |
+
else:
|
221 |
+
log_error("❌ Even RAW audio doesn't work - problem in frontend!")
|
222 |
+
|
223 |
+
except Exception as e:
|
224 |
+
log_warning(f"Could not run raw audio test: {e}")
|
225 |
+
|
226 |
# ✅ Audio analizi
|
227 |
self._analyze_audio_content(audio_data)
|
228 |
|
|
|
233 |
log_warning("⚠️ Audio too short after trimming")
|
234 |
return None
|
235 |
|
236 |
+
# Trimmed audio'yu da kaydet
|
237 |
+
trimmed_wav_file = f"/tmp/trimmed_audio_{datetime.now().strftime('%H%M%S')}.wav"
|
|
|
238 |
|
239 |
+
with wave.open(trimmed_wav_file, 'wb') as wav_file:
|
240 |
+
wav_file.setnchannels(1)
|
241 |
+
wav_file.setsampwidth(2)
|
242 |
+
wav_file.setframerate(config.sample_rate)
|
243 |
+
wav_file.writeframes(trimmed_audio)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
+
log_info(f"🎯 TRIMMED audio saved as WAV: {trimmed_wav_file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
+
# Trimmed audio'yu da test et
|
248 |
+
try:
|
249 |
+
result = subprocess.run([
|
250 |
+
'python', '/app/test_single_wav.py', trimmed_wav_file
|
251 |
+
], capture_output=True, text=True, timeout=30)
|
|
|
|
|
252 |
|
253 |
+
log_info(f"🔍 Trimmed WAV test result: {result.stdout}")
|
254 |
+
if result.stderr:
|
255 |
+
log_error(f"🔍 Trimmed WAV test error: {result.stderr}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
+
except Exception as e:
|
258 |
+
log_warning(f"Could not run trimmed audio test: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
|
260 |
+
# Sonuç olarak Google'a gönderme
|
261 |
+
log_info("❌ Skipping Google API call for debugging")
|
262 |
return None
|
263 |
|
264 |
except Exception as e:
|