ciyidogan commited on
Commit
59e181e
·
verified ·
1 Parent(s): 89d0af3

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +56 -57
stt/stt_google.py CHANGED
@@ -187,6 +187,42 @@ class GoogleSTT(STTInterface):
187
 
188
  log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  # ✅ Audio analizi
191
  self._analyze_audio_content(audio_data)
192
 
@@ -197,69 +233,32 @@ class GoogleSTT(STTInterface):
197
  log_warning("⚠️ Audio too short after trimming")
198
  return None
199
 
200
- # Test kodundan EXACT aynı format - wave modülü kullan
201
- wav_audio = self._create_wav_like_test(trimmed_audio, config.sample_rate)
202
- log_info(f"🔧 WAV conversion: {len(trimmed_audio)} PCM → {len(wav_audio)} WAV")
203
 
204
- # Configure recognition - TEST KODUNDAN EXACT AYNI
205
- recognition_config = RecognitionConfig(
206
- encoding=RecognitionConfig.AudioEncoding.LINEAR16,
207
- sample_rate_hertz=config.sample_rate,
208
- language_code="tr-TR", # Hardcode tr-TR like test
209
- audio_channel_count=1,
210
- enable_separate_recognition_per_channel=False,
211
- )
212
-
213
- log_debug(f"Recognition config: language=tr-TR, sample_rate={config.sample_rate}")
214
-
215
- # ✅ Create audio object with WAV data
216
- audio = RecognitionAudio(content=wav_audio)
217
 
218
- # Perform synchronous recognition
219
- log_info(f"🔄 Sending {len(wav_audio)} bytes WAV to Google Cloud Speech API...")
220
- response = self.client.recognize(config=recognition_config, audio=audio)
221
-
222
- # ✅ Detaylı response analizi
223
- log_debug(f"API Response: {response}")
224
- log_info(f"🔍 Google response details:")
225
- log_info(f"- Has results: {bool(response.results)}")
226
- log_info(f"- Results count: {len(response.results)}")
227
 
228
- # Request ID'yi logla
229
- if hasattr(response, '_pb') and hasattr(response._pb, 'request_id'):
230
- log_info(f"- Request ID: {response._pb.request_id}")
231
-
232
- if hasattr(response, 'total_billed_time'):
233
- billed_seconds = response.total_billed_time.total_seconds()
234
- log_info(f"- Billed time: {billed_seconds}s")
235
 
236
- # Eğer billed time 0 ise, Google hiç audio işlememiş demektir
237
- if billed_seconds == 0:
238
- log_error(" Google didn't process any audio - possible format issue")
239
- return None
240
- else:
241
- log_info(f"- Billed time: 0s (no audio processed)")
242
-
243
- # Process results
244
- if response.results:
245
- for i, result in enumerate(response.results):
246
- log_debug(f"Result {i}: {result}")
247
 
248
- if result.alternatives:
249
- alternative = result.alternatives[0]
250
-
251
- transcription = TranscriptionResult(
252
- text=alternative.transcript,
253
- confidence=alternative.confidence,
254
- timestamp=datetime.now().timestamp(),
255
- language="tr-TR",
256
- word_timestamps=None
257
- )
258
-
259
- log_info(f"✅ Transcription: '{alternative.transcript}' (confidence: {alternative.confidence:.2f})")
260
- return transcription
261
 
262
- log_warning("⚠️ No transcription results - Google couldn't recognize speech")
 
263
  return None
264
 
265
  except Exception as e:
 
187
 
188
  log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
189
 
190
+ # ✅ Raw audio'yu direkt WAV olarak kaydet ve test et
191
+ import tempfile
192
+ import os
193
+ import wave
194
+
195
+ # Raw audio'yu WAV olarak kaydet
196
+ raw_wav_file = f"/tmp/raw_audio_{datetime.now().strftime('%H%M%S')}.wav"
197
+
198
+ with wave.open(raw_wav_file, 'wb') as wav_file:
199
+ wav_file.setnchannels(1)
200
+ wav_file.setsampwidth(2)
201
+ wav_file.setframerate(config.sample_rate)
202
+ wav_file.writeframes(audio_data)
203
+
204
+ log_info(f"🎯 RAW audio saved as WAV: {raw_wav_file}")
205
+
206
+ # Test koduyla test et
207
+ try:
208
+ import subprocess
209
+ result = subprocess.run([
210
+ 'python', '/app/test_single_wav.py', raw_wav_file
211
+ ], capture_output=True, text=True, timeout=30)
212
+
213
+ log_info(f"🔍 Raw WAV test result: {result.stdout}")
214
+ if result.stderr:
215
+ log_error(f"🔍 Raw WAV test error: {result.stderr}")
216
+
217
+ # Eğer raw audio çalışıyorsa, sorun trimming'te
218
+ if "Transcript:" in result.stdout:
219
+ log_info("✅ RAW audio works! Problem is in our processing.")
220
+ else:
221
+ log_error("❌ Even RAW audio doesn't work - problem in frontend!")
222
+
223
+ except Exception as e:
224
+ log_warning(f"Could not run raw audio test: {e}")
225
+
226
  # ✅ Audio analizi
227
  self._analyze_audio_content(audio_data)
228
 
 
233
  log_warning("⚠️ Audio too short after trimming")
234
  return None
235
 
236
+ # Trimmed audio'yu da kaydet
237
+ trimmed_wav_file = f"/tmp/trimmed_audio_{datetime.now().strftime('%H%M%S')}.wav"
 
238
 
239
+ with wave.open(trimmed_wav_file, 'wb') as wav_file:
240
+ wav_file.setnchannels(1)
241
+ wav_file.setsampwidth(2)
242
+ wav_file.setframerate(config.sample_rate)
243
+ wav_file.writeframes(trimmed_audio)
 
 
 
 
 
 
 
 
244
 
245
+ log_info(f"🎯 TRIMMED audio saved as WAV: {trimmed_wav_file}")
 
 
 
 
 
 
 
 
246
 
247
+ # Trimmed audio'yu da test et
248
+ try:
249
+ result = subprocess.run([
250
+ 'python', '/app/test_single_wav.py', trimmed_wav_file
251
+ ], capture_output=True, text=True, timeout=30)
 
 
252
 
253
+ log_info(f"🔍 Trimmed WAV test result: {result.stdout}")
254
+ if result.stderr:
255
+ log_error(f"🔍 Trimmed WAV test error: {result.stderr}")
 
 
 
 
 
 
 
 
256
 
257
+ except Exception as e:
258
+ log_warning(f"Could not run trimmed audio test: {e}")
 
 
 
 
 
 
 
 
 
 
 
259
 
260
+ # Sonuç olarak Google'a gönderme
261
+ log_info("❌ Skipping Google API call for debugging")
262
  return None
263
 
264
  except Exception as e: