ciyidogan commited on
Commit
f4b2af6
·
verified ·
1 Parent(s): 931d646

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +31 -3
stt/stt_google.py CHANGED
@@ -76,12 +76,28 @@ class GoogleSTT(STTInterface):
76
 
77
  log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # Convert to WAV format for better compatibility
80
  wav_audio = self._convert_to_wav(audio_data, config.sample_rate)
81
 
82
  # Configure recognition
83
  language_code = self._map_language_code(config.language)
84
-
 
85
  recognition_config = RecognitionConfig(
86
  encoding=RecognitionConfig.AudioEncoding.LINEAR16,
87
  sample_rate_hertz=config.sample_rate,
@@ -91,14 +107,26 @@ class GoogleSTT(STTInterface):
91
  use_enhanced=config.use_enhanced,
92
  enable_word_time_offsets=config.enable_word_timestamps,
93
  )
94
-
 
 
 
 
 
 
 
 
 
95
  # Create audio object
96
  audio = RecognitionAudio(content=wav_audio)
97
 
98
  # Perform synchronous recognition
99
  log_info(f"🔄 Sending audio to Google Cloud Speech API...")
100
  response = self.client.recognize(config=recognition_config, audio=audio)
101
-
 
 
 
102
  # Process results
103
  if response.results:
104
  result = response.results[0]
 
76
 
77
  log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
78
 
79
+ # ✅ Debug - audio verisi analizi
80
+ if len(audio_data) > 100:
81
+ # İlk ve son 50 byte'ı kontrol et
82
+ first_50 = audio_data[:50]
83
+ last_50 = audio_data[-50:]
84
+ log_debug(f"Audio first 50 bytes: {first_50.hex()}")
85
+ log_debug(f"Audio last 50 bytes: {last_50.hex()}")
86
+
87
+ # Ortalama amplitude kontrolü
88
+ import struct
89
+ samples = struct.unpack(f'{len(audio_data)//2}h', audio_data)
90
+ avg_amplitude = sum(abs(s) for s in samples) / len(samples)
91
+ max_amplitude = max(abs(s) for s in samples)
92
+ log_debug(f"Audio stats: avg_amplitude={avg_amplitude:.1f}, max_amplitude={max_amplitude}")
93
+
94
  # Convert to WAV format for better compatibility
95
  wav_audio = self._convert_to_wav(audio_data, config.sample_rate)
96
 
97
  # Configure recognition
98
  language_code = self._map_language_code(config.language)
99
+
100
+ """
101
  recognition_config = RecognitionConfig(
102
  encoding=RecognitionConfig.AudioEncoding.LINEAR16,
103
  sample_rate_hertz=config.sample_rate,
 
107
  use_enhanced=config.use_enhanced,
108
  enable_word_time_offsets=config.enable_word_timestamps,
109
  )
110
+ """
111
+
112
+ recognition_config = RecognitionConfig(
113
+ encoding=RecognitionConfig.AudioEncoding.LINEAR16,
114
+ sample_rate_hertz=16000, # Sabit
115
+ language_code="tr-TR", # Sabit
116
+ enable_automatic_punctuation=True
117
+
118
+ log_debug(f"Recognition config: language={language_code}, sample_rate={config.sample_rate}, model={config.model}")
119
+
120
  # Create audio object
121
  audio = RecognitionAudio(content=wav_audio)
122
 
123
  # Perform synchronous recognition
124
  log_info(f"🔄 Sending audio to Google Cloud Speech API...")
125
  response = self.client.recognize(config=recognition_config, audio=audio)
126
+
127
+ # ✅ Debug response
128
+ log_debug(f"API Response: {response}")
129
+
130
  # Process results
131
  if response.results:
132
  result = response.results[0]