Update transcrriber.py
Browse files- transcrriber.py +14 -12
transcrriber.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import speech_recognition as sr
|
2 |
import numpy as np
|
3 |
-
import
|
4 |
import config
|
5 |
|
6 |
class SpeechTranscriber:
|
@@ -9,33 +9,35 @@ class SpeechTranscriber:
|
|
9 |
self.recognizer.energy_threshold = config.ENERGY_THRESHOLD
|
10 |
self.recognizer.dynamic_energy_threshold = config.DYNAMIC_ENERGY_THRESHOLD
|
11 |
self.recognizer.pause_threshold = config.PAUSE_THRESHOLD
|
12 |
-
self.audio_buffer =
|
13 |
-
self.last_processed = 0
|
14 |
|
15 |
def add_audio_chunk(self, audio_chunk):
|
16 |
-
|
|
|
17 |
|
18 |
def get_transcript_chunk(self):
|
19 |
# Only process if we have enough audio
|
20 |
-
|
|
|
21 |
return None
|
22 |
|
23 |
-
#
|
24 |
audio_data = sr.AudioData(
|
25 |
-
|
26 |
-
config.SAMPLE_RATE,
|
27 |
2 # Sample width in bytes
|
28 |
)
|
29 |
|
30 |
try:
|
31 |
# Use Google Web Speech API for best accuracy
|
32 |
text = self.recognizer.recognize_google(audio_data)
|
|
|
|
|
33 |
return text
|
34 |
except sr.UnknownValueError:
|
|
|
|
|
35 |
return None
|
36 |
except sr.RequestError as e:
|
37 |
print(f"Speech recognition error: {str(e)}")
|
38 |
-
return None
|
39 |
-
finally:
|
40 |
-
# Clear buffer after processing
|
41 |
-
self.audio_buffer.clear()
|
|
|
1 |
import speech_recognition as sr
|
2 |
import numpy as np
|
3 |
+
import io
|
4 |
import config
|
5 |
|
6 |
class SpeechTranscriber:
|
|
|
9 |
self.recognizer.energy_threshold = config.ENERGY_THRESHOLD
|
10 |
self.recognizer.dynamic_energy_threshold = config.DYNAMIC_ENERGY_THRESHOLD
|
11 |
self.recognizer.pause_threshold = config.PAUSE_THRESHOLD
|
12 |
+
self.audio_buffer = bytearray()
|
|
|
13 |
|
14 |
def add_audio_chunk(self, audio_chunk):
|
15 |
+
# Convert numpy array to bytes
|
16 |
+
self.audio_buffer.extend(audio_chunk.tobytes())
|
17 |
|
18 |
def get_transcript_chunk(self):
|
19 |
# Only process if we have enough audio
|
20 |
+
min_bytes = config.SAMPLE_RATE * config.MIN_PROCESSING_DURATION * 2 # 2 bytes per sample
|
21 |
+
if len(self.audio_buffer) < min_bytes:
|
22 |
return None
|
23 |
|
24 |
+
# Create AudioData object
|
25 |
audio_data = sr.AudioData(
|
26 |
+
bytes(self.audio_buffer),
|
27 |
+
config.SAMPLE_RATE,
|
28 |
2 # Sample width in bytes
|
29 |
)
|
30 |
|
31 |
try:
|
32 |
# Use Google Web Speech API for best accuracy
|
33 |
text = self.recognizer.recognize_google(audio_data)
|
34 |
+
# Clear buffer after successful recognition
|
35 |
+
self.audio_buffer = bytearray()
|
36 |
return text
|
37 |
except sr.UnknownValueError:
|
38 |
+
# Clear buffer even if we couldn't recognize
|
39 |
+
self.audio_buffer = bytearray()
|
40 |
return None
|
41 |
except sr.RequestError as e:
|
42 |
print(f"Speech recognition error: {str(e)}")
|
43 |
+
return None
|
|
|
|
|
|