gaur3009 commited on
Commit
0c5c3aa
·
verified ·
1 Parent(s): bdec6fa

Update transcrriber.py

Browse files
Files changed (1) hide show
  1. transcrriber.py +14 -12
transcrriber.py CHANGED
@@ -1,6 +1,6 @@
1
  import speech_recognition as sr
2
  import numpy as np
3
- import collections
4
  import config
5
 
6
  class SpeechTranscriber:
@@ -9,33 +9,35 @@ class SpeechTranscriber:
9
  self.recognizer.energy_threshold = config.ENERGY_THRESHOLD
10
  self.recognizer.dynamic_energy_threshold = config.DYNAMIC_ENERGY_THRESHOLD
11
  self.recognizer.pause_threshold = config.PAUSE_THRESHOLD
12
- self.audio_buffer = collections.deque(maxlen=config.BUFFER_DURATION * 10)
13
- self.last_processed = 0
14
 
15
  def add_audio_chunk(self, audio_chunk):
16
- self.audio_buffer.extend(audio_chunk)
 
17
 
18
  def get_transcript_chunk(self):
19
  # Only process if we have enough audio
20
- if len(self.audio_buffer) < config.SAMPLE_RATE * config.MIN_PROCESSING_DURATION:
 
21
  return None
22
 
23
- # Convert to AudioData format
24
  audio_data = sr.AudioData(
25
- np.array(self.audio_buffer).tobytes(),
26
- config.SAMPLE_RATE,
27
  2 # Sample width in bytes
28
  )
29
 
30
  try:
31
  # Use Google Web Speech API for best accuracy
32
  text = self.recognizer.recognize_google(audio_data)
 
 
33
  return text
34
  except sr.UnknownValueError:
 
 
35
  return None
36
  except sr.RequestError as e:
37
  print(f"Speech recognition error: {str(e)}")
38
- return None
39
- finally:
40
- # Clear buffer after processing
41
- self.audio_buffer.clear()
 
1
  import speech_recognition as sr
2
  import numpy as np
3
+ import io
4
  import config
5
 
6
  class SpeechTranscriber:
 
9
  self.recognizer.energy_threshold = config.ENERGY_THRESHOLD
10
  self.recognizer.dynamic_energy_threshold = config.DYNAMIC_ENERGY_THRESHOLD
11
  self.recognizer.pause_threshold = config.PAUSE_THRESHOLD
12
+ self.audio_buffer = bytearray()
 
13
 
14
  def add_audio_chunk(self, audio_chunk):
15
+ # Convert numpy array to bytes
16
+ self.audio_buffer.extend(audio_chunk.tobytes())
17
 
18
  def get_transcript_chunk(self):
19
  # Only process if we have enough audio
20
+ min_bytes = config.SAMPLE_RATE * config.MIN_PROCESSING_DURATION * 2 # 2 bytes per sample
21
+ if len(self.audio_buffer) < min_bytes:
22
  return None
23
 
24
+ # Create AudioData object
25
  audio_data = sr.AudioData(
26
+ bytes(self.audio_buffer),
27
+ config.SAMPLE_RATE,
28
  2 # Sample width in bytes
29
  )
30
 
31
  try:
32
  # Use Google Web Speech API for best accuracy
33
  text = self.recognizer.recognize_google(audio_data)
34
+ # Clear buffer after successful recognition
35
+ self.audio_buffer = bytearray()
36
  return text
37
  except sr.UnknownValueError:
38
+ # Clear buffer even if we couldn't recognize
39
+ self.audio_buffer = bytearray()
40
  return None
41
  except sr.RequestError as e:
42
  print(f"Speech recognition error: {str(e)}")
43
+ return None