Luigi commited on
Commit
2a8a9a5
·
1 Parent(s): 2318eae

resample mic audio to 16kHz

Browse files
Files changed (2) hide show
  1. app/asr_worker.py +11 -2
  2. requirements.txt +2 -2
app/asr_worker.py CHANGED
@@ -1,6 +1,10 @@
1
  import numpy as np
2
  import sherpa_onnx
3
  from pathlib import Path
 
 
 
 
4
 
5
  MODEL_DIR = Path("models/zipformer_bilingual")
6
 
@@ -19,8 +23,13 @@ def create_recognizer():
19
 
20
  def stream_audio(raw_pcm_bytes, stream, recognizer):
21
  audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
22
- rms = float(np.sqrt(np.mean(audio ** 2)))
23
- stream.accept_waveform(16000, audio)
 
 
 
 
 
24
  if recognizer.is_ready(stream):
25
  recognizer.decode_streams([stream])
26
  result = recognizer.get_result(stream)
 
1
  import numpy as np
2
  import sherpa_onnx
3
  from pathlib import Path
4
+ import scipy.signal
5
+
6
+ def resample_audio(audio, orig_sr, target_sr):
7
+ return scipy.signal.resample_poly(audio, target_sr, orig_sr)
8
 
9
  MODEL_DIR = Path("models/zipformer_bilingual")
10
 
 
23
 
24
  def stream_audio(raw_pcm_bytes, stream, recognizer):
25
  audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
26
+ if audio.size == 0:
27
+ return "", 0.0
28
+
29
+ resampled = resample_audio(audio, 48000, 16000)
30
+ rms = float(np.sqrt(np.mean(resampled ** 2)))
31
+
32
+ stream.accept_waveform(16000, resampled)
33
  if recognizer.is_ready(stream):
34
  recognizer.decode_streams([stream])
35
  result = recognizer.get_result(stream)
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a7c3c10c8ec533e73405e503c3004146a36153ae701934132aecbe689e9e666
3
- size 44
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc3163f789b3e2232c85fe9ae6ae0dd70869dd6bdc217b55353e5e34bfe24e48
3
+ size 49