Spaces:
Running
Running
resample mic audio to 16kHz
Browse files- app/asr_worker.py +11 -2
- requirements.txt +2 -2
app/asr_worker.py
CHANGED
@@ -1,6 +1,10 @@
|
|
1 |
import numpy as np
|
2 |
import sherpa_onnx
|
3 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
4 |
|
5 |
MODEL_DIR = Path("models/zipformer_bilingual")
|
6 |
|
@@ -19,8 +23,13 @@ def create_recognizer():
|
|
19 |
|
20 |
def stream_audio(raw_pcm_bytes, stream, recognizer):
|
21 |
audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
24 |
if recognizer.is_ready(stream):
|
25 |
recognizer.decode_streams([stream])
|
26 |
result = recognizer.get_result(stream)
|
|
|
1 |
import numpy as np
|
2 |
import sherpa_onnx
|
3 |
from pathlib import Path
|
4 |
+
import scipy.signal
|
5 |
+
|
6 |
+
def resample_audio(audio, orig_sr, target_sr):
|
7 |
+
return scipy.signal.resample_poly(audio, target_sr, orig_sr)
|
8 |
|
9 |
MODEL_DIR = Path("models/zipformer_bilingual")
|
10 |
|
|
|
23 |
|
24 |
def stream_audio(raw_pcm_bytes, stream, recognizer):
|
25 |
audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
|
26 |
+
if audio.size == 0:
|
27 |
+
return "", 0.0
|
28 |
+
|
29 |
+
resampled = resample_audio(audio, 48000, 16000)
|
30 |
+
rms = float(np.sqrt(np.mean(resampled ** 2)))
|
31 |
+
|
32 |
+
stream.accept_waveform(16000, resampled)
|
33 |
if recognizer.is_ready(stream):
|
34 |
recognizer.decode_streams([stream])
|
35 |
result = recognizer.get_result(stream)
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc3163f789b3e2232c85fe9ae6ae0dd70869dd6bdc217b55353e5e34bfe24e48
|
3 |
+
size 49
|