Spaces:
Running
Running
add simplified to traditional chinese conversion
Browse files- app/asr_worker.py +10 -6
- requirements.txt +2 -2
app/asr_worker.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
import numpy as np
|
|
|
|
|
2 |
import sherpa_onnx
|
3 |
from pathlib import Path
|
4 |
-
import scipy.signal
|
5 |
-
|
6 |
-
def resample_audio(audio, orig_sr, target_sr):
|
7 |
-
return scipy.signal.resample_poly(audio, target_sr, orig_sr)
|
8 |
|
9 |
MODEL_DIR = Path("models/zipformer_bilingual")
|
10 |
|
|
|
|
|
11 |
def create_recognizer():
|
12 |
return sherpa_onnx.OnlineRecognizer.from_transducer(
|
13 |
tokens=str(MODEL_DIR / "tokens.txt"),
|
@@ -21,6 +21,9 @@ def create_recognizer():
|
|
21 |
decoding_method="greedy_search"
|
22 |
)
|
23 |
|
|
|
|
|
|
|
24 |
def stream_audio(raw_pcm_bytes, stream, recognizer):
|
25 |
audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
|
26 |
if audio.size == 0:
|
@@ -33,7 +36,7 @@ def stream_audio(raw_pcm_bytes, stream, recognizer):
|
|
33 |
if recognizer.is_ready(stream):
|
34 |
recognizer.decode_streams([stream])
|
35 |
result = recognizer.get_result(stream)
|
36 |
-
return result, rms
|
37 |
|
38 |
def finalize_stream(stream, recognizer):
|
39 |
tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
|
@@ -41,4 +44,5 @@ def finalize_stream(stream, recognizer):
|
|
41 |
stream.input_finished()
|
42 |
while recognizer.is_ready(stream):
|
43 |
recognizer.decode_streams([stream])
|
44 |
-
|
|
|
|
1 |
import numpy as np
|
2 |
+
import pysoxr
|
3 |
+
from opencc import OpenCC
|
4 |
import sherpa_onnx
|
5 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
6 |
|
7 |
MODEL_DIR = Path("models/zipformer_bilingual")
|
8 |
|
9 |
+
converter = OpenCC('s2t') # Simplified to Traditional Chinese
|
10 |
+
|
11 |
def create_recognizer():
|
12 |
return sherpa_onnx.OnlineRecognizer.from_transducer(
|
13 |
tokens=str(MODEL_DIR / "tokens.txt"),
|
|
|
21 |
decoding_method="greedy_search"
|
22 |
)
|
23 |
|
24 |
+
def resample_audio(audio, orig_sr, target_sr):
|
25 |
+
return pysoxr.resample(audio, orig_sr, target_sr)
|
26 |
+
|
27 |
def stream_audio(raw_pcm_bytes, stream, recognizer):
|
28 |
audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
|
29 |
if audio.size == 0:
|
|
|
36 |
if recognizer.is_ready(stream):
|
37 |
recognizer.decode_streams([stream])
|
38 |
result = recognizer.get_result(stream)
|
39 |
+
return converter.convert(result), rms
|
40 |
|
41 |
def finalize_stream(stream, recognizer):
|
42 |
tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
|
|
|
44 |
stream.input_finished()
|
45 |
while recognizer.is_ready(stream):
|
46 |
recognizer.decode_streams([stream])
|
47 |
+
result = recognizer.get_result(stream)
|
48 |
+
return converter.convert(result)
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b08ab37381178cb19b7876c22c7df8a82205f03c506c487a6f0cbe773707c23
|
3 |
+
size 88
|