Luigi commited on
Commit
8ebc812
·
1 Parent(s): 719d009

add simplified to traditional chinese conversion

Browse files
Files changed (2) hide show
  1. app/asr_worker.py +10 -6
  2. requirements.txt +2 -2
app/asr_worker.py CHANGED
@@ -1,13 +1,13 @@
1
  import numpy as np
 
 
2
  import sherpa_onnx
3
  from pathlib import Path
4
- import scipy.signal
5
-
6
- def resample_audio(audio, orig_sr, target_sr):
7
- return scipy.signal.resample_poly(audio, target_sr, orig_sr)
8
 
9
  MODEL_DIR = Path("models/zipformer_bilingual")
10
 
 
 
11
  def create_recognizer():
12
  return sherpa_onnx.OnlineRecognizer.from_transducer(
13
  tokens=str(MODEL_DIR / "tokens.txt"),
@@ -21,6 +21,9 @@ def create_recognizer():
21
  decoding_method="greedy_search"
22
  )
23
 
 
 
 
24
  def stream_audio(raw_pcm_bytes, stream, recognizer):
25
  audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
26
  if audio.size == 0:
@@ -33,7 +36,7 @@ def stream_audio(raw_pcm_bytes, stream, recognizer):
33
  if recognizer.is_ready(stream):
34
  recognizer.decode_streams([stream])
35
  result = recognizer.get_result(stream)
36
- return result, rms
37
 
38
  def finalize_stream(stream, recognizer):
39
  tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
@@ -41,4 +44,5 @@ def finalize_stream(stream, recognizer):
41
  stream.input_finished()
42
  while recognizer.is_ready(stream):
43
  recognizer.decode_streams([stream])
44
- return recognizer.get_result(stream)
 
 
1
  import numpy as np
2
+ import pysoxr
3
+ from opencc import OpenCC
4
  import sherpa_onnx
5
  from pathlib import Path
 
 
 
 
6
 
7
  MODEL_DIR = Path("models/zipformer_bilingual")
8
 
9
+ converter = OpenCC('s2t') # Simplified to Traditional Chinese
10
+
11
  def create_recognizer():
12
  return sherpa_onnx.OnlineRecognizer.from_transducer(
13
  tokens=str(MODEL_DIR / "tokens.txt"),
 
21
  decoding_method="greedy_search"
22
  )
23
 
24
+ def resample_audio(audio, orig_sr, target_sr):
25
+ return pysoxr.resample(audio, orig_sr, target_sr)
26
+
27
  def stream_audio(raw_pcm_bytes, stream, recognizer):
28
  audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
29
  if audio.size == 0:
 
36
  if recognizer.is_ready(stream):
37
  recognizer.decode_streams([stream])
38
  result = recognizer.get_result(stream)
39
+ return converter.convert(result), rms
40
 
41
  def finalize_stream(stream, recognizer):
42
  tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
 
44
  stream.input_finished()
45
  while recognizer.is_ready(stream):
46
  recognizer.decode_streams([stream])
47
+ result = recognizer.get_result(stream)
48
+ return converter.convert(result)
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c564834ceb94d0d9aceef1ba37eed4451bca6eb05158ebd81518a7e68a1c2ef
3
- size 60
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b08ab37381178cb19b7876c22c7df8a82205f03c506c487a6f0cbe773707c23
3
+ size 88