Luigi commited on
Commit
231cd3a
Β·
1 Parent(s): 221a9c5

1. add to traditional chinese conversion 2. improve ui layout

Browse files
Files changed (2) hide show
  1. app/asr_worker.py +7 -2
  2. app/static/index.html +74 -6
app/asr_worker.py CHANGED
@@ -2,6 +2,10 @@ import numpy as np
2
  import sherpa_onnx
3
  from pathlib import Path
4
  import scipy.signal
 
 
 
 
5
 
6
  def resample_audio(audio, orig_sr, target_sr):
7
  return scipy.signal.resample_poly(audio, target_sr, orig_sr)
@@ -33,7 +37,7 @@ def stream_audio(raw_pcm_bytes, stream, recognizer):
33
  if recognizer.is_ready(stream):
34
  recognizer.decode_streams([stream])
35
  result = recognizer.get_result(stream)
36
- return result, rms
37
 
38
  def finalize_stream(stream, recognizer):
39
  tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
@@ -41,4 +45,5 @@ def finalize_stream(stream, recognizer):
41
  stream.input_finished()
42
  while recognizer.is_ready(stream):
43
  recognizer.decode_streams([stream])
44
- return recognizer.get_result(stream)
 
 
2
  import sherpa_onnx
3
  from pathlib import Path
4
  import scipy.signal
5
+ from opencc import OpenCC
6
+
7
+ converter = OpenCC('s2t')
8
+
9
 
10
  def resample_audio(audio, orig_sr, target_sr):
11
  return scipy.signal.resample_poly(audio, target_sr, orig_sr)
 
37
  if recognizer.is_ready(stream):
38
  recognizer.decode_streams([stream])
39
  result = recognizer.get_result(stream)
40
+ return converter.convert(result), rms
41
 
42
  def finalize_stream(stream, recognizer):
43
  tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
 
45
  stream.input_finished()
46
  while recognizer.is_ready(stream):
47
  recognizer.decode_streams([stream])
48
+ result = recognizer.get_result(stream)
49
+ return converter.convert(result)
app/static/index.html CHANGED
@@ -1,13 +1,81 @@
1
  <!DOCTYPE html>
2
- <html>
3
  <head>
4
- <title>FastAPI Real-Time ASR</title>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  </head>
6
  <body>
7
- <h1>🎀 Speak into your mic...</h1>
8
- <div>Volume: <progress id="vol" max="1" value="0"></progress></div>
9
- <p>Partial: <span id="partial"></span></p>
10
- <p>Final: <b id="final"></b></p>
 
 
 
 
11
  <script>
12
  const ws = new WebSocket("wss://" + location.host + "/ws");
13
  const vol = document.getElementById("vol");
 
1
  <!DOCTYPE html>
2
+ <html lang="en">
3
  <head>
4
+ <meta charset="UTF-8" />
5
+ <title>🎀 Real-Time ASR Demo</title>
6
+ <style>
7
+ body {
8
+ font-family: "Segoe UI", sans-serif;
9
+ background-color: #f5f6fa;
10
+ display: flex;
11
+ flex-direction: column;
12
+ align-items: center;
13
+ justify-content: center;
14
+ min-height: 100vh;
15
+ margin: 0;
16
+ padding: 2rem;
17
+ color: #2f3640;
18
+ }
19
+
20
+ h1 {
21
+ margin-bottom: 1rem;
22
+ font-size: 2rem;
23
+ }
24
+
25
+ #vol {
26
+ width: 300px;
27
+ height: 20px;
28
+ margin-bottom: 1rem;
29
+ appearance: none;
30
+ }
31
+
32
+ #vol::-webkit-progress-bar {
33
+ background-color: #dcdde1;
34
+ border-radius: 8px;
35
+ }
36
+
37
+ #vol::-webkit-progress-value {
38
+ background-color: #44bd32;
39
+ border-radius: 8px;
40
+ transition: width 0.2s;
41
+ }
42
+
43
+ .output {
44
+ width: 90%;
45
+ max-width: 800px;
46
+ text-align: left;
47
+ margin-top: 2rem;
48
+ background: white;
49
+ padding: 1rem 1.5rem;
50
+ border-radius: 10px;
51
+ box-shadow: 0 0 10px rgba(0,0,0,0.1);
52
+ }
53
+
54
+ .label {
55
+ font-weight: bold;
56
+ color: #718093;
57
+ }
58
+
59
+ #partial {
60
+ font-size: 1.25rem;
61
+ color: #353b48;
62
+ }
63
+
64
+ #final {
65
+ font-size: 1.4rem;
66
+ color: #e84118;
67
+ }
68
+ </style>
69
  </head>
70
  <body>
71
+ <h1>🎀 Speak into your microphone</h1>
72
+ <progress id="vol" max="1" value="0"></progress>
73
+
74
+ <div class="output">
75
+ <div><span class="label">Partial:</span> <span id="partial">...</span></div>
76
+ <div><span class="label">Final:</span> <b id="final">...</b></div>
77
+ </div>
78
+
79
  <script>
80
  const ws = new WebSocket("wss://" + location.host + "/ws");
81
  const vol = document.getElementById("vol");