Luigi commited on
Commit
ab74fc2
Β·
1 Parent(s): cd866b8

add hotwords (ie. speech context) demo

Browse files
Files changed (3) hide show
  1. app/asr_worker.py +91 -2
  2. app/main.py +15 -3
  3. app/static/index.html +62 -4
app/asr_worker.py CHANGED
@@ -5,6 +5,8 @@ import sherpa_onnx
5
  import scipy.signal
6
  from opencc import OpenCC
7
  from huggingface_hub import hf_hub_download
 
 
8
 
9
  # Ensure Hugging Face cache is in a user-writable directory
10
  CACHE_DIR = Path(__file__).parent / "hf_cache"
@@ -14,6 +16,7 @@ converter = OpenCC('s2t')
14
 
15
  # Streaming Zipformer model registry: paths relative to repo root
16
  STREAMING_ZIPFORMER_MODELS = {
 
17
  "csukuangfj/k2fsa-zipformer-bilingual-zh-en-t": {
18
  "tokens": "data/lang_char_bpe/tokens.txt",
19
  "encoder_fp32": "exp/96/encoder-epoch-99-avg-1.onnx",
@@ -22,7 +25,10 @@ STREAMING_ZIPFORMER_MODELS = {
22
  "decoder_int8": "exp/96/decoder-epoch-99-avg-1.int8.onnx",
23
  "joiner_fp32": "exp/96/joiner-epoch-99-avg-1.onnx",
24
  "joiner_int8": "exp/96/joiner-epoch-99-avg-1.int8.onnx",
 
 
25
  },
 
26
  "pfluo/k2fsa-zipformer-chinese-english-mixed": {
27
  "tokens": "data/lang_char_bpe/tokens.txt",
28
  "encoder_fp32": "exp/encoder-epoch-99-avg-1.onnx",
@@ -31,7 +37,10 @@ STREAMING_ZIPFORMER_MODELS = {
31
  "decoder_int8": None,
32
  "joiner_fp32": "exp/joiner-epoch-99-avg-1.onnx",
33
  "joiner_int8": "exp/joiner-epoch-99-avg-1.int8.onnx",
 
 
34
  },
 
35
  "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": {
36
  "tokens": "tokens.txt",
37
  "encoder_fp32": "encoder-epoch-99-avg-1.onnx",
@@ -40,7 +49,10 @@ STREAMING_ZIPFORMER_MODELS = {
40
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
41
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
42
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
 
 
43
  },
 
44
  "k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": {
45
  "tokens": "tokens.txt",
46
  "encoder_fp32": "encoder-epoch-20-avg-1-chunk-16-left-128.onnx",
@@ -49,7 +61,10 @@ STREAMING_ZIPFORMER_MODELS = {
49
  "decoder_int8": "decoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
50
  "joiner_fp32": "joiner-epoch-20-avg-1-chunk-16-left-128.onnx",
51
  "joiner_int8": "joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
 
 
52
  },
 
53
  "pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": {
54
  "tokens": "data/lang_char/tokens.txt",
55
  "encoder_fp32": "exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx",
@@ -58,7 +73,10 @@ STREAMING_ZIPFORMER_MODELS = {
58
  "decoder_int8": "exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx",
59
  "joiner_fp32": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx",
60
  "joiner_int8": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx",
 
 
61
  },
 
62
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": {
63
  "tokens": "tokens.txt",
64
  "encoder_fp32": "encoder-epoch-99-avg-1-chunk-16-left-128.onnx",
@@ -67,6 +85,8 @@ STREAMING_ZIPFORMER_MODELS = {
67
  "decoder_int8": None,
68
  "joiner_fp32": "joiner-epoch-99-avg-1-chunk-16-left-128.onnx",
69
  "joiner_int8": "joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx",
 
 
70
  },
71
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": {
72
  "tokens": "tokens.txt",
@@ -76,6 +96,8 @@ STREAMING_ZIPFORMER_MODELS = {
76
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
77
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
78
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
 
 
79
  },
80
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": {
81
  "tokens": "tokens.txt",
@@ -85,7 +107,10 @@ STREAMING_ZIPFORMER_MODELS = {
85
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
86
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
87
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
 
 
88
  },
 
89
  "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": {
90
  "tokens": "tokens.txt",
91
  "encoder_fp32": "encoder-epoch-99-avg-1.onnx",
@@ -94,7 +119,10 @@ STREAMING_ZIPFORMER_MODELS = {
94
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
95
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
96
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
 
 
97
  },
 
98
  "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": {
99
  "tokens": "tokens.txt",
100
  "encoder_fp32": "encoder-epoch-29-avg-9-with-averaged-model.onnx",
@@ -103,7 +131,10 @@ STREAMING_ZIPFORMER_MODELS = {
103
  "decoder_int8": "decoder-epoch-29-avg-9-with-averaged-model.int8.onnx",
104
  "joiner_fp32": "joiner-epoch-29-avg-9-with-averaged-model.onnx",
105
  "joiner_int8": "joiner-epoch-29-avg-9-with-averaged-model.int8.onnx",
 
 
106
  },
 
107
  "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": {
108
  "tokens": "tokens.txt",
109
  "encoder_fp32": "encoder-epoch-99-avg-1.onnx",
@@ -112,7 +143,10 @@ STREAMING_ZIPFORMER_MODELS = {
112
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
113
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
114
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
 
 
115
  },
 
116
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": {
117
  "tokens": "tokens.txt",
118
  "encoder_fp32": "encoder-epoch-99-avg-1.onnx",
@@ -121,6 +155,8 @@ STREAMING_ZIPFORMER_MODELS = {
121
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
122
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
123
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
 
 
124
  },
125
  }
126
 
@@ -131,7 +167,12 @@ def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarra
131
  # Create an online recognizer for a given model and precision
132
  # model_id: full HF repo ID
133
  # precision: "int8" or "fp32"
134
- def create_recognizer(model_id: str, precision: str):
 
 
 
 
 
135
  if model_id not in STREAMING_ZIPFORMER_MODELS:
136
  raise ValueError(f"Model '{model_id}' is not registered.")
137
  entry = STREAMING_ZIPFORMER_MODELS[model_id]
@@ -146,6 +187,54 @@ def create_recognizer(model_id: str, precision: str):
146
  decoder_path = hf_hub_download(repo_id=model_id, filename=decoder_file, cache_dir=str(CACHE_DIR))
147
  joiner_path = hf_hub_download(repo_id=model_id, filename=joiner_file, cache_dir=str(CACHE_DIR))
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  return sherpa_onnx.OnlineRecognizer.from_transducer(
150
  tokens=tokens_path,
151
  encoder=encoder_path,
@@ -155,7 +244,7 @@ def create_recognizer(model_id: str, precision: str):
155
  num_threads=1,
156
  sample_rate=16000,
157
  feature_dim=80,
158
- decoding_method="greedy_search"
159
  )
160
 
161
  def stream_audio(raw_pcm_bytes, stream, recognizer, orig_sr):
 
5
  import scipy.signal
6
  from opencc import OpenCC
7
  from huggingface_hub import hf_hub_download
8
+ from typing import List
9
+ import tempfile
10
 
11
  # Ensure Hugging Face cache is in a user-writable directory
12
  CACHE_DIR = Path(__file__).parent / "hf_cache"
 
16
 
17
  # Streaming Zipformer model registry: paths relative to repo root
18
  STREAMING_ZIPFORMER_MODELS = {
19
+ # bilingual zh-en with char+BPE
20
  "csukuangfj/k2fsa-zipformer-bilingual-zh-en-t": {
21
  "tokens": "data/lang_char_bpe/tokens.txt",
22
  "encoder_fp32": "exp/96/encoder-epoch-99-avg-1.onnx",
 
25
  "decoder_int8": "exp/96/decoder-epoch-99-avg-1.int8.onnx",
26
  "joiner_fp32": "exp/96/joiner-epoch-99-avg-1.onnx",
27
  "joiner_int8": "exp/96/joiner-epoch-99-avg-1.int8.onnx",
28
+ "modeling_unit":"cjkchar+bpe",
29
+ "bpe_vocab": "data/lang_char_bpe/bpe.vocab",
30
  },
31
+ # mixed Chinese+English (char+BPE)
32
  "pfluo/k2fsa-zipformer-chinese-english-mixed": {
33
  "tokens": "data/lang_char_bpe/tokens.txt",
34
  "encoder_fp32": "exp/encoder-epoch-99-avg-1.onnx",
 
37
  "decoder_int8": None,
38
  "joiner_fp32": "exp/joiner-epoch-99-avg-1.onnx",
39
  "joiner_int8": "exp/joiner-epoch-99-avg-1.int8.onnx",
40
+ "modeling_unit":"cjkchar+bpe",
41
+ "bpe_vocab": "data/lang_char_bpe/bpe.vocab",
42
  },
43
+ # Korean-only (CJK chars)
44
  "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": {
45
  "tokens": "tokens.txt",
46
  "encoder_fp32": "encoder-epoch-99-avg-1.onnx",
 
49
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
50
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
51
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
52
+ "modeling_unit":"cjkchar",
53
+ "bpe_vocab": None,
54
  },
55
+ # multi Chinese (Hans) (CJK chars)
56
  "k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": {
57
  "tokens": "tokens.txt",
58
  "encoder_fp32": "encoder-epoch-20-avg-1-chunk-16-left-128.onnx",
 
61
  "decoder_int8": "decoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
62
  "joiner_fp32": "joiner-epoch-20-avg-1-chunk-16-left-128.onnx",
63
  "joiner_int8": "joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
64
+ "modeling_unit":"cjkchar",
65
+ "bpe_vocab": None,
66
  },
67
+ # wenetspeech streaming (CJK chars)
68
  "pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": {
69
  "tokens": "data/lang_char/tokens.txt",
70
  "encoder_fp32": "exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx",
 
73
  "decoder_int8": "exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx",
74
  "joiner_fp32": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx",
75
  "joiner_int8": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx",
76
+ "modeling_unit":"cjkchar",
77
+ "bpe_vocab": None,
78
  },
79
+ # English-only (BPE)
80
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": {
81
  "tokens": "tokens.txt",
82
  "encoder_fp32": "encoder-epoch-99-avg-1-chunk-16-left-128.onnx",
 
85
  "decoder_int8": None,
86
  "joiner_fp32": "joiner-epoch-99-avg-1-chunk-16-left-128.onnx",
87
  "joiner_int8": "joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx",
88
+ "modeling_unit":"bpe",
89
+ "bpe_vocab": None,
90
  },
91
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": {
92
  "tokens": "tokens.txt",
 
96
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
97
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
98
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
99
+ "modeling_unit":"bpe",
100
+ "bpe_vocab": None,
101
  },
102
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": {
103
  "tokens": "tokens.txt",
 
107
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
108
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
109
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
110
+ "modeling_unit":"bpe",
111
+ "bpe_vocab": None,
112
  },
113
+ # older bilingual zh-en (cjkchar+BPE) – no bpe.vocab shipped
114
  "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": {
115
  "tokens": "tokens.txt",
116
  "encoder_fp32": "encoder-epoch-99-avg-1.onnx",
 
119
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
120
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
121
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
122
+ "modeling_unit":"cjkchar+bpe",
123
+ "bpe_vocab": None,
124
  },
125
+ # French-only (BPE)
126
  "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": {
127
  "tokens": "tokens.txt",
128
  "encoder_fp32": "encoder-epoch-29-avg-9-with-averaged-model.onnx",
 
131
  "decoder_int8": "decoder-epoch-29-avg-9-with-averaged-model.int8.onnx",
132
  "joiner_fp32": "joiner-epoch-29-avg-9-with-averaged-model.onnx",
133
  "joiner_int8": "joiner-epoch-29-avg-9-with-averaged-model.int8.onnx",
134
+ "modeling_unit":"bpe",
135
+ "bpe_vocab": None,
136
  },
137
+ # Chinese-only small (CJK chars)
138
  "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": {
139
  "tokens": "tokens.txt",
140
  "encoder_fp32": "encoder-epoch-99-avg-1.onnx",
 
143
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
144
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
145
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
146
+ "modeling_unit":"cjkchar",
147
+ "bpe_vocab": None,
148
  },
149
+ # English-only 20M (BPE)
150
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": {
151
  "tokens": "tokens.txt",
152
  "encoder_fp32": "encoder-epoch-99-avg-1.onnx",
 
155
  "decoder_int8": "decoder-epoch-99-avg-1.int8.onnx",
156
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
157
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
158
+ "modeling_unit":"bpe",
159
+ "bpe_vocab": None,
160
  },
161
  }
162
 
 
167
  # Create an online recognizer for a given model and precision
168
  # model_id: full HF repo ID
169
  # precision: "int8" or "fp32"
170
+ def create_recognizer(
171
+ model_id: str,
172
+ precision: str,
173
+ hotwords: List[str] = None,
174
+ hotwords_score: float = 0.0,
175
+ ):
176
  if model_id not in STREAMING_ZIPFORMER_MODELS:
177
  raise ValueError(f"Model '{model_id}' is not registered.")
178
  entry = STREAMING_ZIPFORMER_MODELS[model_id]
 
187
  decoder_path = hf_hub_download(repo_id=model_id, filename=decoder_file, cache_dir=str(CACHE_DIR))
188
  joiner_path = hf_hub_download(repo_id=model_id, filename=joiner_file, cache_dir=str(CACHE_DIR))
189
 
190
+ # β€”β€”β€” Download BPE vocab if this model has one β€”β€”β€”
191
+ modeling_unit = entry.get("modeling_unit")
192
+ bpe_rel_path = entry.get("bpe_vocab")
193
+ bpe_vocab_path = None
194
+ if bpe_rel_path:
195
+ try:
196
+ bpe_vocab_path = hf_hub_download(
197
+ repo_id=model_id,
198
+ filename=bpe_rel_path,
199
+ cache_dir=str(CACHE_DIR),
200
+ )
201
+ print(f"[DEBUG asr_worker] Downloaded bpe_vocab: {bpe_vocab_path}")
202
+ except Exception as e:
203
+ print(f"[WARNING asr_worker] Could not download bpe_vocab '{bpe_rel_path}': {e}")
204
+ bpe_vocab_path = None
205
+
206
+ # β€”β€”β€” Decide whether to use beam search with hotword biasing β€”β€”β€”
207
+ use_beam = (hotwords and hotwords_score > 0.0) and bpe_vocab_path
208
+ if use_beam:
209
+ # Write hotword list to a temp file (one entry per line)
210
+ tf = tempfile.NamedTemporaryFile(
211
+ mode="w", delete=False, suffix=".txt", dir=str(CACHE_DIR)
212
+ )
213
+ for w in hotwords:
214
+ tf.write(f"{w}\\n")
215
+ tf.flush()
216
+ tf.close()
217
+ hotwords_file_path = tf.name
218
+ print(f"[DEBUG asr_worker] Written {len(hotwords)} hotwords to {hotwords_file_path} with score {hotwords_score}")
219
+
220
+ # Create beam-search recognizer with biasing :contentReference[oaicite:0]{index=0}
221
+ return sherpa_onnx.OnlineRecognizer.from_transducer(
222
+ tokens=tokens_path,
223
+ encoder=encoder_path,
224
+ decoder=decoder_path,
225
+ joiner=joiner_path,
226
+ provider="cpu",
227
+ num_threads=1,
228
+ sample_rate=16000,
229
+ feature_dim=80,
230
+ decoding_method="modified_beam_search",
231
+ hotwords_file=hotwords_file_path,
232
+ hotwords_score=hotwords_score,
233
+ modeling_unit=modeling_unit,
234
+ bpe_vocab=bpe_vocab_path,
235
+ )
236
+
237
+ # β€”β€”β€” Fallback to original greedy-search (no hotword biasing) β€”β€”β€”
238
  return sherpa_onnx.OnlineRecognizer.from_transducer(
239
  tokens=tokens_path,
240
  encoder=encoder_path,
 
244
  num_threads=1,
245
  sample_rate=16000,
246
  feature_dim=80,
247
+ decoding_method="greedy_search",
248
  )
249
 
250
  def stream_audio(raw_pcm_bytes, stream, recognizer, orig_sr):
app/main.py CHANGED
@@ -42,15 +42,27 @@ async def websocket_endpoint(websocket: WebSocket):
42
  print(f"[ERROR main] JSON parse failed: {e}")
43
  continue
44
  if config_msg.get("type") == "config":
 
45
  orig_sr = int(config_msg["sampleRate"])
46
  print(f"[INFO main] Set original sample rate to {orig_sr}")
47
 
48
- # New: dynamic model & precision
49
- model_id = config_msg.get("model")
50
  precision = config_msg.get("precision")
51
  print(f"[INFO main] Selected model: {model_id}, precision: {precision}")
52
 
53
- recognizer = create_recognizer(model_id, precision)
 
 
 
 
 
 
 
 
 
 
 
54
  stream = recognizer.create_stream()
55
  print("[INFO main] WebSocket connection accepted; created a streaming context.")
56
  continue
 
42
  print(f"[ERROR main] JSON parse failed: {e}")
43
  continue
44
  if config_msg.get("type") == "config":
45
+ # 1) sample rate
46
  orig_sr = int(config_msg["sampleRate"])
47
  print(f"[INFO main] Set original sample rate to {orig_sr}")
48
 
49
+ # 2) model & precision
50
+ model_id = config_msg.get("model")
51
  precision = config_msg.get("precision")
52
  print(f"[INFO main] Selected model: {model_id}, precision: {precision}")
53
 
54
+ # 3) hotwords & boost score
55
+ hotwords = config_msg.get("hotwords", [])
56
+ hotwords_score = float(config_msg.get("hotwordsScore", 0.0))
57
+ print(f"[INFO main] Hotwords: {hotwords}, score: {hotwords_score}")
58
+
59
+ # 4) create recognizer with biasing
60
+ recognizer = create_recognizer(
61
+ model_id,
62
+ precision,
63
+ hotwords=hotwords,
64
+ hotwords_score=hotwords_score
65
+ )
66
  stream = recognizer.create_stream()
67
  print("[INFO main] WebSocket connection accepted; created a streaming context.")
68
  continue
app/static/index.html CHANGED
@@ -21,6 +21,13 @@
21
  margin-bottom: 1rem;
22
  font-size: 2rem;
23
  }
 
 
 
 
 
 
 
24
 
25
  #vol {
26
  width: 300px;
@@ -86,11 +93,19 @@
86
  font-weight: bold;
87
  color: #2f3640;
88
  }
89
- .controls select {
 
 
90
  padding: 0.3rem;
91
  border-radius: 5px;
92
  border: 1px solid #dcdde1;
93
  background: white;
 
 
 
 
 
 
94
  }
95
 
96
  .model-info {
@@ -139,6 +154,22 @@
139
  </select>
140
  </div>
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  <div class="model-info" id="modelInfo">
143
  Languages: <span id="modelLangs"></span> | Size: <span id="modelSize"></span> MB
144
  </div>
@@ -178,11 +209,24 @@
178
  const transcript = document.getElementById("transcript");
179
  const modelSelect = document.getElementById("modelSelect");
180
  const precisionSelect = document.getElementById("precisionSelect");
 
 
 
 
181
  const modelLangs = document.getElementById("modelLangs");
182
  const modelSize = document.getElementById("modelSize");
183
  const micNameElem = document.getElementById("micName");
184
  const sampleRateElem = document.getElementById("sampleRate");
185
 
 
 
 
 
 
 
 
 
 
186
  function updateModelInfo() {
187
  const meta = MODEL_METADATA[modelSelect.value];
188
  if (Array.isArray(meta.language)) {
@@ -199,7 +243,9 @@
199
  type: "config",
200
  sampleRate: orig_sample_rate,
201
  model: modelSelect.value,
202
- precision: precisionSelect.value
 
 
203
  }));
204
  } else {
205
  console.warn("WebSocket not open yet. Cannot send config.");
@@ -218,7 +264,7 @@
218
  updateModelInfo();
219
 
220
  // Now that we know the sample rate, open the WS
221
- ws = new WebSocket(`wss://${location.host}/ws`);
222
  ws.onopen = () => sendConfig();
223
  ws.onerror = err => console.error("WebSocket error:", err);
224
  ws.onclose = () => console.log("WebSocket closed");
@@ -238,8 +284,20 @@
238
  modelSelect.addEventListener("change", () => {
239
  updateModelInfo();
240
  sendConfig();
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  });
242
- precisionSelect.addEventListener("change", sendConfig);
243
 
244
  const source = context.createMediaStreamSource(stream);
245
  const processor = context.createScriptProcessor(4096, 1, 1);
 
21
  margin-bottom: 1rem;
22
  font-size: 2rem;
23
  }
24
+
25
+ /* Added for Hotword Bias status */
26
+ #hotwordStatus {
27
+ font-size: 0.9rem;
28
+ color: #e1b12c;
29
+ font-weight: bold;
30
+ }
31
 
32
  #vol {
33
  width: 300px;
 
93
  font-weight: bold;
94
  color: #2f3640;
95
  }
96
+ .controls select,
97
+ .controls input[type="number"],
98
+ .controls textarea {
99
  padding: 0.3rem;
100
  border-radius: 5px;
101
  border: 1px solid #dcdde1;
102
  background: white;
103
+ font-size: 1rem;
104
+ }
105
+ .controls textarea {
106
+ flex: 1;
107
+ resize: vertical;
108
+ min-height: 4rem;
109
  }
110
 
111
  .model-info {
 
154
  </select>
155
  </div>
156
 
157
+ <div class="controls">
158
+ <!-- Hotwords List Input -->
159
+ <label for="hotwordsList">Hotwords:</label>
160
+ <textarea id="hotwordsList" placeholder="Enter one hotword per line"></textarea>
161
+ <!-- Global Boost Score Input -->
162
+ <label for="boostScore">Boost Score:</label>
163
+ <input type="number" id="boostScore" min="0" max="10" step="0.1" value="2.0" />
164
+ <!-- ← NEW button to submit hotword changes -->
165
+ <button id="applyHotwords">Apply Hotwords</button>
166
+ </div>
167
+
168
+ <!-- ← NEW indicator showing whether biasing is ON or OFF -->
169
+ <div class="controls">
170
+ <span id="hotwordStatus">Hotword Bias: Off</span>
171
+ </div>
172
+
173
  <div class="model-info" id="modelInfo">
174
  Languages: <span id="modelLangs"></span> | Size: <span id="modelSize"></span> MB
175
  </div>
 
209
  const transcript = document.getElementById("transcript");
210
  const modelSelect = document.getElementById("modelSelect");
211
  const precisionSelect = document.getElementById("precisionSelect");
212
+ const hotwordsList = document.getElementById("hotwordsList");
213
+ const boostScore = document.getElementById("boostScore");
214
+ const applyBtn = document.getElementById("applyHotwords");
215
+ const hotwordStatus = document.getElementById("hotwordStatus");
216
  const modelLangs = document.getElementById("modelLangs");
217
  const modelSize = document.getElementById("modelSize");
218
  const micNameElem = document.getElementById("micName");
219
  const sampleRateElem = document.getElementById("sampleRate");
220
 
221
+ // ← Helper to toggle the status text
222
+ function updateHotwordStatus() {
223
+ const enabled = hotwordsList.value.split(/\r?\n/).filter(Boolean).length > 0
224
+ && parseFloat(boostScore.value) > 0;
225
+ hotwordStatus.textContent = enabled
226
+ ? "Hotword Bias: On"
227
+ : "Hotword Bias: Off";
228
+ }
229
+
230
  function updateModelInfo() {
231
  const meta = MODEL_METADATA[modelSelect.value];
232
  if (Array.isArray(meta.language)) {
 
243
  type: "config",
244
  sampleRate: orig_sample_rate,
245
  model: modelSelect.value,
246
+ precision: precisionSelect.value,
247
+ hotwords: hotwordsList.value.split(/\r?\n/).filter(Boolean),
248
+ hotwordsScore: parseFloat(boostScore.value)
249
  }));
250
  } else {
251
  console.warn("WebSocket not open yet. Cannot send config.");
 
264
  updateModelInfo();
265
 
266
  // Now that we know the sample rate, open the WS
267
+ ws = new WebSocket(`ws://${location.host}/ws`);
268
  ws.onopen = () => sendConfig();
269
  ws.onerror = err => console.error("WebSocket error:", err);
270
  ws.onclose = () => console.log("WebSocket closed");
 
284
  modelSelect.addEventListener("change", () => {
285
  updateModelInfo();
286
  sendConfig();
287
+ updateHotwordStatus();
288
+ });
289
+ precisionSelect.addEventListener("change", () => {
290
+ sendConfig();
291
+ updateHotwordStatus();
292
+ });
293
+ // hotwordsList.addEventListener("input", sendConfig);
294
+ // boostScore.addEventListener("input", sendConfig);
295
+
296
+ // ← Re-send config & update indicator when the button is clicked
297
+ applyBtn.addEventListener("click", () => {
298
+ sendConfig();
299
+ updateHotwordStatus();
300
  });
 
301
 
302
  const source = context.createMediaStreamSource(stream);
303
  const processor = context.createScriptProcessor(4096, 1, 1);