csukuangfj commited on
Commit
3ef3ff3
·
1 Parent(s): 1b4da5c

update model

Browse files
app-vad.js CHANGED
@@ -5,7 +5,6 @@
5
  const startBtn = document.getElementById('startBtn');
6
  const stopBtn = document.getElementById('stopBtn');
7
  const clearBtn = document.getElementById('clearBtn');
8
- const hint = document.getElementById('hint');
9
  const soundClips = document.getElementById('sound-clips');
10
 
11
  let textArea = document.getElementById('results');
@@ -43,19 +42,98 @@ function getDisplayResult() {
43
 
44
 
45
  Module = {};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  Module.onRuntimeInitialized = function() {
47
  console.log('inited!');
48
- hint.innerText = 'Model loaded! Please click start';
49
 
50
  startBtn.disabled = false;
51
 
52
- vad = createVad(Module);
53
  console.log('vad is created!', vad);
54
 
55
  buffer = new CircularBuffer(30 * 16000, Module);
56
  console.log('CircularBuffer is created!', buffer);
57
  };
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  let audioCtx;
60
  let mediaStream;
61
 
 
5
  const startBtn = document.getElementById('startBtn');
6
  const stopBtn = document.getElementById('stopBtn');
7
  const clearBtn = document.getElementById('clearBtn');
 
8
  const soundClips = document.getElementById('sound-clips');
9
 
10
  let textArea = document.getElementById('results');
 
42
 
43
 
44
  Module = {};
45
+
46
+ // https://emscripten.org/docs/api_reference/module.html#Module.locateFile
47
+ Module.locateFile = function(path, scriptDirectory = '') {
48
+ console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
49
+ return scriptDirectory + path;
50
+ };
51
+
52
+ // https://emscripten.org/docs/api_reference/module.html#Module.locateFile
53
+ Module.setStatus = function(status) {
54
+ console.log(`status ${status}`);
55
+ const statusElement = document.getElementById('status');
56
+ if (status == 'Running...') {
57
+ status = 'Model downloaded. Initializing vad...'
58
+ }
59
+ statusElement.textContent = status;
60
+ if (status === '') {
61
+ statusElement.style.display = 'none';
62
+ // statusElement.parentNode.removeChild(statusElement);
63
+
64
+ document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
65
+ tabContentElement.classList.remove('loading');
66
+ });
67
+ } else {
68
+ statusElement.style.display = 'block';
69
+ document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
70
+ tabContentElement.classList.add('loading');
71
+ });
72
+ }
73
+ };
74
+
75
  Module.onRuntimeInitialized = function() {
76
  console.log('inited!');
 
77
 
78
  startBtn.disabled = false;
79
 
80
+ initVad();
81
  console.log('vad is created!', vad);
82
 
83
  buffer = new CircularBuffer(30 * 16000, Module);
84
  console.log('CircularBuffer is created!', buffer);
85
  };
86
 
87
+ function fileExists(filename) {
88
+ const filenameLen = Module.lengthBytesUTF8(filename) + 1;
89
+ const buffer = Module._malloc(filenameLen);
90
+ Module.stringToUTF8(filename, buffer, filenameLen);
91
+
92
+ let exists = Module._SherpaOnnxFileExists(buffer);
93
+
94
+ Module._free(buffer);
95
+
96
+ return exists;
97
+ }
98
+
99
+ function initVad() {
100
+ const sileroVad = {
101
+ model: '',
102
+ threshold: 0.50,
103
+ minSilenceDuration: 0.50,
104
+ minSpeechDuration: 0.25,
105
+ maxSpeechDuration: 20,
106
+ windowSize: 512,
107
+ };
108
+
109
+ const tenVad = {
110
+ model: '',
111
+ threshold: 0.50,
112
+ minSilenceDuration: 0.50,
113
+ minSpeechDuration: 0.25,
114
+ maxSpeechDuration: 20,
115
+ windowSize: 256,
116
+ };
117
+
118
+ let config = {
119
+ sileroVad: sileroVad,
120
+ tenVad: tenVad,
121
+ sampleRate: 16000,
122
+ numThreads: 1,
123
+ provider: 'cpu',
124
+ debug: 1,
125
+ bufferSizeInSeconds: 30,
126
+ };
127
+
128
+ if (fileExists('silero_vad.onnx') == 1) {
129
+ config.sileroVad.model = 'silero_vad.onnx'
130
+ } else if (fileExists('ten-vad.onnx') == 1) {
131
+ config.tenVad.model = 'ten-vad.onnx'
132
+ }
133
+
134
+ vad = createVad(Module, config);
135
+ }
136
+
137
  let audioCtx;
138
  let mediaStream;
139
 
index.html CHANGED
@@ -11,30 +11,67 @@
11
  textarea {
12
  width:100%;
13
  }
 
 
 
14
  </style>
15
  </head>
16
 
17
- <body>
18
  <h1>
19
  Next-gen Kaldi + WebAssembly<br/>
20
- VAD Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
21
  (with <a href="https://github.com/snakers4/silero-vad">silero-vad</a>)
22
  </h1>
23
 
24
- <div>
25
- <span id="hint">Loading model ... ...</span>
26
- <br/>
27
- <br/>
28
- <button id="startBtn" disabled>Start</button>
29
- <button id="stopBtn" disabled>Stop</button>
30
- <button id="clearBtn">Clear</button>
31
- <br/>
32
- <br/>
33
- <textarea id="results" rows="10" readonly></textarea>
 
 
 
 
 
 
 
 
34
  </div>
35
 
36
- <section flex="1" overflow="auto" id="sound-clips">
37
- </section>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  <script src="sherpa-onnx-vad.js"></script>
40
  <script src="app-vad.js"></script>
 
11
  textarea {
12
  width:100%;
13
  }
14
+ .loading {
15
+ display: none !important;
16
+ }
17
  </style>
18
  </head>
19
 
20
+ <body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;">
21
  <h1>
22
  Next-gen Kaldi + WebAssembly<br/>
23
+ VAD Demo using <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
24
  (with <a href="https://github.com/snakers4/silero-vad">silero-vad</a>)
25
  </h1>
26
 
27
+ <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;">
28
+ <div id="status">Loading...</div>
29
+
30
+ <div id="singleAudioContent" class="tab-content loading">
31
+ <div style="display: flex; gap: 1.5rem;">
32
+ <div style="flex: 1; display: flex; flex-direction: row; align-items: center; gap: 1rem;">
33
+ <button id="startBtn" disabled>Start</button>
34
+ <button id="stopBtn" disabled>Stop</button>
35
+ <button id="clearBtn">Clear</button>
36
+ </div>
37
+ </div>
38
+
39
+ <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
40
+ <textarea id="results" rows="10" placeholder="Please click start and speak. Output will appear here..." readonly style="flex: 1; padding: 0.75rem; font-size: 1rem; border: 1px solid #ced4da; border-radius: 8px; resize: none; background-color: #f8f9fa;"></textarea>
41
+ </div>
42
+
43
+ <section flex="1" overflow="auto" id="sound-clips">
44
+ </section>
45
  </div>
46
 
47
+ <!-- Footer Section -->
48
+ <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
49
+ <h3>Description</h3>
50
+ <ul>
51
+ <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
52
+ <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
53
+ <ul>
54
+ <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
55
+ <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
56
+ <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
57
+ </ul>
58
+ </ul>
59
+ <h3>About This Demo</h3>
60
+ <ul>
61
+ <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
62
+ <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
63
+ </ul>
64
+ <h3>Latest Update</h3>
65
+ <ul>
66
+ <li>Update UI.</li>
67
+ <li>First working version.</li>
68
+ </ul>
69
+
70
+ <h3>Acknowledgement</h3>
71
+ <ul>
72
+ <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
73
+ </ul>
74
+ </div>
75
 
76
  <script src="sherpa-onnx-vad.js"></script>
77
  <script src="app-vad.js"></script>
sherpa-onnx-vad.js CHANGED
@@ -7,6 +7,10 @@ function freeConfig(config, Module) {
7
  freeConfig(config.sileroVad, Module)
8
  }
9
 
 
 
 
 
10
 
11
  Module._free(config.ptr);
12
  }
@@ -48,6 +52,42 @@ function initSherpaOnnxSileroVadModelConfig(config, Module) {
48
  }
49
  }
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  function initSherpaOnnxVadModelConfig(config, Module) {
52
  if (!('sileroVad' in config)) {
53
  config.sileroVad = {
@@ -60,10 +100,23 @@ function initSherpaOnnxVadModelConfig(config, Module) {
60
  };
61
  }
62
 
 
 
 
 
 
 
 
 
 
 
 
63
  const sileroVad =
64
  initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module);
65
 
66
- const len = sileroVad.len + 4 * 4;
 
 
67
  const ptr = Module._malloc(len);
68
 
69
  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
@@ -86,8 +139,11 @@ function initSherpaOnnxVadModelConfig(config, Module) {
86
  Module.setValue(ptr + offset, config.debug || 0, 'i32');
87
  offset += 4;
88
 
 
 
 
89
  return {
90
- buffer: buffer, ptr: ptr, len: len, sileroVad: sileroVad,
91
  }
92
  }
93
 
@@ -101,8 +157,18 @@ function createVad(Module, myConfig) {
101
  windowSize: 512,
102
  };
103
 
 
 
 
 
 
 
 
 
 
104
  let config = {
105
  sileroVad: sileroVad,
 
106
  sampleRate: 16000,
107
  numThreads: 1,
108
  provider: 'cpu',
 
7
  freeConfig(config.sileroVad, Module)
8
  }
9
 
10
+ if ('tenVad' in config) {
11
+ freeConfig(config.tenVad, Module)
12
+ }
13
+
14
 
15
  Module._free(config.ptr);
16
  }
 
52
  }
53
  }
54
 
55
+ function initSherpaOnnxTenVadModelConfig(config, Module) {
56
+ const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
57
+
58
+ const n = modelLen;
59
+
60
+ const buffer = Module._malloc(n);
61
+
62
+ const len = 6 * 4;
63
+ const ptr = Module._malloc(len);
64
+
65
+ Module.stringToUTF8(config.model || '', buffer, modelLen);
66
+
67
+ offset = 0;
68
+ Module.setValue(ptr, buffer, 'i8*');
69
+ offset += 4;
70
+
71
+ Module.setValue(ptr + offset, config.threshold || 0.5, 'float');
72
+ offset += 4;
73
+
74
+ Module.setValue(ptr + offset, config.minSilenceDuration || 0.5, 'float');
75
+ offset += 4;
76
+
77
+ Module.setValue(ptr + offset, config.minSpeechDuration || 0.25, 'float');
78
+ offset += 4;
79
+
80
+ Module.setValue(ptr + offset, config.windowSize || 256, 'i32');
81
+ offset += 4;
82
+
83
+ Module.setValue(ptr + offset, config.maxSpeechDuration || 20, 'float');
84
+ offset += 4;
85
+
86
+ return {
87
+ buffer: buffer, ptr: ptr, len: len,
88
+ }
89
+ }
90
+
91
  function initSherpaOnnxVadModelConfig(config, Module) {
92
  if (!('sileroVad' in config)) {
93
  config.sileroVad = {
 
100
  };
101
  }
102
 
103
+ if (!('tenVad' in config)) {
104
+ config.tenVad = {
105
+ model: '',
106
+ threshold: 0.50,
107
+ minSilenceDuration: 0.50,
108
+ minSpeechDuration: 0.25,
109
+ windowSize: 256,
110
+ maxSpeechDuration: 20,
111
+ };
112
+ }
113
+
114
  const sileroVad =
115
  initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module);
116
 
117
+ const tenVad = initSherpaOnnxTenVadModelConfig(config.tenVad, Module);
118
+
119
+ const len = sileroVad.len + 4 * 4 + tenVad.len;
120
  const ptr = Module._malloc(len);
121
 
122
  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
 
139
  Module.setValue(ptr + offset, config.debug || 0, 'i32');
140
  offset += 4;
141
 
142
+ Module._CopyHeap(tenVad.ptr, tenVad.len, ptr + offset);
143
+ offset += tenVad.len;
144
+
145
  return {
146
+ buffer: buffer, ptr: ptr, len: len, sileroVad: sileroVad, tenVad: tenVad
147
  }
148
  }
149
 
 
157
  windowSize: 512,
158
  };
159
 
160
+ const tenVad = {
161
+ model: '',
162
+ threshold: 0.50,
163
+ minSilenceDuration: 0.50,
164
+ minSpeechDuration: 0.25,
165
+ maxSpeechDuration: 20,
166
+ windowSize: 256,
167
+ };
168
+
169
  let config = {
170
  sileroVad: sileroVad,
171
+ tenVad: tenVad,
172
  sampleRate: 16000,
173
  numThreads: 1,
174
  provider: 'cpu',
sherpa-onnx-wasm-main-vad.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f08d12ca25857236f9f06e9353588c54435b07a7e74a7d79ca4a1f8c6918c5d
3
- size 1807857
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd37ef028574ef87d799d42c323da6e2218f024b2bb2a83fce733ca95537ac3b
3
+ size 644930
sherpa-onnx-wasm-main-vad.js CHANGED
The diff for this file is too large to render. See raw diff
 
sherpa-onnx-wasm-main-vad.wasm CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c186ecca23100bbb06f3834f112a087f507d1ec83e207cf1dd5c60e6e3a5346
3
- size 10364436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df6d196e2afdf9e034b08b6c51c1acdd0f8d88d850d2c87fc092fde37b5404ca
3
+ size 10400397