Spaces:
Running
Running
- Fixes a runtime error caused by the `enableGraphCapture` flag in recent ONNX Runtime Web builds. See parakeet.js library repo for details.
Browse files- package.json +1 -1
- src/App.js +307 -315
package.json
CHANGED
@@ -7,7 +7,7 @@
|
|
7 |
"@testing-library/jest-dom": "^6.6.3",
|
8 |
"@testing-library/react": "^16.3.0",
|
9 |
"@testing-library/user-event": "^13.5.0",
|
10 |
-
"parakeet.js": "^0.0.
|
11 |
"onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4",
|
12 |
"react": "^19.1.0",
|
13 |
"react-dom": "^19.1.0",
|
|
|
7 |
"@testing-library/jest-dom": "^6.6.3",
|
8 |
"@testing-library/react": "^16.3.0",
|
9 |
"@testing-library/user-event": "^13.5.0",
|
10 |
+
"parakeet.js": "^0.0.3",
|
11 |
"onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4",
|
12 |
"react": "^19.1.0",
|
13 |
"react-dom": "^19.1.0",
|
src/App.js
CHANGED
@@ -1,315 +1,307 @@
|
|
1 |
-
import React, { useState, useRef, useEffect } from 'react';
|
2 |
-
import { ParakeetModel, getParakeetModel } from 'parakeet.js';
|
3 |
-
import './App.css';
|
4 |
-
|
5 |
-
export default function App() {
|
6 |
-
const repoId = 'istupakov/parakeet-tdt-0.6b-v2-onnx';
|
7 |
-
const [backend, setBackend] = useState('webgpu-hybrid');
|
8 |
-
const [
|
9 |
-
const [
|
10 |
-
const [
|
11 |
-
const [
|
12 |
-
const [
|
13 |
-
const [
|
14 |
-
const [
|
15 |
-
const [
|
16 |
-
const [
|
17 |
-
const [
|
18 |
-
const [
|
19 |
-
const [
|
20 |
-
const [frameStride, setFrameStride] = useState(1);
|
21 |
-
const [dumpDetail, setDumpDetail] = useState(false);
|
22 |
-
const maxCores = navigator.hardwareConcurrency || 8;
|
23 |
-
const [cpuThreads, setCpuThreads] = useState(Math.max(1, maxCores - 2));
|
24 |
-
const modelRef = useRef(null);
|
25 |
-
const fileInputRef = useRef(null);
|
26 |
-
|
27 |
-
// Auto-adjust quant
|
28 |
-
useEffect(() => {
|
29 |
-
if (backend.startsWith('webgpu')) {
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
<
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
<label>
|
163 |
-
|
164 |
-
<select value={
|
165 |
-
<option value="
|
166 |
-
<option value="
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
<option value="
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
<
|
181 |
-
<
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
<option value=
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
<
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
</
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
<
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
{/*
|
278 |
-
{
|
279 |
-
<div className="
|
280 |
-
<
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
<a href="https://www.npmjs.com/package/parakeet.js" target="_blank" rel="noopener noreferrer">
|
309 |
-
npm Package
|
310 |
-
</a>
|
311 |
-
</p>
|
312 |
-
</div>
|
313 |
-
</div>
|
314 |
-
);
|
315 |
-
}
|
|
|
1 |
+
import React, { useState, useRef, useEffect } from 'react';
|
2 |
+
import { ParakeetModel, getParakeetModel } from 'parakeet.js';
|
3 |
+
import './App.css';
|
4 |
+
|
5 |
+
export default function App() {
|
6 |
+
const repoId = 'istupakov/parakeet-tdt-0.6b-v2-onnx';
|
7 |
+
const [backend, setBackend] = useState('webgpu-hybrid');
|
8 |
+
const [encoderQuant, setEncoderQuant] = useState('fp32');
|
9 |
+
const [decoderQuant, setDecoderQuant] = useState('int8');
|
10 |
+
const [preprocessor, setPreprocessor] = useState('nemo128');
|
11 |
+
const [status, setStatus] = useState('Idle');
|
12 |
+
const [progress, setProgress] = useState('');
|
13 |
+
const [progressText, setProgressText] = useState('');
|
14 |
+
const [progressPct, setProgressPct] = useState(null);
|
15 |
+
const [text, setText] = useState('');
|
16 |
+
const [latestMetrics, setLatestMetrics] = useState(null);
|
17 |
+
const [transcriptions, setTranscriptions] = useState([]);
|
18 |
+
const [isTranscribing, setIsTranscribing] = useState(false);
|
19 |
+
const [verboseLog, setVerboseLog] = useState(false);
|
20 |
+
const [frameStride, setFrameStride] = useState(1);
|
21 |
+
const [dumpDetail, setDumpDetail] = useState(false);
|
22 |
+
const maxCores = navigator.hardwareConcurrency || 8;
|
23 |
+
const [cpuThreads, setCpuThreads] = useState(Math.max(1, maxCores - 2));
|
24 |
+
const modelRef = useRef(null);
|
25 |
+
const fileInputRef = useRef(null);
|
26 |
+
|
27 |
+
// Auto-adjust quant presets when backend changes
|
28 |
+
useEffect(() => {
|
29 |
+
if (backend.startsWith('webgpu')) {
|
30 |
+
setEncoderQuant('fp32');
|
31 |
+
setDecoderQuant('int8');
|
32 |
+
} else {
|
33 |
+
setEncoderQuant('int8');
|
34 |
+
setDecoderQuant('int8');
|
35 |
+
}
|
36 |
+
}, [backend]);
|
37 |
+
|
38 |
+
async function loadModel() {
|
39 |
+
setStatus('Loading model…');
|
40 |
+
setProgress('');
|
41 |
+
setProgressText('');
|
42 |
+
setProgressPct(0);
|
43 |
+
console.time('LoadModel');
|
44 |
+
|
45 |
+
try {
|
46 |
+
const progressCallback = (p) => setProgress(`${p.file}: ${Math.round(p.loaded/p.total*100)}%`);
|
47 |
+
|
48 |
+
// 1. Download all model files from HuggingFace Hub
|
49 |
+
const modelUrls = await getParakeetModel(repoId, {
|
50 |
+
encoderQuant,
|
51 |
+
decoderQuant,
|
52 |
+
preprocessor,
|
53 |
+
progress: progressCallback
|
54 |
+
});
|
55 |
+
|
56 |
+
// Show compiling sessions stage
|
57 |
+
setStatus('Creating sessions…');
|
58 |
+
setProgressText('Compiling model (this may take ~10 s)…');
|
59 |
+
setProgressPct(null);
|
60 |
+
|
61 |
+
// 2. Create the model instance with all file URLs
|
62 |
+
modelRef.current = await ParakeetModel.fromUrls({
|
63 |
+
...modelUrls.urls,
|
64 |
+
backend,
|
65 |
+
});
|
66 |
+
|
67 |
+
// 3. Warm-up and verify
|
68 |
+
setStatus('Warming up & verifying…');
|
69 |
+
setProgressText('Model ready! Upload an audio file to transcribe.');
|
70 |
+
setProgressPct(null);
|
71 |
+
|
72 |
+
console.timeEnd('LoadModel');
|
73 |
+
setStatus('Model ready ✔');
|
74 |
+
setProgressText('');
|
75 |
+
} catch (e) {
|
76 |
+
console.error(e);
|
77 |
+
setStatus(`Failed: ${e.message}`);
|
78 |
+
setProgress('');
|
79 |
+
}
|
80 |
+
}
|
81 |
+
|
82 |
+
async function transcribeFile(e) {
|
83 |
+
if (!modelRef.current) return alert('Load model first');
|
84 |
+
const file = e.target.files?.[0];
|
85 |
+
if (!file) return;
|
86 |
+
|
87 |
+
setIsTranscribing(true);
|
88 |
+
setStatus(`Transcribing "${file.name}"…`);
|
89 |
+
|
90 |
+
try {
|
91 |
+
const buf = await file.arrayBuffer();
|
92 |
+
const audioCtx = new AudioContext({ sampleRate: 16000 });
|
93 |
+
const decoded = await audioCtx.decodeAudioData(buf);
|
94 |
+
const pcm = decoded.getChannelData(0);
|
95 |
+
|
96 |
+
console.time(`Transcribe-${file.name}`);
|
97 |
+
const res = await modelRef.current.transcribe(pcm, 16_000, {
|
98 |
+
returnTimestamps: true,
|
99 |
+
returnConfidences: true,
|
100 |
+
frameStride
|
101 |
+
});
|
102 |
+
console.timeEnd(`Transcribe-${file.name}`);
|
103 |
+
|
104 |
+
if (dumpDetail) {
|
105 |
+
console.log('[Parakeet] Detailed transcription output', res);
|
106 |
+
}
|
107 |
+
setLatestMetrics(res.metrics);
|
108 |
+
// Add to transcriptions list
|
109 |
+
const newTranscription = {
|
110 |
+
id: Date.now(),
|
111 |
+
filename: file.name,
|
112 |
+
text: res.utterance_text,
|
113 |
+
timestamp: new Date().toLocaleTimeString(),
|
114 |
+
duration: pcm.length / 16000, // duration in seconds
|
115 |
+
wordCount: res.words?.length || 0,
|
116 |
+
confidence: res.confidence_scores?.token_avg ?? res.confidence_scores?.word_avg ?? null,
|
117 |
+
metrics: res.metrics
|
118 |
+
};
|
119 |
+
|
120 |
+
setTranscriptions(prev => [newTranscription, ...prev]);
|
121 |
+
setText(res.utterance_text); // Show latest transcription
|
122 |
+
setStatus('Model ready ✔'); // Ready for next file
|
123 |
+
|
124 |
+
} catch (error) {
|
125 |
+
console.error('Transcription failed:', error);
|
126 |
+
setStatus('Transcription failed');
|
127 |
+
alert(`Failed to transcribe "${file.name}": ${error.message}`);
|
128 |
+
} finally {
|
129 |
+
setIsTranscribing(false);
|
130 |
+
// Clear the file input so the same file can be selected again
|
131 |
+
if (fileInputRef.current) {
|
132 |
+
fileInputRef.current.value = '';
|
133 |
+
}
|
134 |
+
}
|
135 |
+
}
|
136 |
+
|
137 |
+
function clearTranscriptions() {
|
138 |
+
setTranscriptions([]);
|
139 |
+
setText('');
|
140 |
+
}
|
141 |
+
|
142 |
+
return (
|
143 |
+
<div className="app">
|
144 |
+
<h2>🦜 Parakeet.js - HF Spaces Demo</h2>
|
145 |
+
<p>NVIDIA Parakeet speech recognition for the browser using WebGPU/WASM</p>
|
146 |
+
|
147 |
+
<div className="controls">
|
148 |
+
<p>
|
149 |
+
<strong>Model:</strong> {repoId}
|
150 |
+
</p>
|
151 |
+
</div>
|
152 |
+
|
153 |
+
<div className="controls">
|
154 |
+
<label>
|
155 |
+
Backend:
|
156 |
+
<select value={backend} onChange={e=>setBackend(e.target.value)}>
|
157 |
+
<option value="webgpu-hybrid">WebGPU</option>
|
158 |
+
<option value="wasm">WASM (CPU)</option>
|
159 |
+
</select>
|
160 |
+
</label>
|
161 |
+
{' '}
|
162 |
+
<label>
|
163 |
+
Encoder Quant:
|
164 |
+
<select value={encoderQuant} onChange={e=>setEncoderQuant(e.target.value)}>
|
165 |
+
<option value="int8">int8 (faster)</option>
|
166 |
+
<option value="fp32">fp32 (higher quality)</option>
|
167 |
+
</select>
|
168 |
+
</label>
|
169 |
+
{' '}
|
170 |
+
<label>
|
171 |
+
Decoder Quant:
|
172 |
+
<select value={decoderQuant} onChange={e=>setDecoderQuant(e.target.value)}>
|
173 |
+
<option value="int8">int8 (faster)</option>
|
174 |
+
<option value="fp32">fp32 (higher quality)</option>
|
175 |
+
</select>
|
176 |
+
</label>
|
177 |
+
{' '}
|
178 |
+
<label>
|
179 |
+
Preprocessor:
|
180 |
+
<select value={preprocessor} onChange={e=>setPreprocessor(e.target.value)}>
|
181 |
+
<option value="nemo128">nemo128 (default)</option>
|
182 |
+
</select>
|
183 |
+
</label>
|
184 |
+
{' '}
|
185 |
+
<label>
|
186 |
+
Stride:
|
187 |
+
<select value={frameStride} onChange={e=>setFrameStride(Number(e.target.value))}>
|
188 |
+
<option value={1}>1</option>
|
189 |
+
<option value={2}>2</option>
|
190 |
+
<option value={4}>4</option>
|
191 |
+
</select>
|
192 |
+
</label>
|
193 |
+
{' '}
|
194 |
+
<label>
|
195 |
+
<input type="checkbox" checked={verboseLog} onChange={e => setVerboseLog(e.target.checked)} />
|
196 |
+
Verbose Log
|
197 |
+
</label>
|
198 |
+
{' '}
|
199 |
+
<label style={{fontSize:'0.9em'}}>
|
200 |
+
<input type="checkbox" checked={dumpDetail} onChange={e=>setDumpDetail(e.target.checked)} />
|
201 |
+
Dump result to console
|
202 |
+
</label>
|
203 |
+
{(backend === 'wasm') && (
|
204 |
+
<label style={{fontSize:'0.9em'}}>
|
205 |
+
Threads:
|
206 |
+
<input type="number" min="1" max={maxCores} value={cpuThreads} onChange={e=>setCpuThreads(Number(e.target.value))} style={{width:'4rem'}} />
|
207 |
+
</label>
|
208 |
+
)}
|
209 |
+
<button
|
210 |
+
onClick={loadModel}
|
211 |
+
disabled={!status.toLowerCase().includes('fail') && status !== 'Idle'}
|
212 |
+
className="primary"
|
213 |
+
>
|
214 |
+
{status === 'Model ready ✔' ? 'Model Loaded' : 'Load Model'}
|
215 |
+
</button>
|
216 |
+
</div>
|
217 |
+
|
218 |
+
{typeof SharedArrayBuffer === 'undefined' && backend === 'wasm' && (
|
219 |
+
<div style={{
|
220 |
+
marginBottom: '1rem',
|
221 |
+
padding: '0.5rem',
|
222 |
+
backgroundColor: '#fff3cd',
|
223 |
+
border: '1px solid #ffeaa7',
|
224 |
+
borderRadius: '4px',
|
225 |
+
fontSize: '0.9em'
|
226 |
+
}}>
|
227 |
+
⚠️ <strong>Performance Note:</strong> SharedArrayBuffer is not available.
|
228 |
+
WASM will run single-threaded. For better performance, use WebGPU.
|
229 |
+
</div>
|
230 |
+
)}
|
231 |
+
|
232 |
+
<div className="controls">
|
233 |
+
<input
|
234 |
+
ref={fileInputRef}
|
235 |
+
type="file"
|
236 |
+
accept="audio/*"
|
237 |
+
onChange={transcribeFile}
|
238 |
+
disabled={status !== 'Model ready ✔' || isTranscribing}
|
239 |
+
/>
|
240 |
+
{transcriptions.length > 0 && (
|
241 |
+
<button
|
242 |
+
onClick={clearTranscriptions}
|
243 |
+
style={{ marginLeft: '1rem', padding: '0.25rem 0.5rem' }}
|
244 |
+
>
|
245 |
+
Clear History
|
246 |
+
</button>
|
247 |
+
)}
|
248 |
+
</div>
|
249 |
+
|
250 |
+
<p>Status: {status}</p>
|
251 |
+
{progressPct!==null && (
|
252 |
+
<div className="progress-wrapper">
|
253 |
+
<div className="progress-bar"><div style={{ width: `${progressPct}%` }} /></div>
|
254 |
+
<p className="progress-text">{progressText}</p>
|
255 |
+
</div>
|
256 |
+
)}
|
257 |
+
|
258 |
+
{/* Latest transcription */}
|
259 |
+
<div className="controls">
|
260 |
+
<h3>Latest Transcription:</h3>
|
261 |
+
<textarea
|
262 |
+
value={text}
|
263 |
+
readOnly
|
264 |
+
className="textarea"
|
265 |
+
placeholder="Transcribed text will appear here..."
|
266 |
+
/>
|
267 |
+
</div>
|
268 |
+
|
269 |
+
{/* Latest transcription performace info */}
|
270 |
+
{latestMetrics && (
|
271 |
+
<div className="performance">
|
272 |
+
<strong>RTF:</strong> {latestMetrics.rtf?.toFixed(2)}x | Total: {latestMetrics.total_ms} ms<br/>
|
273 |
+
Preprocess {latestMetrics.preprocess_ms} ms · Encode {latestMetrics.encode_ms} ms · Decode {latestMetrics.decode_ms} ms · Tokenize {latestMetrics.tokenize_ms} ms
|
274 |
+
</div>
|
275 |
+
)}
|
276 |
+
|
277 |
+
{/* Transcription history */}
|
278 |
+
{transcriptions.length > 0 && (
|
279 |
+
<div className="history">
|
280 |
+
<h3>Transcription History ({transcriptions.length} files):</h3>
|
281 |
+
<div style={{ maxHeight: '400px', overflowY: 'auto', border: '1px solid #ddd', borderRadius: '4px' }}>
|
282 |
+
{transcriptions.map((trans) => (
|
283 |
+
<div className="history-item" key={trans.id}>
|
284 |
+
<div className="history-meta"><strong>{trans.filename}</strong><span>{trans.timestamp}</span></div>
|
285 |
+
<div className="history-stats">Duration: {trans.duration.toFixed(1)}s | Words: {trans.wordCount}{trans.confidence && ` | Confidence: ${trans.confidence.toFixed(2)}`}{trans.metrics && ` | RTF: ${trans.metrics.rtf?.toFixed(2)}x`}</div>
|
286 |
+
<div className="history-text">{trans.text}</div>
|
287 |
+
</div>
|
288 |
+
))}
|
289 |
+
</div>
|
290 |
+
</div>
|
291 |
+
)}
|
292 |
+
|
293 |
+
<div style={{ marginTop: '2rem', padding: '1rem', backgroundColor: '#f8f9fa', borderRadius: '4px', fontSize: '0.9em' }}>
|
294 |
+
<h4>🔗 Links:</h4>
|
295 |
+
<p>
|
296 |
+
<a href="https://github.com/ysdede/parakeet.js" target="_blank" rel="noopener noreferrer">
|
297 |
+
GitHub Repository
|
298 |
+
</a>
|
299 |
+
{' | '}
|
300 |
+
<a href="https://www.npmjs.com/package/parakeet.js" target="_blank" rel="noopener noreferrer">
|
301 |
+
npm Package
|
302 |
+
</a>
|
303 |
+
</p>
|
304 |
+
</div>
|
305 |
+
</div>
|
306 |
+
);
|
307 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|