Spaces:
Sleeping
Sleeping
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8" /> | |
| <title>🎤 Real-Time ASR Demo</title> | |
| <style> | |
| body { | |
| font-family: "Segoe UI", sans-serif; | |
| background-color: #f5f6fa; | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| min-height: 100vh; | |
| margin: 0; | |
| padding: 2rem; | |
| color: #2f3640; | |
| } | |
| h1 { | |
| margin-bottom: 1rem; | |
| font-size: 2rem; | |
| } | |
| /* Added for Hotword Bias status */ | |
| #hotwordStatus { | |
| font-size: 0.9rem; | |
| color: #e1b12c; | |
| font-weight: bold; | |
| } | |
| #vol { | |
| width: 300px; | |
| height: 20px; | |
| margin-bottom: 1rem; | |
| appearance: none; | |
| } | |
| #vol::-webkit-progress-bar { | |
| background-color: #dcdde1; | |
| border-radius: 8px; | |
| } | |
| #vol::-webkit-progress-value { | |
| background-color: #44bd32; | |
| border-radius: 8px; | |
| transition: width 0.2s; | |
| } | |
| #vol::-moz-progress-bar { | |
| background-color: #44bd32; | |
| border-radius: 8px; | |
| transition: width 0.2s; | |
| } | |
| .output { | |
| width: 90%; | |
| max-width: 800px; | |
| text-align: left; | |
| margin-top: 2rem; | |
| background: white; | |
| padding: 1rem 1.5rem; | |
| border-radius: 10px; | |
| box-shadow: 0 0 10px rgba(0,0,0,0.1); | |
| display: flex; | |
| flex-direction: column; | |
| } | |
| .transcript-container { | |
| flex: 1; /* take remaining vertical space */ | |
| max-height: 300px; /* adjust as you like */ | |
| margin-top: 0.5rem; | |
| padding: 0.5rem; | |
| background: #fff; | |
| border: 1px solid #dcdde1; | |
| border-radius: 8px; | |
| overflow-y: auto; /* make it scrollable */ | |
| white-space: pre-wrap; /* preserve line breaks */ | |
| font-size: 1.1rem; | |
| color: #353b48; | |
| } | |
| .label { | |
| font-weight: bold; | |
| color: #718093; | |
| } | |
| .controls { | |
| display: flex; | |
| gap: 1rem; | |
| margin-bottom: 1rem; | |
| align-items: center; | |
| } | |
| .controls label { | |
| font-weight: bold; | |
| color: #2f3640; | |
| } | |
| .controls select, | |
| .controls input[type="range"] { | |
| width: 150px; | |
| } | |
| .controls textarea { | |
| padding: 0.3rem; | |
| border-radius: 5px; | |
| border: 1px solid #dcdde1; | |
| background: white; | |
| font-size: 1rem; | |
| } | |
| .controls textarea { | |
| flex: 1; | |
| resize: vertical; | |
| min-height: 4rem; | |
| } | |
| .model-info { | |
| margin-bottom: 1rem; | |
| font-size: 0.9rem; | |
| color: #353b48; | |
| } | |
| .model-info span { | |
| font-weight: bold; | |
| } | |
| .mic-info { | |
| margin-bottom: 1rem; | |
| font-size: 0.9rem; | |
| color: #353b48; | |
| } | |
| .mic-info .label { | |
| margin-right: 0.25rem; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <h1>🎤 Speak into your microphone</h1> | |
| <div class="controls"> | |
| <label for="modelSelect">Model:</label> | |
| <select id="modelSelect"> | |
| <option value="csukuangfj/k2fsa-zipformer-bilingual-zh-en-t">k2fsa-small-bilingual-zh-en</option> | |
| <option value="pfluo/k2fsa-zipformer-chinese-english-mixed">k2fsa-chinese-english-mixed</option> | |
| <option value="k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16">sherpa-onnx-zipformer-korean</option> | |
| <option value="k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12">zipformer-multi-zh-hans</option> | |
| <option value="pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615">icefall-zipformer-wenetspeech</option> | |
| <option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26">zipformer-en-06-26</option> | |
| <option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21">zipformer-en-06-21</option> | |
| <option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21">zipformer-en-02-21</option> | |
| <option value="csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20">zipformer-zh-en</option> | |
| <option value="shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14">zipformer-fr</option> | |
| <option value="csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23">zipformer-zh-14M</option> | |
| <option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17">zipformer-en-20M</option> | |
| <option value="csukuangfj/sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10">zipformer-ar_en_id_ja_ru_th_vi_zh</option> | |
| </select> | |
| <label for="precisionSelect">Precision:</label> | |
| <select id="precisionSelect"> | |
| <option value="fp32">FP32</option> | |
| <option value="int8">INT8</option> | |
| </select> | |
| </div> | |
| <div class="controls"> | |
| <!-- Hotwords List Input --> | |
| <label for="hotwordsList">Hotwords:</label> | |
| <textarea id="hotwordsList" placeholder="Enter one hotword per line"></textarea> | |
| <!-- Boost Score Slider --> | |
| <label for="boostScore">Boost Score: <span id="boostValue">2.0</span></label> | |
| <input type="range" id="boostScore" min="0" max="10" step="0.1" value="2.0" /> | |
| <!-- Button to apply hotword settings --> | |
| <button id="applyHotwords">Apply Hotwords</button> | |
| </div> | |
| <!-- ← NEW indicator showing whether biasing is ON or OFF --> | |
| <div class="controls"> | |
| <span id="hotwordStatus">Hotword Bias: Off</span> | |
| </div> | |
| <div class="model-info" id="modelInfo"> | |
| Languages: <span id="modelLangs"></span> | Size: <span id="modelSize"></span> MB | |
| </div> | |
| <div class="mic-info"> | |
| <span class="label">Microphone:</span> <span id="micName">Detecting...</span><br> | |
| <span class="label">Sample Rate:</span> <span id="sampleRate">-</span> Hz | |
| </div> | |
| <progress id="vol" max="1" value="0"></progress> | |
| <div class="output"> | |
| <div><span class="label">Transcript:</span></div> | |
| <div id="transcript" class="transcript-container">...</div> | |
| </div> | |
| <script> | |
| const MODEL_METADATA = { | |
| "csukuangfj/k2fsa-zipformer-bilingual-zh-en-t": { language: ["zh", "en"], size: 115 }, | |
| "pfluo/k2fsa-zipformer-chinese-english-mixed": { language: ["zh", "en"], size: 342 }, | |
| "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": { language: "korean", size: 300 }, | |
| "k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": { language: "zh-Hans", size: 258 }, | |
| "pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": { language: "zh (WenetSpeech)", size: 273 }, | |
| "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": { language: "english", size: 340 }, | |
| "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": { language: "english", size: 340 }, | |
| "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": { language: "english", size: 341 }, | |
| "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": { language: ["zh", "en"], size: 342 }, | |
| "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": { language: "french", size: 282 }, | |
| "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": { language: "zh", size: 53 }, | |
| "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": { language: "en", size: 88 }, | |
| "csukuangfj/sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10": {language: ["ar","en","id","ja","ru","th","vi","zh"], size: 338} | |
| }; | |
| let orig_sample_rate; | |
| let ws; | |
| const vol = document.getElementById("vol"); | |
| const transcript = document.getElementById("transcript"); | |
| const modelSelect = document.getElementById("modelSelect"); | |
| const precisionSelect = document.getElementById("precisionSelect"); | |
| const hotwordsList = document.getElementById("hotwordsList"); | |
| const boostScore = document.getElementById("boostScore"); | |
| const boostValue = document.getElementById("boostValue"); | |
| const applyBtn = document.getElementById("applyHotwords"); | |
| const hotwordStatus = document.getElementById("hotwordStatus"); | |
| const modelLangs = document.getElementById("modelLangs"); | |
| const modelSize = document.getElementById("modelSize"); | |
| const micNameElem = document.getElementById("micName"); | |
| const sampleRateElem = document.getElementById("sampleRate"); | |
| // ← Helper to toggle the status text | |
| function updateHotwordStatus() { | |
| const enabled = hotwordsList.value.split(/\r?\n/).filter(Boolean).length > 0 | |
| && parseFloat(boostScore.value) > 0; | |
| hotwordStatus.textContent = enabled | |
| ? "Hotword Bias: On" | |
| : "Hotword Bias: Off"; | |
| } | |
| function updateModelInfo() { | |
| const meta = MODEL_METADATA[modelSelect.value]; | |
| if (Array.isArray(meta.language)) { | |
| modelLangs.textContent = meta.language.join(", "); | |
| } else { | |
| modelLangs.textContent = meta.language; | |
| } | |
| modelSize.textContent = meta.size; | |
| } | |
| function sendConfig() { | |
| if (ws && ws.readyState === WebSocket.OPEN) { | |
| ws.send(JSON.stringify({ | |
| type: "config", | |
| sampleRate: orig_sample_rate, | |
| model: modelSelect.value, | |
| precision: precisionSelect.value, | |
| hotwords: hotwordsList.value.split(/\r?\n/).filter(Boolean), | |
| hotwordsScore: parseFloat(boostScore.value) | |
| })); | |
| } else { | |
| console.warn("WebSocket not open yet. Cannot send config."); | |
| } | |
| } | |
| navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => { | |
| const context = new AudioContext(); | |
| orig_sample_rate = context.sampleRate; | |
| // Update mic info in UI | |
| const track = stream.getAudioTracks()[0]; | |
| micNameElem.textContent = track.label || 'Unknown'; | |
| sampleRateElem.textContent = orig_sample_rate; | |
| updateModelInfo(); | |
| // Now that we know the sample rate, open the WS | |
| ws = new WebSocket(`wss://${location.host}/ws`); | |
| ws.onopen = () => sendConfig(); | |
| ws.onerror = err => console.error("WebSocket error:", err); | |
| ws.onclose = () => console.log("WebSocket closed"); | |
| ws.onmessage = e => { | |
| const msg = JSON.parse(e.data); | |
| if (msg.volume !== undefined) { | |
| vol.value = Math.min(msg.volume, 1.0); | |
| } | |
| if (msg.partial) { | |
| // replace content… | |
| transcript.textContent = msg.partial; | |
| // …then scroll to bottom | |
| transcript.scrollTop = transcript.scrollHeight; | |
| } | |
| }; | |
| modelSelect.addEventListener("change", () => { | |
| updateModelInfo(); | |
| sendConfig(); | |
| updateHotwordStatus(); | |
| }); | |
| precisionSelect.addEventListener("change", () => { | |
| sendConfig(); | |
| updateHotwordStatus(); | |
| }); | |
| applyBtn.addEventListener("click", () => { | |
| sendConfig(); | |
| updateHotwordStatus(); | |
| }); | |
| // Update boost display and status on slider input | |
| boostScore.addEventListener("input", () => { | |
| boostValue.textContent = boostScore.value; | |
| updateHotwordStatus(); | |
| }); | |
| const source = context.createMediaStreamSource(stream); | |
| const processor = context.createScriptProcessor(4096, 1, 1); | |
| source.connect(processor); | |
| processor.connect(context.destination); | |
| processor.onaudioprocess = e => { | |
| const input = e.inputBuffer.getChannelData(0); | |
| ws.send(new Float32Array(input).buffer); | |
| }; | |
| }); | |
| </script> | |
| </body> | |
| </html> | |