Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<title>🎤 Real-Time ASR Demo</title> | |
<style> | |
body { | |
font-family: "Segoe UI", sans-serif; | |
background-color: #f5f6fa; | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
justify-content: center; | |
min-height: 100vh; | |
margin: 0; | |
padding: 2rem; | |
color: #2f3640; | |
} | |
h1 { | |
margin-bottom: 1rem; | |
font-size: 2rem; | |
} | |
#vol { | |
width: 300px; | |
height: 20px; | |
margin-bottom: 1rem; | |
appearance: none; | |
} | |
#vol::-webkit-progress-bar { | |
background-color: #dcdde1; | |
border-radius: 8px; | |
} | |
#vol::-webkit-progress-value { | |
background-color: #44bd32; | |
border-radius: 8px; | |
transition: width 0.2s; | |
} | |
#vol::-moz-progress-bar { | |
background-color: #44bd32; | |
border-radius: 8px; | |
transition: width 0.2s; | |
} | |
.output { | |
width: 90%; | |
max-width: 800px; | |
text-align: left; | |
margin-top: 2rem; | |
background: white; | |
padding: 1rem 1.5rem; | |
border-radius: 10px; | |
box-shadow: 0 0 10px rgba(0,0,0,0.1); | |
} | |
.label { | |
font-weight: bold; | |
color: #718093; | |
} | |
#partial { | |
font-size: 1.25rem; | |
color: #353b48; | |
} | |
#final { | |
font-size: 1.4rem; | |
color: #e84118; | |
} | |
.controls { | |
display: flex; | |
gap: 1rem; | |
margin-bottom: 1rem; | |
align-items: center; | |
} | |
.controls label { | |
font-weight: bold; | |
color: #2f3640; | |
} | |
.controls select { | |
padding: 0.3rem; | |
border-radius: 5px; | |
border: 1px solid #dcdde1; | |
background: white; | |
} | |
.model-info { | |
margin-bottom: 1rem; | |
font-size: 0.9rem; | |
color: #353b48; | |
} | |
.model-info span { | |
font-weight: bold; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>🎤 Speak into your microphone</h1> | |
<div class="controls"> | |
<label for="modelSelect">Model:</label> | |
<select id="modelSelect"> | |
<option value="pfluo/k2fsa-zipformer-chinese-english-mixed">k2fsa-chinese-english-mixed</option> | |
<option value="k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16">sherpa-onnx-zipformer-korean</option> | |
<option value="k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12">zipformer-multi-zh-hans</option> | |
<option value="pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615">icefall-zipformer-wenetspeech</option> | |
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26">zipformer-en-06-26</option> | |
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21">zipformer-en-06-21</option> | |
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21">zipformer-en-02-21</option> | |
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20">zipformer-zh-en</option> | |
<option value="shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14">zipformer-fr</option> | |
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16">zipformer-small-zh-en</option> | |
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23">zipformer-zh-14M</option> | |
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17">zipformer-en-20M</option> | |
</select> | |
<label for="precisionSelect">Precision:</label> | |
<select id="precisionSelect"> | |
<option value="fp32">FP32</option> | |
<option value="int8">INT8</option> | |
</select> | |
</div> | |
<div class="model-info" id="modelInfo"> | |
Languages: <span id="modelLangs"></span> | Size: <span id="modelSize"></span> MB | |
</div> | |
<progress id="vol" max="1" value="0"></progress> | |
<div class="output"> | |
<div><span class="label">Partial:</span> <span id="partial">...</span></div> | |
<div><span class="label">Final:</span> <b id="final">...</b></div> | |
</div> | |
<script> | |
const MODEL_METADATA = { | |
"pfluo/k2fsa-zipformer-chinese-english-mixed": { language: ["zh", "en"], size: 342 }, | |
"k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": { language: "korean", size: 300 }, | |
"k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": { language: "zh-Hans", size: 258 }, | |
"pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": { language: "zh (WenetSpeech)", size: 273 }, | |
"csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": { language: "english", size: 340 }, | |
"csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": { language: "english", size: 340 }, | |
"csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": { language: "english", size: 341 }, | |
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": { language: ["zh", "en"], size: 342 }, | |
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": { language: "french", size: 282 }, | |
"csukuangfj/sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16": { language: ["zh", "en"], size: 112 }, | |
"csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": { language: "zh", size: 53 }, | |
"csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": { language: "en", size: 88 } | |
}; | |
let orig_sample_rate; | |
const ws = new WebSocket("wss://" + location.host + "/ws"); | |
const vol = document.getElementById("vol"); | |
const partial = document.getElementById("partial"); | |
const finalText = document.getElementById("final"); | |
const modelSelect = document.getElementById("modelSelect"); | |
const precisionSelect = document.getElementById("precisionSelect"); | |
const modelLangs = document.getElementById("modelLangs"); | |
const modelSize = document.getElementById("modelSize"); | |
function updateModelInfo() { | |
const meta = MODEL_METADATA[modelSelect.value]; | |
if (Array.isArray(meta.language)) { | |
modelLangs.textContent = meta.language.join(', '); | |
} else { | |
modelLangs.textContent = meta.language; | |
} | |
modelSize.textContent = meta.size; | |
} | |
function sendConfig() { | |
ws.send(JSON.stringify({ | |
type: "config", | |
sampleRate: orig_sample_rate, | |
model: modelSelect.value, | |
precision: precisionSelect.value | |
})); | |
} | |
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => { | |
const context = new AudioContext(); | |
orig_sample_rate = context.sampleRate; | |
ws.onopen = () => { | |
updateModelInfo(); | |
sendConfig(); | |
}; | |
modelSelect.addEventListener("change", () => { | |
updateModelInfo(); | |
sendConfig(); | |
}); | |
precisionSelect.addEventListener("change", sendConfig); | |
ws.onerror = err => console.error("WebSocket error:", err); | |
ws.onclose = () => console.log("WebSocket closed"); | |
const source = context.createMediaStreamSource(stream); | |
const processor = context.createScriptProcessor(4096, 1, 1); | |
source.connect(processor); | |
processor.connect(context.destination); | |
processor.onaudioprocess = e => { | |
const input = e.inputBuffer.getChannelData(0); | |
ws.send(new Float32Array(input).buffer); | |
}; | |
ws.onmessage = e => { | |
const msg = JSON.parse(e.data); | |
if (msg.volume !== undefined) { | |
vol.value = Math.min(msg.volume * 20.0, 1.0); | |
} | |
if (msg.partial) { | |
partial.textContent = msg.partial; | |
} | |
if (msg.final) { | |
finalText.textContent = msg.final; | |
} | |
}; | |
}); | |
</script> | |
</body> | |
</html> | |