Luigi's picture
show model size and supportd languages
8e221a6
raw
history blame
7.58 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>🎤 Real-Time ASR Demo</title>
<style>
body {
font-family: "Segoe UI", sans-serif;
background-color: #f5f6fa;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
min-height: 100vh;
margin: 0;
padding: 2rem;
color: #2f3640;
}
h1 {
margin-bottom: 1rem;
font-size: 2rem;
}
#vol {
width: 300px;
height: 20px;
margin-bottom: 1rem;
appearance: none;
}
#vol::-webkit-progress-bar {
background-color: #dcdde1;
border-radius: 8px;
}
#vol::-webkit-progress-value {
background-color: #44bd32;
border-radius: 8px;
transition: width 0.2s;
}
#vol::-moz-progress-bar {
background-color: #44bd32;
border-radius: 8px;
transition: width 0.2s;
}
.output {
width: 90%;
max-width: 800px;
text-align: left;
margin-top: 2rem;
background: white;
padding: 1rem 1.5rem;
border-radius: 10px;
box-shadow: 0 0 10px rgba(0,0,0,0.1);
}
.label {
font-weight: bold;
color: #718093;
}
#partial {
font-size: 1.25rem;
color: #353b48;
}
#final {
font-size: 1.4rem;
color: #e84118;
}
.controls {
display: flex;
gap: 1rem;
margin-bottom: 1rem;
align-items: center;
}
.controls label {
font-weight: bold;
color: #2f3640;
}
.controls select {
padding: 0.3rem;
border-radius: 5px;
border: 1px solid #dcdde1;
background: white;
}
.model-info {
margin-bottom: 1rem;
font-size: 0.9rem;
color: #353b48;
}
.model-info span {
font-weight: bold;
}
</style>
</head>
<body>
<h1>🎤 Speak into your microphone</h1>
<div class="controls">
<label for="modelSelect">Model:</label>
<select id="modelSelect">
<option value="pfluo/k2fsa-zipformer-chinese-english-mixed">k2fsa-chinese-english-mixed</option>
<option value="k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16">sherpa-onnx-zipformer-korean</option>
<option value="k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12">zipformer-multi-zh-hans</option>
<option value="pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615">icefall-zipformer-wenetspeech</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26">zipformer-en-06-26</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21">zipformer-en-06-21</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21">zipformer-en-02-21</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20">zipformer-zh-en</option>
<option value="shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14">zipformer-fr</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16">zipformer-small-zh-en</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23">zipformer-zh-14M</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17">zipformer-en-20M</option>
</select>
<label for="precisionSelect">Precision:</label>
<select id="precisionSelect">
<option value="fp32">FP32</option>
<option value="int8">INT8</option>
</select>
</div>
<div class="model-info" id="modelInfo">
Languages: <span id="modelLangs"></span> | Size: <span id="modelSize"></span> MB
</div>
<progress id="vol" max="1" value="0"></progress>
<div class="output">
<div><span class="label">Partial:</span> <span id="partial">...</span></div>
<div><span class="label">Final:</span> <b id="final">...</b></div>
</div>
<script>
const MODEL_METADATA = {
"pfluo/k2fsa-zipformer-chinese-english-mixed": { language: ["zh", "en"], size: 342 },
"k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": { language: "korean", size: 300 },
"k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": { language: "zh-Hans", size: 258 },
"pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": { language: "zh (WenetSpeech)", size: 273 },
"csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": { language: "english", size: 340 },
"csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": { language: "english", size: 340 },
"csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": { language: "english", size: 341 },
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": { language: ["zh", "en"], size: 342 },
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": { language: "french", size: 282 },
"csukuangfj/sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16": { language: ["zh", "en"], size: 112 },
"csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": { language: "zh", size: 53 },
"csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": { language: "en", size: 88 }
};
let orig_sample_rate;
const ws = new WebSocket("wss://" + location.host + "/ws");
const vol = document.getElementById("vol");
const partial = document.getElementById("partial");
const finalText = document.getElementById("final");
const modelSelect = document.getElementById("modelSelect");
const precisionSelect = document.getElementById("precisionSelect");
const modelLangs = document.getElementById("modelLangs");
const modelSize = document.getElementById("modelSize");
function updateModelInfo() {
const meta = MODEL_METADATA[modelSelect.value];
if (Array.isArray(meta.language)) {
modelLangs.textContent = meta.language.join(', ');
} else {
modelLangs.textContent = meta.language;
}
modelSize.textContent = meta.size;
}
function sendConfig() {
ws.send(JSON.stringify({
type: "config",
sampleRate: orig_sample_rate,
model: modelSelect.value,
precision: precisionSelect.value
}));
}
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
const context = new AudioContext();
orig_sample_rate = context.sampleRate;
ws.onopen = () => {
updateModelInfo();
sendConfig();
};
modelSelect.addEventListener("change", () => {
updateModelInfo();
sendConfig();
});
precisionSelect.addEventListener("change", sendConfig);
ws.onerror = err => console.error("WebSocket error:", err);
ws.onclose = () => console.log("WebSocket closed");
const source = context.createMediaStreamSource(stream);
const processor = context.createScriptProcessor(4096, 1, 1);
source.connect(processor);
processor.connect(context.destination);
processor.onaudioprocess = e => {
const input = e.inputBuffer.getChannelData(0);
ws.send(new Float32Array(input).buffer);
};
ws.onmessage = e => {
const msg = JSON.parse(e.data);
if (msg.volume !== undefined) {
vol.value = Math.min(msg.volume * 20.0, 1.0);
}
if (msg.partial) {
partial.textContent = msg.partial;
}
if (msg.final) {
finalText.textContent = msg.final;
}
};
});
</script>
</body>
</html>