Luigi's picture
add metadata to newly added model
d295c17
raw
history blame
11.2 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>🎤 Real-Time ASR Demo</title>
<style>
body {
font-family: "Segoe UI", sans-serif;
background-color: #f5f6fa;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
min-height: 100vh;
margin: 0;
padding: 2rem;
color: #2f3640;
}
h1 {
margin-bottom: 1rem;
font-size: 2rem;
}
/* Added for Hotword Bias status */
#hotwordStatus {
font-size: 0.9rem;
color: #e1b12c;
font-weight: bold;
}
#vol {
width: 300px;
height: 20px;
margin-bottom: 1rem;
appearance: none;
}
#vol::-webkit-progress-bar {
background-color: #dcdde1;
border-radius: 8px;
}
#vol::-webkit-progress-value {
background-color: #44bd32;
border-radius: 8px;
transition: width 0.2s;
}
#vol::-moz-progress-bar {
background-color: #44bd32;
border-radius: 8px;
transition: width 0.2s;
}
.output {
width: 90%;
max-width: 800px;
text-align: left;
margin-top: 2rem;
background: white;
padding: 1rem 1.5rem;
border-radius: 10px;
box-shadow: 0 0 10px rgba(0,0,0,0.1);
display: flex;
flex-direction: column;
}
.transcript-container {
flex: 1; /* take remaining vertical space */
max-height: 300px; /* adjust as you like */
margin-top: 0.5rem;
padding: 0.5rem;
background: #fff;
border: 1px solid #dcdde1;
border-radius: 8px;
overflow-y: auto; /* make it scrollable */
white-space: pre-wrap; /* preserve line breaks */
font-size: 1.1rem;
color: #353b48;
}
.label {
font-weight: bold;
color: #718093;
}
.controls {
display: flex;
gap: 1rem;
margin-bottom: 1rem;
align-items: center;
}
.controls label {
font-weight: bold;
color: #2f3640;
}
.controls select,
.controls input[type="range"] {
width: 150px;
}
.controls textarea {
padding: 0.3rem;
border-radius: 5px;
border: 1px solid #dcdde1;
background: white;
font-size: 1rem;
}
.controls textarea {
flex: 1;
resize: vertical;
min-height: 4rem;
}
.model-info {
margin-bottom: 1rem;
font-size: 0.9rem;
color: #353b48;
}
.model-info span {
font-weight: bold;
}
.mic-info {
margin-bottom: 1rem;
font-size: 0.9rem;
color: #353b48;
}
.mic-info .label {
margin-right: 0.25rem;
}
</style>
</head>
<body>
<h1>🎤 Speak into your microphone</h1>
<div class="controls">
<label for="modelSelect">Model:</label>
<select id="modelSelect">
<option value="csukuangfj/k2fsa-zipformer-bilingual-zh-en-t">k2fsa-small-bilingual-zh-en</option>
<option value="pfluo/k2fsa-zipformer-chinese-english-mixed">k2fsa-chinese-english-mixed</option>
<option value="k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16">sherpa-onnx-zipformer-korean</option>
<option value="k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12">zipformer-multi-zh-hans</option>
<option value="pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615">icefall-zipformer-wenetspeech</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26">zipformer-en-06-26</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21">zipformer-en-06-21</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21">zipformer-en-02-21</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20">zipformer-zh-en</option>
<option value="shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14">zipformer-fr</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23">zipformer-zh-14M</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17">zipformer-en-20M</option>
<option value="csukuangfj/sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10">zipformer-ar_en_id_ja_ru_th_vi_zh</option>
</select>
<label for="precisionSelect">Precision:</label>
<select id="precisionSelect">
<option value="fp32">FP32</option>
<option value="int8">INT8</option>
</select>
</div>
<div class="controls">
<!-- Hotwords List Input -->
<label for="hotwordsList">Hotwords:</label>
<textarea id="hotwordsList" placeholder="Enter one hotword per line"></textarea>
<!-- Boost Score Slider -->
<label for="boostScore">Boost Score: <span id="boostValue">2.0</span></label>
<input type="range" id="boostScore" min="0" max="10" step="0.1" value="2.0" />
<!-- Button to apply hotword settings -->
<button id="applyHotwords">Apply Hotwords</button>
</div>
<!-- ← NEW indicator showing whether biasing is ON or OFF -->
<div class="controls">
<span id="hotwordStatus">Hotword Bias: Off</span>
</div>
<div class="model-info" id="modelInfo">
Languages: <span id="modelLangs"></span> | Size: <span id="modelSize"></span> MB
</div>
<div class="mic-info">
<span class="label">Microphone:</span> <span id="micName">Detecting...</span><br>
<span class="label">Sample Rate:</span> <span id="sampleRate">-</span> Hz
</div>
<progress id="vol" max="1" value="0"></progress>
<div class="output">
<div><span class="label">Transcript:</span></div>
<div id="transcript" class="transcript-container">...</div>
</div>
<script>
const MODEL_METADATA = {
"csukuangfj/k2fsa-zipformer-bilingual-zh-en-t": { language: ["zh", "en"], size: 115 },
"pfluo/k2fsa-zipformer-chinese-english-mixed": { language: ["zh", "en"], size: 342 },
"k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": { language: "korean", size: 300 },
"k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": { language: "zh-Hans", size: 258 },
"pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": { language: "zh (WenetSpeech)", size: 273 },
"csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": { language: "english", size: 340 },
"csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": { language: "english", size: 340 },
"csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": { language: "english", size: 341 },
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": { language: ["zh", "en"], size: 342 },
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": { language: "french", size: 282 },
"csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": { language: "zh", size: 53 },
"csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": { language: "en", size: 88 },
"csukuangfj/sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10": {language: ["ar","en","id","ja","ru","th","vi","zh"], size: 338}
};
let orig_sample_rate;
let ws;
const vol = document.getElementById("vol");
const transcript = document.getElementById("transcript");
const modelSelect = document.getElementById("modelSelect");
const precisionSelect = document.getElementById("precisionSelect");
const hotwordsList = document.getElementById("hotwordsList");
const boostScore = document.getElementById("boostScore");
const boostValue = document.getElementById("boostValue");
const applyBtn = document.getElementById("applyHotwords");
const hotwordStatus = document.getElementById("hotwordStatus");
const modelLangs = document.getElementById("modelLangs");
const modelSize = document.getElementById("modelSize");
const micNameElem = document.getElementById("micName");
const sampleRateElem = document.getElementById("sampleRate");
// ← Helper to toggle the status text
function updateHotwordStatus() {
const enabled = hotwordsList.value.split(/\r?\n/).filter(Boolean).length > 0
&& parseFloat(boostScore.value) > 0;
hotwordStatus.textContent = enabled
? "Hotword Bias: On"
: "Hotword Bias: Off";
}
function updateModelInfo() {
const meta = MODEL_METADATA[modelSelect.value];
if (Array.isArray(meta.language)) {
modelLangs.textContent = meta.language.join(", ");
} else {
modelLangs.textContent = meta.language;
}
modelSize.textContent = meta.size;
}
function sendConfig() {
if (ws && ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({
type: "config",
sampleRate: orig_sample_rate,
model: modelSelect.value,
precision: precisionSelect.value,
hotwords: hotwordsList.value.split(/\r?\n/).filter(Boolean),
hotwordsScore: parseFloat(boostScore.value)
}));
} else {
console.warn("WebSocket not open yet. Cannot send config.");
}
}
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
const context = new AudioContext();
orig_sample_rate = context.sampleRate;
// Update mic info in UI
const track = stream.getAudioTracks()[0];
micNameElem.textContent = track.label || 'Unknown';
sampleRateElem.textContent = orig_sample_rate;
updateModelInfo();
// Now that we know the sample rate, open the WS
ws = new WebSocket(`wss://${location.host}/ws`);
ws.onopen = () => sendConfig();
ws.onerror = err => console.error("WebSocket error:", err);
ws.onclose = () => console.log("WebSocket closed");
ws.onmessage = e => {
const msg = JSON.parse(e.data);
if (msg.volume !== undefined) {
vol.value = Math.min(msg.volume, 1.0);
}
if (msg.partial) {
// replace content…
transcript.textContent = msg.partial;
// …then scroll to bottom
transcript.scrollTop = transcript.scrollHeight;
}
};
modelSelect.addEventListener("change", () => {
updateModelInfo();
sendConfig();
updateHotwordStatus();
});
precisionSelect.addEventListener("change", () => {
sendConfig();
updateHotwordStatus();
});
applyBtn.addEventListener("click", () => {
sendConfig();
updateHotwordStatus();
});
// Update boost display and status on slider input
boostScore.addEventListener("input", () => {
boostValue.textContent = boostScore.value;
updateHotwordStatus();
});
const source = context.createMediaStreamSource(stream);
const processor = context.createScriptProcessor(4096, 1, 1);
source.connect(processor);
processor.connect(context.destination);
processor.onaudioprocess = e => {
const input = e.inputBuffer.getChannelData(0);
ws.send(new Float32Array(input).buffer);
};
});
</script>
</body>
</html>