digitalhuman / index.html
atlury's picture
Update index.html
536e020 verified
raw
history blame
15.3 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Voice Chat Bot with Advanced Echo Cancellation and TinyLLM</title>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
<style>
/* ... (previous styles remain unchanged) ... */
#model-progress {
width: 100%;
background-color: #444;
border-radius: 5px;
margin-top: 10px;
overflow: hidden;
}
#model-progress-bar {
width: 0;
height: 20px;
background-color: #ffd700;
text-align: center;
line-height: 20px;
color: #1a1a1a;
}
</style>
</head>
<body>
<div id="loading">
<div class="spinner"></div>
</div>
<div class="container">
<h1>Digital Human Voice Chat</h1>
<p class="subtitle">For best results, use headphones.</p>
<div id="chat-container">
<div id="controls">
<button id="startButton" disabled>Begin Call</button>
</div>
<div id="configuration">
<select id="configSelect">
<option value="fastest">Fastest</option>
<option value="balanced">Balanced</option>
<option value="quality">Highest Quality</option>
</select>
<div id="model-info">
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
</div>
<div id="model-progress">
<div id="model-progress-bar"></div>
</div>
</div>
<div id="visualizer"></div>
<div id="conversation"></div>
</div>
<h2>Logs</h2>
<div id="logs"></div>
<button id="clear-logs">Clear</button>
</div>
<video id="localVideo" autoplay></video>
<video id="remoteVideo" autoplay></video>
<script type="module">
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
env.localModelPath = './models';
env.backends = ['wasm'];
env.wasm = env.wasm || {};
env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
env.wasm.simd = true;
env.numThreads = navigator.hardwareConcurrency || 4;
const conversationDiv = document.getElementById('conversation');
const startButton = document.getElementById('startButton');
const visualizer = document.getElementById('visualizer');
const loadingDiv = document.getElementById('loading');
const logsDiv = document.getElementById('logs');
const clearLogsButton = document.getElementById('clear-logs');
const localVideo = document.getElementById('localVideo');
const remoteVideo = document.getElementById('remoteVideo');
const modelProgressBar = document.getElementById('model-progress-bar');
let myvad;
let sttPipeline;
let ttsPipeline;
let llmPipeline;
let audioContext;
let analyser;
let dataArray;
let bars;
let animationId;
let isListening = false;
let microphoneStream;
let isSpeaking = false;
let currentAudioSource = null;
let rtcConnection = null;
let rtcLoopbackConnection = null;
let loopbackStream = new MediaStream();
function createVisualizer() {
const barCount = 64;
for (let i = 0; i < barCount; i++) {
const bar = document.createElement('div');
bar.className = 'bar';
visualizer.appendChild(bar);
}
bars = visualizer.getElementsByClassName('bar');
}
function updateVisualizer() {
analyser.getByteFrequencyData(dataArray);
for (let i = 0; i < bars.length; i++) {
const barHeight = dataArray[i] / 2;
bars[i].style.height = barHeight + 'px';
}
animationId = setTimeout(updateVisualizer, 50);
}
async function initializePipelines() {
try {
addLog('System: Initializing pipelines...');
const tasks = [
{ name: 'STT', task: 'automatic-speech-recognition', model: 'Xenova/whisper-tiny.en' },
{ name: 'TTS', task: 'text-to-speech', model: 'Xenova/mms-tts-eng' },
{ name: 'LLM', task: 'text-generation', model: 'Xenova/tiny-llm' }
];
for (const [index, task] of tasks.entries()) {
addLog(`System: Loading ${task.name} model...`);
updateProgressBar((index / tasks.length) * 100);
const pipelineInstance = await pipeline(task.task, task.model, {
quantized: true,
progress_callback: (progress) => {
updateProgressBar(((index + progress) / tasks.length) * 100);
}
});
addLog(`System: ${task.name} model loaded successfully.`);
switch (task.name) {
case 'STT':
sttPipeline = pipelineInstance;
break;
case 'TTS':
ttsPipeline = pipelineInstance;
break;
case 'LLM':
llmPipeline = pipelineInstance;
break;
}
}
updateProgressBar(100);
addLog('System: All pipelines initialized successfully.');
startButton.disabled = false;
loadingDiv.style.display = 'none';
} catch (error) {
console.error('Error initializing pipelines:', error);
addLog(`System: Error initializing pipelines: ${error.message}`);
loadingDiv.style.display = 'none';
}
}
function updateProgressBar(percentage) {
modelProgressBar.style.width = `${percentage}%`;
modelProgressBar.textContent = `${Math.round(percentage)}%`;
}
async function processSpeech(audio) {
try {
if (!sttPipeline || !ttsPipeline || !llmPipeline) {
throw new Error('Pipelines not initialized');
}
addLog('System: Processing speech...');
const transcription = await sttPipeline(audio);
addLog(`User: ${transcription.text}`);
addLog('System: Generating LLM response...');
const llmResponse = await llmPipeline(transcription.text, {
max_new_tokens: 50,
temperature: 0.7
});
const botResponse = llmResponse[0].generated_text;
addLog(`Bot: ${botResponse}`);
addLog('System: Generating speech from response...');
isSpeaking = true;
const speechOutput = await ttsPipeline(botResponse);
await playAudio(speechOutput.audio);
isSpeaking = false;
addLog('System: Speech playback complete.');
} catch (error) {
console.error('Error processing speech:', error);
addLog(`System: Error processing speech: ${error.message}`);
}
}
function addLog(message) {
const now = new Date();
const timestamp = now.toLocaleTimeString();
const logMessage = `[${timestamp}] ${message}`;
const messageElement = document.createElement('div');
messageElement.textContent = logMessage;
logsDiv.appendChild(messageElement);
logsDiv.scrollTop = logsDiv.scrollHeight;
console.log(logMessage);
}
function playAudio(audioArray) {
return new Promise((resolve) => {
const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
const channelData = audioBuffer.getChannelData(0);
channelData.set(audioArray);
const source = audioContext.createBufferSource();
currentAudioSource = source;
source.buffer = audioBuffer;
source.connect(analyser);
analyser.connect(audioContext.destination);
source.start();
source.onended = () => {
currentAudioSource = null;
resolve();
};
});
}
function stopCurrentAudio() {
if (currentAudioSource) {
currentAudioSource.stop();
currentAudioSource = null;
}
}
async function toggleListening() {
if (isListening) {
await stopListening();
} else {
await startListening();
}
}
async function startListening() {
try {
addLog('System: Initializing audio context and stream...');
audioContext = new (window.AudioContext || window.webkitAudioContext)();
analyser = audioContext.createAnalyser();
analyser.fftSize = 128;
dataArray = new Uint8Array(analyser.frequencyBinCount);
localVideo.volume = 0;
localVideo.muted = true;
remoteVideo.volume = 0;
remoteVideo.muted = true;
addLog('System: Requesting media stream...');
microphoneStream = await navigator.mediaDevices.getUserMedia({
audio: true,
video: { width: 1, height: 1 }
});
localVideo.srcObject = microphoneStream;
await localVideo.play();
addLog('System: Setting up RTCPeerConnection for echo cancellation...');
const offerOptions = {
offerToReceiveAudio: true,
offerToReceiveVideo: false,
};
rtcConnection = new RTCPeerConnection();
rtcLoopbackConnection = new RTCPeerConnection();
rtcConnection.onicecandidate = e => e.candidate && rtcLoopbackConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
rtcLoopbackConnection.onicecandidate = e => e.candidate && rtcConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
rtcLoopbackConnection.ontrack = e => e.streams[0].getTracks().forEach(track => loopbackStream.addTrack(track));
microphoneStream.getTracks().forEach(track => rtcConnection.addTrack(track, microphoneStream));
const offer = await rtcConnection.createOffer(offerOptions);
await rtcConnection.setLocalDescription(offer);
await rtcLoopbackConnection.setRemoteDescription(offer);
const answer = await rtcLoopbackConnection.createAnswer();
await rtcLoopbackConnection.setLocalDescription(answer);
await rtcConnection.setRemoteDescription(answer);
const source = audioContext.createMediaStreamSource(loopbackStream);
source.connect(analyser);
addLog('System: Initializing voice activity detection...');
myvad = await vad.MicVAD.new({
noiseSuppression: true,
aggressiveness: 3,
onSpeechStart: () => {
addLog('System: Voice activity detected - speech start');
updateVisualizer();
if (isSpeaking) {
addLog('System: User interrupted. Stopping bot speech.');
stopCurrentAudio();
isSpeaking = false;
}
},
onSpeechEnd: (audio) => {
addLog('System: Voice activity detected - speech end');
cancelAnimationFrame(animationId);
processSpeech(audio);
}
});
await myvad.start();
startButton.textContent = 'End Call';
isListening = true;
addLog('System: Listening started successfully.');
} catch (error) {
console.error('Error starting voice activity:', error);
addLog(`System: Error starting voice detection: ${error.message}`);
}
}
async function stopListening() {
addLog('System: Stopping listening...');
if (myvad) {
try {
await myvad.destroy();
addLog('System: Voice activity detection stopped.');
} catch (error) {
console.error('Error stopping voice activity:', error);
addLog(`System: Error stopping voice activity: ${error.message}`);
}
myvad = null;
}
if (microphoneStream) {
microphoneStream.getTracks().forEach(track => track.stop());
microphoneStream = null;
addLog('System: Microphone stream stopped.');
}
if (audioContext) {
await audioContext.close();
audioContext = null;
addLog('System: Audio context closed.');
}
if (localVideo) {
localVideo.srcObject = null;
}
if (remoteVideo) {
remoteVideo.srcObject = null;
}
if (rtcConnection) {
rtcConnection.close();
rtcConnection = null;
addLog('System: RTCPeerConnection closed.');
}
if (rtcLoopbackConnection) {
rtcLoopbackConnection.close();
rtcLoopbackConnection = null;
addLog('System: RTCPeerConnection loopback closed.');
}
loopbackStream = new MediaStream();
stopCurrentAudio();
startButton.textContent = 'Begin Call';
isListening = false;
addLog('System: Stopped listening.');
cancelAnimationFrame(animationId);
}
startButton.addEventListener('click', toggleListening);
clearLogsButton.addEventListener('click', () => {
logsDiv.innerHTML = '';
addLog('System: Logs cleared.');
});
createVisualizer();
initializePipelines();
</script>
</body>
</html>