Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Voice Chat Bot with Advanced Echo Cancellation and TinyLLM</title> | |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script> | |
<script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script> | |
<script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script> | |
<style> | |
/* ... (previous styles remain unchanged) ... */ | |
#model-progress { | |
width: 100%; | |
background-color: #444; | |
border-radius: 5px; | |
margin-top: 10px; | |
overflow: hidden; | |
} | |
#model-progress-bar { | |
width: 0; | |
height: 20px; | |
background-color: #ffd700; | |
text-align: center; | |
line-height: 20px; | |
color: #1a1a1a; | |
} | |
</style> | |
</head> | |
<body> | |
<div id="loading"> | |
<div class="spinner"></div> | |
</div> | |
<div class="container"> | |
<h1>Digital Human Voice Chat</h1> | |
<p class="subtitle">For best results, use headphones.</p> | |
<div id="chat-container"> | |
<div id="controls"> | |
<button id="startButton" disabled>Begin Call</button> | |
</div> | |
<div id="configuration"> | |
<select id="configSelect"> | |
<option value="fastest">Fastest</option> | |
<option value="balanced">Balanced</option> | |
<option value="quality">Highest Quality</option> | |
</select> | |
<div id="model-info"> | |
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm | |
</div> | |
<div id="model-progress"> | |
<div id="model-progress-bar"></div> | |
</div> | |
</div> | |
<div id="visualizer"></div> | |
<div id="conversation"></div> | |
</div> | |
<h2>Logs</h2> | |
<div id="logs"></div> | |
<button id="clear-logs">Clear</button> | |
</div> | |
<video id="localVideo" autoplay></video> | |
<video id="remoteVideo" autoplay></video> | |
<script type="module"> | |
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]'; | |
env.localModelPath = './models'; | |
env.backends = ['wasm']; | |
env.wasm = env.wasm || {}; | |
env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/'; | |
env.wasm.simd = true; | |
env.numThreads = navigator.hardwareConcurrency || 4; | |
const conversationDiv = document.getElementById('conversation'); | |
const startButton = document.getElementById('startButton'); | |
const visualizer = document.getElementById('visualizer'); | |
const loadingDiv = document.getElementById('loading'); | |
const logsDiv = document.getElementById('logs'); | |
const clearLogsButton = document.getElementById('clear-logs'); | |
const localVideo = document.getElementById('localVideo'); | |
const remoteVideo = document.getElementById('remoteVideo'); | |
const modelProgressBar = document.getElementById('model-progress-bar'); | |
let myvad; | |
let sttPipeline; | |
let ttsPipeline; | |
let llmPipeline; | |
let audioContext; | |
let analyser; | |
let dataArray; | |
let bars; | |
let animationId; | |
let isListening = false; | |
let microphoneStream; | |
let isSpeaking = false; | |
let currentAudioSource = null; | |
let rtcConnection = null; | |
let rtcLoopbackConnection = null; | |
let loopbackStream = new MediaStream(); | |
function createVisualizer() { | |
const barCount = 64; | |
for (let i = 0; i < barCount; i++) { | |
const bar = document.createElement('div'); | |
bar.className = 'bar'; | |
visualizer.appendChild(bar); | |
} | |
bars = visualizer.getElementsByClassName('bar'); | |
} | |
function updateVisualizer() { | |
analyser.getByteFrequencyData(dataArray); | |
for (let i = 0; i < bars.length; i++) { | |
const barHeight = dataArray[i] / 2; | |
bars[i].style.height = barHeight + 'px'; | |
} | |
animationId = setTimeout(updateVisualizer, 50); | |
} | |
async function initializePipelines() { | |
try { | |
addLog('System: Initializing pipelines...'); | |
const tasks = [ | |
{ name: 'STT', task: 'automatic-speech-recognition', model: 'Xenova/whisper-tiny.en' }, | |
{ name: 'TTS', task: 'text-to-speech', model: 'Xenova/mms-tts-eng' }, | |
{ name: 'LLM', task: 'text-generation', model: 'Xenova/tiny-llm' } | |
]; | |
for (const [index, task] of tasks.entries()) { | |
addLog(`System: Loading ${task.name} model...`); | |
updateProgressBar((index / tasks.length) * 100); | |
const pipelineInstance = await pipeline(task.task, task.model, { | |
quantized: true, | |
progress_callback: (progress) => { | |
updateProgressBar(((index + progress) / tasks.length) * 100); | |
} | |
}); | |
addLog(`System: ${task.name} model loaded successfully.`); | |
switch (task.name) { | |
case 'STT': | |
sttPipeline = pipelineInstance; | |
break; | |
case 'TTS': | |
ttsPipeline = pipelineInstance; | |
break; | |
case 'LLM': | |
llmPipeline = pipelineInstance; | |
break; | |
} | |
} | |
updateProgressBar(100); | |
addLog('System: All pipelines initialized successfully.'); | |
startButton.disabled = false; | |
loadingDiv.style.display = 'none'; | |
} catch (error) { | |
console.error('Error initializing pipelines:', error); | |
addLog(`System: Error initializing pipelines: ${error.message}`); | |
loadingDiv.style.display = 'none'; | |
} | |
} | |
function updateProgressBar(percentage) { | |
modelProgressBar.style.width = `${percentage}%`; | |
modelProgressBar.textContent = `${Math.round(percentage)}%`; | |
} | |
async function processSpeech(audio) { | |
try { | |
if (!sttPipeline || !ttsPipeline || !llmPipeline) { | |
throw new Error('Pipelines not initialized'); | |
} | |
addLog('System: Processing speech...'); | |
const transcription = await sttPipeline(audio); | |
addLog(`User: ${transcription.text}`); | |
addLog('System: Generating LLM response...'); | |
const llmResponse = await llmPipeline(transcription.text, { | |
max_new_tokens: 50, | |
temperature: 0.7 | |
}); | |
const botResponse = llmResponse[0].generated_text; | |
addLog(`Bot: ${botResponse}`); | |
addLog('System: Generating speech from response...'); | |
isSpeaking = true; | |
const speechOutput = await ttsPipeline(botResponse); | |
await playAudio(speechOutput.audio); | |
isSpeaking = false; | |
addLog('System: Speech playback complete.'); | |
} catch (error) { | |
console.error('Error processing speech:', error); | |
addLog(`System: Error processing speech: ${error.message}`); | |
} | |
} | |
function addLog(message) { | |
const now = new Date(); | |
const timestamp = now.toLocaleTimeString(); | |
const logMessage = `[${timestamp}] ${message}`; | |
const messageElement = document.createElement('div'); | |
messageElement.textContent = logMessage; | |
logsDiv.appendChild(messageElement); | |
logsDiv.scrollTop = logsDiv.scrollHeight; | |
console.log(logMessage); | |
} | |
function playAudio(audioArray) { | |
return new Promise((resolve) => { | |
const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000); | |
const channelData = audioBuffer.getChannelData(0); | |
channelData.set(audioArray); | |
const source = audioContext.createBufferSource(); | |
currentAudioSource = source; | |
source.buffer = audioBuffer; | |
source.connect(analyser); | |
analyser.connect(audioContext.destination); | |
source.start(); | |
source.onended = () => { | |
currentAudioSource = null; | |
resolve(); | |
}; | |
}); | |
} | |
function stopCurrentAudio() { | |
if (currentAudioSource) { | |
currentAudioSource.stop(); | |
currentAudioSource = null; | |
} | |
} | |
async function toggleListening() { | |
if (isListening) { | |
await stopListening(); | |
} else { | |
await startListening(); | |
} | |
} | |
async function startListening() { | |
try { | |
addLog('System: Initializing audio context and stream...'); | |
audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
analyser = audioContext.createAnalyser(); | |
analyser.fftSize = 128; | |
dataArray = new Uint8Array(analyser.frequencyBinCount); | |
localVideo.volume = 0; | |
localVideo.muted = true; | |
remoteVideo.volume = 0; | |
remoteVideo.muted = true; | |
addLog('System: Requesting media stream...'); | |
microphoneStream = await navigator.mediaDevices.getUserMedia({ | |
audio: true, | |
video: { width: 1, height: 1 } | |
}); | |
localVideo.srcObject = microphoneStream; | |
await localVideo.play(); | |
addLog('System: Setting up RTCPeerConnection for echo cancellation...'); | |
const offerOptions = { | |
offerToReceiveAudio: true, | |
offerToReceiveVideo: false, | |
}; | |
rtcConnection = new RTCPeerConnection(); | |
rtcLoopbackConnection = new RTCPeerConnection(); | |
rtcConnection.onicecandidate = e => e.candidate && rtcLoopbackConnection.addIceCandidate(new RTCIceCandidate(e.candidate)); | |
rtcLoopbackConnection.onicecandidate = e => e.candidate && rtcConnection.addIceCandidate(new RTCIceCandidate(e.candidate)); | |
rtcLoopbackConnection.ontrack = e => e.streams[0].getTracks().forEach(track => loopbackStream.addTrack(track)); | |
microphoneStream.getTracks().forEach(track => rtcConnection.addTrack(track, microphoneStream)); | |
const offer = await rtcConnection.createOffer(offerOptions); | |
await rtcConnection.setLocalDescription(offer); | |
await rtcLoopbackConnection.setRemoteDescription(offer); | |
const answer = await rtcLoopbackConnection.createAnswer(); | |
await rtcLoopbackConnection.setLocalDescription(answer); | |
await rtcConnection.setRemoteDescription(answer); | |
const source = audioContext.createMediaStreamSource(loopbackStream); | |
source.connect(analyser); | |
addLog('System: Initializing voice activity detection...'); | |
myvad = await vad.MicVAD.new({ | |
noiseSuppression: true, | |
aggressiveness: 3, | |
onSpeechStart: () => { | |
addLog('System: Voice activity detected - speech start'); | |
updateVisualizer(); | |
if (isSpeaking) { | |
addLog('System: User interrupted. Stopping bot speech.'); | |
stopCurrentAudio(); | |
isSpeaking = false; | |
} | |
}, | |
onSpeechEnd: (audio) => { | |
addLog('System: Voice activity detected - speech end'); | |
cancelAnimationFrame(animationId); | |
processSpeech(audio); | |
} | |
}); | |
await myvad.start(); | |
startButton.textContent = 'End Call'; | |
isListening = true; | |
addLog('System: Listening started successfully.'); | |
} catch (error) { | |
console.error('Error starting voice activity:', error); | |
addLog(`System: Error starting voice detection: ${error.message}`); | |
} | |
} | |
async function stopListening() { | |
addLog('System: Stopping listening...'); | |
if (myvad) { | |
try { | |
await myvad.destroy(); | |
addLog('System: Voice activity detection stopped.'); | |
} catch (error) { | |
console.error('Error stopping voice activity:', error); | |
addLog(`System: Error stopping voice activity: ${error.message}`); | |
} | |
myvad = null; | |
} | |
if (microphoneStream) { | |
microphoneStream.getTracks().forEach(track => track.stop()); | |
microphoneStream = null; | |
addLog('System: Microphone stream stopped.'); | |
} | |
if (audioContext) { | |
await audioContext.close(); | |
audioContext = null; | |
addLog('System: Audio context closed.'); | |
} | |
if (localVideo) { | |
localVideo.srcObject = null; | |
} | |
if (remoteVideo) { | |
remoteVideo.srcObject = null; | |
} | |
if (rtcConnection) { | |
rtcConnection.close(); | |
rtcConnection = null; | |
addLog('System: RTCPeerConnection closed.'); | |
} | |
if (rtcLoopbackConnection) { | |
rtcLoopbackConnection.close(); | |
rtcLoopbackConnection = null; | |
addLog('System: RTCPeerConnection loopback closed.'); | |
} | |
loopbackStream = new MediaStream(); | |
stopCurrentAudio(); | |
startButton.textContent = 'Begin Call'; | |
isListening = false; | |
addLog('System: Stopped listening.'); | |
cancelAnimationFrame(animationId); | |
} | |
startButton.addEventListener('click', toggleListening); | |
clearLogsButton.addEventListener('click', () => { | |
logsDiv.innerHTML = ''; | |
addLog('System: Logs cleared.'); | |
}); | |
createVisualizer(); | |
initializePipelines(); | |
</script> | |
</body> | |
</html> |