Spaces:

atlury
/

digitalhuman

Running

App Files Files Community

digitalhuman / index.html

atlury

Update index.html

536e020 verified 9 months ago

raw

history blame

15.3 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Voice Chat Bot with Advanced Echo Cancellation and TinyLLM</title>
	<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
	<script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
	<script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>

	<style>
	/* ... (previous styles remain unchanged) ... */
	#model-progress {
	width: 100%;
	background-color: #444;
	border-radius: 5px;
	margin-top: 10px;
	overflow: hidden;
	}
	#model-progress-bar {
	width: 0;
	height: 20px;
	background-color: #ffd700;
	text-align: center;
	line-height: 20px;
	color: #1a1a1a;
	}
	</style>
	</head>
	<body>
	<div id="loading">
	<div class="spinner"></div>
	</div>
	<div class="container">
	<h1>Digital Human Voice Chat</h1>
	<p class="subtitle">For best results, use headphones.</p>
	<div id="chat-container">
	<div id="controls">
	<button id="startButton" disabled>Begin Call</button>
	</div>
	<div id="configuration">
	<select id="configSelect">
	<option value="fastest">Fastest</option>
	<option value="balanced">Balanced</option>
	<option value="quality">Highest Quality</option>
	</select>
	<div id="model-info">
	TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
	</div>
	<div id="model-progress">
	<div id="model-progress-bar"></div>
	</div>
	</div>
	<div id="visualizer"></div>
	<div id="conversation"></div>
	</div>
	<h2>Logs</h2>
	<div id="logs"></div>
	<button id="clear-logs">Clear</button>
	</div>
	<video id="localVideo" autoplay></video>
	<video id="remoteVideo" autoplay></video>

	<script type="module">
	import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';

	env.localModelPath = './models';
	env.backends = ['wasm'];
	env.wasm = env.wasm \|\| {};
	env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
	env.wasm.simd = true;
	env.numThreads = navigator.hardwareConcurrency \|\| 4;

	const conversationDiv = document.getElementById('conversation');
	const startButton = document.getElementById('startButton');
	const visualizer = document.getElementById('visualizer');
	const loadingDiv = document.getElementById('loading');
	const logsDiv = document.getElementById('logs');
	const clearLogsButton = document.getElementById('clear-logs');
	const localVideo = document.getElementById('localVideo');
	const remoteVideo = document.getElementById('remoteVideo');
	const modelProgressBar = document.getElementById('model-progress-bar');

	let myvad;
	let sttPipeline;
	let ttsPipeline;
	let llmPipeline;
	let audioContext;
	let analyser;
	let dataArray;
	let bars;
	let animationId;
	let isListening = false;
	let microphoneStream;
	let isSpeaking = false;
	let currentAudioSource = null;
	let rtcConnection = null;
	let rtcLoopbackConnection = null;
	let loopbackStream = new MediaStream();

	function createVisualizer() {
	const barCount = 64;
	for (let i = 0; i < barCount; i++) {
	const bar = document.createElement('div');
	bar.className = 'bar';
	visualizer.appendChild(bar);
	}
	bars = visualizer.getElementsByClassName('bar');
	}

	function updateVisualizer() {
	analyser.getByteFrequencyData(dataArray);
	for (let i = 0; i < bars.length; i++) {
	const barHeight = dataArray[i] / 2;
	bars[i].style.height = barHeight + 'px';
	}
	animationId = setTimeout(updateVisualizer, 50);
	}

	async function initializePipelines() {
	try {
	addLog('System: Initializing pipelines...');
	const tasks = [
	{ name: 'STT', task: 'automatic-speech-recognition', model: 'Xenova/whisper-tiny.en' },
	{ name: 'TTS', task: 'text-to-speech', model: 'Xenova/mms-tts-eng' },
	{ name: 'LLM', task: 'text-generation', model: 'Xenova/tiny-llm' }
	];

	for (const [index, task] of tasks.entries()) {
	addLog(`System: Loading ${task.name} model...`);
	updateProgressBar((index / tasks.length) * 100);
	const pipelineInstance = await pipeline(task.task, task.model, {
	quantized: true,
	progress_callback: (progress) => {
	updateProgressBar(((index + progress) / tasks.length) * 100);
	}
	});
	addLog(`System: ${task.name} model loaded successfully.`);

	switch (task.name) {
	case 'STT':
	sttPipeline = pipelineInstance;
	break;
	case 'TTS':
	ttsPipeline = pipelineInstance;
	break;
	case 'LLM':
	llmPipeline = pipelineInstance;
	break;
	}
	}

	updateProgressBar(100);
	addLog('System: All pipelines initialized successfully.');
	startButton.disabled = false;
	loadingDiv.style.display = 'none';
	} catch (error) {
	console.error('Error initializing pipelines:', error);
	addLog(`System: Error initializing pipelines: ${error.message}`);
	loadingDiv.style.display = 'none';
	}
	}

	function updateProgressBar(percentage) {
	modelProgressBar.style.width = `${percentage}%`;
	modelProgressBar.textContent = `${Math.round(percentage)}%`;
	}

	async function processSpeech(audio) {
	try {
	if (!sttPipeline \|\| !ttsPipeline \|\| !llmPipeline) {
	throw new Error('Pipelines not initialized');
	}

	addLog('System: Processing speech...');
	const transcription = await sttPipeline(audio);
	addLog(`User: ${transcription.text}`);

	addLog('System: Generating LLM response...');
	const llmResponse = await llmPipeline(transcription.text, {
	max_new_tokens: 50,
	temperature: 0.7
	});
	const botResponse = llmResponse[0].generated_text;
	addLog(`Bot: ${botResponse}`);

	addLog('System: Generating speech from response...');
	isSpeaking = true;
	const speechOutput = await ttsPipeline(botResponse);
	await playAudio(speechOutput.audio);
	isSpeaking = false;
	addLog('System: Speech playback complete.');
	} catch (error) {
	console.error('Error processing speech:', error);
	addLog(`System: Error processing speech: ${error.message}`);
	}
	}

	function addLog(message) {
	const now = new Date();
	const timestamp = now.toLocaleTimeString();
	const logMessage = `[${timestamp}] ${message}`;
	const messageElement = document.createElement('div');
	messageElement.textContent = logMessage;
	logsDiv.appendChild(messageElement);
	logsDiv.scrollTop = logsDiv.scrollHeight;
	console.log(logMessage);
	}

	function playAudio(audioArray) {
	return new Promise((resolve) => {
	const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
	const channelData = audioBuffer.getChannelData(0);
	channelData.set(audioArray);

	const source = audioContext.createBufferSource();
	currentAudioSource = source;
	source.buffer = audioBuffer;
	source.connect(analyser);
	analyser.connect(audioContext.destination);
	source.start();
	source.onended = () => {
	currentAudioSource = null;
	resolve();
	};
	});
	}

	function stopCurrentAudio() {
	if (currentAudioSource) {
	currentAudioSource.stop();
	currentAudioSource = null;
	}
	}

	async function toggleListening() {
	if (isListening) {
	await stopListening();
	} else {
	await startListening();
	}
	}

	async function startListening() {
	try {
	addLog('System: Initializing audio context and stream...');
	audioContext = new (window.AudioContext \|\| window.webkitAudioContext)();
	analyser = audioContext.createAnalyser();
	analyser.fftSize = 128;
	dataArray = new Uint8Array(analyser.frequencyBinCount);

	localVideo.volume = 0;
	localVideo.muted = true;
	remoteVideo.volume = 0;
	remoteVideo.muted = true;

	addLog('System: Requesting media stream...');
	microphoneStream = await navigator.mediaDevices.getUserMedia({
	audio: true,
	video: { width: 1, height: 1 }
	});

	localVideo.srcObject = microphoneStream;
	await localVideo.play();

	addLog('System: Setting up RTCPeerConnection for echo cancellation...');
	const offerOptions = {
	offerToReceiveAudio: true,
	offerToReceiveVideo: false,
	};

	rtcConnection = new RTCPeerConnection();
	rtcLoopbackConnection = new RTCPeerConnection();

	rtcConnection.onicecandidate = e => e.candidate && rtcLoopbackConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
	rtcLoopbackConnection.onicecandidate = e => e.candidate && rtcConnection.addIceCandidate(new RTCIceCandidate(e.candidate));

	rtcLoopbackConnection.ontrack = e => e.streams[0].getTracks().forEach(track => loopbackStream.addTrack(track));

	microphoneStream.getTracks().forEach(track => rtcConnection.addTrack(track, microphoneStream));

	const offer = await rtcConnection.createOffer(offerOptions);
	await rtcConnection.setLocalDescription(offer);
	await rtcLoopbackConnection.setRemoteDescription(offer);
	const answer = await rtcLoopbackConnection.createAnswer();
	await rtcLoopbackConnection.setLocalDescription(answer);
	await rtcConnection.setRemoteDescription(answer);

	const source = audioContext.createMediaStreamSource(loopbackStream);
	source.connect(analyser);

	addLog('System: Initializing voice activity detection...');
	myvad = await vad.MicVAD.new({
	noiseSuppression: true,
	aggressiveness: 3,
	onSpeechStart: () => {
	addLog('System: Voice activity detected - speech start');
	updateVisualizer();
	if (isSpeaking) {
	addLog('System: User interrupted. Stopping bot speech.');
	stopCurrentAudio();
	isSpeaking = false;
	}
	},
	onSpeechEnd: (audio) => {
	addLog('System: Voice activity detected - speech end');
	cancelAnimationFrame(animationId);
	processSpeech(audio);
	}
	});

	await myvad.start();
	startButton.textContent = 'End Call';
	isListening = true;
	addLog('System: Listening started successfully.');
	} catch (error) {
	console.error('Error starting voice activity:', error);
	addLog(`System: Error starting voice detection: ${error.message}`);
	}
	}

	async function stopListening() {
	addLog('System: Stopping listening...');
	if (myvad) {
	try {
	await myvad.destroy();
	addLog('System: Voice activity detection stopped.');
	} catch (error) {
	console.error('Error stopping voice activity:', error);
	addLog(`System: Error stopping voice activity: ${error.message}`);
	}
	myvad = null;
	}
	if (microphoneStream) {
	microphoneStream.getTracks().forEach(track => track.stop());
	microphoneStream = null;
	addLog('System: Microphone stream stopped.');
	}
	if (audioContext) {
	await audioContext.close();
	audioContext = null;
	addLog('System: Audio context closed.');
	}
	if (localVideo) {
	localVideo.srcObject = null;
	}
	if (remoteVideo) {
	remoteVideo.srcObject = null;
	}
	if (rtcConnection) {
	rtcConnection.close();
	rtcConnection = null;
	addLog('System: RTCPeerConnection closed.');
	}
	if (rtcLoopbackConnection) {
	rtcLoopbackConnection.close();
	rtcLoopbackConnection = null;
	addLog('System: RTCPeerConnection loopback closed.');
	}
	loopbackStream = new MediaStream();
	stopCurrentAudio();
	startButton.textContent = 'Begin Call';
	isListening = false;
	addLog('System: Stopped listening.');
	cancelAnimationFrame(animationId);
	}

	startButton.addEventListener('click', toggleListening);
	clearLogsButton.addEventListener('click', () => {
	logsDiv.innerHTML = '';
	addLog('System: Logs cleared.');
	});

	createVisualizer();
	initializePipelines();
	</script>
	</body>
	</html>