Spaces:
Running
Running
Update index.backup5.html
Browse files- index.backup5.html +34 -42
index.backup5.html
CHANGED
|
@@ -161,7 +161,7 @@
|
|
| 161 |
<option value="quality">Highest Quality</option>
|
| 162 |
</select>
|
| 163 |
<div id="model-info">
|
| 164 |
-
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM:
|
| 165 |
</div>
|
| 166 |
</div>
|
| 167 |
<div id="visualizer"></div>
|
|
@@ -179,12 +179,13 @@
|
|
| 179 |
|
| 180 |
env.localModelPath = './models';
|
| 181 |
|
|
|
|
| 182 |
// Configure environment before initializing pipelines
|
| 183 |
env.backends = ['wasm'];
|
| 184 |
env.wasm = env.wasm || {};
|
| 185 |
-
env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
|
| 186 |
-
env.wasm.simd = true;
|
| 187 |
-
env.numThreads = navigator.hardwareConcurrency || 4;
|
| 188 |
|
| 189 |
const conversationDiv = document.getElementById('conversation');
|
| 190 |
const startButton = document.getElementById('startButton');
|
|
@@ -198,7 +199,6 @@
|
|
| 198 |
let myvad;
|
| 199 |
let sttPipeline;
|
| 200 |
let ttsPipeline;
|
| 201 |
-
let llmPipeline;
|
| 202 |
let audioContext;
|
| 203 |
let analyser;
|
| 204 |
let dataArray;
|
|
@@ -228,55 +228,46 @@
|
|
| 228 |
const barHeight = dataArray[i] / 2;
|
| 229 |
bars[i].style.height = barHeight + 'px';
|
| 230 |
}
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
| 232 |
}
|
| 233 |
|
|
|
|
| 234 |
async function initializePipelines() {
|
| 235 |
try {
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
]);
|
| 242 |
|
| 243 |
-
addLog('System: Digital Human Voice Chat initialized
|
| 244 |
startButton.disabled = false;
|
| 245 |
loadingDiv.style.display = 'none';
|
| 246 |
} catch (error) {
|
| 247 |
console.error('Error initializing pipelines:', error);
|
| 248 |
-
addLog(
|
| 249 |
loadingDiv.style.display = 'none';
|
| 250 |
}
|
| 251 |
}
|
| 252 |
|
| 253 |
async function processSpeech(audio) {
|
| 254 |
try {
|
| 255 |
-
if (!sttPipeline || !ttsPipeline
|
| 256 |
throw new Error('Pipelines not initialized');
|
| 257 |
}
|
| 258 |
|
| 259 |
const transcription = await sttPipeline(audio);
|
| 260 |
addLog(`User: ${transcription.text}`);
|
| 261 |
|
| 262 |
-
const
|
| 263 |
-
{ role: 'system', content: 'You are a helpful assistant.' },
|
| 264 |
-
{ role: 'user', content: transcription.text }
|
| 265 |
-
];
|
| 266 |
-
|
| 267 |
-
// Apply chat template
|
| 268 |
-
const text = llmPipeline.tokenizer.apply_chat_template(messages, {
|
| 269 |
-
tokenize: false,
|
| 270 |
-
add_generation_prompt: true,
|
| 271 |
-
});
|
| 272 |
-
|
| 273 |
-
// Generate text
|
| 274 |
-
const llmResponse = await llmPipeline(text, {
|
| 275 |
-
max_new_tokens: 128,
|
| 276 |
-
do_sample: false
|
| 277 |
-
});
|
| 278 |
-
|
| 279 |
-
const botResponse = llmResponse[0].generated_text;
|
| 280 |
addLog(`Bot: ${botResponse}`);
|
| 281 |
|
| 282 |
isSpeaking = true;
|
|
@@ -285,7 +276,7 @@
|
|
| 285 |
isSpeaking = false;
|
| 286 |
} catch (error) {
|
| 287 |
console.error('Error processing speech:', error);
|
| 288 |
-
addLog(
|
| 289 |
}
|
| 290 |
}
|
| 291 |
|
|
@@ -348,9 +339,10 @@
|
|
| 348 |
remoteVideo.muted = true;
|
| 349 |
document.getElementById('remoteVideo').volume = 0;
|
| 350 |
|
|
|
|
| 351 |
microphoneStream = await navigator.mediaDevices.getUserMedia({
|
| 352 |
audio: true,
|
| 353 |
-
video: { width: 1, height: 1 }
|
| 354 |
});
|
| 355 |
|
| 356 |
localVideo.srcObject = microphoneStream;
|
|
@@ -359,6 +351,7 @@
|
|
| 359 |
console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
|
| 360 |
console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
|
| 361 |
|
|
|
|
| 362 |
const offerOptions = {
|
| 363 |
offerToReceiveAudio: true,
|
| 364 |
offerToReceiveVideo: false,
|
|
@@ -381,12 +374,13 @@
|
|
| 381 |
await rtcLoopbackConnection.setLocalDescription(answer);
|
| 382 |
await rtcConnection.setRemoteDescription(answer);
|
| 383 |
|
|
|
|
| 384 |
const source = audioContext.createMediaStreamSource(loopbackStream);
|
| 385 |
source.connect(analyser);
|
| 386 |
|
| 387 |
myvad = await vad.MicVAD.new({
|
| 388 |
-
|
| 389 |
-
|
| 390 |
onSpeechStart: () => {
|
| 391 |
addLog('--- Voice activity: speech start');
|
| 392 |
updateVisualizer();
|
|
@@ -404,12 +398,12 @@
|
|
| 404 |
});
|
| 405 |
|
| 406 |
await myvad.start();
|
| 407 |
-
|
| 408 |
isListening = true;
|
| 409 |
addLog('System: Listening...');
|
| 410 |
} catch (error) {
|
| 411 |
console.error('Error starting voice activity:', error);
|
| 412 |
-
addLog(
|
| 413 |
}
|
| 414 |
}
|
| 415 |
|
|
@@ -451,7 +445,7 @@
|
|
| 451 |
addLog('System: Stopped listening.');
|
| 452 |
cancelAnimationFrame(animationId);
|
| 453 |
addLog('System: Microphone closed');
|
| 454 |
-
|
| 455 |
|
| 456 |
startButton.addEventListener('click', toggleListening);
|
| 457 |
clearLogsButton.addEventListener('click', () => {
|
|
@@ -462,6 +456,4 @@
|
|
| 462 |
initializePipelines();
|
| 463 |
</script>
|
| 464 |
</body>
|
| 465 |
-
</html>
|
| 466 |
-
|
| 467 |
-
|
|
|
|
| 161 |
<option value="quality">Highest Quality</option>
|
| 162 |
</select>
|
| 163 |
<div id="model-info">
|
| 164 |
+
TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
|
| 165 |
</div>
|
| 166 |
</div>
|
| 167 |
<div id="visualizer"></div>
|
|
|
|
| 179 |
|
| 180 |
env.localModelPath = './models';
|
| 181 |
|
| 182 |
+
//BELOW 5 statements added by RAHUL
|
| 183 |
// Configure environment before initializing pipelines
|
| 184 |
env.backends = ['wasm'];
|
| 185 |
env.wasm = env.wasm || {};
|
| 186 |
+
env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/'; // Ensure correct WASM paths
|
| 187 |
+
env.wasm.simd = true; // Enable SIMD if available
|
| 188 |
+
env.numThreads = navigator.hardwareConcurrency || 4; // Use available CPU cores
|
| 189 |
|
| 190 |
const conversationDiv = document.getElementById('conversation');
|
| 191 |
const startButton = document.getElementById('startButton');
|
|
|
|
| 199 |
let myvad;
|
| 200 |
let sttPipeline;
|
| 201 |
let ttsPipeline;
|
|
|
|
| 202 |
let audioContext;
|
| 203 |
let analyser;
|
| 204 |
let dataArray;
|
|
|
|
| 228 |
const barHeight = dataArray[i] / 2;
|
| 229 |
bars[i].style.height = barHeight + 'px';
|
| 230 |
}
|
| 231 |
+
// Use setTimeout instead of requestAnimationFrame to reduce update frequency - RAHUL ATLURY
|
| 232 |
+
animationId = setTimeout(updateVisualizer, 50); // Update every 50ms - RAHUL ATLURY
|
| 233 |
+
|
| 234 |
+
//animationId = requestAnimationFrame(updateVisualizer);
|
| 235 |
}
|
| 236 |
|
| 237 |
+
|
| 238 |
async function initializePipelines() {
|
| 239 |
try {
|
| 240 |
+
|
| 241 |
+
//sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }); // added , { quantized: true }
|
| 242 |
+
//ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
|
| 243 |
+
// quantized: true, //changed to true - RAHUL ATLURY
|
| 244 |
+
//});
|
| 245 |
+
|
| 246 |
+
[sttPipeline, ttsPipeline] = await Promise.all([
|
| 247 |
+
pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
|
| 248 |
+
pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true })
|
| 249 |
]);
|
| 250 |
|
| 251 |
+
addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
|
| 252 |
startButton.disabled = false;
|
| 253 |
loadingDiv.style.display = 'none';
|
| 254 |
} catch (error) {
|
| 255 |
console.error('Error initializing pipelines:', error);
|
| 256 |
+
addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
|
| 257 |
loadingDiv.style.display = 'none';
|
| 258 |
}
|
| 259 |
}
|
| 260 |
|
| 261 |
async function processSpeech(audio) {
|
| 262 |
try {
|
| 263 |
+
if (!sttPipeline || !ttsPipeline) {
|
| 264 |
throw new Error('Pipelines not initialized');
|
| 265 |
}
|
| 266 |
|
| 267 |
const transcription = await sttPipeline(audio);
|
| 268 |
addLog(`User: ${transcription.text}`);
|
| 269 |
|
| 270 |
+
const botResponse = `I heard you say: "${transcription.text}".`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
addLog(`Bot: ${botResponse}`);
|
| 272 |
|
| 273 |
isSpeaking = true;
|
|
|
|
| 276 |
isSpeaking = false;
|
| 277 |
} catch (error) {
|
| 278 |
console.error('Error processing speech:', error);
|
| 279 |
+
addLog('System: Error processing speech. Please try again.');
|
| 280 |
}
|
| 281 |
}
|
| 282 |
|
|
|
|
| 339 |
remoteVideo.muted = true;
|
| 340 |
document.getElementById('remoteVideo').volume = 0;
|
| 341 |
|
| 342 |
+
// Request both audio and video streams
|
| 343 |
microphoneStream = await navigator.mediaDevices.getUserMedia({
|
| 344 |
audio: true,
|
| 345 |
+
video: { width: 1, height: 1 } // Minimal video for echo cancellation
|
| 346 |
});
|
| 347 |
|
| 348 |
localVideo.srcObject = microphoneStream;
|
|
|
|
| 351 |
console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
|
| 352 |
console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
|
| 353 |
|
| 354 |
+
// Implement loopback hack for improved echo cancellation
|
| 355 |
const offerOptions = {
|
| 356 |
offerToReceiveAudio: true,
|
| 357 |
offerToReceiveVideo: false,
|
|
|
|
| 374 |
await rtcLoopbackConnection.setLocalDescription(answer);
|
| 375 |
await rtcConnection.setRemoteDescription(answer);
|
| 376 |
|
| 377 |
+
// Use the loopback stream for audio processing
|
| 378 |
const source = audioContext.createMediaStreamSource(loopbackStream);
|
| 379 |
source.connect(analyser);
|
| 380 |
|
| 381 |
myvad = await vad.MicVAD.new({
|
| 382 |
+
noiseSuppression: true, ///Added by RAHUL Atlury
|
| 383 |
+
aggressiveness: 3, // Higher value for more aggressive detection Added by RAHUL ATLURY
|
| 384 |
onSpeechStart: () => {
|
| 385 |
addLog('--- Voice activity: speech start');
|
| 386 |
updateVisualizer();
|
|
|
|
| 398 |
});
|
| 399 |
|
| 400 |
await myvad.start();
|
| 401 |
+
startButton.textContent = 'End Call';
|
| 402 |
isListening = true;
|
| 403 |
addLog('System: Listening...');
|
| 404 |
} catch (error) {
|
| 405 |
console.error('Error starting voice activity:', error);
|
| 406 |
+
addLog('System: Error starting voice detection. Please check your microphone and try again.');
|
| 407 |
}
|
| 408 |
}
|
| 409 |
|
|
|
|
| 445 |
addLog('System: Stopped listening.');
|
| 446 |
cancelAnimationFrame(animationId);
|
| 447 |
addLog('System: Microphone closed');
|
| 448 |
+
}
|
| 449 |
|
| 450 |
startButton.addEventListener('click', toggleListening);
|
| 451 |
clearLogsButton.addEventListener('click', () => {
|
|
|
|
| 456 |
initializePipelines();
|
| 457 |
</script>
|
| 458 |
</body>
|
| 459 |
+
</html>
|
|
|
|
|
|