atlury commited on
Commit
915dd7f
·
verified ·
1 Parent(s): 553e1ac

Update index.backup5.html

Browse files
Files changed (1) hide show
  1. index.backup5.html +37 -89
index.backup5.html CHANGED
@@ -161,7 +161,7 @@
161
  <option value="quality">Highest Quality</option>
162
  </select>
163
  <div id="model-info">
164
- TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
165
  </div>
166
  </div>
167
  <div id="visualizer"></div>
@@ -179,13 +179,12 @@
179
 
180
  env.localModelPath = './models';
181
 
182
- //BELOW 5 statements added by RAHUL
183
  // Configure environment before initializing pipelines
184
  env.backends = ['wasm'];
185
  env.wasm = env.wasm || {};
186
- env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/'; // Ensure correct WASM paths
187
- env.wasm.simd = true; // Enable SIMD if available
188
- env.numThreads = navigator.hardwareConcurrency || 4; // Use available CPU cores
189
 
190
  const conversationDiv = document.getElementById('conversation');
191
  const startButton = document.getElementById('startButton');
@@ -199,6 +198,7 @@
199
  let myvad;
200
  let sttPipeline;
201
  let ttsPipeline;
 
202
  let audioContext;
203
  let analyser;
204
  let dataArray;
@@ -228,46 +228,55 @@
228
  const barHeight = dataArray[i] / 2;
229
  bars[i].style.height = barHeight + 'px';
230
  }
231
- // Use setTimeout instead of requestAnimationFrame to reduce update frequency - RAHUL ATLURY
232
- animationId = setTimeout(updateVisualizer, 50); // Update every 50ms - RAHUL ATLURY
233
-
234
- //animationId = requestAnimationFrame(updateVisualizer);
235
  }
236
 
237
-
238
  async function initializePipelines() {
239
  try {
240
-
241
- //sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }); // added , { quantized: true }
242
- //ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
243
- // quantized: true, //changed to true - RAHUL ATLURY
244
- //});
245
-
246
- [sttPipeline, ttsPipeline] = await Promise.all([
247
- pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
248
- pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true })
249
  ]);
250
 
251
- addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
252
  startButton.disabled = false;
253
  loadingDiv.style.display = 'none';
254
  } catch (error) {
255
  console.error('Error initializing pipelines:', error);
256
- addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
257
  loadingDiv.style.display = 'none';
258
  }
259
  }
260
 
261
  async function processSpeech(audio) {
262
  try {
263
- if (!sttPipeline || !ttsPipeline) {
264
  throw new Error('Pipelines not initialized');
265
  }
266
 
267
  const transcription = await sttPipeline(audio);
268
  addLog(`User: ${transcription.text}`);
269
 
270
- const botResponse = `I heard you say: "${transcription.text}".`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  addLog(`Bot: ${botResponse}`);
272
 
273
  isSpeaking = true;
@@ -276,7 +285,7 @@
276
  isSpeaking = false;
277
  } catch (error) {
278
  console.error('Error processing speech:', error);
279
- addLog('System: Error processing speech. Please try again.');
280
  }
281
  }
282
 
@@ -339,10 +348,9 @@
339
  remoteVideo.muted = true;
340
  document.getElementById('remoteVideo').volume = 0;
341
 
342
- // Request both audio and video streams
343
  microphoneStream = await navigator.mediaDevices.getUserMedia({
344
  audio: true,
345
- video: { width: 1, height: 1 } // Minimal video for echo cancellation
346
  });
347
 
348
  localVideo.srcObject = microphoneStream;
@@ -351,7 +359,6 @@
351
  console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
352
  console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
353
 
354
- // Implement loopback hack for improved echo cancellation
355
  const offerOptions = {
356
  offerToReceiveAudio: true,
357
  offerToReceiveVideo: false,
@@ -374,13 +381,12 @@
374
  await rtcLoopbackConnection.setLocalDescription(answer);
375
  await rtcConnection.setRemoteDescription(answer);
376
 
377
- // Use the loopback stream for audio processing
378
  const source = audioContext.createMediaStreamSource(loopbackStream);
379
  source.connect(analyser);
380
 
381
  myvad = await vad.MicVAD.new({
382
- noiseSuppression: true, ///Added by RAHUL Atlury
383
- aggressiveness: 3, // Higher value for more aggressive detection Added by RAHUL ATLURY
384
  onSpeechStart: () => {
385
  addLog('--- Voice activity: speech start');
386
  updateVisualizer();
@@ -398,62 +404,4 @@
398
  });
399
 
400
  await myvad.start();
401
- startButton.textContent = 'End Call';
402
- isListening = true;
403
- addLog('System: Listening...');
404
- } catch (error) {
405
- console.error('Error starting voice activity:', error);
406
- addLog('System: Error starting voice detection. Please check your microphone and try again.');
407
- }
408
- }
409
-
410
- async function stopListening() {
411
- if (myvad) {
412
- try {
413
- await myvad.destroy();
414
- } catch (error) {
415
- console.error('Error stopping voice activity:', error);
416
- }
417
- myvad = null;
418
- }
419
- if (microphoneStream) {
420
- microphoneStream.getTracks().forEach(track => track.stop());
421
- microphoneStream = null;
422
- }
423
- if (audioContext) {
424
- await audioContext.close();
425
- audioContext = null;
426
- }
427
- if (localVideo) {
428
- localVideo.srcObject = null;
429
- }
430
- if (remoteVideo) {
431
- remoteVideo.srcObject = null;
432
- }
433
- if (rtcConnection) {
434
- rtcConnection.close();
435
- rtcConnection = null;
436
- }
437
- if (rtcLoopbackConnection) {
438
- rtcLoopbackConnection.close();
439
- rtcLoopbackConnection = null;
440
- }
441
- loopbackStream = new MediaStream();
442
- stopCurrentAudio();
443
- startButton.textContent = 'Begin Call';
444
- isListening = false;
445
- addLog('System: Stopped listening.');
446
- cancelAnimationFrame(animationId);
447
- addLog('System: Microphone closed');
448
- }
449
-
450
- startButton.addEventListener('click', toggleListening);
451
- clearLogsButton.addEventListener('click', () => {
452
- logsDiv.innerHTML = '';
453
- });
454
-
455
- createVisualizer();
456
- initializePipelines();
457
- </script>
458
- </body>
459
- </html>
 
161
  <option value="quality">Highest Quality</option>
162
  </select>
163
  <div id="model-info">
164
+ TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/Qwen1.5-0.5B-Chat
165
  </div>
166
  </div>
167
  <div id="visualizer"></div>
 
179
 
180
  env.localModelPath = './models';
181
 
 
182
  // Configure environment before initializing pipelines
183
  env.backends = ['wasm'];
184
  env.wasm = env.wasm || {};
185
+ env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
186
+ env.wasm.simd = true;
187
+ env.numThreads = navigator.hardwareConcurrency || 4;
188
 
189
  const conversationDiv = document.getElementById('conversation');
190
  const startButton = document.getElementById('startButton');
 
198
  let myvad;
199
  let sttPipeline;
200
  let ttsPipeline;
201
+ let llmPipeline;
202
  let audioContext;
203
  let analyser;
204
  let dataArray;
 
228
  const barHeight = dataArray[i] / 2;
229
  bars[i].style.height = barHeight + 'px';
230
  }
231
+ animationId = setTimeout(updateVisualizer, 50);
 
 
 
232
  }
233
 
 
234
  async function initializePipelines() {
235
  try {
236
+ addLog('System: Initializing pipelines...');
237
+ [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
238
+ pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
239
+ pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
240
+ pipeline('text-generation', 'Xenova/Qwen1.5-0.5B-Chat', { quantized: true })
 
 
 
 
241
  ]);
242
 
243
+ addLog('System: Digital Human Voice Chat initialized with Qwen1.5-0.5B-Chat. Click "Begin Call" to start.');
244
  startButton.disabled = false;
245
  loadingDiv.style.display = 'none';
246
  } catch (error) {
247
  console.error('Error initializing pipelines:', error);
248
+ addLog(`System: Error initializing pipelines: ${error.message}`);
249
  loadingDiv.style.display = 'none';
250
  }
251
  }
252
 
253
  async function processSpeech(audio) {
254
  try {
255
+ if (!sttPipeline || !ttsPipeline || !llmPipeline) {
256
  throw new Error('Pipelines not initialized');
257
  }
258
 
259
  const transcription = await sttPipeline(audio);
260
  addLog(`User: ${transcription.text}`);
261
 
262
+ const messages = [
263
+ { role: 'system', content: 'You are a helpful assistant.' },
264
+ { role: 'user', content: transcription.text }
265
+ ];
266
+
267
+ // Apply chat template
268
+ const text = llmPipeline.tokenizer.apply_chat_template(messages, {
269
+ tokenize: false,
270
+ add_generation_prompt: true,
271
+ });
272
+
273
+ // Generate text
274
+ const llmResponse = await llmPipeline(text, {
275
+ max_new_tokens: 128,
276
+ do_sample: false
277
+ });
278
+
279
+ const botResponse = llmResponse[0].generated_text;
280
  addLog(`Bot: ${botResponse}`);
281
 
282
  isSpeaking = true;
 
285
  isSpeaking = false;
286
  } catch (error) {
287
  console.error('Error processing speech:', error);
288
+ addLog(`System: Error processing speech: ${error.message}`);
289
  }
290
  }
291
 
 
348
  remoteVideo.muted = true;
349
  document.getElementById('remoteVideo').volume = 0;
350
 
 
351
  microphoneStream = await navigator.mediaDevices.getUserMedia({
352
  audio: true,
353
+ video: { width: 1, height: 1 }
354
  });
355
 
356
  localVideo.srcObject = microphoneStream;
 
359
  console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
360
  console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
361
 
 
362
  const offerOptions = {
363
  offerToReceiveAudio: true,
364
  offerToReceiveVideo: false,
 
381
  await rtcLoopbackConnection.setLocalDescription(answer);
382
  await rtcConnection.setRemoteDescription(answer);
383
 
 
384
  const source = audioContext.createMediaStreamSource(loopbackStream);
385
  source.connect(analyser);
386
 
387
  myvad = await vad.MicVAD.new({
388
+ noiseSuppression: true,
389
+ aggressiveness: 3,
390
  onSpeechStart: () => {
391
  addLog('--- Voice activity: speech start');
392
  updateVisualizer();
 
404
  });
405
 
406
  await myvad.start();
407
+ startButton.textContent