Spaces:

atlury
/

digitalhuman

Running

App Files Files Community

atlury commited on Sep 16, 2024

Commit

08925c7

verified ·

1 Parent(s): 1b06c14

Update index.backup5.html

Browse files

Files changed (1) hide show

index.backup5.html +34 -42

index.backup5.html CHANGED Viewed

@@ -161,7 +161,7 @@
                     <option value="quality">Highest Quality</option>
                 </select>
                 <div id="model-info">
-                    TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/Qwen1.5-0.5B-Chat
                 </div>
             </div>
             <div id="visualizer"></div>
@@ -179,12 +179,13 @@
         env.localModelPath = './models';
         // Configure environment before initializing pipelines
         env.backends = ['wasm'];
         env.wasm = env.wasm || {};
-        env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
-        env.wasm.simd = true;
-        env.numThreads = navigator.hardwareConcurrency || 4;
         const conversationDiv = document.getElementById('conversation');
         const startButton = document.getElementById('startButton');
@@ -198,7 +199,6 @@
         let myvad;
         let sttPipeline;
         let ttsPipeline;
-        let llmPipeline;
         let audioContext;
         let analyser;
         let dataArray;
@@ -228,55 +228,46 @@
                 const barHeight = dataArray[i] / 2;
                 bars[i].style.height = barHeight + 'px';
             }
-            animationId = setTimeout(updateVisualizer, 50);
         }
         async function initializePipelines() {
             try {
-                addLog('System: Initializing pipelines...');
-                [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
-                    pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
-                    pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
-                    pipeline('text-generation', 'Xenova/Qwen1.5-0.5B-Chat', { quantized: true })
                 ]);
-                addLog('System: Digital Human Voice Chat initialized with Qwen1.5-0.5B-Chat. Click "Begin Call" to start.');
                 startButton.disabled = false;
                 loadingDiv.style.display = 'none';
             } catch (error) {
                 console.error('Error initializing pipelines:', error);
-                addLog(`System: Error initializing pipelines: ${error.message}`);
                 loadingDiv.style.display = 'none';
             }
         }
         async function processSpeech(audio) {
             try {
-                if (!sttPipeline || !ttsPipeline || !llmPipeline) {
                     throw new Error('Pipelines not initialized');
                 }
                 const transcription = await sttPipeline(audio);
                 addLog(`User: ${transcription.text}`);
-                const messages = [
-                    { role: 'system', content: 'You are a helpful assistant.' },
-                    { role: 'user', content: transcription.text }
-                ];
-                // Apply chat template
-                const text = llmPipeline.tokenizer.apply_chat_template(messages, {
-                    tokenize: false,
-                    add_generation_prompt: true,
-                });
-                // Generate text
-                const llmResponse = await llmPipeline(text, {
-                    max_new_tokens: 128,
-                    do_sample: false
-                });
-                const botResponse = llmResponse[0].generated_text;
                 addLog(`Bot: ${botResponse}`);
                 isSpeaking = true;
@@ -285,7 +276,7 @@
                 isSpeaking = false;
             } catch (error) {
                 console.error('Error processing speech:', error);
-                addLog(`System: Error processing speech: ${error.message}`);
             }
         }
@@ -348,9 +339,10 @@
                 remoteVideo.muted = true;
                 document.getElementById('remoteVideo').volume = 0;
                 microphoneStream = await navigator.mediaDevices.getUserMedia({
                     audio: true,
-                    video: { width: 1, height: 1 }
                 });
                 localVideo.srcObject = microphoneStream;
@@ -359,6 +351,7 @@
                 console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
                 console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
                 const offerOptions = {
                     offerToReceiveAudio: true,
                     offerToReceiveVideo: false,
@@ -381,12 +374,13 @@
                 await rtcLoopbackConnection.setLocalDescription(answer);
                 await rtcConnection.setRemoteDescription(answer);
                 const source = audioContext.createMediaStreamSource(loopbackStream);
                 source.connect(analyser);
                 myvad = await vad.MicVAD.new({
-                    noiseSuppression: true,
-                    aggressiveness: 3,
                     onSpeechStart: () => {
                         addLog('--- Voice activity: speech start');
                         updateVisualizer();
@@ -404,12 +398,12 @@
                 });
                 await myvad.start();
-            startButton.textContent = 'End Call';
                 isListening = true;
                 addLog('System: Listening...');
             } catch (error) {
                 console.error('Error starting voice activity:', error);
-                addLog(`System: Error starting voice detection: ${error.message}`);
             }
         }
@@ -451,7 +445,7 @@
             addLog('System: Stopped listening.');
             cancelAnimationFrame(animationId);
             addLog('System: Microphone closed');
-        }
         startButton.addEventListener('click', toggleListening);
         clearLogsButton.addEventListener('click', () => {
@@ -462,6 +456,4 @@
         initializePipelines();
     </script>
 </body>
-</html>

                     <option value="quality">Highest Quality</option>
                 </select>
                 <div id="model-info">
+                    TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
                 </div>
             </div>
             <div id="visualizer"></div>
         env.localModelPath = './models';
+        //BELOW 5 statements added by RAHUL
         // Configure environment before initializing pipelines
         env.backends = ['wasm'];
         env.wasm = env.wasm || {};
+        env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/'; // Ensure correct WASM paths
+        env.wasm.simd = true; // Enable SIMD if available
+        env.numThreads = navigator.hardwareConcurrency || 4; // Use available CPU cores
         const conversationDiv = document.getElementById('conversation');
         const startButton = document.getElementById('startButton');
         let myvad;
         let sttPipeline;
         let ttsPipeline;
         let audioContext;
         let analyser;
         let dataArray;
                 const barHeight = dataArray[i] / 2;
                 bars[i].style.height = barHeight + 'px';
             }
+              // Use setTimeout instead of requestAnimationFrame to reduce update frequency - RAHUL ATLURY
+              animationId = setTimeout(updateVisualizer, 50); // Update every 50ms - RAHUL ATLURY
+            //animationId = requestAnimationFrame(updateVisualizer);
         }
         async function initializePipelines() {
             try {
+                //sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true });  // added , { quantized: true }
+                //ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
+                //    quantized: true,   //changed to true - RAHUL ATLURY
+                //});
+                [sttPipeline, ttsPipeline] = await Promise.all([
+                  pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
+                  pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true })
                 ]);
+                addLog('System: Digital Human Voice Chat initialized. Click "Begin Call" to start.');
                 startButton.disabled = false;
                 loadingDiv.style.display = 'none';
             } catch (error) {
                 console.error('Error initializing pipelines:', error);
+                addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
                 loadingDiv.style.display = 'none';
             }
         }
         async function processSpeech(audio) {
             try {
+                if (!sttPipeline || !ttsPipeline) {
                     throw new Error('Pipelines not initialized');
                 }
                 const transcription = await sttPipeline(audio);
                 addLog(`User: ${transcription.text}`);
+                const botResponse = `I heard you say: "${transcription.text}".`;
                 addLog(`Bot: ${botResponse}`);
                 isSpeaking = true;
                 isSpeaking = false;
             } catch (error) {
                 console.error('Error processing speech:', error);
+                addLog('System: Error processing speech. Please try again.');
             }
         }
                 remoteVideo.muted = true;
                 document.getElementById('remoteVideo').volume = 0;
+                // Request both audio and video streams
                 microphoneStream = await navigator.mediaDevices.getUserMedia({
                     audio: true,
+                    video: { width: 1, height: 1 } // Minimal video for echo cancellation
                 });
                 localVideo.srcObject = microphoneStream;
                 console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
                 console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
+                // Implement loopback hack for improved echo cancellation
                 const offerOptions = {
                     offerToReceiveAudio: true,
                     offerToReceiveVideo: false,
                 await rtcLoopbackConnection.setLocalDescription(answer);
                 await rtcConnection.setRemoteDescription(answer);
+                // Use the loopback stream for audio processing
                 const source = audioContext.createMediaStreamSource(loopbackStream);
                 source.connect(analyser);
                 myvad = await vad.MicVAD.new({
+                      noiseSuppression: true,  ///Added by RAHUL Atlury
+                      aggressiveness: 3, // Higher value for more aggressive detection Added by RAHUL ATLURY
                     onSpeechStart: () => {
                         addLog('--- Voice activity: speech start');
                         updateVisualizer();
                 });
                 await myvad.start();
+                startButton.textContent = 'End Call';
                 isListening = true;
                 addLog('System: Listening...');
             } catch (error) {
                 console.error('Error starting voice activity:', error);
+                addLog('System: Error starting voice detection. Please check your microphone and try again.');
             }
         }
             addLog('System: Stopped listening.');
             cancelAnimationFrame(animationId);
             addLog('System: Microphone closed');
+          }
         startButton.addEventListener('click', toggleListening);
         clearLogsButton.addEventListener('click', () => {
         initializePipelines();
     </script>
 </body>
+</html>