Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on Jul 27

Commit

1f41a8a

1 Parent(s): 330157f

update whisper

Browse files

Files changed (2) hide show

backend/services/interview_engine.py +19 -47
backend/templates/interview.html +6 -2

backend/services/interview_engine.py CHANGED Viewed

@@ -7,6 +7,7 @@ from langchain_groq import ChatGroq
 import logging
 import tempfile
 import shutil
 # Initialize models
 chat_groq_api = os.getenv("GROQ_API_KEY")
@@ -25,7 +26,7 @@ def load_whisper_model():
     global whisper_model
     if whisper_model is None:
         try:
-            device = "cuda" if os.system("nvidia-smi") == 0 else "cpu"
             compute_type = "float16" if device == "cuda" else "int8"
             whisper_model = WhisperModel("base", device=device, compute_type=compute_type)
             logging.info(f"Whisper model loaded on {device} with {compute_type}")
@@ -175,59 +176,30 @@ def convert_webm_to_wav(webm_path, wav_path):
         return None
 def whisper_stt(audio_path):
-    """Speech-to-text using Faster-Whisper with better error handling"""
     try:
         if not audio_path or not os.path.exists(audio_path):
             logging.error(f"Audio file does not exist: {audio_path}")
             return ""
-        # Check if file has content
-        file_size = os.path.getsize(audio_path)
-        if file_size == 0:
             logging.error(f"Audio file is empty: {audio_path}")
             return ""
-        logging.info(f"Processing audio file: {audio_path} ({file_size} bytes)")
-        # If the file is WebM, try to convert it to WAV
-        if audio_path.endswith('.webm'):
-            wav_path = audio_path.replace('.webm', '.wav')
-            converted_path = convert_webm_to_wav(audio_path, wav_path)
-            if converted_path:
-                audio_path = converted_path
-            else:
-                logging.warning("Could not convert WebM to WAV, trying with original file")
-        model = load_whisper_model()
-        # Add timeout and better error handling
-        try:
-            segments, info = model.transcribe(
-                audio_path,
-                language="en",  # Specify language for better performance
-                task="transcribe",
-                vad_filter=True,  # Voice activity detection
-                vad_parameters=dict(min_silence_duration_ms=500)
-            )
-            transcript_parts = []
-            for segment in segments:
-                if hasattr(segment, 'text') and segment.text.strip():
-                    transcript_parts.append(segment.text.strip())
-            transcript = " ".join(transcript_parts)
-            if transcript:
-                logging.info(f"Transcription successful: '{transcript[:100]}...'")
-            else:
-                logging.warning("No speech detected in audio file")
-            return transcript.strip()
-        except Exception as e:
-            logging.error(f"Error during transcription: {e}")
             return ""
     except Exception as e:
         logging.error(f"Error in STT: {e}")
         return ""

 import logging
 import tempfile
 import shutil
+import torch
 # Initialize models
 chat_groq_api = os.getenv("GROQ_API_KEY")
     global whisper_model
     if whisper_model is None:
         try:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
             compute_type = "float16" if device == "cuda" else "int8"
             whisper_model = WhisperModel("base", device=device, compute_type=compute_type)
             logging.info(f"Whisper model loaded on {device} with {compute_type}")
         return None
 def whisper_stt(audio_path):
     try:
         if not audio_path or not os.path.exists(audio_path):
             logging.error(f"Audio file does not exist: {audio_path}")
             return ""
+        if os.path.getsize(audio_path) == 0:
             logging.error(f"Audio file is empty: {audio_path}")
             return ""
+        # Convert WebM to WAV using ffmpeg (ensure ffmpeg is available)
+        converted_path = audio_path.replace(".webm", ".wav")
+        subprocess.run([
+            "ffmpeg", "-y", "-i", audio_path, "-ar", "16000", "-ac", "1", converted_path
+        ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        if not os.path.exists(converted_path) or os.path.getsize(converted_path) == 0:
+            logging.error(f"Conversion failed or produced empty file: {converted_path}")
             return ""
+        model = load_whisper_model()
+        segments, _ = model.transcribe(converted_path)
+        transcript = " ".join(segment.text for segment in segments)
+        return transcript.strip()
     except Exception as e:
         logging.error(f"Error in STT: {e}")
         return ""

backend/templates/interview.html CHANGED Viewed

@@ -695,7 +695,10 @@
                         delete options.mimeType;
                     }
-                    this.mediaRecorder = new MediaRecorder(stream, options);
                     this.audioChunks = [];
                     this.mediaRecorder.ondataavailable = (event) => {
@@ -757,7 +760,8 @@
                     console.log('Processing', this.audioChunks.length, 'audio chunks');
                     // Create blob from audio chunks
-                    const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm;codecs=opus' });
                     console.log('Created audio blob:', audioBlob.size, 'bytes');

                         delete options.mimeType;
                     }
+                    this.mediaRecorder = new MediaRecorder(stream, {
+                        mimeType: 'audio/webm;codecs=opus'
+                    });
                     this.audioChunks = [];
                     this.mediaRecorder.ondataavailable = (event) => {
                     console.log('Processing', this.audioChunks.length, 'audio chunks');
                     // Create blob from audio chunks
+                    const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' });
+                    formData.append('audio', audioBlob, 'recording.webm');
                     console.log('Created audio blob:', audioBlob.size, 'bytes');