Spaces:

husseinelsaadi
/

Codingo

Paused

File size: 13,202 Bytes

import os
import json
import asyncio
import edge_tts
from faster_whisper import WhisperModel
from langchain_groq import ChatGroq
import logging
import tempfile
import shutil
import torch

if torch.cuda.is_available():
    print("🔥 CUDA Available")
    print(torch.cuda.get_device_name(0))
    print("cuDNN version:", torch.backends.cudnn.version())
else:
    print("❌ CUDA Not Available")
print("🔥 CUDA:", torch.cuda.is_available())
print("🧠 GPU:", torch.cuda.get_device_name(0))
print("💡 cuDNN version:", torch.backends.cudnn.version())
print("💥 cuDNN enabled:", torch.backends.cudnn.is_available())



# Initialize models
chat_groq_api = os.getenv("GROQ_API_KEY")

# Attempt to initialize the Groq LLM only if an API key is provided.  When
# running in environments where the key is unavailable (such as local
# development or automated testing), fall back to a simple stub that
# generates generic responses.  This avoids raising an exception at import
# time and allows the rest of the application to run without external
# dependencies.  See the DummyGroq class defined below.
if chat_groq_api:
    try:
        groq_llm = ChatGroq(
            temperature=0.7,
            model_name="llama-3.3-70b-versatile",
            api_key=chat_groq_api
        )
    except Exception as e:
        logging.error(f"Error initializing ChatGroq LLM: {e}. Falling back to dummy model.")
        groq_llm = None
else:
    groq_llm = None

if groq_llm is None:
    class DummyGroq:
        """A fallback language model used when no Groq API key is set.

        The ``invoke`` method of this class returns a simple canned response
        rather than calling an external API.  This ensures that the
        interview functionality still produces a sensible prompt, albeit
        without advanced LLM behaviour.
        """
        def invoke(self, prompt: str):
            # Provide a very generic question based on the prompt.  This
            # implementation ignores the prompt contents entirely; in a more
            # sophisticated fallback you could parse ``prompt`` to tailor
            # responses.
            return "Tell me about yourself and why you're interested in this position."

    groq_llm = DummyGroq()

# Initialize Whisper model
#
# Loading the Whisper model can take several seconds on first use because the
# model weights must be downloaded from Hugging Face. This delay can cause
# the API call to ``/api/transcribe_audio`` to appear stuck while the model
# downloads. To mitigate this, we allow the model size to be configured via
# the ``WHISPER_MODEL_NAME`` environment variable and preload the model when
# this module is imported. Using a smaller model (e.g. "tiny" or "base.en")
# reduces download size and inference time considerably.
whisper_model = None

def load_whisper_model():
    global whisper_model
    if whisper_model is None:
        try:
            device = "cuda" if torch.cuda.is_available() else "cpu"
            compute_type = "float16" if device == "cuda" else "int8"
            # Allow overriding the model size via environment. Default to a
            # lightweight model to improve startup times. Available options
            # include: tiny, base, base.en, small, medium, large. See
            # https://huggingface.co/ggerganov/whisper.cpp for details.
            model_name = os.getenv("WHISPER_MODEL_NAME", "tiny")
            whisper_model = WhisperModel(model_name, device=device, compute_type=compute_type)
            logging.info(f"Whisper model '{model_name}' loaded on {device} with {compute_type}")
        except Exception as e:
            logging.error(f"Error loading Whisper model: {e}")
            # Fallback to CPU
            whisper_model = WhisperModel(model_name if 'model_name' in locals() else "tiny", device="cpu", compute_type="int8")
    return whisper_model

load_whisper_model()

def generate_first_question(profile, job):
    """Generate the first interview question based on profile and job"""
    try:
        prompt = f"""
        You are conducting an interview for a {job.role} position at {job.company}.
        The candidate's profile shows:
        - Skills: {profile.get('skills', [])}
        - Experience: {profile.get('experience', [])}
        - Education: {profile.get('education', [])}
        
        Generate an appropriate opening interview question that is professional and relevant.
        Keep it concise and clear. Respond with ONLY the question text, no additional formatting.
        If the interview is for a technical role, focus on technical skills. Make the question related
        to the job role and the candidate's background and the previous question.
        """
        
        response = groq_llm.invoke(prompt)
        
        # Fix: Handle AIMessage object properly
        if hasattr(response, 'content'):
            question = response.content.strip()
        elif isinstance(response, str):
            question = response.strip()
        else:
            question = str(response).strip()
            
        # Ensure we have a valid question
        if not question or len(question) < 10:
            question = "Tell me about yourself and why you're interested in this position."
            
        logging.info(f"Generated question: {question}")
        return question
        
    except Exception as e:
        logging.error(f"Error generating first question: {e}")
        return "Tell me about yourself and why you're interested in this position."

def edge_tts_to_file_sync(text, output_path, voice="en-US-AriaNeural"):
    """Synchronous wrapper for edge-tts with better error handling"""
    try:
        # Ensure text is not empty
        if not text or not text.strip():
            logging.error("Empty text provided for TTS")
            return None
            
        # Ensure the directory exists and is writable
        directory = os.path.dirname(output_path)
        if not directory:
            directory = "/tmp/audio"
            output_path = os.path.join(directory, os.path.basename(output_path))
        
        os.makedirs(directory, exist_ok=True)
        
        # Test write permissions with a temporary file
        test_file = os.path.join(directory, f"test_{os.getpid()}.tmp")
        try:
            with open(test_file, 'w') as f:
                f.write("test")
            os.remove(test_file)
            logging.info(f"Directory {directory} is writable")
        except (PermissionError, OSError) as e:
            logging.error(f"Directory {directory} is not writable: {e}")
            # Fallback to /tmp
            directory = "/tmp/audio"
            output_path = os.path.join(directory, os.path.basename(output_path))
            os.makedirs(directory, exist_ok=True)
        
        async def generate_audio():
            try:
                communicate = edge_tts.Communicate(text, voice)
                await communicate.save(output_path)
                logging.info(f"TTS audio saved to: {output_path}")
            except Exception as e:
                logging.error(f"Error in async TTS generation: {e}")
                raise
        
        # Run async function in sync context
        try:
            loop = asyncio.get_event_loop()
            if loop.is_running():
                # If loop is already running, create a new one in a thread
                import threading
                import concurrent.futures
                
                def run_in_thread():
                    new_loop = asyncio.new_event_loop()
                    asyncio.set_event_loop(new_loop)
                    try:
                        new_loop.run_until_complete(generate_audio())
                    finally:
                        new_loop.close()
                
                with concurrent.futures.ThreadPoolExecutor() as executor:
                    future = executor.submit(run_in_thread)
                    future.result(timeout=30)  # 30 second timeout
            else:
                loop.run_until_complete(generate_audio())
        except RuntimeError:
            # No event loop exists
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            try:
                loop.run_until_complete(generate_audio())
            finally:
                loop.close()
        
        # Verify file was created and has content
        if os.path.exists(output_path):
            file_size = os.path.getsize(output_path)
            if file_size > 1000:  # At least 1KB for a valid audio file
                logging.info(f"TTS file created successfully: {output_path} ({file_size} bytes)")
                return output_path
            else:
                logging.error(f"TTS file is too small: {output_path} ({file_size} bytes)")
                return None
        else:
            logging.error(f"TTS file was not created: {output_path}")
            return None
            
    except Exception as e:
        logging.error(f"Error in TTS generation: {e}")
        return None

def convert_webm_to_wav(webm_path, wav_path):
    """Convert WebM audio to WAV using ffmpeg if available"""
    try:
        import subprocess
        result = subprocess.run([
            'ffmpeg', '-i', webm_path, '-ar', '16000', '-ac', '1', '-y', wav_path
        ], capture_output=True, text=True, timeout=30)
        
        if result.returncode == 0 and os.path.exists(wav_path) and os.path.getsize(wav_path) > 0:
            logging.info(f"Successfully converted {webm_path} to {wav_path}")
            return wav_path
        else:
            logging.error(f"FFmpeg conversion failed: {result.stderr}")
            return None
    except (subprocess.TimeoutExpired, FileNotFoundError, Exception) as e:
        logging.error(f"Error converting audio: {e}")
        return None

import subprocess  # top of the file if not already imported

def whisper_stt(audio_path):
    """Speech-to-text using Faster-Whisper"""
    try:
        if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
            logging.error(f"Audio file is empty or missing: {audio_path}")
            return ""

        # Convert webm to wav using ffmpeg
        wav_path = audio_path.replace(".webm", ".wav")
        cmd = [
            "ffmpeg",
            "-y",  # overwrite
            "-i", audio_path,
            "-ar", "16000",
            "-ac", "1",
            "-f", "wav",
            wav_path
        ]
        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

        if not os.path.exists(wav_path) or os.path.getsize(wav_path) == 0:
            logging.error(f"FFmpeg conversion failed or produced empty file: {wav_path}")
            return ""

        model = load_whisper_model()
        segments, _ = model.transcribe(wav_path)
        transcript = " ".join(segment.text for segment in segments)
        return transcript.strip()
    except Exception as e:
        logging.error(f"Error in STT: {e}")
        return ""

def evaluate_answer(question, answer, job_role="Software Developer", seniority="Mid-level"):
    """Evaluate candidate's answer with better error handling"""
    try:
        if not answer or not answer.strip():
            return {
                "score": "Poor",
                "feedback": "No answer provided."
            }
            
        prompt = f"""
        You are evaluating a candidate's answer for a {seniority} {job_role} position.
        
        Question: {question}
        Candidate Answer: {answer}
        
        Evaluate based on technical correctness, clarity, and relevance.
        Provide a brief evaluation in 1-2 sentences.
        
        Rate the answer as one of: Poor, Medium, Good, Excellent
        
        Respond in this exact format:
        Score: [Poor/Medium/Good/Excellent]
        Feedback: [Your brief feedback here]
        """
        
        response = groq_llm.invoke(prompt)
        
        # Handle AIMessage object properly
        if hasattr(response, 'content'):
            response_text = response.content.strip()
        elif isinstance(response, str):
            response_text = response.strip()
        else:
            response_text = str(response).strip()
        
        # Parse the response
        lines = response_text.split('\n')
        score = "Medium"  # default
        feedback = "Good answer, but could be more detailed."  # default
        
        for line in lines:
            line = line.strip()
            if line.startswith('Score:'):
                score = line.replace('Score:', '').strip()
            elif line.startswith('Feedback:'):
                feedback = line.replace('Feedback:', '').strip()
        
        # Ensure score is valid
        valid_scores = ["Poor", "Medium", "Good", "Excellent"]
        if score not in valid_scores:
            score = "Medium"
        
        return {
            "score": score,
            "feedback": feedback
        }
        
    except Exception as e:
        logging.error(f"Error evaluating answer: {e}")
        return {
            "score": "Medium",
            "feedback": "Unable to evaluate answer at this time."
        }