Spaces:

husseinelsaadi
/

ai-interviewer-demo

Paused

App Files Files Community

husseinelsaadi commited on Jul 19

Commit

5fc8fb2

1 Parent(s): 5b71262

updated eval question

Browse files

Files changed (1) hide show

app.py +56 -68

app.py CHANGED Viewed

@@ -537,79 +537,59 @@ from typing import Dict
 def eval_question_quality(
     question: str,
     job_role: str,
-    seniority: str,
-    judge_pipeline=None,
-    max_retries=1  # Allow at least 1 retry on parse fail
 ) -> Dict[str, str]:
-    import time
-    try:
-        # Use provided pipeline or fall back to global
-        if judge_pipeline is None:
-            judge_pipeline = globals().get("judge_pipeline")
-        if not judge_pipeline:
-            return {
-                "Score": "Error",
-                "Reasoning": "Judge pipeline not available",
-                "Improvements": "Please provide a valid language model pipeline"
-            }
-        prompt = f"""
-        ... (same as your prompt) ...
-        Now evaluate this question:
-        \"{question}\"
-        """
-        for attempt in range(max_retries + 1):
-            response = judge_pipeline(
-                prompt,
-                max_new_tokens=512,
-                do_sample=False,
-                temperature=0.1,
-                repetition_penalty=1.2
-            )[0]["generated_text"]
-            try:
-                # Fallback to last {...} block
-                match = re.search(r'\{.*\}', response, re.DOTALL)
-                if not match:
-                    raise ValueError("Could not locate JSON structure in model output.")
-                json_str = match.group(0)
-                result = json.loads(json_str)
-                # Validate required fields and values
-                required_keys = ["Score", "Reasoning", "Improvements"]
-                valid_scores = {"Poor", "Medium", "Good", "Excellent"}
-                if not all(k in result for k in required_keys):
-                    raise ValueError("Missing required fields.")
-                if result["Score"] not in valid_scores:
-                    raise ValueError("Invalid score value.")
-                return result
-            except Exception as e:
-                logging.warning(f"Attempt {attempt+1} JSON parsing failed: {e}")
-                time.sleep(0.2)  # Small delay before retry
-        # If all attempts fail, return a default valid dict
-        return {
-            "Score": "Poor",
-            "Reasoning": "The evaluation model failed to produce a valid score, so defaulted to 'Poor'. Check model output and prompt formatting.",
-            "Improvements": [
-                "Ensure the question is clear and role-relevant.",
-                "Double-check prompt and formatting.",
-                "Try rephrasing the question to match rubric."
-            ]
-        }
     except Exception as e:
-        logging.error(f"Error in eval_question_quality: {type(e).__name__}: {e}", exc_info=True)
         return {
             "Score": "Poor",
-            "Reasoning": f"Critical error occurred: {str(e)}. Defaulted to 'Poor'.",
             "Improvements": [
-                "Retry with a different question.",
-                "Check your judge pipeline connection.",
-                "Contact support if this persists."
             ]
         }
@@ -621,9 +601,10 @@ def evaluate_answer(
     seniority: str,
 ) -> Dict[str, str]:
     """
-    Fast and structured answer evaluation using Groq LLM (e.g. Mixtral-8x7b).
     """
     import time, json
     prompt = f"""
 You are a technical interviewer evaluating a candidate for a {seniority} {job_role} role.
@@ -650,12 +631,19 @@ Respond ONLY with valid JSON in the following format:
     try:
         start = time.time()
-        response = groq_llm.invoke(prompt)
         print("⏱️ evaluate_answer duration:", round(time.time() - start, 2), "s")
-        print("🔍 Raw Groq Response:\n", response)
-        start_idx = response.rfind("{")
-        end_idx = response.rfind("}") + 1
-        json_str = response[start_idx:end_idx]
         result = json.loads(json_str)
         if result.get("Score") in {"Poor", "Medium", "Good", "Excellent"}:

 def eval_question_quality(
     question: str,
     job_role: str,
+    seniority: str
 ) -> Dict[str, str]:
+    """
+    Evaluate the quality of a generated interview question using Groq LLM.
+    Returns a structured JSON with score, reasoning, and suggestions.
+    """
+    import time, json
+    prompt = f"""
+You are a senior AI hiring expert evaluating the quality of an interview question for a {seniority} {job_role} role.
+Evaluate the question based on:
+- Relevance to the role and level
+- Clarity and conciseness
+- Depth of technical insight
+---
+Question: {question}
+---
+Respond only with a valid JSON like:
+{{
+  "Score": "Poor" | "Medium" | "Good" | "Excellent",
+  "Reasoning": "short justification",
+  "Improvements": ["tip1", "tip2"]
+}}
+"""
+    try:
+        start = time.time()
+        response = groq_llm.invoke(prompt)
+        print("⏱️ eval_question_quality duration:", round(time.time() - start, 2), "s")
+        # Extract JSON safely
+        start_idx = response.rfind("{")
+        end_idx = response.rfind("}") + 1
+        json_str = response[start_idx:end_idx]
+        result = json.loads(json_str)
+        if result.get("Score") in {"Poor", "Medium", "Good", "Excellent"}:
+            return result
+        else:
+            raise ValueError("Invalid Score value in model output")
     except Exception as e:
+        print(f"⚠️ eval_question_quality fallback: {e}")
         return {
             "Score": "Poor",
+            "Reasoning": "Evaluation failed, using fallback.",
             "Improvements": [
+                "Ensure the question is relevant and clear.",
+                "Avoid vague or overly generic phrasing.",
+                "Include role-specific context if needed."
             ]
         }
     seniority: str,
 ) -> Dict[str, str]:
     """
+    Fast and structured answer evaluation using Groq LLM (e.g. Mixtral or LLaMA 3).
     """
     import time, json
+    from langchain_core.messages import AIMessage
     prompt = f"""
 You are a technical interviewer evaluating a candidate for a {seniority} {job_role} role.
     try:
         start = time.time()
+        raw = groq_llm.invoke(prompt)
         print("⏱️ evaluate_answer duration:", round(time.time() - start, 2), "s")
+        if isinstance(raw, AIMessage):
+            output = raw.content
+        else:
+            output = str(raw)
+        print("🔍 Raw Groq Response:\n", output)
+        start_idx = output.rfind("{")
+        end_idx = output.rfind("}") + 1
+        json_str = output[start_idx:end_idx]
         result = json.loads(json_str)
         if result.get("Score") in {"Poor", "Medium", "Good", "Excellent"}: