Update app.py
Browse files
app.py
CHANGED
@@ -471,11 +471,9 @@ def retrieve_interview_data(job_role, all_roles):
|
|
471 |
"""
|
472 |
Retrieve all interview Q&A for a given job role.
|
473 |
Falls back to similar roles if no data found.
|
474 |
-
|
475 |
Args:
|
476 |
job_role (str): Input job role (can be misspelled)
|
477 |
all_roles (list): Full list of available job roles
|
478 |
-
|
479 |
Returns:
|
480 |
list: List of QA dicts with keys: 'question', 'answer', 'job_role'
|
481 |
"""
|
@@ -646,36 +644,28 @@ def evaluate_answer(
|
|
646 |
# Enhanced prompt (your version)
|
647 |
prompt = f"""
|
648 |
You are an expert technical interviewer evaluating a candidate's response for a {job_role} position at the {seniority} level.
|
649 |
-
|
650 |
You are provided with:
|
651 |
- The question asked
|
652 |
- The candidate's response
|
653 |
- A reference answer that represents a high-quality expected answer
|
654 |
-
|
655 |
Evaluate the candidate's response based on:
|
656 |
- Technical correctness
|
657 |
- Clarity and depth of explanation
|
658 |
- Relevance to the job role and seniority
|
659 |
- Completeness and structure
|
660 |
-
|
661 |
Be objective, concise, and use professional language. Be fair but critical.
|
662 |
-
|
663 |
--------------------------
|
664 |
Question:
|
665 |
{question}
|
666 |
-
|
667 |
Candidate Answer:
|
668 |
{answer}
|
669 |
-
|
670 |
Reference Answer:
|
671 |
{ref_answer}
|
672 |
--------------------------
|
673 |
-
|
674 |
Now return your evaluation as a valid JSON object using exactly these keys:
|
675 |
- "Score": One of ["Poor", "Medium", "Good", "Excellent"]
|
676 |
- "Reasoning": 2-3 sentence explanation justifying the score, covering clarity, accuracy, completeness, or relevance
|
677 |
- "Improvements": A list of 2-3 specific and constructive suggestions to help the candidate improve this answer
|
678 |
-
|
679 |
Example:
|
680 |
{{
|
681 |
"Score": "Good",
|
@@ -686,7 +676,6 @@ Example:
|
|
686 |
"Clarify the specific business impact or outcome achieved"
|
687 |
]
|
688 |
}}
|
689 |
-
|
690 |
Respond only with the JSON:
|
691 |
"""
|
692 |
for attempt in range(max_retries + 1):
|
@@ -746,19 +735,16 @@ Respond only with the JSON:
|
|
746 |
def generate_reference_answer(question, job_role, seniority):
|
747 |
"""
|
748 |
Generates a high-quality reference answer using Groq-hosted LLaMA model.
|
749 |
-
|
750 |
Args:
|
751 |
question (str): Interview question to answer.
|
752 |
job_role (str): Target job role (e.g., "Frontend Developer").
|
753 |
seniority (str): Experience level (e.g., "Mid-Level").
|
754 |
-
|
755 |
Returns:
|
756 |
str: Clean, generated reference answer or error message.
|
757 |
"""
|
758 |
try:
|
759 |
# Clean, role-specific prompt
|
760 |
prompt = f"""You are a {seniority} {job_role}.
|
761 |
-
|
762 |
Q: {question}
|
763 |
A:"""
|
764 |
|
@@ -782,11 +768,8 @@ def build_interview_prompt(conversation_history, user_response, context, job_rol
|
|
782 |
|
783 |
interview_template = """
|
784 |
You are an AI interviewer conducting a real-time interview for a {job_role} position.
|
785 |
-
|
786 |
Your objective is to thoroughly evaluate the candidate's suitability for the role using smart, structured, and adaptive questioning.
|
787 |
-
|
788 |
---
|
789 |
-
|
790 |
Interview Rules and Principles:
|
791 |
- The **baseline difficulty** of questions must match the candidate’s seniority level (e.g., junior, mid-level, senior).
|
792 |
- Use your judgment to increase difficulty **slightly** if the candidate performs well, or simplify if they struggle — but never drop below the expected baseline for their level.
|
@@ -795,51 +778,36 @@ def build_interview_prompt(conversation_history, user_response, context, job_rol
|
|
795 |
- Adapt your line of questioning gradually and logically based on the **overall flow**, not just the last answer.
|
796 |
- Include real-world problem-solving scenarios to test how the candidate thinks and behaves practically.
|
797 |
- You must **lead** the interview and make intelligent decisions about what to ask next.
|
798 |
-
|
799 |
---
|
800 |
-
|
801 |
Context Use:
|
802 |
{context_instruction}
|
803 |
Note:
|
804 |
If no relevant context was retrieved or the previous answer is unclear, you must still generate a thoughtful interview question using your own knowledge. Do not skip generation. Avoid default or fallback responses — always try to generate a meaningful and fair next question.
|
805 |
-
|
806 |
-
|
807 |
---
|
808 |
-
|
809 |
Job Role: {job_role}
|
810 |
Seniority Level: {seniority}
|
811 |
Skills Focus: {skills}
|
812 |
Difficulty Setting: {difficulty} (based on {difficulty_adjustment})
|
813 |
-
|
814 |
---
|
815 |
-
|
816 |
Recent Conversation History:
|
817 |
{history}
|
818 |
-
|
819 |
Candidate's Last Response:
|
820 |
"{user_response}"
|
821 |
-
|
822 |
Evaluation of Last Response:
|
823 |
{response_evaluation}
|
824 |
-
|
825 |
Voice Tone: {voice_label}
|
826 |
-
|
827 |
---
|
828 |
---
|
829 |
-
|
830 |
Important:
|
831 |
If no relevant context was retrieved or the previous answer is unclear or off-topic,
|
832 |
you must still generate a meaningful and fair interview question using your own knowledge and best practices.
|
833 |
Do not skip question generation or fall back to default/filler responses.
|
834 |
-
|
835 |
---
|
836 |
-
|
837 |
Guidelines for Next Question:
|
838 |
- If this is the beginning of the interview, start with a question about the candidate’s background or experience.
|
839 |
- Base the difficulty primarily on the seniority level, with light adjustment from recent performance.
|
840 |
- Focus on core skills, real-world applications, and depth of reasoning.
|
841 |
- Ask only one question. Be clear and concise.
|
842 |
-
|
843 |
Generate the next interview question now:
|
844 |
"""
|
845 |
|
@@ -967,29 +935,22 @@ def generate_llm_interview_report(
|
|
967 |
|
968 |
prompt = f"""
|
969 |
You are a senior technical interviewer at a major tech company.
|
970 |
-
|
971 |
Write a structured, realistic hiring report for this {seniority} {job_role} interview, using these section scores (scale 1–5, with 5 best):
|
972 |
-
|
973 |
Section-wise Evaluation
|
974 |
1. *Problem Solving & Critical Thinking*: {avg_problem}
|
975 |
2. *Technical Depth & Knowledge*: {avg_tech}
|
976 |
3. *Communication & Clarity*: {avg_comm}
|
977 |
4. *Emotional Composure & Confidence*: {avg_emotion}
|
978 |
5. *Role Relevance*: 5
|
979 |
-
|
980 |
*Transcript*
|
981 |
{transcript}
|
982 |
-
|
983 |
Your report should have the following sections:
|
984 |
-
|
985 |
1. *Executive Summary* (realistic, hiring-committee style)
|
986 |
2. *Section-wise Comments* (for each numbered category above, with short paragraph citing specifics)
|
987 |
3. *Strengths & Weaknesses* (list at least 2 for each)
|
988 |
4. *Final Verdict*: {verdict}
|
989 |
5. *Recommendations* (2–3 for future improvement)
|
990 |
-
|
991 |
Use realistic language. If some sections are N/A or lower than others, comment honestly.
|
992 |
-
|
993 |
Interview Report:
|
994 |
"""
|
995 |
# LLM call, or just return prompt for review
|
@@ -1226,12 +1187,10 @@ def build_ground_truth(all_roles):
|
|
1226 |
def evaluate_retrieval(job_role, all_roles, k=10):
|
1227 |
"""
|
1228 |
Evaluate retrieval quality using Precision@k, Recall@k, and F1@k.
|
1229 |
-
|
1230 |
Args:
|
1231 |
job_role (str): The input job role to search for.
|
1232 |
all_roles (list): List of all available job roles in the system.
|
1233 |
k (int): Top-k retrieved questions to evaluate.
|
1234 |
-
|
1235 |
Returns:
|
1236 |
dict: Evaluation metrics including precision, recall, and f1.
|
1237 |
"""
|
@@ -1764,33 +1723,53 @@ if torch.cuda.is_available():
|
|
1764 |
torch.cuda.set_device(0)
|
1765 |
else:
|
1766 |
print("⚠️ CUDA not available, using CPU")
|
1767 |
-
from TTS.api import TTS
|
1768 |
|
1769 |
def load_models_lazy():
|
1770 |
"""Load models only when needed"""
|
1771 |
-
global
|
1772 |
|
1773 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
1774 |
print(f"🔁 Using device: {device}")
|
1775 |
|
1776 |
-
|
1777 |
-
|
1778 |
-
|
1779 |
-
|
1780 |
-
|
|
|
|
|
|
|
|
|
1781 |
|
1782 |
if whisper_model is None:
|
1783 |
print("🔁 Loading Whisper model...")
|
1784 |
whisper_model = whisper.load_model("base", device=device)
|
1785 |
print(f"✅ Whisper model loaded on {device}")
|
1786 |
|
1787 |
-
|
1788 |
def bark_tts_async(text):
|
1789 |
-
|
1790 |
-
|
1791 |
-
|
1792 |
-
|
1793 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1794 |
|
1795 |
|
1796 |
def whisper_stt(audio_path):
|
|
|
471 |
"""
|
472 |
Retrieve all interview Q&A for a given job role.
|
473 |
Falls back to similar roles if no data found.
|
|
|
474 |
Args:
|
475 |
job_role (str): Input job role (can be misspelled)
|
476 |
all_roles (list): Full list of available job roles
|
|
|
477 |
Returns:
|
478 |
list: List of QA dicts with keys: 'question', 'answer', 'job_role'
|
479 |
"""
|
|
|
644 |
# Enhanced prompt (your version)
|
645 |
prompt = f"""
|
646 |
You are an expert technical interviewer evaluating a candidate's response for a {job_role} position at the {seniority} level.
|
|
|
647 |
You are provided with:
|
648 |
- The question asked
|
649 |
- The candidate's response
|
650 |
- A reference answer that represents a high-quality expected answer
|
|
|
651 |
Evaluate the candidate's response based on:
|
652 |
- Technical correctness
|
653 |
- Clarity and depth of explanation
|
654 |
- Relevance to the job role and seniority
|
655 |
- Completeness and structure
|
|
|
656 |
Be objective, concise, and use professional language. Be fair but critical.
|
|
|
657 |
--------------------------
|
658 |
Question:
|
659 |
{question}
|
|
|
660 |
Candidate Answer:
|
661 |
{answer}
|
|
|
662 |
Reference Answer:
|
663 |
{ref_answer}
|
664 |
--------------------------
|
|
|
665 |
Now return your evaluation as a valid JSON object using exactly these keys:
|
666 |
- "Score": One of ["Poor", "Medium", "Good", "Excellent"]
|
667 |
- "Reasoning": 2-3 sentence explanation justifying the score, covering clarity, accuracy, completeness, or relevance
|
668 |
- "Improvements": A list of 2-3 specific and constructive suggestions to help the candidate improve this answer
|
|
|
669 |
Example:
|
670 |
{{
|
671 |
"Score": "Good",
|
|
|
676 |
"Clarify the specific business impact or outcome achieved"
|
677 |
]
|
678 |
}}
|
|
|
679 |
Respond only with the JSON:
|
680 |
"""
|
681 |
for attempt in range(max_retries + 1):
|
|
|
735 |
def generate_reference_answer(question, job_role, seniority):
|
736 |
"""
|
737 |
Generates a high-quality reference answer using Groq-hosted LLaMA model.
|
|
|
738 |
Args:
|
739 |
question (str): Interview question to answer.
|
740 |
job_role (str): Target job role (e.g., "Frontend Developer").
|
741 |
seniority (str): Experience level (e.g., "Mid-Level").
|
|
|
742 |
Returns:
|
743 |
str: Clean, generated reference answer or error message.
|
744 |
"""
|
745 |
try:
|
746 |
# Clean, role-specific prompt
|
747 |
prompt = f"""You are a {seniority} {job_role}.
|
|
|
748 |
Q: {question}
|
749 |
A:"""
|
750 |
|
|
|
768 |
|
769 |
interview_template = """
|
770 |
You are an AI interviewer conducting a real-time interview for a {job_role} position.
|
|
|
771 |
Your objective is to thoroughly evaluate the candidate's suitability for the role using smart, structured, and adaptive questioning.
|
|
|
772 |
---
|
|
|
773 |
Interview Rules and Principles:
|
774 |
- The **baseline difficulty** of questions must match the candidate’s seniority level (e.g., junior, mid-level, senior).
|
775 |
- Use your judgment to increase difficulty **slightly** if the candidate performs well, or simplify if they struggle — but never drop below the expected baseline for their level.
|
|
|
778 |
- Adapt your line of questioning gradually and logically based on the **overall flow**, not just the last answer.
|
779 |
- Include real-world problem-solving scenarios to test how the candidate thinks and behaves practically.
|
780 |
- You must **lead** the interview and make intelligent decisions about what to ask next.
|
|
|
781 |
---
|
|
|
782 |
Context Use:
|
783 |
{context_instruction}
|
784 |
Note:
|
785 |
If no relevant context was retrieved or the previous answer is unclear, you must still generate a thoughtful interview question using your own knowledge. Do not skip generation. Avoid default or fallback responses — always try to generate a meaningful and fair next question.
|
|
|
|
|
786 |
---
|
|
|
787 |
Job Role: {job_role}
|
788 |
Seniority Level: {seniority}
|
789 |
Skills Focus: {skills}
|
790 |
Difficulty Setting: {difficulty} (based on {difficulty_adjustment})
|
|
|
791 |
---
|
|
|
792 |
Recent Conversation History:
|
793 |
{history}
|
|
|
794 |
Candidate's Last Response:
|
795 |
"{user_response}"
|
|
|
796 |
Evaluation of Last Response:
|
797 |
{response_evaluation}
|
|
|
798 |
Voice Tone: {voice_label}
|
|
|
799 |
---
|
800 |
---
|
|
|
801 |
Important:
|
802 |
If no relevant context was retrieved or the previous answer is unclear or off-topic,
|
803 |
you must still generate a meaningful and fair interview question using your own knowledge and best practices.
|
804 |
Do not skip question generation or fall back to default/filler responses.
|
|
|
805 |
---
|
|
|
806 |
Guidelines for Next Question:
|
807 |
- If this is the beginning of the interview, start with a question about the candidate’s background or experience.
|
808 |
- Base the difficulty primarily on the seniority level, with light adjustment from recent performance.
|
809 |
- Focus on core skills, real-world applications, and depth of reasoning.
|
810 |
- Ask only one question. Be clear and concise.
|
|
|
811 |
Generate the next interview question now:
|
812 |
"""
|
813 |
|
|
|
935 |
|
936 |
prompt = f"""
|
937 |
You are a senior technical interviewer at a major tech company.
|
|
|
938 |
Write a structured, realistic hiring report for this {seniority} {job_role} interview, using these section scores (scale 1–5, with 5 best):
|
|
|
939 |
Section-wise Evaluation
|
940 |
1. *Problem Solving & Critical Thinking*: {avg_problem}
|
941 |
2. *Technical Depth & Knowledge*: {avg_tech}
|
942 |
3. *Communication & Clarity*: {avg_comm}
|
943 |
4. *Emotional Composure & Confidence*: {avg_emotion}
|
944 |
5. *Role Relevance*: 5
|
|
|
945 |
*Transcript*
|
946 |
{transcript}
|
|
|
947 |
Your report should have the following sections:
|
|
|
948 |
1. *Executive Summary* (realistic, hiring-committee style)
|
949 |
2. *Section-wise Comments* (for each numbered category above, with short paragraph citing specifics)
|
950 |
3. *Strengths & Weaknesses* (list at least 2 for each)
|
951 |
4. *Final Verdict*: {verdict}
|
952 |
5. *Recommendations* (2–3 for future improvement)
|
|
|
953 |
Use realistic language. If some sections are N/A or lower than others, comment honestly.
|
|
|
954 |
Interview Report:
|
955 |
"""
|
956 |
# LLM call, or just return prompt for review
|
|
|
1187 |
def evaluate_retrieval(job_role, all_roles, k=10):
|
1188 |
"""
|
1189 |
Evaluate retrieval quality using Precision@k, Recall@k, and F1@k.
|
|
|
1190 |
Args:
|
1191 |
job_role (str): The input job role to search for.
|
1192 |
all_roles (list): List of all available job roles in the system.
|
1193 |
k (int): Top-k retrieved questions to evaluate.
|
|
|
1194 |
Returns:
|
1195 |
dict: Evaluation metrics including precision, recall, and f1.
|
1196 |
"""
|
|
|
1723 |
torch.cuda.set_device(0)
|
1724 |
else:
|
1725 |
print("⚠️ CUDA not available, using CPU")
|
|
|
1726 |
|
1727 |
def load_models_lazy():
|
1728 |
"""Load models only when needed"""
|
1729 |
+
global model_bark, processor_bark, whisper_model
|
1730 |
|
1731 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
1732 |
print(f"🔁 Using device: {device}")
|
1733 |
|
1734 |
+
if model_bark is None:
|
1735 |
+
print("🔁 Loading Bark model...")
|
1736 |
+
model_bark = BarkModel.from_pretrained("suno/bark").to(device)
|
1737 |
+
print(f"✅ Bark model loaded on {device}")
|
1738 |
+
|
1739 |
+
if processor_bark is None:
|
1740 |
+
print("🔁 Loading Bark processor...")
|
1741 |
+
processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
1742 |
+
print("✅ Bark processor loaded")
|
1743 |
|
1744 |
if whisper_model is None:
|
1745 |
print("🔁 Loading Whisper model...")
|
1746 |
whisper_model = whisper.load_model("base", device=device)
|
1747 |
print(f"✅ Whisper model loaded on {device}")
|
1748 |
|
|
|
1749 |
def bark_tts_async(text):
|
1750 |
+
"""Fully correct async TTS generation with Bark"""
|
1751 |
+
def _generate():
|
1752 |
+
load_models_lazy()
|
1753 |
+
device = next(model_bark.parameters()).device
|
1754 |
+
print(f"🔁 Bark model on: {device}")
|
1755 |
+
print(f"🎙️ Speaking: {text}")
|
1756 |
+
|
1757 |
+
# 🧠 Prepare full input using processor (not just input_ids)
|
1758 |
+
inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
1759 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
1760 |
+
|
1761 |
+
# ✅ Generate using unpacked args — this includes all required prompt tensors
|
1762 |
+
with torch.no_grad():
|
1763 |
+
speech_values = model_bark.generate(**inputs)
|
1764 |
+
|
1765 |
+
# ✅ Convert to audio
|
1766 |
+
speech = speech_values.cpu().numpy().squeeze()
|
1767 |
+
speech = (speech * 32767).astype(np.int16)
|
1768 |
+
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
1769 |
+
wavfile.write(temp_wav.name, 22050, speech)
|
1770 |
+
return temp_wav.name
|
1771 |
+
|
1772 |
+
return executor.submit(_generate)
|
1773 |
|
1774 |
|
1775 |
def whisper_stt(audio_path):
|