Update app.py
Browse files
app.py
CHANGED
@@ -1448,416 +1448,6 @@ def extract_candidate_details(file_path):
|
|
1448 |
"skills": skills
|
1449 |
}
|
1450 |
|
1451 |
-
# import gradio as gr
|
1452 |
-
# import time
|
1453 |
-
# import tempfile
|
1454 |
-
# import numpy as np
|
1455 |
-
# import scipy.io.wavfile as wavfile
|
1456 |
-
# import cv2
|
1457 |
-
# import os
|
1458 |
-
# import json
|
1459 |
-
# from moviepy.editor import VideoFileClip
|
1460 |
-
# import shutil
|
1461 |
-
# from transformers import BarkModel, AutoProcessor
|
1462 |
-
# import torch, gc
|
1463 |
-
# import whisper
|
1464 |
-
# from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
|
1465 |
-
# import librosa
|
1466 |
-
|
1467 |
-
# import torch
|
1468 |
-
# print(torch.cuda.is_available()) # β
Tells you if GPU is available
|
1469 |
-
# torch.cuda.empty_cache()
|
1470 |
-
# gc.collect()
|
1471 |
-
|
1472 |
-
|
1473 |
-
# # Bark TTS
|
1474 |
-
# print("π Loading Bark model...")
|
1475 |
-
# model_bark = BarkModel.from_pretrained("suno/bark")
|
1476 |
-
# print("β
Bark model loaded")
|
1477 |
-
|
1478 |
-
# print("π Loading Bark processor...")
|
1479 |
-
# processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
1480 |
-
# print("β
Bark processor loaded")
|
1481 |
-
# print("π Moving Bark model to device...")
|
1482 |
-
# model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
|
1483 |
-
# print("β
Bark model on device")
|
1484 |
-
# bark_voice_preset = "v2/en_speaker_6"
|
1485 |
-
|
1486 |
-
# def bark_tts(text):
|
1487 |
-
# print(f"π Synthesizing TTS for: {text}")
|
1488 |
-
# inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
1489 |
-
# inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
|
1490 |
-
# speech_values = model_bark.generate(**inputs)
|
1491 |
-
# speech = speech_values.cpu().numpy().squeeze()
|
1492 |
-
# speech = (speech * 32767).astype(np.int16)
|
1493 |
-
# temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
1494 |
-
# wavfile.write(temp_wav.name, 22050, speech)
|
1495 |
-
# return temp_wav.name
|
1496 |
-
|
1497 |
-
# # Whisper STT
|
1498 |
-
# print("π Loading Whisper model...")
|
1499 |
-
# whisper_model = whisper.load_model("base", device="cuda")
|
1500 |
-
# print("β
Whisper model loaded")
|
1501 |
-
# def whisper_stt(audio_path):
|
1502 |
-
# if not audio_path or not os.path.exists(audio_path): return ""
|
1503 |
-
# result = whisper_model.transcribe(audio_path)
|
1504 |
-
# return result["text"]
|
1505 |
-
|
1506 |
-
|
1507 |
-
# # DeepFace (Video Face Emotion)
|
1508 |
-
# def ensure_mp4(video_input):
|
1509 |
-
# # video_input could be a file-like object, a path, or a Gradio temp path
|
1510 |
-
# if isinstance(video_input, str):
|
1511 |
-
# input_path = video_input
|
1512 |
-
# else:
|
1513 |
-
# # It's a file-like object (rare for Gradio video, but handle it)
|
1514 |
-
# with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
|
1515 |
-
# temp_in.write(video_input.read())
|
1516 |
-
# input_path = temp_in.name
|
1517 |
-
|
1518 |
-
# # If already mp4, return as is
|
1519 |
-
# if input_path.endswith(".mp4"):
|
1520 |
-
# return input_path
|
1521 |
-
|
1522 |
-
# # Convert to mp4 using moviepy
|
1523 |
-
# mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
|
1524 |
-
# try:
|
1525 |
-
# clip = VideoFileClip(input_path)
|
1526 |
-
# clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
|
1527 |
-
# clip.close()
|
1528 |
-
# except Exception as e:
|
1529 |
-
# print("Video conversion failed:", e)
|
1530 |
-
# # As fallback, just copy original
|
1531 |
-
# shutil.copy(input_path, mp4_path)
|
1532 |
-
# return mp4_path
|
1533 |
-
|
1534 |
-
# def analyze_video_emotions(video_input, sample_rate=15):
|
1535 |
-
# # Convert input to an mp4 file OpenCV can process
|
1536 |
-
# mp4_path = ensure_mp4(video_input)
|
1537 |
-
# if not mp4_path or not os.path.exists(mp4_path):
|
1538 |
-
# return "no_face"
|
1539 |
-
# cap = cv2.VideoCapture(mp4_path)
|
1540 |
-
# frame_count = 0
|
1541 |
-
# emotion_counts = {}
|
1542 |
-
# while True:
|
1543 |
-
# ret, frame = cap.read()
|
1544 |
-
# if not ret: break
|
1545 |
-
# if frame_count % sample_rate == 0:
|
1546 |
-
# try:
|
1547 |
-
# result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
|
1548 |
-
# dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
|
1549 |
-
# emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
|
1550 |
-
# except Exception: pass
|
1551 |
-
# frame_count += 1
|
1552 |
-
# cap.release()
|
1553 |
-
# if not emotion_counts: return "no_face"
|
1554 |
-
# return max(emotion_counts.items(), key=lambda x: x[1])[0]
|
1555 |
-
|
1556 |
-
# # Original Hugging Face model: HaniaRuby/speech-emotion-recognition-wav2vec2
|
1557 |
-
# local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2" # Local path to the downloaded model files
|
1558 |
-
# print("π Loading Wav2Vec processor and model...")
|
1559 |
-
# wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
|
1560 |
-
# wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
|
1561 |
-
# wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
|
1562 |
-
# print("β
Wav2Vec model loaded")
|
1563 |
-
# wav2vec_model.eval()
|
1564 |
-
# voice_label_map = {
|
1565 |
-
# 0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
|
1566 |
-
# 4: 'neutral', 5: 'sad', 6: 'surprise'
|
1567 |
-
# }
|
1568 |
-
|
1569 |
-
|
1570 |
-
|
1571 |
-
# def analyze_audio_emotion(audio_path):
|
1572 |
-
# print(f"π Analyzing audio emotion for: {audio_path}")
|
1573 |
-
# if not audio_path or not os.path.exists(audio_path): return "neutral"
|
1574 |
-
|
1575 |
-
# speech, sr = librosa.load(audio_path, sr=16000)
|
1576 |
-
# inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
|
1577 |
-
|
1578 |
-
# # π₯ Move model and inputs to GPU
|
1579 |
-
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
1580 |
-
# wav2vec_model.to(device)
|
1581 |
-
# inputs = {k: v.to(device) for k, v in inputs.items()}
|
1582 |
-
|
1583 |
-
# with torch.no_grad():
|
1584 |
-
# logits = wav2vec_model(**inputs).logits
|
1585 |
-
|
1586 |
-
# probs = torch.nn.functional.softmax(logits, dim=-1)
|
1587 |
-
# predicted_id = torch.argmax(probs, dim=-1).item()
|
1588 |
-
# return voice_label_map.get(predicted_id, "neutral")
|
1589 |
-
|
1590 |
-
|
1591 |
-
# # --- Effective confidence calculation
|
1592 |
-
# def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
|
1593 |
-
# emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
|
1594 |
-
# answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
|
1595 |
-
# voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
|
1596 |
-
# avg_emotion = (voice_score + face_score) / 2
|
1597 |
-
# control_bonus = max(0, answer_score - avg_emotion) * k
|
1598 |
-
# eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
|
1599 |
-
# return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
|
1600 |
-
|
1601 |
-
# seniority_mapping = {
|
1602 |
-
# "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
|
1603 |
-
# }
|
1604 |
-
|
1605 |
-
|
1606 |
-
# # --- 2. Gradio App ---
|
1607 |
-
|
1608 |
-
# with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
1609 |
-
# user_data = gr.State({})
|
1610 |
-
# interview_state = gr.State({})
|
1611 |
-
# missing_fields_state = gr.State([])
|
1612 |
-
|
1613 |
-
# # --- UI Layout ---
|
1614 |
-
# with gr.Column(visible=True) as user_info_section:
|
1615 |
-
# gr.Markdown("## Candidate Information")
|
1616 |
-
# cv_file = gr.File(label="Upload CV")
|
1617 |
-
# job_desc = gr.Textbox(label="Job Description")
|
1618 |
-
# start_btn = gr.Button("Continue", interactive=False)
|
1619 |
-
|
1620 |
-
# with gr.Column(visible=False) as missing_section:
|
1621 |
-
# gr.Markdown("## Missing Information")
|
1622 |
-
# name_in = gr.Textbox(label="Name", visible=False)
|
1623 |
-
# role_in = gr.Textbox(label="Job Role", visible=False)
|
1624 |
-
# seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
|
1625 |
-
# skills_in = gr.Textbox(label="Skills", visible=False)
|
1626 |
-
# submit_btn = gr.Button("Submit", interactive=False)
|
1627 |
-
|
1628 |
-
# with gr.Column(visible=False) as interview_pre_section:
|
1629 |
-
# pre_interview_greeting_md = gr.Markdown()
|
1630 |
-
# start_interview_final_btn = gr.Button("Start Interview")
|
1631 |
-
|
1632 |
-
# with gr.Column(visible=False) as interview_section:
|
1633 |
-
# gr.Markdown("## Interview in Progress")
|
1634 |
-
# question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
|
1635 |
-
# question_text = gr.Markdown()
|
1636 |
-
# user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
|
1637 |
-
# user_video_input = gr.Video(sources=["webcam"], label="2. Record Video Answer")
|
1638 |
-
# stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
|
1639 |
-
# confirm_btn = gr.Button("Confirm Answer")
|
1640 |
-
# evaluation_display = gr.Markdown()
|
1641 |
-
# emotion_display = gr.Markdown()
|
1642 |
-
# interview_summary = gr.Markdown(visible=False)
|
1643 |
-
|
1644 |
-
# # --- UI Logic ---
|
1645 |
-
|
1646 |
-
# def validate_start_btn(cv_file, job_desc):
|
1647 |
-
# return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
|
1648 |
-
# cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
1649 |
-
# job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
1650 |
-
|
1651 |
-
# def process_and_route_initial(cv_file, job_desc):
|
1652 |
-
# details = extract_candidate_details(cv_file.name)
|
1653 |
-
# job_info = extract_job_details(job_desc)
|
1654 |
-
# data = {
|
1655 |
-
# "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
|
1656 |
-
# "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
|
1657 |
-
# }
|
1658 |
-
# missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
|
1659 |
-
# if missing:
|
1660 |
-
# return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
1661 |
-
# else:
|
1662 |
-
# greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
|
1663 |
-
# return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
|
1664 |
-
# start_btn.click(
|
1665 |
-
# process_and_route_initial,
|
1666 |
-
# [cv_file, job_desc],
|
1667 |
-
# [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
|
1668 |
-
# )
|
1669 |
-
|
1670 |
-
# def show_missing(missing):
|
1671 |
-
# if missing is None: missing = []
|
1672 |
-
# return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
|
1673 |
-
# missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
|
1674 |
-
|
1675 |
-
# def validate_fields(name, role, seniority, skills, missing):
|
1676 |
-
# if not missing: return gr.update(interactive=False)
|
1677 |
-
# all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
|
1678 |
-
# return gr.update(interactive=all_filled)
|
1679 |
-
# for inp in [name_in, role_in, seniority_in, skills_in]:
|
1680 |
-
# inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
|
1681 |
-
|
1682 |
-
# def complete_manual(data, name, role, seniority, skills):
|
1683 |
-
# if data["name"].lower() == "unknown": data["name"] = name
|
1684 |
-
# if data["job_role"].lower() == "unknown": data["job_role"] = role
|
1685 |
-
# if data["seniority"].lower() == "unknown": data["seniority"] = seniority
|
1686 |
-
# if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
|
1687 |
-
# greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
|
1688 |
-
# return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
|
1689 |
-
# submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
|
1690 |
-
|
1691 |
-
# def start_interview(data):
|
1692 |
-
# # --- Advanced state with full logging ---
|
1693 |
-
# state = {
|
1694 |
-
# "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
|
1695 |
-
# "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
|
1696 |
-
# "conversation_history": [],
|
1697 |
-
# "difficulty_adjustment": None,
|
1698 |
-
# "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
|
1699 |
-
# "log": []
|
1700 |
-
# }
|
1701 |
-
# # --- Optionally: context retrieval here (currently just blank) ---
|
1702 |
-
# context = ""
|
1703 |
-
# prompt = build_interview_prompt(
|
1704 |
-
# conversation_history=[], user_response="", context=context, job_role=data["job_role"],
|
1705 |
-
# skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
|
1706 |
-
# voice_label="neutral", face_label="neutral"
|
1707 |
-
# )
|
1708 |
-
# #here the original one
|
1709 |
-
# # first_q = groq_llm.predict(prompt)
|
1710 |
-
# # # Evaluate Q for quality
|
1711 |
-
# # q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
|
1712 |
-
# # state["questions"].append(first_q)
|
1713 |
-
# # state["question_evaluations"].append(q_eval)
|
1714 |
-
|
1715 |
-
# #here the testing one
|
1716 |
-
# first_q = groq_llm.predict(prompt)
|
1717 |
-
# q_eval = {
|
1718 |
-
# "Score": "N/A",
|
1719 |
-
# "Reasoning": "Skipped to reduce processing time",
|
1720 |
-
# "Improvements": []
|
1721 |
-
# }
|
1722 |
-
# state["questions"].append(first_q)
|
1723 |
-
# state["question_evaluations"].append(q_eval)
|
1724 |
-
|
1725 |
-
|
1726 |
-
# state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
|
1727 |
-
# audio_path = bark_tts(first_q)
|
1728 |
-
# # LOG
|
1729 |
-
# state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
|
1730 |
-
# return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
|
1731 |
-
# start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
|
1732 |
-
|
1733 |
-
# def transcribe(audio_path):
|
1734 |
-
# return whisper_stt(audio_path)
|
1735 |
-
# user_audio_input.change(transcribe, user_audio_input, stt_transcript)
|
1736 |
-
|
1737 |
-
# def process_answer(transcript, audio_path, video_path, state, data):
|
1738 |
-
# if not transcript and not video_path:
|
1739 |
-
# return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
|
1740 |
-
# elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
|
1741 |
-
# state["timings"].append(elapsed)
|
1742 |
-
# state["answers"].append(transcript)
|
1743 |
-
# state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
|
1744 |
-
|
1745 |
-
# # --- 1. Emotion analysis ---
|
1746 |
-
# # voice_label = analyze_audio_emotion(audio_path)
|
1747 |
-
# # face_label = analyze_video_emotions(video_path)
|
1748 |
-
# # state["voice_labels"].append(voice_label)
|
1749 |
-
# # state["face_labels"].append(face_label)
|
1750 |
-
|
1751 |
-
# #just for testing
|
1752 |
-
# voice_label = "neutral"
|
1753 |
-
# face_label = "neutral"
|
1754 |
-
# state["voice_labels"].append(voice_label)
|
1755 |
-
# state["face_labels"].append(face_label)
|
1756 |
-
|
1757 |
-
|
1758 |
-
|
1759 |
-
# # --- 2. Evaluate previous Q and Answer ---
|
1760 |
-
# last_q = state["questions"][-1]
|
1761 |
-
# q_eval = state["question_evaluations"][-1] # Already in state
|
1762 |
-
# ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
|
1763 |
-
# answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
|
1764 |
-
# state["answer_evaluations"].append(answer_eval)
|
1765 |
-
# answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
|
1766 |
-
|
1767 |
-
# # --- 3. Adaptive difficulty ---
|
1768 |
-
# if answer_score == "excellent":
|
1769 |
-
# state["difficulty_adjustment"] = "harder"
|
1770 |
-
# elif answer_score in ("medium", "poor"):
|
1771 |
-
# state["difficulty_adjustment"] = "easier"
|
1772 |
-
# else:
|
1773 |
-
# state["difficulty_adjustment"] = None
|
1774 |
-
|
1775 |
-
# # --- 4. Effective confidence ---
|
1776 |
-
# # eff_conf = interpret_confidence(voice_label, face_label, answer_score)
|
1777 |
-
# # state["effective_confidences"].append(eff_conf)
|
1778 |
-
|
1779 |
-
# #just for testing:
|
1780 |
-
# eff_conf = {"effective_confidence": 0.6}
|
1781 |
-
# state["effective_confidences"].append(eff_conf)
|
1782 |
-
|
1783 |
-
|
1784 |
-
# # --- LOG ---
|
1785 |
-
# state["log"].append({
|
1786 |
-
# "type": "answer",
|
1787 |
-
# "question": last_q,
|
1788 |
-
# "answer": transcript,
|
1789 |
-
# "answer_eval": answer_eval,
|
1790 |
-
# "ref_answer": ref_answer,
|
1791 |
-
# "face_label": face_label,
|
1792 |
-
# "voice_label": voice_label,
|
1793 |
-
# "effective_confidence": eff_conf,
|
1794 |
-
# "timing": elapsed,
|
1795 |
-
# "timestamp": time.time()
|
1796 |
-
# })
|
1797 |
-
|
1798 |
-
# # --- Next or End ---
|
1799 |
-
# qidx = state["question_idx"] + 1
|
1800 |
-
# if qidx >= state["max_questions"]:
|
1801 |
-
# # Save as JSON (optionally)
|
1802 |
-
# timestamp = time.strftime("%Y%m%d_%H%M%S")
|
1803 |
-
# log_file = f"interview_log_{timestamp}.json"
|
1804 |
-
# with open(log_file, "w", encoding="utf-8") as f:
|
1805 |
-
# json.dump(state["log"], f, indent=2, ensure_ascii=False)
|
1806 |
-
# # Report
|
1807 |
-
# summary = "# Interview Summary\n"
|
1808 |
-
# for i, q in enumerate(state["questions"]):
|
1809 |
-
# summary += (f"\n### Q{i + 1}: {q}\n"
|
1810 |
-
# f"- *Answer*: {state['answers'][i]}\n"
|
1811 |
-
# f"- *Q Eval*: {state['question_evaluations'][i]}\n"
|
1812 |
-
# f"- *A Eval*: {state['answer_evaluations'][i]}\n"
|
1813 |
-
# #also this are removed just for testing :(
|
1814 |
-
# # f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
|
1815 |
-
# # f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
|
1816 |
-
# f"- *Time*: {state['timings'][i]}s\n")
|
1817 |
-
# summary += f"\n\nβΊ Full log saved as {log_file}."
|
1818 |
-
# return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected β Face: {face_label}, Voice: {voice_label}"))
|
1819 |
-
# else:
|
1820 |
-
# # --- Build next prompt using adaptive difficulty ---
|
1821 |
-
# state["question_idx"] = qidx
|
1822 |
-
# state["q_start_time"] = time.time()
|
1823 |
-
# context = "" # You can add your context logic here
|
1824 |
-
# prompt = build_interview_prompt(
|
1825 |
-
# conversation_history=state["conversation_history"],
|
1826 |
-
# user_response=transcript,
|
1827 |
-
# context=context,
|
1828 |
-
# job_role=data["job_role"],
|
1829 |
-
# skills=data["skills"],
|
1830 |
-
# seniority=data["seniority"],
|
1831 |
-
# difficulty_adjustment=state["difficulty_adjustment"],
|
1832 |
-
# face_label=face_label,
|
1833 |
-
# voice_label=voice_label,
|
1834 |
-
# effective_confidence=eff_conf
|
1835 |
-
# )
|
1836 |
-
# next_q = groq_llm.predict(prompt)
|
1837 |
-
# # Evaluate Q quality
|
1838 |
-
# q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
|
1839 |
-
# state["questions"].append(next_q)
|
1840 |
-
# state["question_evaluations"].append(q_eval)
|
1841 |
-
# state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
|
1842 |
-
# state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
|
1843 |
-
# audio_path = bark_tts(next_q)
|
1844 |
-
# # Display evaluations
|
1845 |
-
# eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
|
1846 |
-
# return (
|
1847 |
-
# state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
|
1848 |
-
# gr.update(value=None), gr.update(value=None),
|
1849 |
-
# gr.update(visible=True, value=f"Last Detected β Face: {face_label}, Voice: {voice_label}"),
|
1850 |
-
# )
|
1851 |
-
# confirm_btn.click(
|
1852 |
-
# process_answer,
|
1853 |
-
# [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
|
1854 |
-
# [interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
|
1855 |
-
# ).then(
|
1856 |
-
# lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
|
1857 |
-
# )
|
1858 |
-
|
1859 |
-
# demo.launch(debug=True)
|
1860 |
-
|
1861 |
import gradio as gr
|
1862 |
import time
|
1863 |
import tempfile
|
@@ -1879,18 +1469,15 @@ print(torch.cuda.is_available()) # β
Tells you if GPU is available
|
|
1879 |
torch.cuda.empty_cache()
|
1880 |
gc.collect()
|
1881 |
|
|
|
1882 |
# Bark TTS
|
1883 |
print("π Loading Bark model...")
|
1884 |
-
model_bark = BarkModel.from_pretrained("suno/bark")
|
1885 |
print("β
Bark model loaded")
|
1886 |
-
|
1887 |
print("π Loading Bark processor...")
|
1888 |
processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
1889 |
print("β
Bark processor loaded")
|
1890 |
-
|
1891 |
-
model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
|
1892 |
-
print("β
Bark model on device")
|
1893 |
-
bark_voice_preset = "v2/en_speaker_6"
|
1894 |
|
1895 |
def bark_tts(text):
|
1896 |
print(f"π Synthesizing TTS for: {text}")
|
@@ -1907,110 +1494,24 @@ def bark_tts(text):
|
|
1907 |
print("π Loading Whisper model...")
|
1908 |
whisper_model = whisper.load_model("base", device="cuda")
|
1909 |
print("β
Whisper model loaded")
|
1910 |
-
|
1911 |
def whisper_stt(audio_path):
|
1912 |
-
if not audio_path or not os.path.exists(audio_path):
|
1913 |
-
return ""
|
1914 |
result = whisper_model.transcribe(audio_path)
|
1915 |
return result["text"]
|
1916 |
|
1917 |
-
# DeepFace (Video Face Emotion)
|
1918 |
-
def ensure_mp4(video_input):
|
1919 |
-
if isinstance(video_input, str):
|
1920 |
-
input_path = video_input
|
1921 |
-
else:
|
1922 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
|
1923 |
-
temp_in.write(video_input.read())
|
1924 |
-
input_path = temp_in.name
|
1925 |
-
|
1926 |
-
if input_path.endswith(".mp4"):
|
1927 |
-
return input_path
|
1928 |
-
|
1929 |
-
mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
|
1930 |
-
try:
|
1931 |
-
clip = VideoFileClip(input_path)
|
1932 |
-
clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
|
1933 |
-
clip.close()
|
1934 |
-
except Exception as e:
|
1935 |
-
print("Video conversion failed:", e)
|
1936 |
-
shutil.copy(input_path, mp4_path)
|
1937 |
-
return mp4_path
|
1938 |
-
|
1939 |
-
def analyze_video_emotions(video_input, sample_rate=15):
|
1940 |
-
mp4_path = ensure_mp4(video_input)
|
1941 |
-
if not mp4_path or not os.path.exists(mp4_path):
|
1942 |
-
return "no_face"
|
1943 |
-
cap = cv2.VideoCapture(mp4_path)
|
1944 |
-
frame_count = 0
|
1945 |
-
emotion_counts = {}
|
1946 |
-
while True:
|
1947 |
-
ret, frame = cap.read()
|
1948 |
-
if not ret: break
|
1949 |
-
if frame_count % sample_rate == 0:
|
1950 |
-
try:
|
1951 |
-
result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
|
1952 |
-
dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
|
1953 |
-
emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
|
1954 |
-
except Exception: pass
|
1955 |
-
frame_count += 1
|
1956 |
-
cap.release()
|
1957 |
-
if not emotion_counts: return "no_face"
|
1958 |
-
return max(emotion_counts.items(), key=lambda x: x[1])[0]
|
1959 |
-
|
1960 |
-
# Wav2Vec2 model for audio emotion analysis
|
1961 |
-
local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2"
|
1962 |
-
print("π Loading Wav2Vec processor and model...")
|
1963 |
-
wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
|
1964 |
-
wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
|
1965 |
-
wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
|
1966 |
-
print("β
Wav2Vec model loaded")
|
1967 |
-
wav2vec_model.eval()
|
1968 |
-
voice_label_map = {
|
1969 |
-
0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
|
1970 |
-
4: 'neutral', 5: 'sad', 6: 'surprise'
|
1971 |
-
}
|
1972 |
-
|
1973 |
-
def analyze_audio_emotion(audio_path):
|
1974 |
-
print(f"π Analyzing audio emotion for: {audio_path}")
|
1975 |
-
if not audio_path or not os.path.exists(audio_path):
|
1976 |
-
return "neutral"
|
1977 |
-
|
1978 |
-
speech, sr = librosa.load(audio_path, sr=16000)
|
1979 |
-
inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
|
1980 |
-
|
1981 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
1982 |
-
wav2vec_model.to(device)
|
1983 |
-
inputs = {k: v.to(device) for k, v in inputs.items()}
|
1984 |
-
|
1985 |
-
with torch.no_grad():
|
1986 |
-
logits = wav2vec_model(**inputs).logits
|
1987 |
-
|
1988 |
-
probs = torch.nn.functional.softmax(logits, dim=-1)
|
1989 |
-
predicted_id = torch.argmax(probs, dim=-1).item()
|
1990 |
-
return voice_label_map.get(predicted_id, "neutral")
|
1991 |
-
|
1992 |
-
# Effective confidence calculation
|
1993 |
-
def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
|
1994 |
-
emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
|
1995 |
-
answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
|
1996 |
-
voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
|
1997 |
-
avg_emotion = (voice_score + face_score) / 2
|
1998 |
-
control_bonus = max(0, answer_score - avg_emotion) * k
|
1999 |
-
eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
|
2000 |
-
return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
|
2001 |
-
|
2002 |
seniority_mapping = {
|
2003 |
"Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
|
2004 |
}
|
2005 |
|
2006 |
-
|
|
|
|
|
2007 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
2008 |
user_data = gr.State({})
|
2009 |
interview_state = gr.State({})
|
2010 |
missing_fields_state = gr.State([])
|
2011 |
-
recording_state = gr.State({"is_recording": False})
|
2012 |
|
2013 |
-
# UI Layout
|
2014 |
with gr.Column(visible=True) as user_info_section:
|
2015 |
gr.Markdown("## Candidate Information")
|
2016 |
cv_file = gr.File(label="Upload CV")
|
@@ -2031,34 +1532,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2031 |
|
2032 |
with gr.Column(visible=False) as interview_section:
|
2033 |
gr.Markdown("## Interview in Progress")
|
2034 |
-
question_audio = gr.Audio(label="Listen
|
2035 |
question_text = gr.Markdown()
|
2036 |
-
|
2037 |
-
|
2038 |
-
|
2039 |
-
record_btn = gr.Button("π€ Start Recording", variant="primary")
|
2040 |
-
stop_btn = gr.Button("βΉοΈ Stop Recording", interactive=False)
|
2041 |
-
|
2042 |
-
# Hidden audio component for recording
|
2043 |
-
user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Audio Recording", visible=False)
|
2044 |
-
|
2045 |
-
# Video input (keeping for emotion analysis)
|
2046 |
-
user_video_input = gr.Video(sources=["webcam"], label="Video Recording (for emotion analysis)", visible=False)
|
2047 |
-
|
2048 |
-
# Transcript and confirmation
|
2049 |
-
stt_transcript = gr.Textbox(label="Transcribed Answer (automatically generated)", interactive=True)
|
2050 |
-
confirm_btn = gr.Button("Confirm Answer", interactive=False)
|
2051 |
-
|
2052 |
-
# Status and results
|
2053 |
-
recording_status = gr.Markdown("**Status:** Ready to record")
|
2054 |
evaluation_display = gr.Markdown()
|
2055 |
-
emotion_display = gr.Markdown()
|
2056 |
interview_summary = gr.Markdown(visible=False)
|
2057 |
|
2058 |
-
# UI Logic
|
|
|
2059 |
def validate_start_btn(cv_file, job_desc):
|
2060 |
return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
|
2061 |
-
|
2062 |
cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
2063 |
job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
2064 |
|
@@ -2066,10 +1551,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2066 |
details = extract_candidate_details(cv_file.name)
|
2067 |
job_info = extract_job_details(job_desc)
|
2068 |
data = {
|
2069 |
-
"name": details.get("name", "unknown"),
|
2070 |
-
"
|
2071 |
-
"seniority": job_info.get("experience_level", "unknown"),
|
2072 |
-
"skills": job_info.get("skills", [])
|
2073 |
}
|
2074 |
missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
|
2075 |
if missing:
|
@@ -2077,7 +1560,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2077 |
else:
|
2078 |
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
|
2079 |
return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
|
2080 |
-
|
2081 |
start_btn.click(
|
2082 |
process_and_route_initial,
|
2083 |
[cv_file, job_desc],
|
@@ -2086,23 +1568,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2086 |
|
2087 |
def show_missing(missing):
|
2088 |
if missing is None: missing = []
|
2089 |
-
return
|
2090 |
-
gr.update(visible="job_role" in missing),
|
2091 |
-
gr.update(visible="seniority" in missing),
|
2092 |
-
gr.update(visible="skills" in missing))
|
2093 |
-
|
2094 |
missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
|
2095 |
|
2096 |
def validate_fields(name, role, seniority, skills, missing):
|
2097 |
if not missing: return gr.update(interactive=False)
|
2098 |
-
all_filled = all([
|
2099 |
-
(not ("name" in missing) or bool(name.strip())),
|
2100 |
-
(not ("job_role" in missing) or bool(role.strip())),
|
2101 |
-
(not ("seniority" in missing) or bool(seniority)),
|
2102 |
-
(not ("skills" in missing) or bool(skills.strip()))
|
2103 |
-
])
|
2104 |
return gr.update(interactive=all_filled)
|
2105 |
-
|
2106 |
for inp in [name_in, role_in, seniority_in, skills_in]:
|
2107 |
inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
|
2108 |
|
@@ -2113,10 +1585,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2113 |
if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
|
2114 |
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
|
2115 |
return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
|
2116 |
-
|
2117 |
submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
|
2118 |
|
2119 |
def start_interview(data):
|
|
|
2120 |
state = {
|
2121 |
"questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
|
2122 |
"question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
|
@@ -2125,14 +1597,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2125 |
"question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
|
2126 |
"log": []
|
2127 |
}
|
|
|
2128 |
context = ""
|
2129 |
prompt = build_interview_prompt(
|
2130 |
conversation_history=[], user_response="", context=context, job_role=data["job_role"],
|
2131 |
skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
|
2132 |
voice_label="neutral", face_label="neutral"
|
2133 |
)
|
2134 |
-
|
2135 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
2136 |
first_q = groq_llm.predict(prompt)
|
2137 |
q_eval = {
|
2138 |
"Score": "N/A",
|
@@ -2141,101 +1620,50 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2141 |
}
|
2142 |
state["questions"].append(first_q)
|
2143 |
state["question_evaluations"].append(q_eval)
|
2144 |
-
|
2145 |
|
2146 |
-
|
2147 |
audio_path = bark_tts(first_q)
|
2148 |
-
|
2149 |
-
# Log
|
2150 |
state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
|
2151 |
-
|
2152 |
-
|
2153 |
-
gr.update(visible=False),
|
2154 |
-
gr.update(visible=True),
|
2155 |
-
audio_path,
|
2156 |
-
f"*Question 1:* {first_q}",
|
2157 |
-
gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
|
2158 |
-
|
2159 |
-
start_interview_final_btn.click(
|
2160 |
-
start_interview,
|
2161 |
-
[user_data],
|
2162 |
-
[interview_state, interview_pre_section, interview_section, question_audio, question_text, recording_status]
|
2163 |
-
)
|
2164 |
-
|
2165 |
-
# Recording functionality
|
2166 |
-
def start_recording(rec_state):
|
2167 |
-
rec_state["is_recording"] = True
|
2168 |
-
return (rec_state,
|
2169 |
-
gr.update(interactive=False),
|
2170 |
-
gr.update(interactive=True),
|
2171 |
-
gr.update(visible=True),
|
2172 |
-
gr.update(value="**Status:** π΄ Recording... Click 'Stop Recording' when done"))
|
2173 |
-
|
2174 |
-
record_btn.click(
|
2175 |
-
start_recording,
|
2176 |
-
[recording_state],
|
2177 |
-
[recording_state, record_btn, stop_btn, user_audio_input, recording_status]
|
2178 |
-
)
|
2179 |
-
|
2180 |
-
def stop_recording(rec_state):
|
2181 |
-
rec_state["is_recording"] = False
|
2182 |
-
return (rec_state,
|
2183 |
-
gr.update(interactive=True),
|
2184 |
-
gr.update(interactive=False),
|
2185 |
-
gr.update(visible=False),
|
2186 |
-
gr.update(value="**Status:** Processing audio... Please wait"))
|
2187 |
-
|
2188 |
-
stop_btn.click(
|
2189 |
-
stop_recording,
|
2190 |
-
[recording_state],
|
2191 |
-
[recording_state, record_btn, stop_btn, user_audio_input, recording_status]
|
2192 |
-
)
|
2193 |
|
2194 |
-
|
2195 |
-
|
2196 |
-
|
2197 |
-
return "", gr.update(interactive=False), gr.update(value="**Status:** No audio recorded")
|
2198 |
-
|
2199 |
-
transcript = whisper_stt(audio_path)
|
2200 |
-
if transcript:
|
2201 |
-
return (transcript,
|
2202 |
-
gr.update(interactive=True),
|
2203 |
-
gr.update(value="**Status:** Audio transcribed! Review and click 'Confirm Answer'"))
|
2204 |
-
else:
|
2205 |
-
return ("",
|
2206 |
-
gr.update(interactive=False),
|
2207 |
-
gr.update(value="**Status:** Transcription failed. Please try recording again"))
|
2208 |
-
|
2209 |
-
user_audio_input.change(
|
2210 |
-
transcribe_and_update,
|
2211 |
-
[user_audio_input],
|
2212 |
-
[stt_transcript, confirm_btn, recording_status]
|
2213 |
-
)
|
2214 |
|
2215 |
def process_answer(transcript, audio_path, video_path, state, data):
|
2216 |
-
if not transcript:
|
2217 |
-
return
|
2218 |
-
|
2219 |
elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
|
2220 |
state["timings"].append(elapsed)
|
2221 |
state["answers"].append(transcript)
|
2222 |
state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
|
2223 |
|
2224 |
-
# Emotion analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
2225 |
voice_label = "neutral"
|
2226 |
face_label = "neutral"
|
2227 |
state["voice_labels"].append(voice_label)
|
2228 |
state["face_labels"].append(face_label)
|
2229 |
|
2230 |
-
|
|
|
|
|
2231 |
last_q = state["questions"][-1]
|
2232 |
-
q_eval = state["question_evaluations"][-1]
|
2233 |
ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
|
2234 |
answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
|
2235 |
state["answer_evaluations"].append(answer_eval)
|
2236 |
answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
|
2237 |
|
2238 |
-
# Adaptive difficulty
|
2239 |
if answer_score == "excellent":
|
2240 |
state["difficulty_adjustment"] = "harder"
|
2241 |
elif answer_score in ("medium", "poor"):
|
@@ -2243,11 +1671,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2243 |
else:
|
2244 |
state["difficulty_adjustment"] = None
|
2245 |
|
2246 |
-
# Effective confidence
|
|
|
|
|
|
|
|
|
2247 |
eff_conf = {"effective_confidence": 0.6}
|
2248 |
state["effective_confidences"].append(eff_conf)
|
2249 |
|
2250 |
-
|
|
|
2251 |
state["log"].append({
|
2252 |
"type": "answer",
|
2253 |
"question": last_q,
|
@@ -2261,38 +1694,32 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2261 |
"timestamp": time.time()
|
2262 |
})
|
2263 |
|
2264 |
-
#
|
2265 |
qidx = state["question_idx"] + 1
|
2266 |
if qidx >= state["max_questions"]:
|
2267 |
-
# Save
|
2268 |
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
2269 |
log_file = f"interview_log_{timestamp}.json"
|
2270 |
with open(log_file, "w", encoding="utf-8") as f:
|
2271 |
json.dump(state["log"], f, indent=2, ensure_ascii=False)
|
2272 |
-
|
2273 |
-
# Generate summary
|
2274 |
summary = "# Interview Summary\n"
|
2275 |
for i, q in enumerate(state["questions"]):
|
2276 |
summary += (f"\n### Q{i + 1}: {q}\n"
|
2277 |
f"- *Answer*: {state['answers'][i]}\n"
|
2278 |
f"- *Q Eval*: {state['question_evaluations'][i]}\n"
|
2279 |
f"- *A Eval*: {state['answer_evaluations'][i]}\n"
|
|
|
|
|
|
|
2280 |
f"- *Time*: {state['timings'][i]}s\n")
|
2281 |
summary += f"\n\nβΊ Full log saved as {log_file}."
|
2282 |
-
|
2283 |
-
return (state,
|
2284 |
-
gr.update(visible=True, value=summary),
|
2285 |
-
gr.update(value=None),
|
2286 |
-
gr.update(value=None),
|
2287 |
-
gr.update(value=None),
|
2288 |
-
gr.update(interactive=False),
|
2289 |
-
gr.update(visible=True, value=f"Last Detected β Face: {face_label}, Voice: {voice_label}"),
|
2290 |
-
gr.update(value="**Status:** Interview completed!"))
|
2291 |
else:
|
2292 |
-
#
|
2293 |
state["question_idx"] = qidx
|
2294 |
state["q_start_time"] = time.time()
|
2295 |
-
context = ""
|
2296 |
prompt = build_interview_prompt(
|
2297 |
conversation_history=state["conversation_history"],
|
2298 |
user_response=transcript,
|
@@ -2301,34 +1728,31 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
2301 |
skills=data["skills"],
|
2302 |
seniority=data["seniority"],
|
2303 |
difficulty_adjustment=state["difficulty_adjustment"],
|
2304 |
-
face_label=face_label,
|
2305 |
voice_label=voice_label,
|
2306 |
-
effective_confidence=eff_conf
|
2307 |
)
|
2308 |
-
|
2309 |
next_q = groq_llm.predict(prompt)
|
|
|
2310 |
q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
|
2311 |
state["questions"].append(next_q)
|
2312 |
state["question_evaluations"].append(q_eval)
|
2313 |
state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
|
2314 |
state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
|
2315 |
-
|
2316 |
audio_path = bark_tts(next_q)
|
|
|
2317 |
eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
|
2318 |
-
|
2319 |
-
|
2320 |
-
|
2321 |
-
|
2322 |
-
|
2323 |
-
gr.update(value=""),
|
2324 |
-
gr.update(interactive=False),
|
2325 |
-
gr.update(visible=True, value=f"Last Detected β Face: {face_label}, Voice: {voice_label}"),
|
2326 |
-
gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
|
2327 |
-
|
2328 |
confirm_btn.click(
|
2329 |
process_answer,
|
2330 |
[stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
|
2331 |
-
[interview_state, interview_summary, question_audio, question_text,
|
|
|
|
|
2332 |
)
|
2333 |
|
2334 |
demo.launch(debug=True)
|
|
|
|
|
|
1448 |
"skills": skills
|
1449 |
}
|
1450 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1451 |
import gradio as gr
|
1452 |
import time
|
1453 |
import tempfile
|
|
|
1469 |
torch.cuda.empty_cache()
|
1470 |
gc.collect()
|
1471 |
|
1472 |
+
|
1473 |
# Bark TTS
|
1474 |
print("π Loading Bark model...")
|
1475 |
+
model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
|
1476 |
print("β
Bark model loaded")
|
|
|
1477 |
print("π Loading Bark processor...")
|
1478 |
processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
1479 |
print("β
Bark processor loaded")
|
1480 |
+
bark_voice_preset = "v2/en_speaker_5"
|
|
|
|
|
|
|
1481 |
|
1482 |
def bark_tts(text):
|
1483 |
print(f"π Synthesizing TTS for: {text}")
|
|
|
1494 |
print("π Loading Whisper model...")
|
1495 |
whisper_model = whisper.load_model("base", device="cuda")
|
1496 |
print("β
Whisper model loaded")
|
|
|
1497 |
def whisper_stt(audio_path):
|
1498 |
+
if not audio_path or not os.path.exists(audio_path): return ""
|
|
|
1499 |
result = whisper_model.transcribe(audio_path)
|
1500 |
return result["text"]
|
1501 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1502 |
seniority_mapping = {
|
1503 |
"Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
|
1504 |
}
|
1505 |
|
1506 |
+
|
1507 |
+
# --- 2. Gradio App ---
|
1508 |
+
|
1509 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
1510 |
user_data = gr.State({})
|
1511 |
interview_state = gr.State({})
|
1512 |
missing_fields_state = gr.State([])
|
|
|
1513 |
|
1514 |
+
# --- UI Layout ---
|
1515 |
with gr.Column(visible=True) as user_info_section:
|
1516 |
gr.Markdown("## Candidate Information")
|
1517 |
cv_file = gr.File(label="Upload CV")
|
|
|
1532 |
|
1533 |
with gr.Column(visible=False) as interview_section:
|
1534 |
gr.Markdown("## Interview in Progress")
|
1535 |
+
question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
|
1536 |
question_text = gr.Markdown()
|
1537 |
+
user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
|
1538 |
+
stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
|
1539 |
+
confirm_btn = gr.Button("Confirm Answer")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1540 |
evaluation_display = gr.Markdown()
|
|
|
1541 |
interview_summary = gr.Markdown(visible=False)
|
1542 |
|
1543 |
+
# --- UI Logic ---
|
1544 |
+
|
1545 |
def validate_start_btn(cv_file, job_desc):
|
1546 |
return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
|
|
|
1547 |
cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
1548 |
job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
1549 |
|
|
|
1551 |
details = extract_candidate_details(cv_file.name)
|
1552 |
job_info = extract_job_details(job_desc)
|
1553 |
data = {
|
1554 |
+
"name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
|
1555 |
+
"seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
|
|
|
|
|
1556 |
}
|
1557 |
missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
|
1558 |
if missing:
|
|
|
1560 |
else:
|
1561 |
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
|
1562 |
return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
|
|
|
1563 |
start_btn.click(
|
1564 |
process_and_route_initial,
|
1565 |
[cv_file, job_desc],
|
|
|
1568 |
|
1569 |
def show_missing(missing):
|
1570 |
if missing is None: missing = []
|
1571 |
+
return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
|
|
|
|
|
|
|
|
|
1572 |
missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
|
1573 |
|
1574 |
def validate_fields(name, role, seniority, skills, missing):
|
1575 |
if not missing: return gr.update(interactive=False)
|
1576 |
+
all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
|
|
|
|
|
|
|
|
|
|
|
1577 |
return gr.update(interactive=all_filled)
|
|
|
1578 |
for inp in [name_in, role_in, seniority_in, skills_in]:
|
1579 |
inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
|
1580 |
|
|
|
1585 |
if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
|
1586 |
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
|
1587 |
return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
|
|
|
1588 |
submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
|
1589 |
|
1590 |
def start_interview(data):
|
1591 |
+
# --- Advanced state with full logging ---
|
1592 |
state = {
|
1593 |
"questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
|
1594 |
"question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
|
|
|
1597 |
"question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
|
1598 |
"log": []
|
1599 |
}
|
1600 |
+
# --- Optionally: context retrieval here (currently just blank) ---
|
1601 |
context = ""
|
1602 |
prompt = build_interview_prompt(
|
1603 |
conversation_history=[], user_response="", context=context, job_role=data["job_role"],
|
1604 |
skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
|
1605 |
voice_label="neutral", face_label="neutral"
|
1606 |
)
|
1607 |
+
#here the original one
|
1608 |
+
# first_q = groq_llm.predict(prompt)
|
1609 |
+
# # Evaluate Q for quality
|
1610 |
+
# q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
|
1611 |
+
# state["questions"].append(first_q)
|
1612 |
+
# state["question_evaluations"].append(q_eval)
|
1613 |
+
|
1614 |
+
#here the testing one
|
1615 |
first_q = groq_llm.predict(prompt)
|
1616 |
q_eval = {
|
1617 |
"Score": "N/A",
|
|
|
1620 |
}
|
1621 |
state["questions"].append(first_q)
|
1622 |
state["question_evaluations"].append(q_eval)
|
1623 |
+
|
1624 |
|
1625 |
+
state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
|
1626 |
audio_path = bark_tts(first_q)
|
1627 |
+
# LOG
|
|
|
1628 |
state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
|
1629 |
+
return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
|
1630 |
+
start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1631 |
|
1632 |
+
def transcribe(audio_path):
|
1633 |
+
return whisper_stt(audio_path)
|
1634 |
+
user_audio_input.change(transcribe, user_audio_input, stt_transcript)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1635 |
|
1636 |
def process_answer(transcript, audio_path, video_path, state, data):
|
1637 |
+
if not transcript and not video_path:
|
1638 |
+
return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
|
|
|
1639 |
elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
|
1640 |
state["timings"].append(elapsed)
|
1641 |
state["answers"].append(transcript)
|
1642 |
state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
|
1643 |
|
1644 |
+
# --- 1. Emotion analysis ---
|
1645 |
+
# voice_label = analyze_audio_emotion(audio_path)
|
1646 |
+
# face_label = analyze_video_emotions(video_path)
|
1647 |
+
# state["voice_labels"].append(voice_label)
|
1648 |
+
# state["face_labels"].append(face_label)
|
1649 |
+
|
1650 |
+
#just for testing
|
1651 |
voice_label = "neutral"
|
1652 |
face_label = "neutral"
|
1653 |
state["voice_labels"].append(voice_label)
|
1654 |
state["face_labels"].append(face_label)
|
1655 |
|
1656 |
+
|
1657 |
+
|
1658 |
+
# --- 2. Evaluate previous Q and Answer ---
|
1659 |
last_q = state["questions"][-1]
|
1660 |
+
q_eval = state["question_evaluations"][-1] # Already in state
|
1661 |
ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
|
1662 |
answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
|
1663 |
state["answer_evaluations"].append(answer_eval)
|
1664 |
answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
|
1665 |
|
1666 |
+
# --- 3. Adaptive difficulty ---
|
1667 |
if answer_score == "excellent":
|
1668 |
state["difficulty_adjustment"] = "harder"
|
1669 |
elif answer_score in ("medium", "poor"):
|
|
|
1671 |
else:
|
1672 |
state["difficulty_adjustment"] = None
|
1673 |
|
1674 |
+
# --- 4. Effective confidence ---
|
1675 |
+
# eff_conf = interpret_confidence(voice_label, face_label, answer_score)
|
1676 |
+
# state["effective_confidences"].append(eff_conf)
|
1677 |
+
|
1678 |
+
#just for testing:
|
1679 |
eff_conf = {"effective_confidence": 0.6}
|
1680 |
state["effective_confidences"].append(eff_conf)
|
1681 |
|
1682 |
+
|
1683 |
+
# --- LOG ---
|
1684 |
state["log"].append({
|
1685 |
"type": "answer",
|
1686 |
"question": last_q,
|
|
|
1694 |
"timestamp": time.time()
|
1695 |
})
|
1696 |
|
1697 |
+
# --- Next or End ---
|
1698 |
qidx = state["question_idx"] + 1
|
1699 |
if qidx >= state["max_questions"]:
|
1700 |
+
# Save as JSON (optionally)
|
1701 |
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
1702 |
log_file = f"interview_log_{timestamp}.json"
|
1703 |
with open(log_file, "w", encoding="utf-8") as f:
|
1704 |
json.dump(state["log"], f, indent=2, ensure_ascii=False)
|
1705 |
+
# Report
|
|
|
1706 |
summary = "# Interview Summary\n"
|
1707 |
for i, q in enumerate(state["questions"]):
|
1708 |
summary += (f"\n### Q{i + 1}: {q}\n"
|
1709 |
f"- *Answer*: {state['answers'][i]}\n"
|
1710 |
f"- *Q Eval*: {state['question_evaluations'][i]}\n"
|
1711 |
f"- *A Eval*: {state['answer_evaluations'][i]}\n"
|
1712 |
+
#also this are removed just for testing :(
|
1713 |
+
# f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
|
1714 |
+
# f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
|
1715 |
f"- *Time*: {state['timings'][i]}s\n")
|
1716 |
summary += f"\n\nβΊ Full log saved as {log_file}."
|
1717 |
+
return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected β Face: {face_label}, Voice: {voice_label}"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1718 |
else:
|
1719 |
+
# --- Build next prompt using adaptive difficulty ---
|
1720 |
state["question_idx"] = qidx
|
1721 |
state["q_start_time"] = time.time()
|
1722 |
+
context = "" # You can add your context logic here
|
1723 |
prompt = build_interview_prompt(
|
1724 |
conversation_history=state["conversation_history"],
|
1725 |
user_response=transcript,
|
|
|
1728 |
skills=data["skills"],
|
1729 |
seniority=data["seniority"],
|
1730 |
difficulty_adjustment=state["difficulty_adjustment"],
|
|
|
1731 |
voice_label=voice_label,
|
|
|
1732 |
)
|
|
|
1733 |
next_q = groq_llm.predict(prompt)
|
1734 |
+
# Evaluate Q quality
|
1735 |
q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
|
1736 |
state["questions"].append(next_q)
|
1737 |
state["question_evaluations"].append(q_eval)
|
1738 |
state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
|
1739 |
state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
|
|
|
1740 |
audio_path = bark_tts(next_q)
|
1741 |
+
# Display evaluations
|
1742 |
eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
|
1743 |
+
return (
|
1744 |
+
state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
|
1745 |
+
gr.update(value=None), gr.update(value=None),
|
1746 |
+
gr.update(visible=True, value=f"Last Detected β Face: {face_label}, Voice: {voice_label}"),
|
1747 |
+
)
|
|
|
|
|
|
|
|
|
|
|
1748 |
confirm_btn.click(
|
1749 |
process_answer,
|
1750 |
[stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
|
1751 |
+
[interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
|
1752 |
+
).then(
|
1753 |
+
lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
|
1754 |
)
|
1755 |
|
1756 |
demo.launch(debug=True)
|
1757 |
+
|
1758 |
+
|