husseinelsaadi commited on
Commit
c343ad6
Β·
verified Β·
1 Parent(s): a04c545

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -654
app.py CHANGED
@@ -1448,416 +1448,6 @@ def extract_candidate_details(file_path):
1448
  "skills": skills
1449
  }
1450
 
1451
- # import gradio as gr
1452
- # import time
1453
- # import tempfile
1454
- # import numpy as np
1455
- # import scipy.io.wavfile as wavfile
1456
- # import cv2
1457
- # import os
1458
- # import json
1459
- # from moviepy.editor import VideoFileClip
1460
- # import shutil
1461
- # from transformers import BarkModel, AutoProcessor
1462
- # import torch, gc
1463
- # import whisper
1464
- # from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
1465
- # import librosa
1466
-
1467
- # import torch
1468
- # print(torch.cuda.is_available()) # βœ… Tells you if GPU is available
1469
- # torch.cuda.empty_cache()
1470
- # gc.collect()
1471
-
1472
-
1473
- # # Bark TTS
1474
- # print("πŸ” Loading Bark model...")
1475
- # model_bark = BarkModel.from_pretrained("suno/bark")
1476
- # print("βœ… Bark model loaded")
1477
-
1478
- # print("πŸ” Loading Bark processor...")
1479
- # processor_bark = AutoProcessor.from_pretrained("suno/bark")
1480
- # print("βœ… Bark processor loaded")
1481
- # print("πŸ” Moving Bark model to device...")
1482
- # model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
1483
- # print("βœ… Bark model on device")
1484
- # bark_voice_preset = "v2/en_speaker_6"
1485
-
1486
- # def bark_tts(text):
1487
- # print(f"πŸ” Synthesizing TTS for: {text}")
1488
- # inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1489
- # inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
1490
- # speech_values = model_bark.generate(**inputs)
1491
- # speech = speech_values.cpu().numpy().squeeze()
1492
- # speech = (speech * 32767).astype(np.int16)
1493
- # temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1494
- # wavfile.write(temp_wav.name, 22050, speech)
1495
- # return temp_wav.name
1496
-
1497
- # # Whisper STT
1498
- # print("πŸ” Loading Whisper model...")
1499
- # whisper_model = whisper.load_model("base", device="cuda")
1500
- # print("βœ… Whisper model loaded")
1501
- # def whisper_stt(audio_path):
1502
- # if not audio_path or not os.path.exists(audio_path): return ""
1503
- # result = whisper_model.transcribe(audio_path)
1504
- # return result["text"]
1505
-
1506
-
1507
- # # DeepFace (Video Face Emotion)
1508
- # def ensure_mp4(video_input):
1509
- # # video_input could be a file-like object, a path, or a Gradio temp path
1510
- # if isinstance(video_input, str):
1511
- # input_path = video_input
1512
- # else:
1513
- # # It's a file-like object (rare for Gradio video, but handle it)
1514
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
1515
- # temp_in.write(video_input.read())
1516
- # input_path = temp_in.name
1517
-
1518
- # # If already mp4, return as is
1519
- # if input_path.endswith(".mp4"):
1520
- # return input_path
1521
-
1522
- # # Convert to mp4 using moviepy
1523
- # mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
1524
- # try:
1525
- # clip = VideoFileClip(input_path)
1526
- # clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
1527
- # clip.close()
1528
- # except Exception as e:
1529
- # print("Video conversion failed:", e)
1530
- # # As fallback, just copy original
1531
- # shutil.copy(input_path, mp4_path)
1532
- # return mp4_path
1533
-
1534
- # def analyze_video_emotions(video_input, sample_rate=15):
1535
- # # Convert input to an mp4 file OpenCV can process
1536
- # mp4_path = ensure_mp4(video_input)
1537
- # if not mp4_path or not os.path.exists(mp4_path):
1538
- # return "no_face"
1539
- # cap = cv2.VideoCapture(mp4_path)
1540
- # frame_count = 0
1541
- # emotion_counts = {}
1542
- # while True:
1543
- # ret, frame = cap.read()
1544
- # if not ret: break
1545
- # if frame_count % sample_rate == 0:
1546
- # try:
1547
- # result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
1548
- # dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
1549
- # emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
1550
- # except Exception: pass
1551
- # frame_count += 1
1552
- # cap.release()
1553
- # if not emotion_counts: return "no_face"
1554
- # return max(emotion_counts.items(), key=lambda x: x[1])[0]
1555
-
1556
- # # Original Hugging Face model: HaniaRuby/speech-emotion-recognition-wav2vec2
1557
- # local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2" # Local path to the downloaded model files
1558
- # print("πŸ” Loading Wav2Vec processor and model...")
1559
- # wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
1560
- # wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
1561
- # wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
1562
- # print("βœ… Wav2Vec model loaded")
1563
- # wav2vec_model.eval()
1564
- # voice_label_map = {
1565
- # 0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
1566
- # 4: 'neutral', 5: 'sad', 6: 'surprise'
1567
- # }
1568
-
1569
-
1570
-
1571
- # def analyze_audio_emotion(audio_path):
1572
- # print(f"πŸ” Analyzing audio emotion for: {audio_path}")
1573
- # if not audio_path or not os.path.exists(audio_path): return "neutral"
1574
-
1575
- # speech, sr = librosa.load(audio_path, sr=16000)
1576
- # inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
1577
-
1578
- # # πŸ”₯ Move model and inputs to GPU
1579
- # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1580
- # wav2vec_model.to(device)
1581
- # inputs = {k: v.to(device) for k, v in inputs.items()}
1582
-
1583
- # with torch.no_grad():
1584
- # logits = wav2vec_model(**inputs).logits
1585
-
1586
- # probs = torch.nn.functional.softmax(logits, dim=-1)
1587
- # predicted_id = torch.argmax(probs, dim=-1).item()
1588
- # return voice_label_map.get(predicted_id, "neutral")
1589
-
1590
-
1591
- # # --- Effective confidence calculation
1592
- # def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
1593
- # emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
1594
- # answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
1595
- # voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
1596
- # avg_emotion = (voice_score + face_score) / 2
1597
- # control_bonus = max(0, answer_score - avg_emotion) * k
1598
- # eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
1599
- # return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
1600
-
1601
- # seniority_mapping = {
1602
- # "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
1603
- # }
1604
-
1605
-
1606
- # # --- 2. Gradio App ---
1607
-
1608
- # with gr.Blocks(theme=gr.themes.Soft()) as demo:
1609
- # user_data = gr.State({})
1610
- # interview_state = gr.State({})
1611
- # missing_fields_state = gr.State([])
1612
-
1613
- # # --- UI Layout ---
1614
- # with gr.Column(visible=True) as user_info_section:
1615
- # gr.Markdown("## Candidate Information")
1616
- # cv_file = gr.File(label="Upload CV")
1617
- # job_desc = gr.Textbox(label="Job Description")
1618
- # start_btn = gr.Button("Continue", interactive=False)
1619
-
1620
- # with gr.Column(visible=False) as missing_section:
1621
- # gr.Markdown("## Missing Information")
1622
- # name_in = gr.Textbox(label="Name", visible=False)
1623
- # role_in = gr.Textbox(label="Job Role", visible=False)
1624
- # seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
1625
- # skills_in = gr.Textbox(label="Skills", visible=False)
1626
- # submit_btn = gr.Button("Submit", interactive=False)
1627
-
1628
- # with gr.Column(visible=False) as interview_pre_section:
1629
- # pre_interview_greeting_md = gr.Markdown()
1630
- # start_interview_final_btn = gr.Button("Start Interview")
1631
-
1632
- # with gr.Column(visible=False) as interview_section:
1633
- # gr.Markdown("## Interview in Progress")
1634
- # question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
1635
- # question_text = gr.Markdown()
1636
- # user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
1637
- # user_video_input = gr.Video(sources=["webcam"], label="2. Record Video Answer")
1638
- # stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
1639
- # confirm_btn = gr.Button("Confirm Answer")
1640
- # evaluation_display = gr.Markdown()
1641
- # emotion_display = gr.Markdown()
1642
- # interview_summary = gr.Markdown(visible=False)
1643
-
1644
- # # --- UI Logic ---
1645
-
1646
- # def validate_start_btn(cv_file, job_desc):
1647
- # return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
1648
- # cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
1649
- # job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
1650
-
1651
- # def process_and_route_initial(cv_file, job_desc):
1652
- # details = extract_candidate_details(cv_file.name)
1653
- # job_info = extract_job_details(job_desc)
1654
- # data = {
1655
- # "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
1656
- # "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
1657
- # }
1658
- # missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
1659
- # if missing:
1660
- # return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
1661
- # else:
1662
- # greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
1663
- # return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
1664
- # start_btn.click(
1665
- # process_and_route_initial,
1666
- # [cv_file, job_desc],
1667
- # [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
1668
- # )
1669
-
1670
- # def show_missing(missing):
1671
- # if missing is None: missing = []
1672
- # return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
1673
- # missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
1674
-
1675
- # def validate_fields(name, role, seniority, skills, missing):
1676
- # if not missing: return gr.update(interactive=False)
1677
- # all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
1678
- # return gr.update(interactive=all_filled)
1679
- # for inp in [name_in, role_in, seniority_in, skills_in]:
1680
- # inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
1681
-
1682
- # def complete_manual(data, name, role, seniority, skills):
1683
- # if data["name"].lower() == "unknown": data["name"] = name
1684
- # if data["job_role"].lower() == "unknown": data["job_role"] = role
1685
- # if data["seniority"].lower() == "unknown": data["seniority"] = seniority
1686
- # if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
1687
- # greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
1688
- # return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
1689
- # submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
1690
-
1691
- # def start_interview(data):
1692
- # # --- Advanced state with full logging ---
1693
- # state = {
1694
- # "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
1695
- # "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
1696
- # "conversation_history": [],
1697
- # "difficulty_adjustment": None,
1698
- # "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
1699
- # "log": []
1700
- # }
1701
- # # --- Optionally: context retrieval here (currently just blank) ---
1702
- # context = ""
1703
- # prompt = build_interview_prompt(
1704
- # conversation_history=[], user_response="", context=context, job_role=data["job_role"],
1705
- # skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
1706
- # voice_label="neutral", face_label="neutral"
1707
- # )
1708
- # #here the original one
1709
- # # first_q = groq_llm.predict(prompt)
1710
- # # # Evaluate Q for quality
1711
- # # q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
1712
- # # state["questions"].append(first_q)
1713
- # # state["question_evaluations"].append(q_eval)
1714
-
1715
- # #here the testing one
1716
- # first_q = groq_llm.predict(prompt)
1717
- # q_eval = {
1718
- # "Score": "N/A",
1719
- # "Reasoning": "Skipped to reduce processing time",
1720
- # "Improvements": []
1721
- # }
1722
- # state["questions"].append(first_q)
1723
- # state["question_evaluations"].append(q_eval)
1724
-
1725
-
1726
- # state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
1727
- # audio_path = bark_tts(first_q)
1728
- # # LOG
1729
- # state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
1730
- # return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
1731
- # start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
1732
-
1733
- # def transcribe(audio_path):
1734
- # return whisper_stt(audio_path)
1735
- # user_audio_input.change(transcribe, user_audio_input, stt_transcript)
1736
-
1737
- # def process_answer(transcript, audio_path, video_path, state, data):
1738
- # if not transcript and not video_path:
1739
- # return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
1740
- # elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
1741
- # state["timings"].append(elapsed)
1742
- # state["answers"].append(transcript)
1743
- # state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
1744
-
1745
- # # --- 1. Emotion analysis ---
1746
- # # voice_label = analyze_audio_emotion(audio_path)
1747
- # # face_label = analyze_video_emotions(video_path)
1748
- # # state["voice_labels"].append(voice_label)
1749
- # # state["face_labels"].append(face_label)
1750
-
1751
- # #just for testing
1752
- # voice_label = "neutral"
1753
- # face_label = "neutral"
1754
- # state["voice_labels"].append(voice_label)
1755
- # state["face_labels"].append(face_label)
1756
-
1757
-
1758
-
1759
- # # --- 2. Evaluate previous Q and Answer ---
1760
- # last_q = state["questions"][-1]
1761
- # q_eval = state["question_evaluations"][-1] # Already in state
1762
- # ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
1763
- # answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
1764
- # state["answer_evaluations"].append(answer_eval)
1765
- # answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
1766
-
1767
- # # --- 3. Adaptive difficulty ---
1768
- # if answer_score == "excellent":
1769
- # state["difficulty_adjustment"] = "harder"
1770
- # elif answer_score in ("medium", "poor"):
1771
- # state["difficulty_adjustment"] = "easier"
1772
- # else:
1773
- # state["difficulty_adjustment"] = None
1774
-
1775
- # # --- 4. Effective confidence ---
1776
- # # eff_conf = interpret_confidence(voice_label, face_label, answer_score)
1777
- # # state["effective_confidences"].append(eff_conf)
1778
-
1779
- # #just for testing:
1780
- # eff_conf = {"effective_confidence": 0.6}
1781
- # state["effective_confidences"].append(eff_conf)
1782
-
1783
-
1784
- # # --- LOG ---
1785
- # state["log"].append({
1786
- # "type": "answer",
1787
- # "question": last_q,
1788
- # "answer": transcript,
1789
- # "answer_eval": answer_eval,
1790
- # "ref_answer": ref_answer,
1791
- # "face_label": face_label,
1792
- # "voice_label": voice_label,
1793
- # "effective_confidence": eff_conf,
1794
- # "timing": elapsed,
1795
- # "timestamp": time.time()
1796
- # })
1797
-
1798
- # # --- Next or End ---
1799
- # qidx = state["question_idx"] + 1
1800
- # if qidx >= state["max_questions"]:
1801
- # # Save as JSON (optionally)
1802
- # timestamp = time.strftime("%Y%m%d_%H%M%S")
1803
- # log_file = f"interview_log_{timestamp}.json"
1804
- # with open(log_file, "w", encoding="utf-8") as f:
1805
- # json.dump(state["log"], f, indent=2, ensure_ascii=False)
1806
- # # Report
1807
- # summary = "# Interview Summary\n"
1808
- # for i, q in enumerate(state["questions"]):
1809
- # summary += (f"\n### Q{i + 1}: {q}\n"
1810
- # f"- *Answer*: {state['answers'][i]}\n"
1811
- # f"- *Q Eval*: {state['question_evaluations'][i]}\n"
1812
- # f"- *A Eval*: {state['answer_evaluations'][i]}\n"
1813
- # #also this are removed just for testing :(
1814
- # # f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
1815
- # # f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
1816
- # f"- *Time*: {state['timings'][i]}s\n")
1817
- # summary += f"\n\n⏺ Full log saved as {log_file}."
1818
- # return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected β€” Face: {face_label}, Voice: {voice_label}"))
1819
- # else:
1820
- # # --- Build next prompt using adaptive difficulty ---
1821
- # state["question_idx"] = qidx
1822
- # state["q_start_time"] = time.time()
1823
- # context = "" # You can add your context logic here
1824
- # prompt = build_interview_prompt(
1825
- # conversation_history=state["conversation_history"],
1826
- # user_response=transcript,
1827
- # context=context,
1828
- # job_role=data["job_role"],
1829
- # skills=data["skills"],
1830
- # seniority=data["seniority"],
1831
- # difficulty_adjustment=state["difficulty_adjustment"],
1832
- # face_label=face_label,
1833
- # voice_label=voice_label,
1834
- # effective_confidence=eff_conf
1835
- # )
1836
- # next_q = groq_llm.predict(prompt)
1837
- # # Evaluate Q quality
1838
- # q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
1839
- # state["questions"].append(next_q)
1840
- # state["question_evaluations"].append(q_eval)
1841
- # state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
1842
- # state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
1843
- # audio_path = bark_tts(next_q)
1844
- # # Display evaluations
1845
- # eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
1846
- # return (
1847
- # state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
1848
- # gr.update(value=None), gr.update(value=None),
1849
- # gr.update(visible=True, value=f"Last Detected β€” Face: {face_label}, Voice: {voice_label}"),
1850
- # )
1851
- # confirm_btn.click(
1852
- # process_answer,
1853
- # [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
1854
- # [interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
1855
- # ).then(
1856
- # lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
1857
- # )
1858
-
1859
- # demo.launch(debug=True)
1860
-
1861
  import gradio as gr
1862
  import time
1863
  import tempfile
@@ -1879,18 +1469,15 @@ print(torch.cuda.is_available()) # βœ… Tells you if GPU is available
1879
  torch.cuda.empty_cache()
1880
  gc.collect()
1881
 
 
1882
  # Bark TTS
1883
  print("πŸ” Loading Bark model...")
1884
- model_bark = BarkModel.from_pretrained("suno/bark")
1885
  print("βœ… Bark model loaded")
1886
-
1887
  print("πŸ” Loading Bark processor...")
1888
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
1889
  print("βœ… Bark processor loaded")
1890
- print("πŸ” Moving Bark model to device...")
1891
- model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
1892
- print("βœ… Bark model on device")
1893
- bark_voice_preset = "v2/en_speaker_6"
1894
 
1895
  def bark_tts(text):
1896
  print(f"πŸ” Synthesizing TTS for: {text}")
@@ -1907,110 +1494,24 @@ def bark_tts(text):
1907
  print("πŸ” Loading Whisper model...")
1908
  whisper_model = whisper.load_model("base", device="cuda")
1909
  print("βœ… Whisper model loaded")
1910
-
1911
  def whisper_stt(audio_path):
1912
- if not audio_path or not os.path.exists(audio_path):
1913
- return ""
1914
  result = whisper_model.transcribe(audio_path)
1915
  return result["text"]
1916
 
1917
- # DeepFace (Video Face Emotion)
1918
- def ensure_mp4(video_input):
1919
- if isinstance(video_input, str):
1920
- input_path = video_input
1921
- else:
1922
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
1923
- temp_in.write(video_input.read())
1924
- input_path = temp_in.name
1925
-
1926
- if input_path.endswith(".mp4"):
1927
- return input_path
1928
-
1929
- mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
1930
- try:
1931
- clip = VideoFileClip(input_path)
1932
- clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
1933
- clip.close()
1934
- except Exception as e:
1935
- print("Video conversion failed:", e)
1936
- shutil.copy(input_path, mp4_path)
1937
- return mp4_path
1938
-
1939
- def analyze_video_emotions(video_input, sample_rate=15):
1940
- mp4_path = ensure_mp4(video_input)
1941
- if not mp4_path or not os.path.exists(mp4_path):
1942
- return "no_face"
1943
- cap = cv2.VideoCapture(mp4_path)
1944
- frame_count = 0
1945
- emotion_counts = {}
1946
- while True:
1947
- ret, frame = cap.read()
1948
- if not ret: break
1949
- if frame_count % sample_rate == 0:
1950
- try:
1951
- result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
1952
- dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
1953
- emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
1954
- except Exception: pass
1955
- frame_count += 1
1956
- cap.release()
1957
- if not emotion_counts: return "no_face"
1958
- return max(emotion_counts.items(), key=lambda x: x[1])[0]
1959
-
1960
- # Wav2Vec2 model for audio emotion analysis
1961
- local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2"
1962
- print("πŸ” Loading Wav2Vec processor and model...")
1963
- wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
1964
- wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
1965
- wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
1966
- print("βœ… Wav2Vec model loaded")
1967
- wav2vec_model.eval()
1968
- voice_label_map = {
1969
- 0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
1970
- 4: 'neutral', 5: 'sad', 6: 'surprise'
1971
- }
1972
-
1973
- def analyze_audio_emotion(audio_path):
1974
- print(f"πŸ” Analyzing audio emotion for: {audio_path}")
1975
- if not audio_path or not os.path.exists(audio_path):
1976
- return "neutral"
1977
-
1978
- speech, sr = librosa.load(audio_path, sr=16000)
1979
- inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
1980
-
1981
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1982
- wav2vec_model.to(device)
1983
- inputs = {k: v.to(device) for k, v in inputs.items()}
1984
-
1985
- with torch.no_grad():
1986
- logits = wav2vec_model(**inputs).logits
1987
-
1988
- probs = torch.nn.functional.softmax(logits, dim=-1)
1989
- predicted_id = torch.argmax(probs, dim=-1).item()
1990
- return voice_label_map.get(predicted_id, "neutral")
1991
-
1992
- # Effective confidence calculation
1993
- def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
1994
- emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
1995
- answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
1996
- voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
1997
- avg_emotion = (voice_score + face_score) / 2
1998
- control_bonus = max(0, answer_score - avg_emotion) * k
1999
- eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
2000
- return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
2001
-
2002
  seniority_mapping = {
2003
  "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
2004
  }
2005
 
2006
- # Gradio App
 
 
2007
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
2008
  user_data = gr.State({})
2009
  interview_state = gr.State({})
2010
  missing_fields_state = gr.State([])
2011
- recording_state = gr.State({"is_recording": False})
2012
 
2013
- # UI Layout
2014
  with gr.Column(visible=True) as user_info_section:
2015
  gr.Markdown("## Candidate Information")
2016
  cv_file = gr.File(label="Upload CV")
@@ -2031,34 +1532,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2031
 
2032
  with gr.Column(visible=False) as interview_section:
2033
  gr.Markdown("## Interview in Progress")
2034
- question_audio = gr.Audio(label="Listen to Question", interactive=False, autoplay=True)
2035
  question_text = gr.Markdown()
2036
-
2037
- # Audio recording controls
2038
- with gr.Row():
2039
- record_btn = gr.Button("🎀 Start Recording", variant="primary")
2040
- stop_btn = gr.Button("⏹️ Stop Recording", interactive=False)
2041
-
2042
- # Hidden audio component for recording
2043
- user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Audio Recording", visible=False)
2044
-
2045
- # Video input (keeping for emotion analysis)
2046
- user_video_input = gr.Video(sources=["webcam"], label="Video Recording (for emotion analysis)", visible=False)
2047
-
2048
- # Transcript and confirmation
2049
- stt_transcript = gr.Textbox(label="Transcribed Answer (automatically generated)", interactive=True)
2050
- confirm_btn = gr.Button("Confirm Answer", interactive=False)
2051
-
2052
- # Status and results
2053
- recording_status = gr.Markdown("**Status:** Ready to record")
2054
  evaluation_display = gr.Markdown()
2055
- emotion_display = gr.Markdown()
2056
  interview_summary = gr.Markdown(visible=False)
2057
 
2058
- # UI Logic
 
2059
  def validate_start_btn(cv_file, job_desc):
2060
  return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
2061
-
2062
  cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
2063
  job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
2064
 
@@ -2066,10 +1551,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2066
  details = extract_candidate_details(cv_file.name)
2067
  job_info = extract_job_details(job_desc)
2068
  data = {
2069
- "name": details.get("name", "unknown"),
2070
- "job_role": job_info.get("job_title", "unknown"),
2071
- "seniority": job_info.get("experience_level", "unknown"),
2072
- "skills": job_info.get("skills", [])
2073
  }
2074
  missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
2075
  if missing:
@@ -2077,7 +1560,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2077
  else:
2078
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
2079
  return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
2080
-
2081
  start_btn.click(
2082
  process_and_route_initial,
2083
  [cv_file, job_desc],
@@ -2086,23 +1568,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2086
 
2087
  def show_missing(missing):
2088
  if missing is None: missing = []
2089
- return (gr.update(visible="name" in missing),
2090
- gr.update(visible="job_role" in missing),
2091
- gr.update(visible="seniority" in missing),
2092
- gr.update(visible="skills" in missing))
2093
-
2094
  missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
2095
 
2096
  def validate_fields(name, role, seniority, skills, missing):
2097
  if not missing: return gr.update(interactive=False)
2098
- all_filled = all([
2099
- (not ("name" in missing) or bool(name.strip())),
2100
- (not ("job_role" in missing) or bool(role.strip())),
2101
- (not ("seniority" in missing) or bool(seniority)),
2102
- (not ("skills" in missing) or bool(skills.strip()))
2103
- ])
2104
  return gr.update(interactive=all_filled)
2105
-
2106
  for inp in [name_in, role_in, seniority_in, skills_in]:
2107
  inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
2108
 
@@ -2113,10 +1585,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2113
  if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
2114
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
2115
  return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
2116
-
2117
  submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
2118
 
2119
  def start_interview(data):
 
2120
  state = {
2121
  "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
2122
  "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
@@ -2125,14 +1597,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2125
  "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
2126
  "log": []
2127
  }
 
2128
  context = ""
2129
  prompt = build_interview_prompt(
2130
  conversation_history=[], user_response="", context=context, job_role=data["job_role"],
2131
  skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
2132
  voice_label="neutral", face_label="neutral"
2133
  )
2134
-
2135
- # Generate first question
 
 
 
 
 
 
2136
  first_q = groq_llm.predict(prompt)
2137
  q_eval = {
2138
  "Score": "N/A",
@@ -2141,101 +1620,50 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2141
  }
2142
  state["questions"].append(first_q)
2143
  state["question_evaluations"].append(q_eval)
2144
- state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
2145
 
2146
- # Generate audio for question
2147
  audio_path = bark_tts(first_q)
2148
-
2149
- # Log
2150
  state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
2151
-
2152
- return (state,
2153
- gr.update(visible=False),
2154
- gr.update(visible=True),
2155
- audio_path,
2156
- f"*Question 1:* {first_q}",
2157
- gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
2158
-
2159
- start_interview_final_btn.click(
2160
- start_interview,
2161
- [user_data],
2162
- [interview_state, interview_pre_section, interview_section, question_audio, question_text, recording_status]
2163
- )
2164
-
2165
- # Recording functionality
2166
- def start_recording(rec_state):
2167
- rec_state["is_recording"] = True
2168
- return (rec_state,
2169
- gr.update(interactive=False),
2170
- gr.update(interactive=True),
2171
- gr.update(visible=True),
2172
- gr.update(value="**Status:** πŸ”΄ Recording... Click 'Stop Recording' when done"))
2173
-
2174
- record_btn.click(
2175
- start_recording,
2176
- [recording_state],
2177
- [recording_state, record_btn, stop_btn, user_audio_input, recording_status]
2178
- )
2179
-
2180
- def stop_recording(rec_state):
2181
- rec_state["is_recording"] = False
2182
- return (rec_state,
2183
- gr.update(interactive=True),
2184
- gr.update(interactive=False),
2185
- gr.update(visible=False),
2186
- gr.update(value="**Status:** Processing audio... Please wait"))
2187
-
2188
- stop_btn.click(
2189
- stop_recording,
2190
- [recording_state],
2191
- [recording_state, record_btn, stop_btn, user_audio_input, recording_status]
2192
- )
2193
 
2194
- # Auto-transcription when audio is recorded
2195
- def transcribe_and_update(audio_path):
2196
- if not audio_path:
2197
- return "", gr.update(interactive=False), gr.update(value="**Status:** No audio recorded")
2198
-
2199
- transcript = whisper_stt(audio_path)
2200
- if transcript:
2201
- return (transcript,
2202
- gr.update(interactive=True),
2203
- gr.update(value="**Status:** Audio transcribed! Review and click 'Confirm Answer'"))
2204
- else:
2205
- return ("",
2206
- gr.update(interactive=False),
2207
- gr.update(value="**Status:** Transcription failed. Please try recording again"))
2208
-
2209
- user_audio_input.change(
2210
- transcribe_and_update,
2211
- [user_audio_input],
2212
- [stt_transcript, confirm_btn, recording_status]
2213
- )
2214
 
2215
  def process_answer(transcript, audio_path, video_path, state, data):
2216
- if not transcript:
2217
- return (state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
2218
-
2219
  elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
2220
  state["timings"].append(elapsed)
2221
  state["answers"].append(transcript)
2222
  state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
2223
 
2224
- # Emotion analysis (using testing values for speed)
 
 
 
 
 
 
2225
  voice_label = "neutral"
2226
  face_label = "neutral"
2227
  state["voice_labels"].append(voice_label)
2228
  state["face_labels"].append(face_label)
2229
 
2230
- # Evaluate answer
 
 
2231
  last_q = state["questions"][-1]
2232
- q_eval = state["question_evaluations"][-1]
2233
  ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
2234
  answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
2235
  state["answer_evaluations"].append(answer_eval)
2236
  answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
2237
 
2238
- # Adaptive difficulty
2239
  if answer_score == "excellent":
2240
  state["difficulty_adjustment"] = "harder"
2241
  elif answer_score in ("medium", "poor"):
@@ -2243,11 +1671,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2243
  else:
2244
  state["difficulty_adjustment"] = None
2245
 
2246
- # Effective confidence (testing value)
 
 
 
 
2247
  eff_conf = {"effective_confidence": 0.6}
2248
  state["effective_confidences"].append(eff_conf)
2249
 
2250
- # Log
 
2251
  state["log"].append({
2252
  "type": "answer",
2253
  "question": last_q,
@@ -2261,38 +1694,32 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2261
  "timestamp": time.time()
2262
  })
2263
 
2264
- # Check if interview is complete
2265
  qidx = state["question_idx"] + 1
2266
  if qidx >= state["max_questions"]:
2267
- # Save log
2268
  timestamp = time.strftime("%Y%m%d_%H%M%S")
2269
  log_file = f"interview_log_{timestamp}.json"
2270
  with open(log_file, "w", encoding="utf-8") as f:
2271
  json.dump(state["log"], f, indent=2, ensure_ascii=False)
2272
-
2273
- # Generate summary
2274
  summary = "# Interview Summary\n"
2275
  for i, q in enumerate(state["questions"]):
2276
  summary += (f"\n### Q{i + 1}: {q}\n"
2277
  f"- *Answer*: {state['answers'][i]}\n"
2278
  f"- *Q Eval*: {state['question_evaluations'][i]}\n"
2279
  f"- *A Eval*: {state['answer_evaluations'][i]}\n"
 
 
 
2280
  f"- *Time*: {state['timings'][i]}s\n")
2281
  summary += f"\n\n⏺ Full log saved as {log_file}."
2282
-
2283
- return (state,
2284
- gr.update(visible=True, value=summary),
2285
- gr.update(value=None),
2286
- gr.update(value=None),
2287
- gr.update(value=None),
2288
- gr.update(interactive=False),
2289
- gr.update(visible=True, value=f"Last Detected β€” Face: {face_label}, Voice: {voice_label}"),
2290
- gr.update(value="**Status:** Interview completed!"))
2291
  else:
2292
- # Generate next question
2293
  state["question_idx"] = qidx
2294
  state["q_start_time"] = time.time()
2295
- context = ""
2296
  prompt = build_interview_prompt(
2297
  conversation_history=state["conversation_history"],
2298
  user_response=transcript,
@@ -2301,34 +1728,31 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2301
  skills=data["skills"],
2302
  seniority=data["seniority"],
2303
  difficulty_adjustment=state["difficulty_adjustment"],
2304
- face_label=face_label,
2305
  voice_label=voice_label,
2306
- effective_confidence=eff_conf
2307
  )
2308
-
2309
  next_q = groq_llm.predict(prompt)
 
2310
  q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
2311
  state["questions"].append(next_q)
2312
  state["question_evaluations"].append(q_eval)
2313
  state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
2314
  state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
2315
-
2316
  audio_path = bark_tts(next_q)
 
2317
  eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
2318
-
2319
- return (state,
2320
- gr.update(visible=False),
2321
- audio_path,
2322
- f"*Question {qidx + 1}:* {next_q}",
2323
- gr.update(value=""),
2324
- gr.update(interactive=False),
2325
- gr.update(visible=True, value=f"Last Detected β€” Face: {face_label}, Voice: {voice_label}"),
2326
- gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
2327
-
2328
  confirm_btn.click(
2329
  process_answer,
2330
  [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
2331
- [interview_state, interview_summary, question_audio, question_text, stt_transcript, confirm_btn, emotion_display, recording_status]
 
 
2332
  )
2333
 
2334
  demo.launch(debug=True)
 
 
 
1448
  "skills": skills
1449
  }
1450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1451
  import gradio as gr
1452
  import time
1453
  import tempfile
 
1469
  torch.cuda.empty_cache()
1470
  gc.collect()
1471
 
1472
+
1473
  # Bark TTS
1474
  print("πŸ” Loading Bark model...")
1475
+ model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
1476
  print("βœ… Bark model loaded")
 
1477
  print("πŸ” Loading Bark processor...")
1478
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
1479
  print("βœ… Bark processor loaded")
1480
+ bark_voice_preset = "v2/en_speaker_5"
 
 
 
1481
 
1482
  def bark_tts(text):
1483
  print(f"πŸ” Synthesizing TTS for: {text}")
 
1494
  print("πŸ” Loading Whisper model...")
1495
  whisper_model = whisper.load_model("base", device="cuda")
1496
  print("βœ… Whisper model loaded")
 
1497
  def whisper_stt(audio_path):
1498
+ if not audio_path or not os.path.exists(audio_path): return ""
 
1499
  result = whisper_model.transcribe(audio_path)
1500
  return result["text"]
1501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1502
  seniority_mapping = {
1503
  "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
1504
  }
1505
 
1506
+
1507
+ # --- 2. Gradio App ---
1508
+
1509
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
1510
  user_data = gr.State({})
1511
  interview_state = gr.State({})
1512
  missing_fields_state = gr.State([])
 
1513
 
1514
+ # --- UI Layout ---
1515
  with gr.Column(visible=True) as user_info_section:
1516
  gr.Markdown("## Candidate Information")
1517
  cv_file = gr.File(label="Upload CV")
 
1532
 
1533
  with gr.Column(visible=False) as interview_section:
1534
  gr.Markdown("## Interview in Progress")
1535
+ question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
1536
  question_text = gr.Markdown()
1537
+ user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
1538
+ stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
1539
+ confirm_btn = gr.Button("Confirm Answer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1540
  evaluation_display = gr.Markdown()
 
1541
  interview_summary = gr.Markdown(visible=False)
1542
 
1543
+ # --- UI Logic ---
1544
+
1545
  def validate_start_btn(cv_file, job_desc):
1546
  return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
 
1547
  cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
1548
  job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
1549
 
 
1551
  details = extract_candidate_details(cv_file.name)
1552
  job_info = extract_job_details(job_desc)
1553
  data = {
1554
+ "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
1555
+ "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
 
 
1556
  }
1557
  missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
1558
  if missing:
 
1560
  else:
1561
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
1562
  return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
 
1563
  start_btn.click(
1564
  process_and_route_initial,
1565
  [cv_file, job_desc],
 
1568
 
1569
  def show_missing(missing):
1570
  if missing is None: missing = []
1571
+ return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
 
 
 
 
1572
  missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
1573
 
1574
  def validate_fields(name, role, seniority, skills, missing):
1575
  if not missing: return gr.update(interactive=False)
1576
+ all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
 
 
 
 
 
1577
  return gr.update(interactive=all_filled)
 
1578
  for inp in [name_in, role_in, seniority_in, skills_in]:
1579
  inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
1580
 
 
1585
  if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
1586
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
1587
  return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
 
1588
  submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
1589
 
1590
  def start_interview(data):
1591
+ # --- Advanced state with full logging ---
1592
  state = {
1593
  "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
1594
  "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
 
1597
  "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
1598
  "log": []
1599
  }
1600
+ # --- Optionally: context retrieval here (currently just blank) ---
1601
  context = ""
1602
  prompt = build_interview_prompt(
1603
  conversation_history=[], user_response="", context=context, job_role=data["job_role"],
1604
  skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
1605
  voice_label="neutral", face_label="neutral"
1606
  )
1607
+ #here the original one
1608
+ # first_q = groq_llm.predict(prompt)
1609
+ # # Evaluate Q for quality
1610
+ # q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
1611
+ # state["questions"].append(first_q)
1612
+ # state["question_evaluations"].append(q_eval)
1613
+
1614
+ #here the testing one
1615
  first_q = groq_llm.predict(prompt)
1616
  q_eval = {
1617
  "Score": "N/A",
 
1620
  }
1621
  state["questions"].append(first_q)
1622
  state["question_evaluations"].append(q_eval)
1623
+
1624
 
1625
+ state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
1626
  audio_path = bark_tts(first_q)
1627
+ # LOG
 
1628
  state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
1629
+ return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
1630
+ start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1631
 
1632
+ def transcribe(audio_path):
1633
+ return whisper_stt(audio_path)
1634
+ user_audio_input.change(transcribe, user_audio_input, stt_transcript)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1635
 
1636
  def process_answer(transcript, audio_path, video_path, state, data):
1637
+ if not transcript and not video_path:
1638
+ return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
 
1639
  elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
1640
  state["timings"].append(elapsed)
1641
  state["answers"].append(transcript)
1642
  state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
1643
 
1644
+ # --- 1. Emotion analysis ---
1645
+ # voice_label = analyze_audio_emotion(audio_path)
1646
+ # face_label = analyze_video_emotions(video_path)
1647
+ # state["voice_labels"].append(voice_label)
1648
+ # state["face_labels"].append(face_label)
1649
+
1650
+ #just for testing
1651
  voice_label = "neutral"
1652
  face_label = "neutral"
1653
  state["voice_labels"].append(voice_label)
1654
  state["face_labels"].append(face_label)
1655
 
1656
+
1657
+
1658
+ # --- 2. Evaluate previous Q and Answer ---
1659
  last_q = state["questions"][-1]
1660
+ q_eval = state["question_evaluations"][-1] # Already in state
1661
  ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
1662
  answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
1663
  state["answer_evaluations"].append(answer_eval)
1664
  answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
1665
 
1666
+ # --- 3. Adaptive difficulty ---
1667
  if answer_score == "excellent":
1668
  state["difficulty_adjustment"] = "harder"
1669
  elif answer_score in ("medium", "poor"):
 
1671
  else:
1672
  state["difficulty_adjustment"] = None
1673
 
1674
+ # --- 4. Effective confidence ---
1675
+ # eff_conf = interpret_confidence(voice_label, face_label, answer_score)
1676
+ # state["effective_confidences"].append(eff_conf)
1677
+
1678
+ #just for testing:
1679
  eff_conf = {"effective_confidence": 0.6}
1680
  state["effective_confidences"].append(eff_conf)
1681
 
1682
+
1683
+ # --- LOG ---
1684
  state["log"].append({
1685
  "type": "answer",
1686
  "question": last_q,
 
1694
  "timestamp": time.time()
1695
  })
1696
 
1697
+ # --- Next or End ---
1698
  qidx = state["question_idx"] + 1
1699
  if qidx >= state["max_questions"]:
1700
+ # Save as JSON (optionally)
1701
  timestamp = time.strftime("%Y%m%d_%H%M%S")
1702
  log_file = f"interview_log_{timestamp}.json"
1703
  with open(log_file, "w", encoding="utf-8") as f:
1704
  json.dump(state["log"], f, indent=2, ensure_ascii=False)
1705
+ # Report
 
1706
  summary = "# Interview Summary\n"
1707
  for i, q in enumerate(state["questions"]):
1708
  summary += (f"\n### Q{i + 1}: {q}\n"
1709
  f"- *Answer*: {state['answers'][i]}\n"
1710
  f"- *Q Eval*: {state['question_evaluations'][i]}\n"
1711
  f"- *A Eval*: {state['answer_evaluations'][i]}\n"
1712
+ #also this are removed just for testing :(
1713
+ # f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
1714
+ # f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
1715
  f"- *Time*: {state['timings'][i]}s\n")
1716
  summary += f"\n\n⏺ Full log saved as {log_file}."
1717
+ return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected β€” Face: {face_label}, Voice: {voice_label}"))
 
 
 
 
 
 
 
 
1718
  else:
1719
+ # --- Build next prompt using adaptive difficulty ---
1720
  state["question_idx"] = qidx
1721
  state["q_start_time"] = time.time()
1722
+ context = "" # You can add your context logic here
1723
  prompt = build_interview_prompt(
1724
  conversation_history=state["conversation_history"],
1725
  user_response=transcript,
 
1728
  skills=data["skills"],
1729
  seniority=data["seniority"],
1730
  difficulty_adjustment=state["difficulty_adjustment"],
 
1731
  voice_label=voice_label,
 
1732
  )
 
1733
  next_q = groq_llm.predict(prompt)
1734
+ # Evaluate Q quality
1735
  q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
1736
  state["questions"].append(next_q)
1737
  state["question_evaluations"].append(q_eval)
1738
  state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
1739
  state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
 
1740
  audio_path = bark_tts(next_q)
1741
+ # Display evaluations
1742
  eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
1743
+ return (
1744
+ state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
1745
+ gr.update(value=None), gr.update(value=None),
1746
+ gr.update(visible=True, value=f"Last Detected β€” Face: {face_label}, Voice: {voice_label}"),
1747
+ )
 
 
 
 
 
1748
  confirm_btn.click(
1749
  process_answer,
1750
  [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
1751
+ [interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
1752
+ ).then(
1753
+ lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
1754
  )
1755
 
1756
  demo.launch(debug=True)
1757
+
1758
+