husseinelsaadi commited on
Commit
cd89a2a
·
verified ·
1 Parent(s): d525d89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +346 -155
app.py CHANGED
@@ -774,41 +774,7 @@ A:"""
774
  logging.error(f"Error generating reference answer: {e}", exc_info=True)
775
  return "Unable to generate reference answer due to an error"
776
 
777
- def interpret_confidence(voice_label, face_label, answer_score_label,k=0.2):
778
- # Map expressions to rough numerical confidence levels
779
- emotion_map = {
780
- "happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4,
781
- "angry": 0.3, "disgust": 0.2, "fear": 0.3,
782
- }
783
-
784
- answer_score_map = {
785
- "excellent": 1.0,
786
- "good": 0.8,
787
- "medium": 0.6,
788
- "poor": 0.3
789
- }
790
 
791
- voice_score = emotion_map.get(voice_label, 0.5)
792
- face_score = emotion_map.get(face_label, 0.5)
793
- answer_score = answer_score_map.get(answer_score_label, 0.5)
794
-
795
- # Adjust weights as needed (emotions may be less reliable than verbal answers)
796
- avg_emotion = (voice_score + face_score) /2
797
- control_bonus = max(0,answer_score - avg_emotion) *k
798
- effective_confidence = (
799
- 0.5 * answer_score +
800
- 0.22 * voice_score +
801
- 0.18 * face_score +
802
- 0.1 *control_bonus
803
- )
804
-
805
- return {
806
- "effective_confidence": round(effective_confidence, 3),
807
- "answer_score": round(answer_score, 2),
808
- "voice_score": round(voice_score, 2),
809
- "face_score": round(face_score, 2),
810
- "control_bonus": round(control_bonus, 3)
811
- }
812
 
813
  def build_interview_prompt(conversation_history, user_response, context, job_role, skills, seniority,
814
  difficulty_adjustment=None, voice_label=None, face_label=None, effective_confidence=None):
@@ -857,8 +823,6 @@ def build_interview_prompt(conversation_history, user_response, context, job_rol
857
  {response_evaluation}
858
 
859
  Voice Tone: {voice_label}
860
- Facial Expression: {face_label}
861
- Estimated Confidence Score: {effective_confidence}
862
 
863
  ---
864
  ---
@@ -931,8 +895,6 @@ def build_interview_prompt(conversation_history, user_response, context, job_rol
931
  user_response=user_response,
932
  response_evaluation=response_evaluation.strip(),
933
  voice_label=voice_label or "unknown",
934
- face_label=face_label or "unknown",
935
- effective_confidence=effective_confidence if effective_confidence is not None else "N/A"
936
  )
937
 
938
  return prompt
@@ -1451,28 +1413,338 @@ def extract_candidate_details(file_path):
1451
  "skills": skills
1452
  }
1453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1454
  import gradio as gr
1455
  import time
1456
  import tempfile
1457
  import numpy as np
1458
  import scipy.io.wavfile as wavfile
1459
- import cv2
1460
  import os
1461
  import json
1462
- from moviepy.editor import VideoFileClip
1463
- import shutil
1464
  from transformers import BarkModel, AutoProcessor
1465
  import torch, gc
1466
  import whisper
1467
- from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
1468
- import librosa
1469
 
1470
- import torch
1471
- print(torch.cuda.is_available()) # ✅ Tells you if GPU is available
1472
  torch.cuda.empty_cache()
1473
  gc.collect()
1474
 
1475
-
1476
  # Bark TTS
1477
  print("🔁 Loading Bark model...")
1478
  model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
@@ -1480,20 +1752,13 @@ print("✅ Bark model loaded")
1480
  print("🔁 Loading Bark processor...")
1481
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
1482
  print("✅ Bark processor loaded")
1483
- bark_voice_preset = "v2/en_speaker_5"
1484
 
1485
  def bark_tts(text):
1486
  print(f"🔁 Synthesizing TTS for: {text}")
1487
-
1488
- # Process the text
1489
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1490
-
1491
- # Move tensors to device
1492
  input_ids = inputs["input_ids"].to(model_bark.device)
1493
-
1494
  start = time.time()
1495
-
1496
- # Generate speech with only the required parameters
1497
  with torch.no_grad():
1498
  speech_values = model_bark.generate(
1499
  input_ids=input_ids,
@@ -1501,22 +1766,13 @@ def bark_tts(text):
1501
  fine_temperature=0.4,
1502
  coarse_temperature=0.8
1503
  )
1504
-
1505
  print(f"✅ Bark finished in {round(time.time() - start, 2)}s")
1506
-
1507
- # Convert to audio
1508
  speech = speech_values.cpu().numpy().squeeze()
1509
  speech = (speech * 32767).astype(np.int16)
1510
-
1511
  temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1512
  wavfile.write(temp_wav.name, 22050, speech)
1513
-
1514
  return temp_wav.name
1515
 
1516
-
1517
-
1518
-
1519
-
1520
  # Whisper STT
1521
  print("🔁 Loading Whisper model...")
1522
  whisper_model = whisper.load_model("base", device="cuda")
@@ -1530,15 +1786,11 @@ seniority_mapping = {
1530
  "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
1531
  }
1532
 
1533
-
1534
- # --- 2. Gradio App ---
1535
-
1536
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
1537
  user_data = gr.State({})
1538
  interview_state = gr.State({})
1539
  missing_fields_state = gr.State([])
1540
 
1541
- # --- UI Layout ---
1542
  with gr.Column(visible=True) as user_info_section:
1543
  gr.Markdown("## Candidate Information")
1544
  cv_file = gr.File(label="Upload CV")
@@ -1567,8 +1819,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1567
  evaluation_display = gr.Markdown()
1568
  interview_summary = gr.Markdown(visible=False)
1569
 
1570
- # --- UI Logic ---
1571
-
1572
  def validate_start_btn(cv_file, job_desc):
1573
  return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
1574
  cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
@@ -1578,8 +1828,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1578
  details = extract_candidate_details(cv_file.name)
1579
  job_info = extract_job_details(job_desc)
1580
  data = {
1581
- "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
1582
- "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
 
 
1583
  }
1584
  missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
1585
  if missing:
@@ -1587,11 +1839,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1587
  else:
1588
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
1589
  return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
1590
- start_btn.click(
1591
- process_and_route_initial,
1592
- [cv_file, job_desc],
1593
- [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
1594
- )
1595
 
1596
  def show_missing(missing):
1597
  if missing is None: missing = []
@@ -1600,7 +1848,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1600
 
1601
  def validate_fields(name, role, seniority, skills, missing):
1602
  if not missing: return gr.update(interactive=False)
1603
- all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
1604
  return gr.update(interactive=all_filled)
1605
  for inp in [name_in, role_in, seniority_in, skills_in]:
1606
  inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
@@ -1615,46 +1863,22 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1615
  submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
1616
 
1617
  def start_interview(data):
1618
- # --- Advanced state with full logging ---
1619
  state = {
1620
- "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
1621
- "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
1622
- "conversation_history": [],
1623
- "difficulty_adjustment": None,
1624
- "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
1625
- "log": []
1626
  }
1627
- # --- Optionally: context retrieval here (currently just blank) ---
1628
  context = ""
1629
  prompt = build_interview_prompt(
1630
  conversation_history=[], user_response="", context=context, job_role=data["job_role"],
1631
- skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
1632
- voice_label="neutral", face_label="neutral"
1633
  )
1634
- #here the original one
1635
- # first_q = groq_llm.predict(prompt)
1636
- # # Evaluate Q for quality
1637
- # q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
1638
- # state["questions"].append(first_q)
1639
- # state["question_evaluations"].append(q_eval)
1640
-
1641
- #here the testing one
1642
  first_q = groq_llm.predict(prompt)
1643
- q_eval = {
1644
- "Score": "N/A",
1645
- "Reasoning": "Skipped to reduce processing time",
1646
- "Improvements": []
1647
- }
1648
  state["questions"].append(first_q)
1649
  state["question_evaluations"].append(q_eval)
1650
-
1651
-
1652
  state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
1653
- start = time.perf_counter()
1654
  audio_path = bark_tts(first_q)
1655
- print("⏱️ Bark TTS took", time.perf_counter() - start, "seconds")
1656
-
1657
- # LOG
1658
  state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
1659
  return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
1660
  start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
@@ -1663,30 +1887,22 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1663
  return whisper_stt(audio_path)
1664
  user_audio_input.change(transcribe, user_audio_input, stt_transcript)
1665
 
1666
- def process_answer(transcript, audio_path, video_path, state, data):
1667
  if not transcript:
1668
  return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
1669
-
1670
  elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
1671
  state["timings"].append(elapsed)
1672
  state["answers"].append(transcript)
1673
  state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
1674
 
1675
- # --- 1. Emotion analysis (simplified for testing) ---
1676
- voice_label = "neutral"
1677
- face_label = "neutral"
1678
- state["voice_labels"].append(voice_label)
1679
- state["face_labels"].append(face_label)
1680
-
1681
- # --- 2. Evaluate previous Q and Answer ---
1682
  last_q = state["questions"][-1]
1683
- q_eval = state["question_evaluations"][-1] # Already in state
1684
  ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
1685
  answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
1686
  state["answer_evaluations"].append(answer_eval)
1687
  answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
1688
 
1689
- # --- 3. Adaptive difficulty ---
1690
  if answer_score == "excellent":
1691
  state["difficulty_adjustment"] = "harder"
1692
  elif answer_score in ("medium", "poor"):
@@ -1694,33 +1910,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1694
  else:
1695
  state["difficulty_adjustment"] = None
1696
 
1697
- # --- 4. Effective confidence (simplified) ---
1698
- eff_conf = {"effective_confidence": 0.6}
1699
- state["effective_confidences"].append(eff_conf)
1700
-
1701
- # --- LOG ---
1702
  state["log"].append({
1703
- "type": "answer",
1704
- "question": last_q,
1705
- "answer": transcript,
1706
- "answer_eval": answer_eval,
1707
- "ref_answer": ref_answer,
1708
- "face_label": face_label,
1709
- "voice_label": voice_label,
1710
- "effective_confidence": eff_conf,
1711
- "timing": elapsed,
1712
- "timestamp": time.time()
1713
  })
1714
 
1715
- # --- Next or End ---
1716
  qidx = state["question_idx"] + 1
1717
  if qidx >= state["max_questions"]:
1718
- # Save as JSON (optionally)
1719
  timestamp = time.strftime("%Y%m%d_%H%M%S")
1720
  log_file = f"interview_log_{timestamp}.json"
1721
  with open(log_file, "w", encoding="utf-8") as f:
1722
  json.dump(state["log"], f, indent=2, ensure_ascii=False)
1723
- # Report
1724
  summary = "# Interview Summary\n"
1725
  for i, q in enumerate(state["questions"]):
1726
  summary += (f"\n### Q{i + 1}: {q}\n"
@@ -1729,41 +1930,31 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1729
  f"- *A Eval*: {state['answer_evaluations'][i]}\n"
1730
  f"- *Time*: {state['timings'][i]}s\n")
1731
  summary += f"\n\n⏺ Full log saved as {log_file}."
1732
- return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"))
1733
  else:
1734
- # --- Build next prompt using adaptive difficulty ---
1735
  state["question_idx"] = qidx
1736
  state["q_start_time"] = time.time()
1737
- context = "" # You can add your context logic here
1738
  prompt = build_interview_prompt(
1739
  conversation_history=state["conversation_history"],
1740
- user_response=transcript,
1741
- context=context,
1742
- job_role=data["job_role"],
1743
- skills=data["skills"],
1744
- seniority=data["seniority"],
1745
- difficulty_adjustment=state["difficulty_adjustment"],
1746
- voice_label=voice_label,
1747
  )
1748
  next_q = groq_llm.predict(prompt)
1749
- # Evaluate Q quality
1750
  q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
1751
  state["questions"].append(next_q)
1752
  state["question_evaluations"].append(q_eval)
1753
  state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
1754
  state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
1755
  audio_path = bark_tts(next_q)
1756
- # Display evaluations
1757
- eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
1758
- return (
1759
- state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
1760
- gr.update(value=None), gr.update(value=None),
1761
- gr.update(visible=True, value=eval_md),
1762
- )
1763
- # Replace your confirm_btn.click with this:
1764
  confirm_btn.click(
1765
  process_answer,
1766
- [stt_transcript, user_audio_input, None, interview_state, user_data], # Added None for video_path
1767
  [interview_state, interview_summary, question_audio, question_text, user_audio_input, stt_transcript, evaluation_display]
1768
  ).then(
1769
  lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, stt_transcript]
 
774
  logging.error(f"Error generating reference answer: {e}", exc_info=True)
775
  return "Unable to generate reference answer due to an error"
776
 
 
 
 
 
 
 
 
 
 
 
 
 
 
777
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
778
 
779
  def build_interview_prompt(conversation_history, user_response, context, job_role, skills, seniority,
780
  difficulty_adjustment=None, voice_label=None, face_label=None, effective_confidence=None):
 
823
  {response_evaluation}
824
 
825
  Voice Tone: {voice_label}
 
 
826
 
827
  ---
828
  ---
 
895
  user_response=user_response,
896
  response_evaluation=response_evaluation.strip(),
897
  voice_label=voice_label or "unknown",
 
 
898
  )
899
 
900
  return prompt
 
1413
  "skills": skills
1414
  }
1415
 
1416
+ # import gradio as gr
1417
+ # import time
1418
+ # import tempfile
1419
+ # import numpy as np
1420
+ # import scipy.io.wavfile as wavfile
1421
+ # import os
1422
+ # import json
1423
+ # from transformers import BarkModel, AutoProcessor
1424
+ # import torch, gc
1425
+ # import whisper
1426
+ # from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
1427
+ # import librosa
1428
+
1429
+ # import torch
1430
+ # print(torch.cuda.is_available()) # ✅ Tells you if GPU is available
1431
+ # torch.cuda.empty_cache()
1432
+ # gc.collect()
1433
+
1434
+
1435
+ # # Bark TTS
1436
+ # print("🔁 Loading Bark model...")
1437
+ # model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
1438
+ # print("✅ Bark model loaded")
1439
+ # print("🔁 Loading Bark processor...")
1440
+ # processor_bark = AutoProcessor.from_pretrained("suno/bark")
1441
+ # print("✅ Bark processor loaded")
1442
+ # bark_voice_preset = "v2/en_speaker_5"
1443
+
1444
+ # def bark_tts(text):
1445
+ # print(f"🔁 Synthesizing TTS for: {text}")
1446
+
1447
+ # # Process the text
1448
+ # inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1449
+
1450
+ # # Move tensors to device
1451
+ # input_ids = inputs["input_ids"].to(model_bark.device)
1452
+
1453
+ # start = time.time()
1454
+
1455
+ # # Generate speech with only the required parameters
1456
+ # with torch.no_grad():
1457
+ # speech_values = model_bark.generate(
1458
+ # input_ids=input_ids,
1459
+ # do_sample=True,
1460
+ # fine_temperature=0.4,
1461
+ # coarse_temperature=0.8
1462
+ # )
1463
+
1464
+ # print(f"✅ Bark finished in {round(time.time() - start, 2)}s")
1465
+
1466
+ # # Convert to audio
1467
+ # speech = speech_values.cpu().numpy().squeeze()
1468
+ # speech = (speech * 32767).astype(np.int16)
1469
+
1470
+ # temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1471
+ # wavfile.write(temp_wav.name, 22050, speech)
1472
+
1473
+ # return temp_wav.name
1474
+
1475
+
1476
+
1477
+
1478
+
1479
+ # # Whisper STT
1480
+ # print("🔁 Loading Whisper model...")
1481
+ # whisper_model = whisper.load_model("base", device="cuda")
1482
+ # print("✅ Whisper model loaded")
1483
+ # def whisper_stt(audio_path):
1484
+ # if not audio_path or not os.path.exists(audio_path): return ""
1485
+ # result = whisper_model.transcribe(audio_path)
1486
+ # return result["text"]
1487
+
1488
+ # seniority_mapping = {
1489
+ # "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
1490
+ # }
1491
+
1492
+
1493
+ # # --- 2. Gradio App ---
1494
+
1495
+ # with gr.Blocks(theme=gr.themes.Soft()) as demo:
1496
+ # user_data = gr.State({})
1497
+ # interview_state = gr.State({})
1498
+ # missing_fields_state = gr.State([])
1499
+
1500
+ # # --- UI Layout ---
1501
+ # with gr.Column(visible=True) as user_info_section:
1502
+ # gr.Markdown("## Candidate Information")
1503
+ # cv_file = gr.File(label="Upload CV")
1504
+ # job_desc = gr.Textbox(label="Job Description")
1505
+ # start_btn = gr.Button("Continue", interactive=False)
1506
+
1507
+ # with gr.Column(visible=False) as missing_section:
1508
+ # gr.Markdown("## Missing Information")
1509
+ # name_in = gr.Textbox(label="Name", visible=False)
1510
+ # role_in = gr.Textbox(label="Job Role", visible=False)
1511
+ # seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
1512
+ # skills_in = gr.Textbox(label="Skills", visible=False)
1513
+ # submit_btn = gr.Button("Submit", interactive=False)
1514
+
1515
+ # with gr.Column(visible=False) as interview_pre_section:
1516
+ # pre_interview_greeting_md = gr.Markdown()
1517
+ # start_interview_final_btn = gr.Button("Start Interview")
1518
+
1519
+ # with gr.Column(visible=False) as interview_section:
1520
+ # gr.Markdown("## Interview in Progress")
1521
+ # question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
1522
+ # question_text = gr.Markdown()
1523
+ # user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
1524
+ # stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
1525
+ # confirm_btn = gr.Button("Confirm Answer")
1526
+ # evaluation_display = gr.Markdown()
1527
+ # interview_summary = gr.Markdown(visible=False)
1528
+
1529
+ # # --- UI Logic ---
1530
+
1531
+ # def validate_start_btn(cv_file, job_desc):
1532
+ # return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
1533
+ # cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
1534
+ # job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
1535
+
1536
+ # def process_and_route_initial(cv_file, job_desc):
1537
+ # details = extract_candidate_details(cv_file.name)
1538
+ # job_info = extract_job_details(job_desc)
1539
+ # data = {
1540
+ # "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
1541
+ # "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
1542
+ # }
1543
+ # missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
1544
+ # if missing:
1545
+ # return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
1546
+ # else:
1547
+ # greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
1548
+ # return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
1549
+ # start_btn.click(
1550
+ # process_and_route_initial,
1551
+ # [cv_file, job_desc],
1552
+ # [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
1553
+ # )
1554
+
1555
+ # def show_missing(missing):
1556
+ # if missing is None: missing = []
1557
+ # return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
1558
+ # missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
1559
+
1560
+ # def validate_fields(name, role, seniority, skills, missing):
1561
+ # if not missing: return gr.update(interactive=False)
1562
+ # all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
1563
+ # return gr.update(interactive=all_filled)
1564
+ # for inp in [name_in, role_in, seniority_in, skills_in]:
1565
+ # inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
1566
+
1567
+ # def complete_manual(data, name, role, seniority, skills):
1568
+ # if data["name"].lower() == "unknown": data["name"] = name
1569
+ # if data["job_role"].lower() == "unknown": data["job_role"] = role
1570
+ # if data["seniority"].lower() == "unknown": data["seniority"] = seniority
1571
+ # if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
1572
+ # greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
1573
+ # return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
1574
+ # submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
1575
+
1576
+ # def start_interview(data):
1577
+ # # --- Advanced state with full logging ---
1578
+ # state = {
1579
+ # "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
1580
+ # "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
1581
+ # "conversation_history": [],
1582
+ # "difficulty_adjustment": None,
1583
+ # "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
1584
+ # "log": []
1585
+ # }
1586
+ # # --- Optionally: context retrieval here (currently just blank) ---
1587
+ # context = ""
1588
+ # prompt = build_interview_prompt(
1589
+ # conversation_history=[], user_response="", context=context, job_role=data["job_role"],
1590
+ # skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
1591
+ # voice_label="neutral", face_label="neutral"
1592
+ # )
1593
+ # #here the original one
1594
+ # # first_q = groq_llm.predict(prompt)
1595
+ # # # Evaluate Q for quality
1596
+ # # q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
1597
+ # # state["questions"].append(first_q)
1598
+ # # state["question_evaluations"].append(q_eval)
1599
+
1600
+ # #here the testing one
1601
+ # first_q = groq_llm.predict(prompt)
1602
+ # q_eval = {
1603
+ # "Score": "N/A",
1604
+ # "Reasoning": "Skipped to reduce processing time",
1605
+ # "Improvements": []
1606
+ # }
1607
+ # state["questions"].append(first_q)
1608
+ # state["question_evaluations"].append(q_eval)
1609
+
1610
+
1611
+ # state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
1612
+ # start = time.perf_counter()
1613
+ # audio_path = bark_tts(first_q)
1614
+ # print("⏱️ Bark TTS took", time.perf_counter() - start, "seconds")
1615
+
1616
+ # # LOG
1617
+ # state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
1618
+ # return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
1619
+ # start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
1620
+
1621
+ # def transcribe(audio_path):
1622
+ # return whisper_stt(audio_path)
1623
+ # user_audio_input.change(transcribe, user_audio_input, stt_transcript)
1624
+
1625
+ # def process_answer(transcript, audio_path, state, data):
1626
+ # if not transcript:
1627
+ # return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
1628
+
1629
+ # elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
1630
+ # state["timings"].append(elapsed)
1631
+ # state["answers"].append(transcript)
1632
+ # state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
1633
+
1634
+ # # --- 1. Emotion analysis (simplified for testing) ---
1635
+ # voice_label = "neutral"
1636
+ # face_label = "neutral"
1637
+ # state["voice_labels"].append(voice_label)
1638
+ # state["face_labels"].append(face_label)
1639
+
1640
+ # # --- 2. Evaluate previous Q and Answer ---
1641
+ # last_q = state["questions"][-1]
1642
+ # q_eval = state["question_evaluations"][-1] # Already in state
1643
+ # ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
1644
+ # answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
1645
+ # state["answer_evaluations"].append(answer_eval)
1646
+ # answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
1647
+
1648
+ # # --- 3. Adaptive difficulty ---
1649
+ # if answer_score == "excellent":
1650
+ # state["difficulty_adjustment"] = "harder"
1651
+ # elif answer_score in ("medium", "poor"):
1652
+ # state["difficulty_adjustment"] = "easier"
1653
+ # else:
1654
+ # state["difficulty_adjustment"] = None
1655
+
1656
+ # # --- 4. Effective confidence (simplified) ---
1657
+ # eff_conf = {"effective_confidence": 0.6}
1658
+ # state["effective_confidences"].append(eff_conf)
1659
+
1660
+ # # --- LOG ---
1661
+ # state["log"].append({
1662
+ # "type": "answer",
1663
+ # "question": last_q,
1664
+ # "answer": transcript,
1665
+ # "answer_eval": answer_eval,
1666
+ # "ref_answer": ref_answer,
1667
+ # "face_label": face_label,
1668
+ # "voice_label": voice_label,
1669
+ # "effective_confidence": eff_conf,
1670
+ # "timing": elapsed,
1671
+ # "timestamp": time.time()
1672
+ # })
1673
+
1674
+ # # --- Next or End ---
1675
+ # qidx = state["question_idx"] + 1
1676
+ # if qidx >= state["max_questions"]:
1677
+ # # Save as JSON (optionally)
1678
+ # timestamp = time.strftime("%Y%m%d_%H%M%S")
1679
+ # log_file = f"interview_log_{timestamp}.json"
1680
+ # with open(log_file, "w", encoding="utf-8") as f:
1681
+ # json.dump(state["log"], f, indent=2, ensure_ascii=False)
1682
+ # # Report
1683
+ # summary = "# Interview Summary\n"
1684
+ # for i, q in enumerate(state["questions"]):
1685
+ # summary += (f"\n### Q{i + 1}: {q}\n"
1686
+ # f"- *Answer*: {state['answers'][i]}\n"
1687
+ # f"- *Q Eval*: {state['question_evaluations'][i]}\n"
1688
+ # f"- *A Eval*: {state['answer_evaluations'][i]}\n"
1689
+ # f"- *Time*: {state['timings'][i]}s\n")
1690
+ # summary += f"\n\n⏺ Full log saved as {log_file}."
1691
+ # return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"))
1692
+ # else:
1693
+ # # --- Build next prompt using adaptive difficulty ---
1694
+ # state["question_idx"] = qidx
1695
+ # state["q_start_time"] = time.time()
1696
+ # context = "" # You can add your context logic here
1697
+ # prompt = build_interview_prompt(
1698
+ # conversation_history=state["conversation_history"],
1699
+ # user_response=transcript,
1700
+ # context=context,
1701
+ # job_role=data["job_role"],
1702
+ # skills=data["skills"],
1703
+ # seniority=data["seniority"],
1704
+ # difficulty_adjustment=state["difficulty_adjustment"],
1705
+ # voice_label=voice_label,
1706
+ # )
1707
+ # next_q = groq_llm.predict(prompt)
1708
+ # # Evaluate Q quality
1709
+ # q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
1710
+ # state["questions"].append(next_q)
1711
+ # state["question_evaluations"].append(q_eval)
1712
+ # state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
1713
+ # state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
1714
+ # audio_path = bark_tts(next_q)
1715
+ # # Display evaluations
1716
+ # eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
1717
+ # return (
1718
+ # state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
1719
+ # gr.update(value=None), gr.update(value=None),
1720
+ # gr.update(visible=True, value=eval_md),
1721
+ # )
1722
+ # # Replace your confirm_btn.click with this:
1723
+ # confirm_btn.click(
1724
+ # process_answer,
1725
+ # [stt_transcript, user_audio_input, interview_state, user_data], # Added None for video_path
1726
+ # [interview_state, interview_summary, question_audio, question_text, user_audio_input, stt_transcript, evaluation_display]
1727
+ # ).then(
1728
+ # lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, stt_transcript]
1729
+ # )
1730
+
1731
+ # demo.launch(debug=True)
1732
+
1733
  import gradio as gr
1734
  import time
1735
  import tempfile
1736
  import numpy as np
1737
  import scipy.io.wavfile as wavfile
 
1738
  import os
1739
  import json
 
 
1740
  from transformers import BarkModel, AutoProcessor
1741
  import torch, gc
1742
  import whisper
 
 
1743
 
1744
+ print(torch.cuda.is_available())
 
1745
  torch.cuda.empty_cache()
1746
  gc.collect()
1747
 
 
1748
  # Bark TTS
1749
  print("🔁 Loading Bark model...")
1750
  model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
 
1752
  print("🔁 Loading Bark processor...")
1753
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
1754
  print("✅ Bark processor loaded")
1755
+ bark_voice_preset = "v2/en_speaker_12"
1756
 
1757
  def bark_tts(text):
1758
  print(f"🔁 Synthesizing TTS for: {text}")
 
 
1759
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
 
 
1760
  input_ids = inputs["input_ids"].to(model_bark.device)
 
1761
  start = time.time()
 
 
1762
  with torch.no_grad():
1763
  speech_values = model_bark.generate(
1764
  input_ids=input_ids,
 
1766
  fine_temperature=0.4,
1767
  coarse_temperature=0.8
1768
  )
 
1769
  print(f"✅ Bark finished in {round(time.time() - start, 2)}s")
 
 
1770
  speech = speech_values.cpu().numpy().squeeze()
1771
  speech = (speech * 32767).astype(np.int16)
 
1772
  temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1773
  wavfile.write(temp_wav.name, 22050, speech)
 
1774
  return temp_wav.name
1775
 
 
 
 
 
1776
  # Whisper STT
1777
  print("🔁 Loading Whisper model...")
1778
  whisper_model = whisper.load_model("base", device="cuda")
 
1786
  "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
1787
  }
1788
 
 
 
 
1789
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
1790
  user_data = gr.State({})
1791
  interview_state = gr.State({})
1792
  missing_fields_state = gr.State([])
1793
 
 
1794
  with gr.Column(visible=True) as user_info_section:
1795
  gr.Markdown("## Candidate Information")
1796
  cv_file = gr.File(label="Upload CV")
 
1819
  evaluation_display = gr.Markdown()
1820
  interview_summary = gr.Markdown(visible=False)
1821
 
 
 
1822
  def validate_start_btn(cv_file, job_desc):
1823
  return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
1824
  cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
 
1828
  details = extract_candidate_details(cv_file.name)
1829
  job_info = extract_job_details(job_desc)
1830
  data = {
1831
+ "name": details.get("name", "unknown"),
1832
+ "job_role": job_info.get("job_title", "unknown"),
1833
+ "seniority": job_info.get("experience_level", "unknown"),
1834
+ "skills": job_info.get("skills", [])
1835
  }
1836
  missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
1837
  if missing:
 
1839
  else:
1840
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
1841
  return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
1842
+ start_btn.click(process_and_route_initial, [cv_file, job_desc], [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md])
 
 
 
 
1843
 
1844
  def show_missing(missing):
1845
  if missing is None: missing = []
 
1848
 
1849
  def validate_fields(name, role, seniority, skills, missing):
1850
  if not missing: return gr.update(interactive=False)
1851
+ all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip()))])
1852
  return gr.update(interactive=all_filled)
1853
  for inp in [name_in, role_in, seniority_in, skills_in]:
1854
  inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
 
1863
  submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
1864
 
1865
  def start_interview(data):
 
1866
  state = {
1867
+ "questions": [], "answers": [], "timings": [], "question_evaluations": [], "answer_evaluations": [],
1868
+ "conversation_history": [], "difficulty_adjustment": None, "question_idx": 0, "max_questions": 3,
1869
+ "q_start_time": time.time(), "log": []
 
 
 
1870
  }
 
1871
  context = ""
1872
  prompt = build_interview_prompt(
1873
  conversation_history=[], user_response="", context=context, job_role=data["job_role"],
1874
+ skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None, voice_label="neutral"
 
1875
  )
 
 
 
 
 
 
 
 
1876
  first_q = groq_llm.predict(prompt)
1877
+ q_eval = {"Score": "N/A", "Reasoning": "Skipped to reduce processing time", "Improvements": []}
 
 
 
 
1878
  state["questions"].append(first_q)
1879
  state["question_evaluations"].append(q_eval)
 
 
1880
  state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
 
1881
  audio_path = bark_tts(first_q)
 
 
 
1882
  state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
1883
  return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
1884
  start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
 
1887
  return whisper_stt(audio_path)
1888
  user_audio_input.change(transcribe, user_audio_input, stt_transcript)
1889
 
1890
+ def process_answer(transcript, audio_path, state, data):
1891
  if not transcript:
1892
  return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
1893
+
1894
  elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
1895
  state["timings"].append(elapsed)
1896
  state["answers"].append(transcript)
1897
  state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
1898
 
 
 
 
 
 
 
 
1899
  last_q = state["questions"][-1]
1900
+ q_eval = state["question_evaluations"][-1]
1901
  ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
1902
  answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
1903
  state["answer_evaluations"].append(answer_eval)
1904
  answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
1905
 
 
1906
  if answer_score == "excellent":
1907
  state["difficulty_adjustment"] = "harder"
1908
  elif answer_score in ("medium", "poor"):
 
1910
  else:
1911
  state["difficulty_adjustment"] = None
1912
 
 
 
 
 
 
1913
  state["log"].append({
1914
+ "type": "answer", "question": last_q, "answer": transcript,
1915
+ "answer_eval": answer_eval, "ref_answer": ref_answer,
1916
+ "timing": elapsed, "timestamp": time.time()
 
 
 
 
 
 
 
1917
  })
1918
 
 
1919
  qidx = state["question_idx"] + 1
1920
  if qidx >= state["max_questions"]:
 
1921
  timestamp = time.strftime("%Y%m%d_%H%M%S")
1922
  log_file = f"interview_log_{timestamp}.json"
1923
  with open(log_file, "w", encoding="utf-8") as f:
1924
  json.dump(state["log"], f, indent=2, ensure_ascii=False)
 
1925
  summary = "# Interview Summary\n"
1926
  for i, q in enumerate(state["questions"]):
1927
  summary += (f"\n### Q{i + 1}: {q}\n"
 
1930
  f"- *A Eval*: {state['answer_evaluations'][i]}\n"
1931
  f"- *Time*: {state['timings'][i]}s\n")
1932
  summary += f"\n\n⏺ Full log saved as {log_file}."
1933
+ return state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=False)
1934
  else:
 
1935
  state["question_idx"] = qidx
1936
  state["q_start_time"] = time.time()
1937
+ context = ""
1938
  prompt = build_interview_prompt(
1939
  conversation_history=state["conversation_history"],
1940
+ user_response=transcript, context=context,
1941
+ job_role=data["job_role"], skills=data["skills"],
1942
+ seniority=data["seniority"], difficulty_adjustment=state["difficulty_adjustment"],
1943
+ voice_label="neutral"
 
 
 
1944
  )
1945
  next_q = groq_llm.predict(prompt)
 
1946
  q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
1947
  state["questions"].append(next_q)
1948
  state["question_evaluations"].append(q_eval)
1949
  state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
1950
  state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
1951
  audio_path = bark_tts(next_q)
1952
+ eval_md = f"*Last Answer Eval:* {answer_eval}"
1953
+ return state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}", gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=eval_md)
1954
+
 
 
 
 
 
1955
  confirm_btn.click(
1956
  process_answer,
1957
+ [stt_transcript, user_audio_input, interview_state, user_data],
1958
  [interview_state, interview_summary, question_audio, question_text, user_audio_input, stt_transcript, evaluation_display]
1959
  ).then(
1960
  lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, stt_transcript]