husseinelsaadi commited on
Commit
be4261f
·
verified ·
1 Parent(s): f25a98e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +437 -377
app.py CHANGED
@@ -1448,43 +1448,435 @@ def extract_candidate_details(file_path):
1448
  "skills": skills
1449
  }
1450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1451
  import gradio as gr
1452
  import time
1453
  import tempfile
1454
  import numpy as np
1455
  import scipy.io.wavfile as wavfile
1456
- import cv2
1457
  import os
1458
- import json
1459
- from moviepy.editor import VideoFileClip
1460
- import shutil
1461
- from transformers import BarkModel, AutoProcessor
1462
- import torch, gc
1463
- import whisper
1464
- from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
1465
- import librosa
1466
-
1467
  import torch
1468
- print(torch.cuda.is_available()) # ✅ Tells you if GPU is available
1469
- torch.cuda.empty_cache()
1470
- gc.collect()
1471
-
1472
-
1473
- # Bark TTS
1474
- print("🔁 Loading Bark model...")
1475
- model_bark = BarkModel.from_pretrained("suno/bark")
1476
- print("✅ Bark model loaded")
1477
 
1478
- print("🔁 Loading Bark processor...")
 
1479
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
1480
- print("✅ Bark processor loaded")
1481
- print("🔁 Moving Bark model to device...")
1482
- model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
1483
- print("✅ Bark model on device")
1484
  bark_voice_preset = "v2/en_speaker_6"
1485
 
 
 
 
1486
  def bark_tts(text):
1487
- print(f"🔁 Synthesizing TTS for: {text}")
1488
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1489
  inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
1490
  speech_values = model_bark.generate(**inputs)
@@ -1494,366 +1886,34 @@ def bark_tts(text):
1494
  wavfile.write(temp_wav.name, 22050, speech)
1495
  return temp_wav.name
1496
 
1497
- # Whisper STT
1498
- print("🔁 Loading Whisper model...")
1499
- whisper_model = whisper.load_model("base", device="cuda")
1500
- print("✅ Whisper model loaded")
1501
  def whisper_stt(audio_path):
1502
- if not audio_path or not os.path.exists(audio_path): return ""
 
1503
  result = whisper_model.transcribe(audio_path)
1504
  return result["text"]
1505
 
 
 
 
1506
 
1507
- # DeepFace (Video Face Emotion)
1508
- def ensure_mp4(video_input):
1509
- # video_input could be a file-like object, a path, or a Gradio temp path
1510
- if isinstance(video_input, str):
1511
- input_path = video_input
1512
- else:
1513
- # It's a file-like object (rare for Gradio video, but handle it)
1514
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
1515
- temp_in.write(video_input.read())
1516
- input_path = temp_in.name
1517
-
1518
- # If already mp4, return as is
1519
- if input_path.endswith(".mp4"):
1520
- return input_path
1521
 
1522
- # Convert to mp4 using moviepy
1523
- mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
1524
- try:
1525
- clip = VideoFileClip(input_path)
1526
- clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
1527
- clip.close()
1528
- except Exception as e:
1529
- print("Video conversion failed:", e)
1530
- # As fallback, just copy original
1531
- shutil.copy(input_path, mp4_path)
1532
- return mp4_path
1533
-
1534
- def analyze_video_emotions(video_input, sample_rate=15):
1535
- # Convert input to an mp4 file OpenCV can process
1536
- mp4_path = ensure_mp4(video_input)
1537
- if not mp4_path or not os.path.exists(mp4_path):
1538
- return "no_face"
1539
- cap = cv2.VideoCapture(mp4_path)
1540
- frame_count = 0
1541
- emotion_counts = {}
1542
- while True:
1543
- ret, frame = cap.read()
1544
- if not ret: break
1545
- if frame_count % sample_rate == 0:
1546
- try:
1547
- result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
1548
- dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
1549
- emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
1550
- except Exception: pass
1551
- frame_count += 1
1552
- cap.release()
1553
- if not emotion_counts: return "no_face"
1554
- return max(emotion_counts.items(), key=lambda x: x[1])[0]
1555
-
1556
- # Original Hugging Face model: HaniaRuby/speech-emotion-recognition-wav2vec2
1557
- local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2" # Local path to the downloaded model files
1558
- print("🔁 Loading Wav2Vec processor and model...")
1559
- wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
1560
- wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
1561
- wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
1562
- print("✅ Wav2Vec model loaded")
1563
- wav2vec_model.eval()
1564
- voice_label_map = {
1565
- 0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
1566
- 4: 'neutral', 5: 'sad', 6: 'surprise'
1567
- }
1568
 
 
 
1569
 
 
 
 
 
 
1570
 
1571
- def analyze_audio_emotion(audio_path):
1572
- print(f"🔁 Analyzing audio emotion for: {audio_path}")
1573
- if not audio_path or not os.path.exists(audio_path): return "neutral"
1574
-
1575
- speech, sr = librosa.load(audio_path, sr=16000)
1576
- inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
1577
-
1578
- # 🔥 Move model and inputs to GPU
1579
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1580
- wav2vec_model.to(device)
1581
- inputs = {k: v.to(device) for k, v in inputs.items()}
1582
-
1583
- with torch.no_grad():
1584
- logits = wav2vec_model(**inputs).logits
1585
-
1586
- probs = torch.nn.functional.softmax(logits, dim=-1)
1587
- predicted_id = torch.argmax(probs, dim=-1).item()
1588
- return voice_label_map.get(predicted_id, "neutral")
1589
-
1590
-
1591
- # --- Effective confidence calculation
1592
- def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
1593
- emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
1594
- answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
1595
- voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
1596
- avg_emotion = (voice_score + face_score) / 2
1597
- control_bonus = max(0, answer_score - avg_emotion) * k
1598
- eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
1599
- return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
1600
-
1601
- seniority_mapping = {
1602
- "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
1603
- }
1604
-
1605
-
1606
- # --- 2. Gradio App ---
1607
-
1608
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
1609
- user_data = gr.State({})
1610
- interview_state = gr.State({})
1611
- missing_fields_state = gr.State([])
1612
-
1613
- # --- UI Layout ---
1614
- with gr.Column(visible=True) as user_info_section:
1615
- gr.Markdown("## Candidate Information")
1616
- cv_file = gr.File(label="Upload CV")
1617
- job_desc = gr.Textbox(label="Job Description")
1618
- start_btn = gr.Button("Continue", interactive=False)
1619
-
1620
- with gr.Column(visible=False) as missing_section:
1621
- gr.Markdown("## Missing Information")
1622
- name_in = gr.Textbox(label="Name", visible=False)
1623
- role_in = gr.Textbox(label="Job Role", visible=False)
1624
- seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
1625
- skills_in = gr.Textbox(label="Skills", visible=False)
1626
- submit_btn = gr.Button("Submit", interactive=False)
1627
-
1628
- with gr.Column(visible=False) as interview_pre_section:
1629
- pre_interview_greeting_md = gr.Markdown()
1630
- start_interview_final_btn = gr.Button("Start Interview")
1631
-
1632
- with gr.Column(visible=False) as interview_section:
1633
- gr.Markdown("## Interview in Progress")
1634
- question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
1635
- question_text = gr.Markdown()
1636
- user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
1637
- user_video_input = gr.Video(sources=["webcam"], label="2. Record Video Answer")
1638
- stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
1639
- confirm_btn = gr.Button("Confirm Answer")
1640
- evaluation_display = gr.Markdown()
1641
- emotion_display = gr.Markdown()
1642
- interview_summary = gr.Markdown(visible=False)
1643
-
1644
- # --- UI Logic ---
1645
-
1646
- def validate_start_btn(cv_file, job_desc):
1647
- return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
1648
- cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
1649
- job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
1650
-
1651
- def process_and_route_initial(cv_file, job_desc):
1652
- details = extract_candidate_details(cv_file.name)
1653
- job_info = extract_job_details(job_desc)
1654
- data = {
1655
- "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
1656
- "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
1657
- }
1658
- missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
1659
- if missing:
1660
- return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
1661
- else:
1662
- greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
1663
- return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
1664
- start_btn.click(
1665
- process_and_route_initial,
1666
- [cv_file, job_desc],
1667
- [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
1668
- )
1669
-
1670
- def show_missing(missing):
1671
- if missing is None: missing = []
1672
- return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
1673
- missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
1674
-
1675
- def validate_fields(name, role, seniority, skills, missing):
1676
- if not missing: return gr.update(interactive=False)
1677
- all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
1678
- return gr.update(interactive=all_filled)
1679
- for inp in [name_in, role_in, seniority_in, skills_in]:
1680
- inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
1681
-
1682
- def complete_manual(data, name, role, seniority, skills):
1683
- if data["name"].lower() == "unknown": data["name"] = name
1684
- if data["job_role"].lower() == "unknown": data["job_role"] = role
1685
- if data["seniority"].lower() == "unknown": data["seniority"] = seniority
1686
- if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
1687
- greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
1688
- return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
1689
- submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
1690
-
1691
- def start_interview(data):
1692
- # --- Advanced state with full logging ---
1693
- state = {
1694
- "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
1695
- "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
1696
- "conversation_history": [],
1697
- "difficulty_adjustment": None,
1698
- "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
1699
- "log": []
1700
- }
1701
- # --- Optionally: context retrieval here (currently just blank) ---
1702
- context = ""
1703
- prompt = build_interview_prompt(
1704
- conversation_history=[], user_response="", context=context, job_role=data["job_role"],
1705
- skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
1706
- voice_label="neutral", face_label="neutral"
1707
- )
1708
- #here the original one
1709
- # first_q = groq_llm.predict(prompt)
1710
- # # Evaluate Q for quality
1711
- # q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
1712
- # state["questions"].append(first_q)
1713
- # state["question_evaluations"].append(q_eval)
1714
-
1715
- #here the testing one
1716
- first_q = groq_llm.predict(prompt)
1717
- q_eval = {
1718
- "Score": "N/A",
1719
- "Reasoning": "Skipped to reduce processing time",
1720
- "Improvements": []
1721
- }
1722
- state["questions"].append(first_q)
1723
- state["question_evaluations"].append(q_eval)
1724
-
1725
-
1726
- state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
1727
- audio_path = bark_tts(first_q)
1728
- # LOG
1729
- state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
1730
- return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
1731
- start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
1732
-
1733
- def transcribe(audio_path):
1734
- return whisper_stt(audio_path)
1735
- user_audio_input.change(transcribe, user_audio_input, stt_transcript)
1736
-
1737
- def process_answer(transcript, audio_path, video_path, state, data):
1738
- if not transcript and not video_path:
1739
- return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
1740
- elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
1741
- state["timings"].append(elapsed)
1742
- state["answers"].append(transcript)
1743
- state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
1744
-
1745
- # --- 1. Emotion analysis ---
1746
- # voice_label = analyze_audio_emotion(audio_path)
1747
- # face_label = analyze_video_emotions(video_path)
1748
- # state["voice_labels"].append(voice_label)
1749
- # state["face_labels"].append(face_label)
1750
-
1751
- #just for testing
1752
- voice_label = "neutral"
1753
- face_label = "neutral"
1754
- state["voice_labels"].append(voice_label)
1755
- state["face_labels"].append(face_label)
1756
-
1757
-
1758
-
1759
- # --- 2. Evaluate previous Q and Answer ---
1760
- last_q = state["questions"][-1]
1761
- q_eval = state["question_evaluations"][-1] # Already in state
1762
- ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
1763
- answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
1764
- state["answer_evaluations"].append(answer_eval)
1765
- answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
1766
-
1767
- # --- 3. Adaptive difficulty ---
1768
- if answer_score == "excellent":
1769
- state["difficulty_adjustment"] = "harder"
1770
- elif answer_score in ("medium", "poor"):
1771
- state["difficulty_adjustment"] = "easier"
1772
- else:
1773
- state["difficulty_adjustment"] = None
1774
-
1775
- # --- 4. Effective confidence ---
1776
- # eff_conf = interpret_confidence(voice_label, face_label, answer_score)
1777
- # state["effective_confidences"].append(eff_conf)
1778
-
1779
- #just for testing:
1780
- eff_conf = {"effective_confidence": 0.6}
1781
- state["effective_confidences"].append(eff_conf)
1782
-
1783
-
1784
- # --- LOG ---
1785
- state["log"].append({
1786
- "type": "answer",
1787
- "question": last_q,
1788
- "answer": transcript,
1789
- "answer_eval": answer_eval,
1790
- "ref_answer": ref_answer,
1791
- "face_label": face_label,
1792
- "voice_label": voice_label,
1793
- "effective_confidence": eff_conf,
1794
- "timing": elapsed,
1795
- "timestamp": time.time()
1796
- })
1797
-
1798
- # --- Next or End ---
1799
- qidx = state["question_idx"] + 1
1800
- if qidx >= state["max_questions"]:
1801
- # Save as JSON (optionally)
1802
- timestamp = time.strftime("%Y%m%d_%H%M%S")
1803
- log_file = f"interview_log_{timestamp}.json"
1804
- with open(log_file, "w", encoding="utf-8") as f:
1805
- json.dump(state["log"], f, indent=2, ensure_ascii=False)
1806
- # Report
1807
- summary = "# Interview Summary\n"
1808
- for i, q in enumerate(state["questions"]):
1809
- summary += (f"\n### Q{i + 1}: {q}\n"
1810
- f"- *Answer*: {state['answers'][i]}\n"
1811
- f"- *Q Eval*: {state['question_evaluations'][i]}\n"
1812
- f"- *A Eval*: {state['answer_evaluations'][i]}\n"
1813
- #also this are removed just for testing :(
1814
- # f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
1815
- # f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
1816
- f"- *Time*: {state['timings'][i]}s\n")
1817
- summary += f"\n\n⏺ Full log saved as {log_file}."
1818
- return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"))
1819
- else:
1820
- # --- Build next prompt using adaptive difficulty ---
1821
- state["question_idx"] = qidx
1822
- state["q_start_time"] = time.time()
1823
- context = "" # You can add your context logic here
1824
- prompt = build_interview_prompt(
1825
- conversation_history=state["conversation_history"],
1826
- user_response=transcript,
1827
- context=context,
1828
- job_role=data["job_role"],
1829
- skills=data["skills"],
1830
- seniority=data["seniority"],
1831
- difficulty_adjustment=state["difficulty_adjustment"],
1832
- face_label=face_label,
1833
- voice_label=voice_label,
1834
- effective_confidence=eff_conf
1835
- )
1836
- next_q = groq_llm.predict(prompt)
1837
- # Evaluate Q quality
1838
- q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
1839
- state["questions"].append(next_q)
1840
- state["question_evaluations"].append(q_eval)
1841
- state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
1842
- state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
1843
- audio_path = bark_tts(next_q)
1844
- # Display evaluations
1845
- eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
1846
- return (
1847
- state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
1848
- gr.update(value=None), gr.update(value=None),
1849
- gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"),
1850
- )
1851
- confirm_btn.click(
1852
- process_answer,
1853
- [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
1854
- [interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
1855
- ).then(
1856
- lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
1857
- )
1858
 
1859
  demo.launch(debug=True)
 
 
1448
  "skills": skills
1449
  }
1450
 
1451
+ # import gradio as gr
1452
+ # import time
1453
+ # import tempfile
1454
+ # import numpy as np
1455
+ # import scipy.io.wavfile as wavfile
1456
+ # import cv2
1457
+ # import os
1458
+ # import json
1459
+ # from moviepy.editor import VideoFileClip
1460
+ # import shutil
1461
+ # from transformers import BarkModel, AutoProcessor
1462
+ # import torch, gc
1463
+ # import whisper
1464
+ # from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
1465
+ # import librosa
1466
+
1467
+ # import torch
1468
+ # print(torch.cuda.is_available()) # ✅ Tells you if GPU is available
1469
+ # torch.cuda.empty_cache()
1470
+ # gc.collect()
1471
+
1472
+
1473
+ # # Bark TTS
1474
+ # print("🔁 Loading Bark model...")
1475
+ # model_bark = BarkModel.from_pretrained("suno/bark")
1476
+ # print("✅ Bark model loaded")
1477
+
1478
+ # print("🔁 Loading Bark processor...")
1479
+ # processor_bark = AutoProcessor.from_pretrained("suno/bark")
1480
+ # print("✅ Bark processor loaded")
1481
+ # print("🔁 Moving Bark model to device...")
1482
+ # model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
1483
+ # print("✅ Bark model on device")
1484
+ # bark_voice_preset = "v2/en_speaker_6"
1485
+
1486
+ # def bark_tts(text):
1487
+ # print(f"🔁 Synthesizing TTS for: {text}")
1488
+ # inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1489
+ # inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
1490
+ # speech_values = model_bark.generate(**inputs)
1491
+ # speech = speech_values.cpu().numpy().squeeze()
1492
+ # speech = (speech * 32767).astype(np.int16)
1493
+ # temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1494
+ # wavfile.write(temp_wav.name, 22050, speech)
1495
+ # return temp_wav.name
1496
+
1497
+ # # Whisper STT
1498
+ # print("🔁 Loading Whisper model...")
1499
+ # whisper_model = whisper.load_model("base", device="cuda")
1500
+ # print("✅ Whisper model loaded")
1501
+ # def whisper_stt(audio_path):
1502
+ # if not audio_path or not os.path.exists(audio_path): return ""
1503
+ # result = whisper_model.transcribe(audio_path)
1504
+ # return result["text"]
1505
+
1506
+
1507
+ # # DeepFace (Video Face Emotion)
1508
+ # def ensure_mp4(video_input):
1509
+ # # video_input could be a file-like object, a path, or a Gradio temp path
1510
+ # if isinstance(video_input, str):
1511
+ # input_path = video_input
1512
+ # else:
1513
+ # # It's a file-like object (rare for Gradio video, but handle it)
1514
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
1515
+ # temp_in.write(video_input.read())
1516
+ # input_path = temp_in.name
1517
+
1518
+ # # If already mp4, return as is
1519
+ # if input_path.endswith(".mp4"):
1520
+ # return input_path
1521
+
1522
+ # # Convert to mp4 using moviepy
1523
+ # mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
1524
+ # try:
1525
+ # clip = VideoFileClip(input_path)
1526
+ # clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
1527
+ # clip.close()
1528
+ # except Exception as e:
1529
+ # print("Video conversion failed:", e)
1530
+ # # As fallback, just copy original
1531
+ # shutil.copy(input_path, mp4_path)
1532
+ # return mp4_path
1533
+
1534
+ # def analyze_video_emotions(video_input, sample_rate=15):
1535
+ # # Convert input to an mp4 file OpenCV can process
1536
+ # mp4_path = ensure_mp4(video_input)
1537
+ # if not mp4_path or not os.path.exists(mp4_path):
1538
+ # return "no_face"
1539
+ # cap = cv2.VideoCapture(mp4_path)
1540
+ # frame_count = 0
1541
+ # emotion_counts = {}
1542
+ # while True:
1543
+ # ret, frame = cap.read()
1544
+ # if not ret: break
1545
+ # if frame_count % sample_rate == 0:
1546
+ # try:
1547
+ # result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
1548
+ # dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
1549
+ # emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
1550
+ # except Exception: pass
1551
+ # frame_count += 1
1552
+ # cap.release()
1553
+ # if not emotion_counts: return "no_face"
1554
+ # return max(emotion_counts.items(), key=lambda x: x[1])[0]
1555
+
1556
+ # # Original Hugging Face model: HaniaRuby/speech-emotion-recognition-wav2vec2
1557
+ # local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2" # Local path to the downloaded model files
1558
+ # print("🔁 Loading Wav2Vec processor and model...")
1559
+ # wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
1560
+ # wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
1561
+ # wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
1562
+ # print("✅ Wav2Vec model loaded")
1563
+ # wav2vec_model.eval()
1564
+ # voice_label_map = {
1565
+ # 0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
1566
+ # 4: 'neutral', 5: 'sad', 6: 'surprise'
1567
+ # }
1568
+
1569
+
1570
+
1571
+ # def analyze_audio_emotion(audio_path):
1572
+ # print(f"🔁 Analyzing audio emotion for: {audio_path}")
1573
+ # if not audio_path or not os.path.exists(audio_path): return "neutral"
1574
+
1575
+ # speech, sr = librosa.load(audio_path, sr=16000)
1576
+ # inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
1577
+
1578
+ # # 🔥 Move model and inputs to GPU
1579
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1580
+ # wav2vec_model.to(device)
1581
+ # inputs = {k: v.to(device) for k, v in inputs.items()}
1582
+
1583
+ # with torch.no_grad():
1584
+ # logits = wav2vec_model(**inputs).logits
1585
+
1586
+ # probs = torch.nn.functional.softmax(logits, dim=-1)
1587
+ # predicted_id = torch.argmax(probs, dim=-1).item()
1588
+ # return voice_label_map.get(predicted_id, "neutral")
1589
+
1590
+
1591
+ # # --- Effective confidence calculation
1592
+ # def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
1593
+ # emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
1594
+ # answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
1595
+ # voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
1596
+ # avg_emotion = (voice_score + face_score) / 2
1597
+ # control_bonus = max(0, answer_score - avg_emotion) * k
1598
+ # eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
1599
+ # return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
1600
+
1601
+ # seniority_mapping = {
1602
+ # "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
1603
+ # }
1604
+
1605
+
1606
+ # # --- 2. Gradio App ---
1607
+
1608
+ # with gr.Blocks(theme=gr.themes.Soft()) as demo:
1609
+ # user_data = gr.State({})
1610
+ # interview_state = gr.State({})
1611
+ # missing_fields_state = gr.State([])
1612
+
1613
+ # # --- UI Layout ---
1614
+ # with gr.Column(visible=True) as user_info_section:
1615
+ # gr.Markdown("## Candidate Information")
1616
+ # cv_file = gr.File(label="Upload CV")
1617
+ # job_desc = gr.Textbox(label="Job Description")
1618
+ # start_btn = gr.Button("Continue", interactive=False)
1619
+
1620
+ # with gr.Column(visible=False) as missing_section:
1621
+ # gr.Markdown("## Missing Information")
1622
+ # name_in = gr.Textbox(label="Name", visible=False)
1623
+ # role_in = gr.Textbox(label="Job Role", visible=False)
1624
+ # seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
1625
+ # skills_in = gr.Textbox(label="Skills", visible=False)
1626
+ # submit_btn = gr.Button("Submit", interactive=False)
1627
+
1628
+ # with gr.Column(visible=False) as interview_pre_section:
1629
+ # pre_interview_greeting_md = gr.Markdown()
1630
+ # start_interview_final_btn = gr.Button("Start Interview")
1631
+
1632
+ # with gr.Column(visible=False) as interview_section:
1633
+ # gr.Markdown("## Interview in Progress")
1634
+ # question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
1635
+ # question_text = gr.Markdown()
1636
+ # user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
1637
+ # user_video_input = gr.Video(sources=["webcam"], label="2. Record Video Answer")
1638
+ # stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
1639
+ # confirm_btn = gr.Button("Confirm Answer")
1640
+ # evaluation_display = gr.Markdown()
1641
+ # emotion_display = gr.Markdown()
1642
+ # interview_summary = gr.Markdown(visible=False)
1643
+
1644
+ # # --- UI Logic ---
1645
+
1646
+ # def validate_start_btn(cv_file, job_desc):
1647
+ # return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
1648
+ # cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
1649
+ # job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
1650
+
1651
+ # def process_and_route_initial(cv_file, job_desc):
1652
+ # details = extract_candidate_details(cv_file.name)
1653
+ # job_info = extract_job_details(job_desc)
1654
+ # data = {
1655
+ # "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
1656
+ # "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
1657
+ # }
1658
+ # missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
1659
+ # if missing:
1660
+ # return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
1661
+ # else:
1662
+ # greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
1663
+ # return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
1664
+ # start_btn.click(
1665
+ # process_and_route_initial,
1666
+ # [cv_file, job_desc],
1667
+ # [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
1668
+ # )
1669
+
1670
+ # def show_missing(missing):
1671
+ # if missing is None: missing = []
1672
+ # return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
1673
+ # missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
1674
+
1675
+ # def validate_fields(name, role, seniority, skills, missing):
1676
+ # if not missing: return gr.update(interactive=False)
1677
+ # all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
1678
+ # return gr.update(interactive=all_filled)
1679
+ # for inp in [name_in, role_in, seniority_in, skills_in]:
1680
+ # inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
1681
+
1682
+ # def complete_manual(data, name, role, seniority, skills):
1683
+ # if data["name"].lower() == "unknown": data["name"] = name
1684
+ # if data["job_role"].lower() == "unknown": data["job_role"] = role
1685
+ # if data["seniority"].lower() == "unknown": data["seniority"] = seniority
1686
+ # if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
1687
+ # greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
1688
+ # return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
1689
+ # submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
1690
+
1691
+ # def start_interview(data):
1692
+ # # --- Advanced state with full logging ---
1693
+ # state = {
1694
+ # "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
1695
+ # "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
1696
+ # "conversation_history": [],
1697
+ # "difficulty_adjustment": None,
1698
+ # "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
1699
+ # "log": []
1700
+ # }
1701
+ # # --- Optionally: context retrieval here (currently just blank) ---
1702
+ # context = ""
1703
+ # prompt = build_interview_prompt(
1704
+ # conversation_history=[], user_response="", context=context, job_role=data["job_role"],
1705
+ # skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
1706
+ # voice_label="neutral", face_label="neutral"
1707
+ # )
1708
+ # #here the original one
1709
+ # # first_q = groq_llm.predict(prompt)
1710
+ # # # Evaluate Q for quality
1711
+ # # q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
1712
+ # # state["questions"].append(first_q)
1713
+ # # state["question_evaluations"].append(q_eval)
1714
+
1715
+ # #here the testing one
1716
+ # first_q = groq_llm.predict(prompt)
1717
+ # q_eval = {
1718
+ # "Score": "N/A",
1719
+ # "Reasoning": "Skipped to reduce processing time",
1720
+ # "Improvements": []
1721
+ # }
1722
+ # state["questions"].append(first_q)
1723
+ # state["question_evaluations"].append(q_eval)
1724
+
1725
+
1726
+ # state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
1727
+ # audio_path = bark_tts(first_q)
1728
+ # # LOG
1729
+ # state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
1730
+ # return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
1731
+ # start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
1732
+
1733
+ # def transcribe(audio_path):
1734
+ # return whisper_stt(audio_path)
1735
+ # user_audio_input.change(transcribe, user_audio_input, stt_transcript)
1736
+
1737
+ # def process_answer(transcript, audio_path, video_path, state, data):
1738
+ # if not transcript and not video_path:
1739
+ # return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
1740
+ # elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
1741
+ # state["timings"].append(elapsed)
1742
+ # state["answers"].append(transcript)
1743
+ # state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
1744
+
1745
+ # # --- 1. Emotion analysis ---
1746
+ # # voice_label = analyze_audio_emotion(audio_path)
1747
+ # # face_label = analyze_video_emotions(video_path)
1748
+ # # state["voice_labels"].append(voice_label)
1749
+ # # state["face_labels"].append(face_label)
1750
+
1751
+ # #just for testing
1752
+ # voice_label = "neutral"
1753
+ # face_label = "neutral"
1754
+ # state["voice_labels"].append(voice_label)
1755
+ # state["face_labels"].append(face_label)
1756
+
1757
+
1758
+
1759
+ # # --- 2. Evaluate previous Q and Answer ---
1760
+ # last_q = state["questions"][-1]
1761
+ # q_eval = state["question_evaluations"][-1] # Already in state
1762
+ # ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
1763
+ # answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
1764
+ # state["answer_evaluations"].append(answer_eval)
1765
+ # answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
1766
+
1767
+ # # --- 3. Adaptive difficulty ---
1768
+ # if answer_score == "excellent":
1769
+ # state["difficulty_adjustment"] = "harder"
1770
+ # elif answer_score in ("medium", "poor"):
1771
+ # state["difficulty_adjustment"] = "easier"
1772
+ # else:
1773
+ # state["difficulty_adjustment"] = None
1774
+
1775
+ # # --- 4. Effective confidence ---
1776
+ # # eff_conf = interpret_confidence(voice_label, face_label, answer_score)
1777
+ # # state["effective_confidences"].append(eff_conf)
1778
+
1779
+ # #just for testing:
1780
+ # eff_conf = {"effective_confidence": 0.6}
1781
+ # state["effective_confidences"].append(eff_conf)
1782
+
1783
+
1784
+ # # --- LOG ---
1785
+ # state["log"].append({
1786
+ # "type": "answer",
1787
+ # "question": last_q,
1788
+ # "answer": transcript,
1789
+ # "answer_eval": answer_eval,
1790
+ # "ref_answer": ref_answer,
1791
+ # "face_label": face_label,
1792
+ # "voice_label": voice_label,
1793
+ # "effective_confidence": eff_conf,
1794
+ # "timing": elapsed,
1795
+ # "timestamp": time.time()
1796
+ # })
1797
+
1798
+ # # --- Next or End ---
1799
+ # qidx = state["question_idx"] + 1
1800
+ # if qidx >= state["max_questions"]:
1801
+ # # Save as JSON (optionally)
1802
+ # timestamp = time.strftime("%Y%m%d_%H%M%S")
1803
+ # log_file = f"interview_log_{timestamp}.json"
1804
+ # with open(log_file, "w", encoding="utf-8") as f:
1805
+ # json.dump(state["log"], f, indent=2, ensure_ascii=False)
1806
+ # # Report
1807
+ # summary = "# Interview Summary\n"
1808
+ # for i, q in enumerate(state["questions"]):
1809
+ # summary += (f"\n### Q{i + 1}: {q}\n"
1810
+ # f"- *Answer*: {state['answers'][i]}\n"
1811
+ # f"- *Q Eval*: {state['question_evaluations'][i]}\n"
1812
+ # f"- *A Eval*: {state['answer_evaluations'][i]}\n"
1813
+ # #also this are removed just for testing :(
1814
+ # # f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
1815
+ # # f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
1816
+ # f"- *Time*: {state['timings'][i]}s\n")
1817
+ # summary += f"\n\n⏺ Full log saved as {log_file}."
1818
+ # return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"))
1819
+ # else:
1820
+ # # --- Build next prompt using adaptive difficulty ---
1821
+ # state["question_idx"] = qidx
1822
+ # state["q_start_time"] = time.time()
1823
+ # context = "" # You can add your context logic here
1824
+ # prompt = build_interview_prompt(
1825
+ # conversation_history=state["conversation_history"],
1826
+ # user_response=transcript,
1827
+ # context=context,
1828
+ # job_role=data["job_role"],
1829
+ # skills=data["skills"],
1830
+ # seniority=data["seniority"],
1831
+ # difficulty_adjustment=state["difficulty_adjustment"],
1832
+ # face_label=face_label,
1833
+ # voice_label=voice_label,
1834
+ # effective_confidence=eff_conf
1835
+ # )
1836
+ # next_q = groq_llm.predict(prompt)
1837
+ # # Evaluate Q quality
1838
+ # q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
1839
+ # state["questions"].append(next_q)
1840
+ # state["question_evaluations"].append(q_eval)
1841
+ # state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
1842
+ # state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
1843
+ # audio_path = bark_tts(next_q)
1844
+ # # Display evaluations
1845
+ # eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
1846
+ # return (
1847
+ # state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
1848
+ # gr.update(value=None), gr.update(value=None),
1849
+ # gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"),
1850
+ # )
1851
+ # confirm_btn.click(
1852
+ # process_answer,
1853
+ # [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
1854
+ # [interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
1855
+ # ).then(
1856
+ # lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
1857
+ # )
1858
+
1859
+ # demo.launch(debug=True)
1860
+
1861
  import gradio as gr
1862
  import time
1863
  import tempfile
1864
  import numpy as np
1865
  import scipy.io.wavfile as wavfile
 
1866
  import os
 
 
 
 
 
 
 
 
 
1867
  import torch
1868
+ import whisper
1869
+ from transformers import BarkModel, AutoProcessor
 
 
 
 
 
 
 
1870
 
1871
+ # Initialize Bark (TTS)
1872
+ model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
1873
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
 
 
 
 
1874
  bark_voice_preset = "v2/en_speaker_6"
1875
 
1876
+ # Initialize Whisper (STT)
1877
+ whisper_model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
1878
+
1879
  def bark_tts(text):
 
1880
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1881
  inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
1882
  speech_values = model_bark.generate(**inputs)
 
1886
  wavfile.write(temp_wav.name, 22050, speech)
1887
  return temp_wav.name
1888
 
 
 
 
 
1889
  def whisper_stt(audio_path):
1890
+ if not audio_path or not os.path.exists(audio_path):
1891
+ return ""
1892
  result = whisper_model.transcribe(audio_path)
1893
  return result["text"]
1894
 
1895
+ # Dummy Groq API stub (replace with actual logic)
1896
+ def groq_llm_predict(prompt):
1897
+ return f"[Mock Question] Based on: {prompt}" # Replace with groq_llm.predict(prompt)
1898
 
1899
+ def interview_loop(state, audio_path):
1900
+ transcript = whisper_stt(audio_path)
1901
+ state["conversation"].append({"role": "Candidate", "content": transcript})
 
 
 
 
 
 
 
 
 
 
 
1902
 
1903
+ prompt = "\n".join([f"{turn['role']}: {turn['content']}" for turn in state["conversation"]])
1904
+ next_q = groq_llm_predict(prompt)
1905
+ state["conversation"].append({"role": "Interviewer", "content": next_q})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1906
 
1907
+ audio_out = bark_tts(next_q)
1908
+ return state, audio_out, transcript
1909
 
1910
+ with gr.Blocks() as demo:
1911
+ state = gr.State({"conversation": []})
1912
+ question_audio = gr.Audio(label="Interviewer's Question", interactive=False, autoplay=True)
1913
+ user_audio_input = gr.Audio(source="microphone", type="filepath", label="Your Answer")
1914
+ transcript_box = gr.Textbox(label="Transcript", interactive=False)
1915
 
1916
+ user_audio_input.change(interview_loop, [state, user_audio_input], [state, question_audio, transcript_box])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1917
 
1918
  demo.launch(debug=True)
1919
+