husseinelsaadi commited on
Commit
a04c545
Β·
verified Β·
1 Parent(s): 57677b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +411 -195
app.py CHANGED
@@ -1857,34 +1857,42 @@ def extract_candidate_details(file_path):
1857
  # )
1858
 
1859
  # demo.launch(debug=True)
 
1860
  import gradio as gr
1861
  import time
1862
  import tempfile
1863
  import numpy as np
1864
  import scipy.io.wavfile as wavfile
 
1865
  import os
1866
  import json
1867
- import torch
1868
- import whisper
1869
  from transformers import BarkModel, AutoProcessor
 
 
 
 
1870
 
1871
- # Clear GPU memory
 
1872
  torch.cuda.empty_cache()
 
1873
 
1874
- # Load only essential models
1875
  print("πŸ” Loading Bark model...")
1876
  model_bark = BarkModel.from_pretrained("suno/bark")
 
 
 
1877
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
 
 
1878
  model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
 
1879
  bark_voice_preset = "v2/en_speaker_6"
1880
- print("βœ… Bark model loaded")
1881
-
1882
- print("πŸ” Loading Whisper model...")
1883
- whisper_model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
1884
- print("βœ… Whisper model loaded")
1885
 
1886
  def bark_tts(text):
1887
- """Convert text to speech using Bark"""
1888
  print(f"πŸ” Synthesizing TTS for: {text}")
1889
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1890
  inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
@@ -1895,224 +1903,432 @@ def bark_tts(text):
1895
  wavfile.write(temp_wav.name, 22050, speech)
1896
  return temp_wav.name
1897
 
 
 
 
 
 
1898
  def whisper_stt(audio_path):
1899
- """Convert speech to text using Whisper"""
1900
- if not audio_path or not os.path.exists(audio_path):
1901
  return ""
1902
  result = whisper_model.transcribe(audio_path)
1903
  return result["text"]
1904
 
1905
- # Simple predefined questions for quick interview
1906
- INTERVIEW_QUESTIONS = [
1907
- "Tell me about yourself and your background.",
1908
- "What are your greatest strengths and how do they relate to this role?"
1909
- ]
 
 
 
1910
 
1911
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
1912
- # State variables
1913
- interview_state = gr.State({"current_question": 0, "answers": [], "started": False})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1914
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1915
  # UI Layout
1916
- with gr.Column(visible=True) as start_section:
1917
- gr.Markdown("## Quick Interview Setup")
1918
- name_input = gr.Textbox(label="Your Name", placeholder="Enter your name")
1919
- role_input = gr.Textbox(label="Job Role", placeholder="e.g., Software Engineer")
1920
- start_btn = gr.Button("Start Interview", variant="primary")
1921
-
 
 
 
 
 
 
 
 
 
 
 
 
1922
  with gr.Column(visible=False) as interview_section:
1923
  gr.Markdown("## Interview in Progress")
1924
  question_audio = gr.Audio(label="Listen to Question", interactive=False, autoplay=True)
1925
  question_text = gr.Markdown()
1926
 
1927
- # Audio input for user response
1928
- user_audio_input = gr.Audio(
1929
- sources=["microphone"],
1930
- type="filepath",
1931
- label="Record Your Answer"
1932
- )
1933
 
1934
- # Transcription display
1935
- transcript_display = gr.Textbox(
1936
- label="Your Answer (Transcribed)",
1937
- interactive=False
1938
- )
1939
 
1940
- # Control buttons
1941
- with gr.Row():
1942
- submit_answer_btn = gr.Button("Submit Answer", variant="primary")
1943
- next_question_btn = gr.Button("Next Question", visible=False)
1944
 
1945
- # Progress indicator
1946
- progress_text = gr.Markdown()
 
1947
 
1948
- # Interview summary
 
 
 
1949
  interview_summary = gr.Markdown(visible=False)
 
 
 
 
1950
 
1951
- # Event handlers
1952
- def start_interview(name, role):
1953
- """Initialize interview with first question"""
1954
- if not name.strip() or not role.strip():
1955
- return (
1956
- gr.update(),
1957
- gr.update(),
1958
- gr.update(),
1959
- gr.update(),
1960
- gr.update(value="Please fill in both name and role fields."),
1961
- {"current_question": 0, "answers": [], "started": False, "name": "", "role": ""}
1962
- )
1963
-
1964
- # Generate first question audio
1965
- question_text = INTERVIEW_QUESTIONS[0]
1966
- audio_path = bark_tts(question_text)
1967
-
1968
- state = {
1969
- "current_question": 0,
1970
- "answers": [],
1971
- "started": True,
1972
- "name": name,
1973
- "role": role
1974
  }
1975
-
1976
- return (
1977
- gr.update(visible=False), # start_section
1978
- gr.update(visible=True), # interview_section
1979
- audio_path, # question_audio
1980
- f"**Question 1:** {question_text}", # question_text
1981
- gr.update(value=f"Question 1 of {len(INTERVIEW_QUESTIONS)}"), # progress_text
1982
- state
1983
- )
1984
 
1985
- def transcribe_audio(audio_path):
1986
- """Transcribe user's audio input"""
1987
- if not audio_path:
1988
- return ""
1989
- return whisper_stt(audio_path)
 
 
 
 
 
 
 
1990
 
1991
- def submit_answer(transcript, state):
1992
- """Process submitted answer and prepare for next question"""
1993
- if not transcript.strip():
1994
- return (
1995
- gr.update(),
1996
- gr.update(),
1997
- gr.update(),
1998
- gr.update(),
1999
- gr.update(value="Please record an answer before submitting."),
2000
- gr.update(),
2001
- state
2002
- )
2003
-
2004
- # Store the answer
2005
- state["answers"].append({
2006
- "question": INTERVIEW_QUESTIONS[state["current_question"]],
2007
- "answer": transcript,
2008
- "timestamp": time.time()
2009
- })
2010
-
2011
- current_q = state["current_question"]
2012
-
2013
- # Check if this was the last question
2014
- if current_q >= len(INTERVIEW_QUESTIONS) - 1:
2015
- # Interview complete
2016
- summary = generate_interview_summary(state)
2017
- return (
2018
- gr.update(visible=False), # submit_answer_btn
2019
- gr.update(visible=False), # next_question_btn
2020
- gr.update(value=None), # question_audio
2021
- gr.update(value="**Interview Complete!**"), # question_text
2022
- gr.update(value="Interview finished. Thank you!"), # progress_text
2023
- gr.update(visible=True, value=summary), # interview_summary
2024
- state
2025
- )
2026
- else:
2027
- # Show next question button
2028
- return (
2029
- gr.update(visible=False), # submit_answer_btn
2030
- gr.update(visible=True), # next_question_btn
2031
- gr.update(), # question_audio
2032
- gr.update(), # question_text
2033
- gr.update(value=f"Answer submitted! Ready for question {current_q + 2}?"), # progress_text
2034
- gr.update(), # interview_summary
2035
- state
2036
- )
2037
 
2038
- def next_question(state):
2039
- """Move to next question"""
2040
- state["current_question"] += 1
2041
- current_q = state["current_question"]
2042
-
2043
- if current_q < len(INTERVIEW_QUESTIONS):
2044
- question_text = INTERVIEW_QUESTIONS[current_q]
2045
- audio_path = bark_tts(question_text)
2046
-
2047
- return (
2048
- gr.update(visible=True), # submit_answer_btn
2049
- gr.update(visible=False), # next_question_btn
2050
- audio_path, # question_audio
2051
- f"**Question {current_q + 1}:** {question_text}", # question_text
2052
- gr.update(value=f"Question {current_q + 1} of {len(INTERVIEW_QUESTIONS)}"), # progress_text
2053
- gr.update(value=None), # user_audio_input (clear)
2054
- gr.update(value=""), # transcript_display (clear)
2055
- state
2056
- )
2057
- else:
2058
- # This shouldn't happen, but handle gracefully
2059
- return (
2060
- gr.update(),
2061
- gr.update(),
2062
- gr.update(),
2063
- gr.update(value="Interview Complete!"),
2064
- gr.update(value="All questions answered."),
2065
- gr.update(),
2066
- gr.update(),
2067
- state
2068
- )
2069
 
2070
- def generate_interview_summary(state):
2071
- """Generate final interview summary"""
2072
- summary = f"# Interview Summary\n\n"
2073
- summary += f"**Candidate:** {state['name']}\n"
2074
- summary += f"**Role:** {state['role']}\n"
2075
- summary += f"**Date:** {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
 
 
 
 
 
 
 
 
 
 
 
2076
 
2077
- for i, qa in enumerate(state['answers']):
2078
- summary += f"### Question {i + 1}\n"
2079
- summary += f"**Q:** {qa['question']}\n\n"
2080
- summary += f"**A:** {qa['answer']}\n\n"
2081
- summary += "---\n\n"
 
 
 
 
 
 
 
 
2082
 
2083
- # Save to file
2084
- timestamp = time.strftime("%Y%m%d_%H%M%S")
2085
- log_file = f"interview_log_{timestamp}.json"
2086
- with open(log_file, "w", encoding="utf-8") as f:
2087
- json.dump(state, f, indent=2, ensure_ascii=False)
2088
 
2089
- summary += f"*Interview log saved as {log_file}*"
2090
- return summary
 
 
 
 
2091
 
2092
- # Wire up events
2093
- start_btn.click(
2094
- start_interview,
2095
- inputs=[name_input, role_input],
2096
- outputs=[start_section, interview_section, question_audio, question_text, progress_text, interview_state]
2097
  )
 
 
 
 
 
 
 
 
 
2098
 
2099
- user_audio_input.change(
2100
- transcribe_audio,
2101
- inputs=[user_audio_input],
2102
- outputs=[transcript_display]
2103
  )
 
 
 
 
 
 
 
 
2104
 
2105
- submit_answer_btn.click(
2106
- submit_answer,
2107
- inputs=[transcript_display, interview_state],
2108
- outputs=[submit_answer_btn, next_question_btn, question_audio, question_text, progress_text, interview_summary, interview_state]
2109
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2110
 
2111
- next_question_btn.click(
2112
- next_question,
2113
- inputs=[interview_state],
2114
- outputs=[submit_answer_btn, next_question_btn, question_audio, question_text, progress_text, user_audio_input, transcript_display, interview_state]
2115
  )
2116
 
2117
- if __name__ == "__main__":
2118
- demo.launch(debug=True)
 
1857
  # )
1858
 
1859
  # demo.launch(debug=True)
1860
+
1861
  import gradio as gr
1862
  import time
1863
  import tempfile
1864
  import numpy as np
1865
  import scipy.io.wavfile as wavfile
1866
+ import cv2
1867
  import os
1868
  import json
1869
+ from moviepy.editor import VideoFileClip
1870
+ import shutil
1871
  from transformers import BarkModel, AutoProcessor
1872
+ import torch, gc
1873
+ import whisper
1874
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
1875
+ import librosa
1876
 
1877
+ import torch
1878
+ print(torch.cuda.is_available()) # βœ… Tells you if GPU is available
1879
  torch.cuda.empty_cache()
1880
+ gc.collect()
1881
 
1882
+ # Bark TTS
1883
  print("πŸ” Loading Bark model...")
1884
  model_bark = BarkModel.from_pretrained("suno/bark")
1885
+ print("βœ… Bark model loaded")
1886
+
1887
+ print("πŸ” Loading Bark processor...")
1888
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
1889
+ print("βœ… Bark processor loaded")
1890
+ print("πŸ” Moving Bark model to device...")
1891
  model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
1892
+ print("βœ… Bark model on device")
1893
  bark_voice_preset = "v2/en_speaker_6"
 
 
 
 
 
1894
 
1895
  def bark_tts(text):
 
1896
  print(f"πŸ” Synthesizing TTS for: {text}")
1897
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1898
  inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
 
1903
  wavfile.write(temp_wav.name, 22050, speech)
1904
  return temp_wav.name
1905
 
1906
+ # Whisper STT
1907
+ print("πŸ” Loading Whisper model...")
1908
+ whisper_model = whisper.load_model("base", device="cuda")
1909
+ print("βœ… Whisper model loaded")
1910
+
1911
  def whisper_stt(audio_path):
1912
+ if not audio_path or not os.path.exists(audio_path):
 
1913
  return ""
1914
  result = whisper_model.transcribe(audio_path)
1915
  return result["text"]
1916
 
1917
+ # DeepFace (Video Face Emotion)
1918
+ def ensure_mp4(video_input):
1919
+ if isinstance(video_input, str):
1920
+ input_path = video_input
1921
+ else:
1922
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
1923
+ temp_in.write(video_input.read())
1924
+ input_path = temp_in.name
1925
 
1926
+ if input_path.endswith(".mp4"):
1927
+ return input_path
1928
+
1929
+ mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
1930
+ try:
1931
+ clip = VideoFileClip(input_path)
1932
+ clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
1933
+ clip.close()
1934
+ except Exception as e:
1935
+ print("Video conversion failed:", e)
1936
+ shutil.copy(input_path, mp4_path)
1937
+ return mp4_path
1938
+
1939
+ def analyze_video_emotions(video_input, sample_rate=15):
1940
+ mp4_path = ensure_mp4(video_input)
1941
+ if not mp4_path or not os.path.exists(mp4_path):
1942
+ return "no_face"
1943
+ cap = cv2.VideoCapture(mp4_path)
1944
+ frame_count = 0
1945
+ emotion_counts = {}
1946
+ while True:
1947
+ ret, frame = cap.read()
1948
+ if not ret: break
1949
+ if frame_count % sample_rate == 0:
1950
+ try:
1951
+ result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
1952
+ dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
1953
+ emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
1954
+ except Exception: pass
1955
+ frame_count += 1
1956
+ cap.release()
1957
+ if not emotion_counts: return "no_face"
1958
+ return max(emotion_counts.items(), key=lambda x: x[1])[0]
1959
+
1960
+ # Wav2Vec2 model for audio emotion analysis
1961
+ local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2"
1962
+ print("πŸ” Loading Wav2Vec processor and model...")
1963
+ wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
1964
+ wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
1965
+ wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
1966
+ print("βœ… Wav2Vec model loaded")
1967
+ wav2vec_model.eval()
1968
+ voice_label_map = {
1969
+ 0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
1970
+ 4: 'neutral', 5: 'sad', 6: 'surprise'
1971
+ }
1972
+
1973
+ def analyze_audio_emotion(audio_path):
1974
+ print(f"πŸ” Analyzing audio emotion for: {audio_path}")
1975
+ if not audio_path or not os.path.exists(audio_path):
1976
+ return "neutral"
1977
 
1978
+ speech, sr = librosa.load(audio_path, sr=16000)
1979
+ inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
1980
+
1981
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1982
+ wav2vec_model.to(device)
1983
+ inputs = {k: v.to(device) for k, v in inputs.items()}
1984
+
1985
+ with torch.no_grad():
1986
+ logits = wav2vec_model(**inputs).logits
1987
+
1988
+ probs = torch.nn.functional.softmax(logits, dim=-1)
1989
+ predicted_id = torch.argmax(probs, dim=-1).item()
1990
+ return voice_label_map.get(predicted_id, "neutral")
1991
+
1992
+ # Effective confidence calculation
1993
+ def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
1994
+ emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
1995
+ answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
1996
+ voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
1997
+ avg_emotion = (voice_score + face_score) / 2
1998
+ control_bonus = max(0, answer_score - avg_emotion) * k
1999
+ eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
2000
+ return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
2001
+
2002
+ seniority_mapping = {
2003
+ "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
2004
+ }
2005
+
2006
+ # Gradio App
2007
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
2008
+ user_data = gr.State({})
2009
+ interview_state = gr.State({})
2010
+ missing_fields_state = gr.State([])
2011
+ recording_state = gr.State({"is_recording": False})
2012
+
2013
  # UI Layout
2014
+ with gr.Column(visible=True) as user_info_section:
2015
+ gr.Markdown("## Candidate Information")
2016
+ cv_file = gr.File(label="Upload CV")
2017
+ job_desc = gr.Textbox(label="Job Description")
2018
+ start_btn = gr.Button("Continue", interactive=False)
2019
+
2020
+ with gr.Column(visible=False) as missing_section:
2021
+ gr.Markdown("## Missing Information")
2022
+ name_in = gr.Textbox(label="Name", visible=False)
2023
+ role_in = gr.Textbox(label="Job Role", visible=False)
2024
+ seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
2025
+ skills_in = gr.Textbox(label="Skills", visible=False)
2026
+ submit_btn = gr.Button("Submit", interactive=False)
2027
+
2028
+ with gr.Column(visible=False) as interview_pre_section:
2029
+ pre_interview_greeting_md = gr.Markdown()
2030
+ start_interview_final_btn = gr.Button("Start Interview")
2031
+
2032
  with gr.Column(visible=False) as interview_section:
2033
  gr.Markdown("## Interview in Progress")
2034
  question_audio = gr.Audio(label="Listen to Question", interactive=False, autoplay=True)
2035
  question_text = gr.Markdown()
2036
 
2037
+ # Audio recording controls
2038
+ with gr.Row():
2039
+ record_btn = gr.Button("🎀 Start Recording", variant="primary")
2040
+ stop_btn = gr.Button("⏹️ Stop Recording", interactive=False)
 
 
2041
 
2042
+ # Hidden audio component for recording
2043
+ user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Audio Recording", visible=False)
 
 
 
2044
 
2045
+ # Video input (keeping for emotion analysis)
2046
+ user_video_input = gr.Video(sources=["webcam"], label="Video Recording (for emotion analysis)", visible=False)
 
 
2047
 
2048
+ # Transcript and confirmation
2049
+ stt_transcript = gr.Textbox(label="Transcribed Answer (automatically generated)", interactive=True)
2050
+ confirm_btn = gr.Button("Confirm Answer", interactive=False)
2051
 
2052
+ # Status and results
2053
+ recording_status = gr.Markdown("**Status:** Ready to record")
2054
+ evaluation_display = gr.Markdown()
2055
+ emotion_display = gr.Markdown()
2056
  interview_summary = gr.Markdown(visible=False)
2057
+
2058
+ # UI Logic
2059
+ def validate_start_btn(cv_file, job_desc):
2060
+ return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
2061
 
2062
+ cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
2063
+ job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
2064
+
2065
+ def process_and_route_initial(cv_file, job_desc):
2066
+ details = extract_candidate_details(cv_file.name)
2067
+ job_info = extract_job_details(job_desc)
2068
+ data = {
2069
+ "name": details.get("name", "unknown"),
2070
+ "job_role": job_info.get("job_title", "unknown"),
2071
+ "seniority": job_info.get("experience_level", "unknown"),
2072
+ "skills": job_info.get("skills", [])
 
 
 
 
 
 
 
 
 
 
 
 
2073
  }
2074
+ missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
2075
+ if missing:
2076
+ return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
2077
+ else:
2078
+ greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
2079
+ return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
 
 
 
2080
 
2081
+ start_btn.click(
2082
+ process_and_route_initial,
2083
+ [cv_file, job_desc],
2084
+ [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
2085
+ )
2086
+
2087
+ def show_missing(missing):
2088
+ if missing is None: missing = []
2089
+ return (gr.update(visible="name" in missing),
2090
+ gr.update(visible="job_role" in missing),
2091
+ gr.update(visible="seniority" in missing),
2092
+ gr.update(visible="skills" in missing))
2093
 
2094
+ missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
2095
+
2096
+ def validate_fields(name, role, seniority, skills, missing):
2097
+ if not missing: return gr.update(interactive=False)
2098
+ all_filled = all([
2099
+ (not ("name" in missing) or bool(name.strip())),
2100
+ (not ("job_role" in missing) or bool(role.strip())),
2101
+ (not ("seniority" in missing) or bool(seniority)),
2102
+ (not ("skills" in missing) or bool(skills.strip()))
2103
+ ])
2104
+ return gr.update(interactive=all_filled)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2105
 
2106
+ for inp in [name_in, role_in, seniority_in, skills_in]:
2107
+ inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
2108
+
2109
+ def complete_manual(data, name, role, seniority, skills):
2110
+ if data["name"].lower() == "unknown": data["name"] = name
2111
+ if data["job_role"].lower() == "unknown": data["job_role"] = role
2112
+ if data["seniority"].lower() == "unknown": data["seniority"] = seniority
2113
+ if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
2114
+ greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
2115
+ return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2116
 
2117
+ submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
2118
+
2119
+ def start_interview(data):
2120
+ state = {
2121
+ "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
2122
+ "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
2123
+ "conversation_history": [],
2124
+ "difficulty_adjustment": None,
2125
+ "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
2126
+ "log": []
2127
+ }
2128
+ context = ""
2129
+ prompt = build_interview_prompt(
2130
+ conversation_history=[], user_response="", context=context, job_role=data["job_role"],
2131
+ skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
2132
+ voice_label="neutral", face_label="neutral"
2133
+ )
2134
 
2135
+ # Generate first question
2136
+ first_q = groq_llm.predict(prompt)
2137
+ q_eval = {
2138
+ "Score": "N/A",
2139
+ "Reasoning": "Skipped to reduce processing time",
2140
+ "Improvements": []
2141
+ }
2142
+ state["questions"].append(first_q)
2143
+ state["question_evaluations"].append(q_eval)
2144
+ state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
2145
+
2146
+ # Generate audio for question
2147
+ audio_path = bark_tts(first_q)
2148
 
2149
+ # Log
2150
+ state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
 
 
 
2151
 
2152
+ return (state,
2153
+ gr.update(visible=False),
2154
+ gr.update(visible=True),
2155
+ audio_path,
2156
+ f"*Question 1:* {first_q}",
2157
+ gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
2158
 
2159
+ start_interview_final_btn.click(
2160
+ start_interview,
2161
+ [user_data],
2162
+ [interview_state, interview_pre_section, interview_section, question_audio, question_text, recording_status]
 
2163
  )
2164
+
2165
+ # Recording functionality
2166
+ def start_recording(rec_state):
2167
+ rec_state["is_recording"] = True
2168
+ return (rec_state,
2169
+ gr.update(interactive=False),
2170
+ gr.update(interactive=True),
2171
+ gr.update(visible=True),
2172
+ gr.update(value="**Status:** πŸ”΄ Recording... Click 'Stop Recording' when done"))
2173
 
2174
+ record_btn.click(
2175
+ start_recording,
2176
+ [recording_state],
2177
+ [recording_state, record_btn, stop_btn, user_audio_input, recording_status]
2178
  )
2179
+
2180
+ def stop_recording(rec_state):
2181
+ rec_state["is_recording"] = False
2182
+ return (rec_state,
2183
+ gr.update(interactive=True),
2184
+ gr.update(interactive=False),
2185
+ gr.update(visible=False),
2186
+ gr.update(value="**Status:** Processing audio... Please wait"))
2187
 
2188
+ stop_btn.click(
2189
+ stop_recording,
2190
+ [recording_state],
2191
+ [recording_state, record_btn, stop_btn, user_audio_input, recording_status]
2192
  )
2193
+
2194
+ # Auto-transcription when audio is recorded
2195
+ def transcribe_and_update(audio_path):
2196
+ if not audio_path:
2197
+ return "", gr.update(interactive=False), gr.update(value="**Status:** No audio recorded")
2198
+
2199
+ transcript = whisper_stt(audio_path)
2200
+ if transcript:
2201
+ return (transcript,
2202
+ gr.update(interactive=True),
2203
+ gr.update(value="**Status:** Audio transcribed! Review and click 'Confirm Answer'"))
2204
+ else:
2205
+ return ("",
2206
+ gr.update(interactive=False),
2207
+ gr.update(value="**Status:** Transcription failed. Please try recording again"))
2208
+
2209
+ user_audio_input.change(
2210
+ transcribe_and_update,
2211
+ [user_audio_input],
2212
+ [stt_transcript, confirm_btn, recording_status]
2213
+ )
2214
+
2215
+ def process_answer(transcript, audio_path, video_path, state, data):
2216
+ if not transcript:
2217
+ return (state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
2218
+
2219
+ elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
2220
+ state["timings"].append(elapsed)
2221
+ state["answers"].append(transcript)
2222
+ state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
2223
+
2224
+ # Emotion analysis (using testing values for speed)
2225
+ voice_label = "neutral"
2226
+ face_label = "neutral"
2227
+ state["voice_labels"].append(voice_label)
2228
+ state["face_labels"].append(face_label)
2229
+
2230
+ # Evaluate answer
2231
+ last_q = state["questions"][-1]
2232
+ q_eval = state["question_evaluations"][-1]
2233
+ ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
2234
+ answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
2235
+ state["answer_evaluations"].append(answer_eval)
2236
+ answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
2237
+
2238
+ # Adaptive difficulty
2239
+ if answer_score == "excellent":
2240
+ state["difficulty_adjustment"] = "harder"
2241
+ elif answer_score in ("medium", "poor"):
2242
+ state["difficulty_adjustment"] = "easier"
2243
+ else:
2244
+ state["difficulty_adjustment"] = None
2245
+
2246
+ # Effective confidence (testing value)
2247
+ eff_conf = {"effective_confidence": 0.6}
2248
+ state["effective_confidences"].append(eff_conf)
2249
+
2250
+ # Log
2251
+ state["log"].append({
2252
+ "type": "answer",
2253
+ "question": last_q,
2254
+ "answer": transcript,
2255
+ "answer_eval": answer_eval,
2256
+ "ref_answer": ref_answer,
2257
+ "face_label": face_label,
2258
+ "voice_label": voice_label,
2259
+ "effective_confidence": eff_conf,
2260
+ "timing": elapsed,
2261
+ "timestamp": time.time()
2262
+ })
2263
+
2264
+ # Check if interview is complete
2265
+ qidx = state["question_idx"] + 1
2266
+ if qidx >= state["max_questions"]:
2267
+ # Save log
2268
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
2269
+ log_file = f"interview_log_{timestamp}.json"
2270
+ with open(log_file, "w", encoding="utf-8") as f:
2271
+ json.dump(state["log"], f, indent=2, ensure_ascii=False)
2272
+
2273
+ # Generate summary
2274
+ summary = "# Interview Summary\n"
2275
+ for i, q in enumerate(state["questions"]):
2276
+ summary += (f"\n### Q{i + 1}: {q}\n"
2277
+ f"- *Answer*: {state['answers'][i]}\n"
2278
+ f"- *Q Eval*: {state['question_evaluations'][i]}\n"
2279
+ f"- *A Eval*: {state['answer_evaluations'][i]}\n"
2280
+ f"- *Time*: {state['timings'][i]}s\n")
2281
+ summary += f"\n\n⏺ Full log saved as {log_file}."
2282
+
2283
+ return (state,
2284
+ gr.update(visible=True, value=summary),
2285
+ gr.update(value=None),
2286
+ gr.update(value=None),
2287
+ gr.update(value=None),
2288
+ gr.update(interactive=False),
2289
+ gr.update(visible=True, value=f"Last Detected β€” Face: {face_label}, Voice: {voice_label}"),
2290
+ gr.update(value="**Status:** Interview completed!"))
2291
+ else:
2292
+ # Generate next question
2293
+ state["question_idx"] = qidx
2294
+ state["q_start_time"] = time.time()
2295
+ context = ""
2296
+ prompt = build_interview_prompt(
2297
+ conversation_history=state["conversation_history"],
2298
+ user_response=transcript,
2299
+ context=context,
2300
+ job_role=data["job_role"],
2301
+ skills=data["skills"],
2302
+ seniority=data["seniority"],
2303
+ difficulty_adjustment=state["difficulty_adjustment"],
2304
+ face_label=face_label,
2305
+ voice_label=voice_label,
2306
+ effective_confidence=eff_conf
2307
+ )
2308
+
2309
+ next_q = groq_llm.predict(prompt)
2310
+ q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
2311
+ state["questions"].append(next_q)
2312
+ state["question_evaluations"].append(q_eval)
2313
+ state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
2314
+ state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
2315
+
2316
+ audio_path = bark_tts(next_q)
2317
+ eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
2318
+
2319
+ return (state,
2320
+ gr.update(visible=False),
2321
+ audio_path,
2322
+ f"*Question {qidx + 1}:* {next_q}",
2323
+ gr.update(value=""),
2324
+ gr.update(interactive=False),
2325
+ gr.update(visible=True, value=f"Last Detected β€” Face: {face_label}, Voice: {voice_label}"),
2326
+ gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
2327
 
2328
+ confirm_btn.click(
2329
+ process_answer,
2330
+ [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
2331
+ [interview_state, interview_summary, question_audio, question_text, stt_transcript, confirm_btn, emotion_display, recording_status]
2332
  )
2333
 
2334
+ demo.launch(debug=True)