Update app.py
Browse files
app.py
CHANGED
@@ -1857,34 +1857,42 @@ def extract_candidate_details(file_path):
|
|
1857 |
# )
|
1858 |
|
1859 |
# demo.launch(debug=True)
|
|
|
1860 |
import gradio as gr
|
1861 |
import time
|
1862 |
import tempfile
|
1863 |
import numpy as np
|
1864 |
import scipy.io.wavfile as wavfile
|
|
|
1865 |
import os
|
1866 |
import json
|
1867 |
-
import
|
1868 |
-
import
|
1869 |
from transformers import BarkModel, AutoProcessor
|
|
|
|
|
|
|
|
|
1870 |
|
1871 |
-
|
|
|
1872 |
torch.cuda.empty_cache()
|
|
|
1873 |
|
1874 |
-
#
|
1875 |
print("π Loading Bark model...")
|
1876 |
model_bark = BarkModel.from_pretrained("suno/bark")
|
|
|
|
|
|
|
1877 |
processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
|
|
|
|
1878 |
model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
1879 |
bark_voice_preset = "v2/en_speaker_6"
|
1880 |
-
print("β
Bark model loaded")
|
1881 |
-
|
1882 |
-
print("π Loading Whisper model...")
|
1883 |
-
whisper_model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
|
1884 |
-
print("β
Whisper model loaded")
|
1885 |
|
1886 |
def bark_tts(text):
|
1887 |
-
"""Convert text to speech using Bark"""
|
1888 |
print(f"π Synthesizing TTS for: {text}")
|
1889 |
inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
1890 |
inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
|
@@ -1895,224 +1903,432 @@ def bark_tts(text):
|
|
1895 |
wavfile.write(temp_wav.name, 22050, speech)
|
1896 |
return temp_wav.name
|
1897 |
|
|
|
|
|
|
|
|
|
|
|
1898 |
def whisper_stt(audio_path):
|
1899 |
-
|
1900 |
-
if not audio_path or not os.path.exists(audio_path):
|
1901 |
return ""
|
1902 |
result = whisper_model.transcribe(audio_path)
|
1903 |
return result["text"]
|
1904 |
|
1905 |
-
#
|
1906 |
-
|
1907 |
-
|
1908 |
-
|
1909 |
-
|
|
|
|
|
|
|
1910 |
|
1911 |
-
|
1912 |
-
|
1913 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1914 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1915 |
# UI Layout
|
1916 |
-
with gr.Column(visible=True) as
|
1917 |
-
gr.Markdown("##
|
1918 |
-
|
1919 |
-
|
1920 |
-
start_btn = gr.Button("
|
1921 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1922 |
with gr.Column(visible=False) as interview_section:
|
1923 |
gr.Markdown("## Interview in Progress")
|
1924 |
question_audio = gr.Audio(label="Listen to Question", interactive=False, autoplay=True)
|
1925 |
question_text = gr.Markdown()
|
1926 |
|
1927 |
-
# Audio
|
1928 |
-
|
1929 |
-
|
1930 |
-
|
1931 |
-
label="Record Your Answer"
|
1932 |
-
)
|
1933 |
|
1934 |
-
#
|
1935 |
-
|
1936 |
-
label="Your Answer (Transcribed)",
|
1937 |
-
interactive=False
|
1938 |
-
)
|
1939 |
|
1940 |
-
#
|
1941 |
-
|
1942 |
-
submit_answer_btn = gr.Button("Submit Answer", variant="primary")
|
1943 |
-
next_question_btn = gr.Button("Next Question", visible=False)
|
1944 |
|
1945 |
-
#
|
1946 |
-
|
|
|
1947 |
|
1948 |
-
#
|
|
|
|
|
|
|
1949 |
interview_summary = gr.Markdown(visible=False)
|
|
|
|
|
|
|
|
|
1950 |
|
1951 |
-
|
1952 |
-
|
1953 |
-
|
1954 |
-
|
1955 |
-
|
1956 |
-
|
1957 |
-
|
1958 |
-
|
1959 |
-
|
1960 |
-
|
1961 |
-
|
1962 |
-
)
|
1963 |
-
|
1964 |
-
# Generate first question audio
|
1965 |
-
question_text = INTERVIEW_QUESTIONS[0]
|
1966 |
-
audio_path = bark_tts(question_text)
|
1967 |
-
|
1968 |
-
state = {
|
1969 |
-
"current_question": 0,
|
1970 |
-
"answers": [],
|
1971 |
-
"started": True,
|
1972 |
-
"name": name,
|
1973 |
-
"role": role
|
1974 |
}
|
1975 |
-
|
1976 |
-
|
1977 |
-
gr.update(visible=False),
|
1978 |
-
|
1979 |
-
|
1980 |
-
|
1981 |
-
gr.update(value=f"Question 1 of {len(INTERVIEW_QUESTIONS)}"), # progress_text
|
1982 |
-
state
|
1983 |
-
)
|
1984 |
|
1985 |
-
|
1986 |
-
|
1987 |
-
|
1988 |
-
|
1989 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1990 |
|
1991 |
-
|
1992 |
-
|
1993 |
-
|
1994 |
-
|
1995 |
-
|
1996 |
-
|
1997 |
-
|
1998 |
-
|
1999 |
-
|
2000 |
-
|
2001 |
-
|
2002 |
-
)
|
2003 |
-
|
2004 |
-
# Store the answer
|
2005 |
-
state["answers"].append({
|
2006 |
-
"question": INTERVIEW_QUESTIONS[state["current_question"]],
|
2007 |
-
"answer": transcript,
|
2008 |
-
"timestamp": time.time()
|
2009 |
-
})
|
2010 |
-
|
2011 |
-
current_q = state["current_question"]
|
2012 |
-
|
2013 |
-
# Check if this was the last question
|
2014 |
-
if current_q >= len(INTERVIEW_QUESTIONS) - 1:
|
2015 |
-
# Interview complete
|
2016 |
-
summary = generate_interview_summary(state)
|
2017 |
-
return (
|
2018 |
-
gr.update(visible=False), # submit_answer_btn
|
2019 |
-
gr.update(visible=False), # next_question_btn
|
2020 |
-
gr.update(value=None), # question_audio
|
2021 |
-
gr.update(value="**Interview Complete!**"), # question_text
|
2022 |
-
gr.update(value="Interview finished. Thank you!"), # progress_text
|
2023 |
-
gr.update(visible=True, value=summary), # interview_summary
|
2024 |
-
state
|
2025 |
-
)
|
2026 |
-
else:
|
2027 |
-
# Show next question button
|
2028 |
-
return (
|
2029 |
-
gr.update(visible=False), # submit_answer_btn
|
2030 |
-
gr.update(visible=True), # next_question_btn
|
2031 |
-
gr.update(), # question_audio
|
2032 |
-
gr.update(), # question_text
|
2033 |
-
gr.update(value=f"Answer submitted! Ready for question {current_q + 2}?"), # progress_text
|
2034 |
-
gr.update(), # interview_summary
|
2035 |
-
state
|
2036 |
-
)
|
2037 |
|
2038 |
-
|
2039 |
-
|
2040 |
-
|
2041 |
-
|
2042 |
-
|
2043 |
-
if
|
2044 |
-
|
2045 |
-
|
2046 |
-
|
2047 |
-
|
2048 |
-
gr.update(visible=True), # submit_answer_btn
|
2049 |
-
gr.update(visible=False), # next_question_btn
|
2050 |
-
audio_path, # question_audio
|
2051 |
-
f"**Question {current_q + 1}:** {question_text}", # question_text
|
2052 |
-
gr.update(value=f"Question {current_q + 1} of {len(INTERVIEW_QUESTIONS)}"), # progress_text
|
2053 |
-
gr.update(value=None), # user_audio_input (clear)
|
2054 |
-
gr.update(value=""), # transcript_display (clear)
|
2055 |
-
state
|
2056 |
-
)
|
2057 |
-
else:
|
2058 |
-
# This shouldn't happen, but handle gracefully
|
2059 |
-
return (
|
2060 |
-
gr.update(),
|
2061 |
-
gr.update(),
|
2062 |
-
gr.update(),
|
2063 |
-
gr.update(value="Interview Complete!"),
|
2064 |
-
gr.update(value="All questions answered."),
|
2065 |
-
gr.update(),
|
2066 |
-
gr.update(),
|
2067 |
-
state
|
2068 |
-
)
|
2069 |
|
2070 |
-
|
2071 |
-
|
2072 |
-
|
2073 |
-
|
2074 |
-
|
2075 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2076 |
|
2077 |
-
|
2078 |
-
|
2079 |
-
|
2080 |
-
|
2081 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2082 |
|
2083 |
-
#
|
2084 |
-
timestamp
|
2085 |
-
log_file = f"interview_log_{timestamp}.json"
|
2086 |
-
with open(log_file, "w", encoding="utf-8") as f:
|
2087 |
-
json.dump(state, f, indent=2, ensure_ascii=False)
|
2088 |
|
2089 |
-
|
2090 |
-
|
|
|
|
|
|
|
|
|
2091 |
|
2092 |
-
|
2093 |
-
|
2094 |
-
|
2095 |
-
|
2096 |
-
outputs=[start_section, interview_section, question_audio, question_text, progress_text, interview_state]
|
2097 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2098 |
|
2099 |
-
|
2100 |
-
|
2101 |
-
|
2102 |
-
|
2103 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2104 |
|
2105 |
-
|
2106 |
-
|
2107 |
-
|
2108 |
-
|
2109 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2110 |
|
2111 |
-
|
2112 |
-
|
2113 |
-
|
2114 |
-
|
2115 |
)
|
2116 |
|
2117 |
-
|
2118 |
-
demo.launch(debug=True)
|
|
|
1857 |
# )
|
1858 |
|
1859 |
# demo.launch(debug=True)
|
1860 |
+
|
1861 |
import gradio as gr
|
1862 |
import time
|
1863 |
import tempfile
|
1864 |
import numpy as np
|
1865 |
import scipy.io.wavfile as wavfile
|
1866 |
+
import cv2
|
1867 |
import os
|
1868 |
import json
|
1869 |
+
from moviepy.editor import VideoFileClip
|
1870 |
+
import shutil
|
1871 |
from transformers import BarkModel, AutoProcessor
|
1872 |
+
import torch, gc
|
1873 |
+
import whisper
|
1874 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
|
1875 |
+
import librosa
|
1876 |
|
1877 |
+
import torch
|
1878 |
+
print(torch.cuda.is_available()) # β
Tells you if GPU is available
|
1879 |
torch.cuda.empty_cache()
|
1880 |
+
gc.collect()
|
1881 |
|
1882 |
+
# Bark TTS
|
1883 |
print("π Loading Bark model...")
|
1884 |
model_bark = BarkModel.from_pretrained("suno/bark")
|
1885 |
+
print("β
Bark model loaded")
|
1886 |
+
|
1887 |
+
print("π Loading Bark processor...")
|
1888 |
processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
1889 |
+
print("β
Bark processor loaded")
|
1890 |
+
print("π Moving Bark model to device...")
|
1891 |
model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
|
1892 |
+
print("β
Bark model on device")
|
1893 |
bark_voice_preset = "v2/en_speaker_6"
|
|
|
|
|
|
|
|
|
|
|
1894 |
|
1895 |
def bark_tts(text):
|
|
|
1896 |
print(f"π Synthesizing TTS for: {text}")
|
1897 |
inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
1898 |
inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
|
|
|
1903 |
wavfile.write(temp_wav.name, 22050, speech)
|
1904 |
return temp_wav.name
|
1905 |
|
1906 |
+
# Whisper STT
|
1907 |
+
print("π Loading Whisper model...")
|
1908 |
+
whisper_model = whisper.load_model("base", device="cuda")
|
1909 |
+
print("β
Whisper model loaded")
|
1910 |
+
|
1911 |
def whisper_stt(audio_path):
|
1912 |
+
if not audio_path or not os.path.exists(audio_path):
|
|
|
1913 |
return ""
|
1914 |
result = whisper_model.transcribe(audio_path)
|
1915 |
return result["text"]
|
1916 |
|
1917 |
+
# DeepFace (Video Face Emotion)
|
1918 |
+
def ensure_mp4(video_input):
|
1919 |
+
if isinstance(video_input, str):
|
1920 |
+
input_path = video_input
|
1921 |
+
else:
|
1922 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
|
1923 |
+
temp_in.write(video_input.read())
|
1924 |
+
input_path = temp_in.name
|
1925 |
|
1926 |
+
if input_path.endswith(".mp4"):
|
1927 |
+
return input_path
|
1928 |
+
|
1929 |
+
mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
|
1930 |
+
try:
|
1931 |
+
clip = VideoFileClip(input_path)
|
1932 |
+
clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
|
1933 |
+
clip.close()
|
1934 |
+
except Exception as e:
|
1935 |
+
print("Video conversion failed:", e)
|
1936 |
+
shutil.copy(input_path, mp4_path)
|
1937 |
+
return mp4_path
|
1938 |
+
|
1939 |
+
def analyze_video_emotions(video_input, sample_rate=15):
|
1940 |
+
mp4_path = ensure_mp4(video_input)
|
1941 |
+
if not mp4_path or not os.path.exists(mp4_path):
|
1942 |
+
return "no_face"
|
1943 |
+
cap = cv2.VideoCapture(mp4_path)
|
1944 |
+
frame_count = 0
|
1945 |
+
emotion_counts = {}
|
1946 |
+
while True:
|
1947 |
+
ret, frame = cap.read()
|
1948 |
+
if not ret: break
|
1949 |
+
if frame_count % sample_rate == 0:
|
1950 |
+
try:
|
1951 |
+
result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
|
1952 |
+
dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
|
1953 |
+
emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
|
1954 |
+
except Exception: pass
|
1955 |
+
frame_count += 1
|
1956 |
+
cap.release()
|
1957 |
+
if not emotion_counts: return "no_face"
|
1958 |
+
return max(emotion_counts.items(), key=lambda x: x[1])[0]
|
1959 |
+
|
1960 |
+
# Wav2Vec2 model for audio emotion analysis
|
1961 |
+
local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2"
|
1962 |
+
print("π Loading Wav2Vec processor and model...")
|
1963 |
+
wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
|
1964 |
+
wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
|
1965 |
+
wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
|
1966 |
+
print("β
Wav2Vec model loaded")
|
1967 |
+
wav2vec_model.eval()
|
1968 |
+
voice_label_map = {
|
1969 |
+
0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
|
1970 |
+
4: 'neutral', 5: 'sad', 6: 'surprise'
|
1971 |
+
}
|
1972 |
+
|
1973 |
+
def analyze_audio_emotion(audio_path):
|
1974 |
+
print(f"π Analyzing audio emotion for: {audio_path}")
|
1975 |
+
if not audio_path or not os.path.exists(audio_path):
|
1976 |
+
return "neutral"
|
1977 |
|
1978 |
+
speech, sr = librosa.load(audio_path, sr=16000)
|
1979 |
+
inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
|
1980 |
+
|
1981 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
1982 |
+
wav2vec_model.to(device)
|
1983 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
1984 |
+
|
1985 |
+
with torch.no_grad():
|
1986 |
+
logits = wav2vec_model(**inputs).logits
|
1987 |
+
|
1988 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
1989 |
+
predicted_id = torch.argmax(probs, dim=-1).item()
|
1990 |
+
return voice_label_map.get(predicted_id, "neutral")
|
1991 |
+
|
1992 |
+
# Effective confidence calculation
|
1993 |
+
def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
|
1994 |
+
emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
|
1995 |
+
answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
|
1996 |
+
voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
|
1997 |
+
avg_emotion = (voice_score + face_score) / 2
|
1998 |
+
control_bonus = max(0, answer_score - avg_emotion) * k
|
1999 |
+
eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
|
2000 |
+
return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
|
2001 |
+
|
2002 |
+
seniority_mapping = {
|
2003 |
+
"Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
|
2004 |
+
}
|
2005 |
+
|
2006 |
+
# Gradio App
|
2007 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
2008 |
+
user_data = gr.State({})
|
2009 |
+
interview_state = gr.State({})
|
2010 |
+
missing_fields_state = gr.State([])
|
2011 |
+
recording_state = gr.State({"is_recording": False})
|
2012 |
+
|
2013 |
# UI Layout
|
2014 |
+
with gr.Column(visible=True) as user_info_section:
|
2015 |
+
gr.Markdown("## Candidate Information")
|
2016 |
+
cv_file = gr.File(label="Upload CV")
|
2017 |
+
job_desc = gr.Textbox(label="Job Description")
|
2018 |
+
start_btn = gr.Button("Continue", interactive=False)
|
2019 |
+
|
2020 |
+
with gr.Column(visible=False) as missing_section:
|
2021 |
+
gr.Markdown("## Missing Information")
|
2022 |
+
name_in = gr.Textbox(label="Name", visible=False)
|
2023 |
+
role_in = gr.Textbox(label="Job Role", visible=False)
|
2024 |
+
seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
|
2025 |
+
skills_in = gr.Textbox(label="Skills", visible=False)
|
2026 |
+
submit_btn = gr.Button("Submit", interactive=False)
|
2027 |
+
|
2028 |
+
with gr.Column(visible=False) as interview_pre_section:
|
2029 |
+
pre_interview_greeting_md = gr.Markdown()
|
2030 |
+
start_interview_final_btn = gr.Button("Start Interview")
|
2031 |
+
|
2032 |
with gr.Column(visible=False) as interview_section:
|
2033 |
gr.Markdown("## Interview in Progress")
|
2034 |
question_audio = gr.Audio(label="Listen to Question", interactive=False, autoplay=True)
|
2035 |
question_text = gr.Markdown()
|
2036 |
|
2037 |
+
# Audio recording controls
|
2038 |
+
with gr.Row():
|
2039 |
+
record_btn = gr.Button("π€ Start Recording", variant="primary")
|
2040 |
+
stop_btn = gr.Button("βΉοΈ Stop Recording", interactive=False)
|
|
|
|
|
2041 |
|
2042 |
+
# Hidden audio component for recording
|
2043 |
+
user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Audio Recording", visible=False)
|
|
|
|
|
|
|
2044 |
|
2045 |
+
# Video input (keeping for emotion analysis)
|
2046 |
+
user_video_input = gr.Video(sources=["webcam"], label="Video Recording (for emotion analysis)", visible=False)
|
|
|
|
|
2047 |
|
2048 |
+
# Transcript and confirmation
|
2049 |
+
stt_transcript = gr.Textbox(label="Transcribed Answer (automatically generated)", interactive=True)
|
2050 |
+
confirm_btn = gr.Button("Confirm Answer", interactive=False)
|
2051 |
|
2052 |
+
# Status and results
|
2053 |
+
recording_status = gr.Markdown("**Status:** Ready to record")
|
2054 |
+
evaluation_display = gr.Markdown()
|
2055 |
+
emotion_display = gr.Markdown()
|
2056 |
interview_summary = gr.Markdown(visible=False)
|
2057 |
+
|
2058 |
+
# UI Logic
|
2059 |
+
def validate_start_btn(cv_file, job_desc):
|
2060 |
+
return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
|
2061 |
|
2062 |
+
cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
2063 |
+
job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
2064 |
+
|
2065 |
+
def process_and_route_initial(cv_file, job_desc):
|
2066 |
+
details = extract_candidate_details(cv_file.name)
|
2067 |
+
job_info = extract_job_details(job_desc)
|
2068 |
+
data = {
|
2069 |
+
"name": details.get("name", "unknown"),
|
2070 |
+
"job_role": job_info.get("job_title", "unknown"),
|
2071 |
+
"seniority": job_info.get("experience_level", "unknown"),
|
2072 |
+
"skills": job_info.get("skills", [])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2073 |
}
|
2074 |
+
missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
|
2075 |
+
if missing:
|
2076 |
+
return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
2077 |
+
else:
|
2078 |
+
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
|
2079 |
+
return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
|
|
|
|
|
|
|
2080 |
|
2081 |
+
start_btn.click(
|
2082 |
+
process_and_route_initial,
|
2083 |
+
[cv_file, job_desc],
|
2084 |
+
[user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
|
2085 |
+
)
|
2086 |
+
|
2087 |
+
def show_missing(missing):
|
2088 |
+
if missing is None: missing = []
|
2089 |
+
return (gr.update(visible="name" in missing),
|
2090 |
+
gr.update(visible="job_role" in missing),
|
2091 |
+
gr.update(visible="seniority" in missing),
|
2092 |
+
gr.update(visible="skills" in missing))
|
2093 |
|
2094 |
+
missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
|
2095 |
+
|
2096 |
+
def validate_fields(name, role, seniority, skills, missing):
|
2097 |
+
if not missing: return gr.update(interactive=False)
|
2098 |
+
all_filled = all([
|
2099 |
+
(not ("name" in missing) or bool(name.strip())),
|
2100 |
+
(not ("job_role" in missing) or bool(role.strip())),
|
2101 |
+
(not ("seniority" in missing) or bool(seniority)),
|
2102 |
+
(not ("skills" in missing) or bool(skills.strip()))
|
2103 |
+
])
|
2104 |
+
return gr.update(interactive=all_filled)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2105 |
|
2106 |
+
for inp in [name_in, role_in, seniority_in, skills_in]:
|
2107 |
+
inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
|
2108 |
+
|
2109 |
+
def complete_manual(data, name, role, seniority, skills):
|
2110 |
+
if data["name"].lower() == "unknown": data["name"] = name
|
2111 |
+
if data["job_role"].lower() == "unknown": data["job_role"] = role
|
2112 |
+
if data["seniority"].lower() == "unknown": data["seniority"] = seniority
|
2113 |
+
if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
|
2114 |
+
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
|
2115 |
+
return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2116 |
|
2117 |
+
submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
|
2118 |
+
|
2119 |
+
def start_interview(data):
|
2120 |
+
state = {
|
2121 |
+
"questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
|
2122 |
+
"question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
|
2123 |
+
"conversation_history": [],
|
2124 |
+
"difficulty_adjustment": None,
|
2125 |
+
"question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
|
2126 |
+
"log": []
|
2127 |
+
}
|
2128 |
+
context = ""
|
2129 |
+
prompt = build_interview_prompt(
|
2130 |
+
conversation_history=[], user_response="", context=context, job_role=data["job_role"],
|
2131 |
+
skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
|
2132 |
+
voice_label="neutral", face_label="neutral"
|
2133 |
+
)
|
2134 |
|
2135 |
+
# Generate first question
|
2136 |
+
first_q = groq_llm.predict(prompt)
|
2137 |
+
q_eval = {
|
2138 |
+
"Score": "N/A",
|
2139 |
+
"Reasoning": "Skipped to reduce processing time",
|
2140 |
+
"Improvements": []
|
2141 |
+
}
|
2142 |
+
state["questions"].append(first_q)
|
2143 |
+
state["question_evaluations"].append(q_eval)
|
2144 |
+
state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
|
2145 |
+
|
2146 |
+
# Generate audio for question
|
2147 |
+
audio_path = bark_tts(first_q)
|
2148 |
|
2149 |
+
# Log
|
2150 |
+
state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
|
|
|
|
|
|
|
2151 |
|
2152 |
+
return (state,
|
2153 |
+
gr.update(visible=False),
|
2154 |
+
gr.update(visible=True),
|
2155 |
+
audio_path,
|
2156 |
+
f"*Question 1:* {first_q}",
|
2157 |
+
gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
|
2158 |
|
2159 |
+
start_interview_final_btn.click(
|
2160 |
+
start_interview,
|
2161 |
+
[user_data],
|
2162 |
+
[interview_state, interview_pre_section, interview_section, question_audio, question_text, recording_status]
|
|
|
2163 |
)
|
2164 |
+
|
2165 |
+
# Recording functionality
|
2166 |
+
def start_recording(rec_state):
|
2167 |
+
rec_state["is_recording"] = True
|
2168 |
+
return (rec_state,
|
2169 |
+
gr.update(interactive=False),
|
2170 |
+
gr.update(interactive=True),
|
2171 |
+
gr.update(visible=True),
|
2172 |
+
gr.update(value="**Status:** π΄ Recording... Click 'Stop Recording' when done"))
|
2173 |
|
2174 |
+
record_btn.click(
|
2175 |
+
start_recording,
|
2176 |
+
[recording_state],
|
2177 |
+
[recording_state, record_btn, stop_btn, user_audio_input, recording_status]
|
2178 |
)
|
2179 |
+
|
2180 |
+
def stop_recording(rec_state):
|
2181 |
+
rec_state["is_recording"] = False
|
2182 |
+
return (rec_state,
|
2183 |
+
gr.update(interactive=True),
|
2184 |
+
gr.update(interactive=False),
|
2185 |
+
gr.update(visible=False),
|
2186 |
+
gr.update(value="**Status:** Processing audio... Please wait"))
|
2187 |
|
2188 |
+
stop_btn.click(
|
2189 |
+
stop_recording,
|
2190 |
+
[recording_state],
|
2191 |
+
[recording_state, record_btn, stop_btn, user_audio_input, recording_status]
|
2192 |
)
|
2193 |
+
|
2194 |
+
# Auto-transcription when audio is recorded
|
2195 |
+
def transcribe_and_update(audio_path):
|
2196 |
+
if not audio_path:
|
2197 |
+
return "", gr.update(interactive=False), gr.update(value="**Status:** No audio recorded")
|
2198 |
+
|
2199 |
+
transcript = whisper_stt(audio_path)
|
2200 |
+
if transcript:
|
2201 |
+
return (transcript,
|
2202 |
+
gr.update(interactive=True),
|
2203 |
+
gr.update(value="**Status:** Audio transcribed! Review and click 'Confirm Answer'"))
|
2204 |
+
else:
|
2205 |
+
return ("",
|
2206 |
+
gr.update(interactive=False),
|
2207 |
+
gr.update(value="**Status:** Transcription failed. Please try recording again"))
|
2208 |
+
|
2209 |
+
user_audio_input.change(
|
2210 |
+
transcribe_and_update,
|
2211 |
+
[user_audio_input],
|
2212 |
+
[stt_transcript, confirm_btn, recording_status]
|
2213 |
+
)
|
2214 |
+
|
2215 |
+
def process_answer(transcript, audio_path, video_path, state, data):
|
2216 |
+
if not transcript:
|
2217 |
+
return (state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
|
2218 |
+
|
2219 |
+
elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
|
2220 |
+
state["timings"].append(elapsed)
|
2221 |
+
state["answers"].append(transcript)
|
2222 |
+
state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
|
2223 |
+
|
2224 |
+
# Emotion analysis (using testing values for speed)
|
2225 |
+
voice_label = "neutral"
|
2226 |
+
face_label = "neutral"
|
2227 |
+
state["voice_labels"].append(voice_label)
|
2228 |
+
state["face_labels"].append(face_label)
|
2229 |
+
|
2230 |
+
# Evaluate answer
|
2231 |
+
last_q = state["questions"][-1]
|
2232 |
+
q_eval = state["question_evaluations"][-1]
|
2233 |
+
ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
|
2234 |
+
answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
|
2235 |
+
state["answer_evaluations"].append(answer_eval)
|
2236 |
+
answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
|
2237 |
+
|
2238 |
+
# Adaptive difficulty
|
2239 |
+
if answer_score == "excellent":
|
2240 |
+
state["difficulty_adjustment"] = "harder"
|
2241 |
+
elif answer_score in ("medium", "poor"):
|
2242 |
+
state["difficulty_adjustment"] = "easier"
|
2243 |
+
else:
|
2244 |
+
state["difficulty_adjustment"] = None
|
2245 |
+
|
2246 |
+
# Effective confidence (testing value)
|
2247 |
+
eff_conf = {"effective_confidence": 0.6}
|
2248 |
+
state["effective_confidences"].append(eff_conf)
|
2249 |
+
|
2250 |
+
# Log
|
2251 |
+
state["log"].append({
|
2252 |
+
"type": "answer",
|
2253 |
+
"question": last_q,
|
2254 |
+
"answer": transcript,
|
2255 |
+
"answer_eval": answer_eval,
|
2256 |
+
"ref_answer": ref_answer,
|
2257 |
+
"face_label": face_label,
|
2258 |
+
"voice_label": voice_label,
|
2259 |
+
"effective_confidence": eff_conf,
|
2260 |
+
"timing": elapsed,
|
2261 |
+
"timestamp": time.time()
|
2262 |
+
})
|
2263 |
+
|
2264 |
+
# Check if interview is complete
|
2265 |
+
qidx = state["question_idx"] + 1
|
2266 |
+
if qidx >= state["max_questions"]:
|
2267 |
+
# Save log
|
2268 |
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
2269 |
+
log_file = f"interview_log_{timestamp}.json"
|
2270 |
+
with open(log_file, "w", encoding="utf-8") as f:
|
2271 |
+
json.dump(state["log"], f, indent=2, ensure_ascii=False)
|
2272 |
+
|
2273 |
+
# Generate summary
|
2274 |
+
summary = "# Interview Summary\n"
|
2275 |
+
for i, q in enumerate(state["questions"]):
|
2276 |
+
summary += (f"\n### Q{i + 1}: {q}\n"
|
2277 |
+
f"- *Answer*: {state['answers'][i]}\n"
|
2278 |
+
f"- *Q Eval*: {state['question_evaluations'][i]}\n"
|
2279 |
+
f"- *A Eval*: {state['answer_evaluations'][i]}\n"
|
2280 |
+
f"- *Time*: {state['timings'][i]}s\n")
|
2281 |
+
summary += f"\n\nβΊ Full log saved as {log_file}."
|
2282 |
+
|
2283 |
+
return (state,
|
2284 |
+
gr.update(visible=True, value=summary),
|
2285 |
+
gr.update(value=None),
|
2286 |
+
gr.update(value=None),
|
2287 |
+
gr.update(value=None),
|
2288 |
+
gr.update(interactive=False),
|
2289 |
+
gr.update(visible=True, value=f"Last Detected β Face: {face_label}, Voice: {voice_label}"),
|
2290 |
+
gr.update(value="**Status:** Interview completed!"))
|
2291 |
+
else:
|
2292 |
+
# Generate next question
|
2293 |
+
state["question_idx"] = qidx
|
2294 |
+
state["q_start_time"] = time.time()
|
2295 |
+
context = ""
|
2296 |
+
prompt = build_interview_prompt(
|
2297 |
+
conversation_history=state["conversation_history"],
|
2298 |
+
user_response=transcript,
|
2299 |
+
context=context,
|
2300 |
+
job_role=data["job_role"],
|
2301 |
+
skills=data["skills"],
|
2302 |
+
seniority=data["seniority"],
|
2303 |
+
difficulty_adjustment=state["difficulty_adjustment"],
|
2304 |
+
face_label=face_label,
|
2305 |
+
voice_label=voice_label,
|
2306 |
+
effective_confidence=eff_conf
|
2307 |
+
)
|
2308 |
+
|
2309 |
+
next_q = groq_llm.predict(prompt)
|
2310 |
+
q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
|
2311 |
+
state["questions"].append(next_q)
|
2312 |
+
state["question_evaluations"].append(q_eval)
|
2313 |
+
state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
|
2314 |
+
state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
|
2315 |
+
|
2316 |
+
audio_path = bark_tts(next_q)
|
2317 |
+
eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
|
2318 |
+
|
2319 |
+
return (state,
|
2320 |
+
gr.update(visible=False),
|
2321 |
+
audio_path,
|
2322 |
+
f"*Question {qidx + 1}:* {next_q}",
|
2323 |
+
gr.update(value=""),
|
2324 |
+
gr.update(interactive=False),
|
2325 |
+
gr.update(visible=True, value=f"Last Detected β Face: {face_label}, Voice: {voice_label}"),
|
2326 |
+
gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
|
2327 |
|
2328 |
+
confirm_btn.click(
|
2329 |
+
process_answer,
|
2330 |
+
[stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
|
2331 |
+
[interview_state, interview_summary, question_audio, question_text, stt_transcript, confirm_btn, emotion_display, recording_status]
|
2332 |
)
|
2333 |
|
2334 |
+
demo.launch(debug=True)
|
|