Spaces:

husseinelsaadi
/

ai-interviewer-demo

Paused

App Files Files Community

husseinelsaadi commited on 2 days ago

Commit

c343ad6

verified ·

1 Parent(s): a04c545

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -654

app.py CHANGED Viewed

@@ -1448,416 +1448,6 @@ def extract_candidate_details(file_path):
         "skills": skills
     }
-# import gradio as gr
-# import time
-# import tempfile
-# import numpy as np
-# import scipy.io.wavfile as wavfile
-# import cv2
-# import os
-# import json
-# from moviepy.editor import VideoFileClip
-# import shutil
-# from transformers import BarkModel, AutoProcessor
-# import torch, gc
-# import whisper
-# from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
-# import librosa
-# import torch
-# print(torch.cuda.is_available())  # ✅ Tells you if GPU is available
-# torch.cuda.empty_cache()
-# gc.collect()
-# # Bark TTS
-# print("🔁 Loading Bark model...")
-# model_bark = BarkModel.from_pretrained("suno/bark")
-# print("✅ Bark model loaded")
-# print("🔁 Loading Bark processor...")
-# processor_bark = AutoProcessor.from_pretrained("suno/bark")
-# print("✅ Bark processor loaded")
-# print("🔁 Moving Bark model to device...")
-# model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
-# print("✅ Bark model on device")
-# bark_voice_preset = "v2/en_speaker_6"
-# def bark_tts(text):
-#     print(f"🔁 Synthesizing TTS for: {text}")
-#     inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
-#     inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
-#     speech_values = model_bark.generate(**inputs)
-#     speech = speech_values.cpu().numpy().squeeze()
-#     speech = (speech * 32767).astype(np.int16)
-#     temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-#     wavfile.write(temp_wav.name, 22050, speech)
-#     return temp_wav.name
-# # Whisper STT
-# print("🔁 Loading Whisper model...")
-# whisper_model = whisper.load_model("base", device="cuda")
-# print("✅ Whisper model loaded")
-# def whisper_stt(audio_path):
-#     if not audio_path or not os.path.exists(audio_path): return ""
-#     result = whisper_model.transcribe(audio_path)
-#     return result["text"]
-# # DeepFace (Video Face Emotion)
-# def ensure_mp4(video_input):
-#     # video_input could be a file-like object, a path, or a Gradio temp path
-#     if isinstance(video_input, str):
-#         input_path = video_input
-#     else:
-#         # It's a file-like object (rare for Gradio video, but handle it)
-#         with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
-#             temp_in.write(video_input.read())
-#             input_path = temp_in.name
-#     # If already mp4, return as is
-#     if input_path.endswith(".mp4"):
-#         return input_path
-#     # Convert to mp4 using moviepy
-#     mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
-#     try:
-#         clip = VideoFileClip(input_path)
-#         clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
-#         clip.close()
-#     except Exception as e:
-#         print("Video conversion failed:", e)
-#         # As fallback, just copy original
-#         shutil.copy(input_path, mp4_path)
-#     return mp4_path
-# def analyze_video_emotions(video_input, sample_rate=15):
-#     # Convert input to an mp4 file OpenCV can process
-#     mp4_path = ensure_mp4(video_input)
-#     if not mp4_path or not os.path.exists(mp4_path):
-#         return "no_face"
-#     cap = cv2.VideoCapture(mp4_path)
-#     frame_count = 0
-#     emotion_counts = {}
-#     while True:
-#         ret, frame = cap.read()
-#         if not ret: break
-#         if frame_count % sample_rate == 0:
-#             try:
-#                 result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
-#                 dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
-#                 emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
-#             except Exception: pass
-#         frame_count += 1
-#     cap.release()
-#     if not emotion_counts: return "no_face"
-#     return max(emotion_counts.items(), key=lambda x: x[1])[0]
-# # Original Hugging Face model: HaniaRuby/speech-emotion-recognition-wav2vec2
-# local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2" # Local path to the downloaded model files
-# print("🔁 Loading Wav2Vec processor and model...")
-# wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
-# wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
-# wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
-# print("✅ Wav2Vec model loaded")
-# wav2vec_model.eval()
-# voice_label_map = {
-#     0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
-#     4: 'neutral', 5: 'sad', 6: 'surprise'
-# }
-# def analyze_audio_emotion(audio_path):
-#     print(f"🔁 Analyzing audio emotion for: {audio_path}")
-#     if not audio_path or not os.path.exists(audio_path): return "neutral"
-#     speech, sr = librosa.load(audio_path, sr=16000)
-#     inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
-#     # 🔥 Move model and inputs to GPU
-#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-#     wav2vec_model.to(device)
-#     inputs = {k: v.to(device) for k, v in inputs.items()}
-#     with torch.no_grad():
-#         logits = wav2vec_model(**inputs).logits
-#     probs = torch.nn.functional.softmax(logits, dim=-1)
-#     predicted_id = torch.argmax(probs, dim=-1).item()
-#     return voice_label_map.get(predicted_id, "neutral")
-# # --- Effective confidence calculation
-# def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
-#     emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
-#     answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
-#     voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
-#     avg_emotion = (voice_score + face_score) / 2
-#     control_bonus = max(0, answer_score - avg_emotion) * k
-#     eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
-#     return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
-# seniority_mapping = {
-#     "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
-# }
-# # --- 2. Gradio App ---
-# with gr.Blocks(theme=gr.themes.Soft()) as demo:
-#     user_data = gr.State({})
-#     interview_state = gr.State({})
-#     missing_fields_state = gr.State([])
-#     # --- UI Layout ---
-#     with gr.Column(visible=True) as user_info_section:
-#         gr.Markdown("## Candidate Information")
-#         cv_file = gr.File(label="Upload CV")
-#         job_desc = gr.Textbox(label="Job Description")
-#         start_btn = gr.Button("Continue", interactive=False)
-#     with gr.Column(visible=False) as missing_section:
-#         gr.Markdown("## Missing Information")
-#         name_in = gr.Textbox(label="Name", visible=False)
-#         role_in = gr.Textbox(label="Job Role", visible=False)
-#         seniority_in = gr.Dropdown(list(seniority_mapping.keys()), label="Seniority", visible=False)
-#         skills_in = gr.Textbox(label="Skills", visible=False)
-#         submit_btn = gr.Button("Submit", interactive=False)
-#     with gr.Column(visible=False) as interview_pre_section:
-#         pre_interview_greeting_md = gr.Markdown()
-#         start_interview_final_btn = gr.Button("Start Interview")
-#     with gr.Column(visible=False) as interview_section:
-#         gr.Markdown("## Interview in Progress")
-#         question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
-#         question_text = gr.Markdown()
-#         user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
-#         user_video_input = gr.Video(sources=["webcam"], label="2. Record Video Answer")
-#         stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
-#         confirm_btn = gr.Button("Confirm Answer")
-#         evaluation_display = gr.Markdown()
-#         emotion_display = gr.Markdown()
-#         interview_summary = gr.Markdown(visible=False)
-#     # --- UI Logic ---
-#     def validate_start_btn(cv_file, job_desc):
-#         return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
-#     cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
-#     job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
-#     def process_and_route_initial(cv_file, job_desc):
-#         details = extract_candidate_details(cv_file.name)
-#         job_info = extract_job_details(job_desc)
-#         data = {
-#             "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
-#             "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
-#         }
-#         missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
-#         if missing:
-#             return data, missing, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
-#         else:
-#             greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
-#             return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
-#     start_btn.click(
-#         process_and_route_initial,
-#         [cv_file, job_desc],
-#         [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md]
-#     )
-#     def show_missing(missing):
-#         if missing is None: missing = []
-#         return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
-#     missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
-#     def validate_fields(name, role, seniority, skills, missing):
-#         if not missing: return gr.update(interactive=False)
-#         all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
-#         return gr.update(interactive=all_filled)
-#     for inp in [name_in, role_in, seniority_in, skills_in]:
-#         inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
-#     def complete_manual(data, name, role, seniority, skills):
-#         if data["name"].lower() == "unknown": data["name"] = name
-#         if data["job_role"].lower() == "unknown": data["job_role"] = role
-#         if data["seniority"].lower() == "unknown": data["seniority"] = seniority
-#         if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
-#         greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
-#         return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
-#     submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
-#     def start_interview(data):
-#         # --- Advanced state with full logging ---
-#         state = {
-#             "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
-#             "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
-#             "conversation_history": [],
-#             "difficulty_adjustment": None,
-#             "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
-#             "log": []
-#         }
-#         # --- Optionally: context retrieval here (currently just blank) ---
-#         context = ""
-#         prompt = build_interview_prompt(
-#             conversation_history=[], user_response="", context=context, job_role=data["job_role"],
-#             skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
-#             voice_label="neutral", face_label="neutral"
-#         )
-#         #here the original one
-#         # first_q = groq_llm.predict(prompt)
-#         # # Evaluate Q for quality
-#         # q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
-#         # state["questions"].append(first_q)
-#         # state["question_evaluations"].append(q_eval)
-#         #here the testing one
-#         first_q = groq_llm.predict(prompt)
-#         q_eval = {
-#             "Score": "N/A",
-#             "Reasoning": "Skipped to reduce processing time",
-#             "Improvements": []
-#         }
-#         state["questions"].append(first_q)
-#         state["question_evaluations"].append(q_eval)
-#         state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
-#         audio_path = bark_tts(first_q)
-#         # LOG
-#         state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
-#         return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
-#     start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
-#     def transcribe(audio_path):
-#         return whisper_stt(audio_path)
-#     user_audio_input.change(transcribe, user_audio_input, stt_transcript)
-#     def process_answer(transcript, audio_path, video_path, state, data):
-#         if not transcript and not video_path:
-#             return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
-#         elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
-#         state["timings"].append(elapsed)
-#         state["answers"].append(transcript)
-#         state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
-#         # --- 1. Emotion analysis ---
-#         # voice_label = analyze_audio_emotion(audio_path)
-#         # face_label = analyze_video_emotions(video_path)
-#         # state["voice_labels"].append(voice_label)
-#         # state["face_labels"].append(face_label)
-#         #just for testing
-#         voice_label = "neutral"
-#         face_label = "neutral"
-#         state["voice_labels"].append(voice_label)
-#         state["face_labels"].append(face_label)
-#         # --- 2. Evaluate previous Q and Answer ---
-#         last_q = state["questions"][-1]
-#         q_eval = state["question_evaluations"][-1]  # Already in state
-#         ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
-#         answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
-#         state["answer_evaluations"].append(answer_eval)
-#         answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
-#         # --- 3. Adaptive difficulty ---
-#         if answer_score == "excellent":
-#             state["difficulty_adjustment"] = "harder"
-#         elif answer_score in ("medium", "poor"):
-#             state["difficulty_adjustment"] = "easier"
-#         else:
-#             state["difficulty_adjustment"] = None
-#         # --- 4. Effective confidence ---
-#         # eff_conf = interpret_confidence(voice_label, face_label, answer_score)
-#         # state["effective_confidences"].append(eff_conf)
-#         #just for testing:
-#         eff_conf = {"effective_confidence": 0.6}
-#         state["effective_confidences"].append(eff_conf)
-#         # --- LOG ---
-#         state["log"].append({
-#             "type": "answer",
-#             "question": last_q,
-#             "answer": transcript,
-#             "answer_eval": answer_eval,
-#             "ref_answer": ref_answer,
-#             "face_label": face_label,
-#             "voice_label": voice_label,
-#             "effective_confidence": eff_conf,
-#             "timing": elapsed,
-#             "timestamp": time.time()
-#         })
-#         # --- Next or End ---
-#         qidx = state["question_idx"] + 1
-#         if qidx >= state["max_questions"]:
-#             # Save as JSON (optionally)
-#             timestamp = time.strftime("%Y%m%d_%H%M%S")
-#             log_file = f"interview_log_{timestamp}.json"
-#             with open(log_file, "w", encoding="utf-8") as f:
-#                 json.dump(state["log"], f, indent=2, ensure_ascii=False)
-#             # Report
-#             summary = "# Interview Summary\n"
-#             for i, q in enumerate(state["questions"]):
-#                 summary += (f"\n### Q{i + 1}: {q}\n"
-#                             f"- *Answer*: {state['answers'][i]}\n"
-#                             f"- *Q Eval*: {state['question_evaluations'][i]}\n"
-#                             f"- *A Eval*: {state['answer_evaluations'][i]}\n"
-#                             #also this are removed just for testing :(
-#                             # f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
-#                             # f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
-#                             f"- *Time*: {state['timings'][i]}s\n")
-#             summary += f"\n\n⏺ Full log saved as {log_file}."
-#             return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"))
-#         else:
-#             # --- Build next prompt using adaptive difficulty ---
-#             state["question_idx"] = qidx
-#             state["q_start_time"] = time.time()
-#             context = ""  # You can add your context logic here
-#             prompt = build_interview_prompt(
-#                 conversation_history=state["conversation_history"],
-#                 user_response=transcript,
-#                 context=context,
-#                 job_role=data["job_role"],
-#                 skills=data["skills"],
-#                 seniority=data["seniority"],
-#                 difficulty_adjustment=state["difficulty_adjustment"],
-#                 face_label=face_label,
-#                 voice_label=voice_label,
-#                 effective_confidence=eff_conf
-#             )
-#             next_q = groq_llm.predict(prompt)
-#             # Evaluate Q quality
-#             q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
-#             state["questions"].append(next_q)
-#             state["question_evaluations"].append(q_eval)
-#             state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
-#             state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
-#             audio_path = bark_tts(next_q)
-#             # Display evaluations
-#             eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
-#             return (
-#                 state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
-#                 gr.update(value=None), gr.update(value=None),
-#                 gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"),
-#             )
-#     confirm_btn.click(
-#         process_answer,
-#         [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
-#         [interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
-#     ).then(
-#         lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
-#     )
-# demo.launch(debug=True)
 import gradio as gr
 import time
 import tempfile
@@ -1879,18 +1469,15 @@ print(torch.cuda.is_available())  # ✅ Tells you if GPU is available
 torch.cuda.empty_cache()
 gc.collect()
 # Bark TTS
 print("🔁 Loading Bark model...")
-model_bark = BarkModel.from_pretrained("suno/bark")
 print("✅ Bark model loaded")
 print("🔁 Loading Bark processor...")
 processor_bark = AutoProcessor.from_pretrained("suno/bark")
 print("✅ Bark processor loaded")
-print("🔁 Moving Bark model to device...")
-model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
-print("✅ Bark model on device")
-bark_voice_preset = "v2/en_speaker_6"
 def bark_tts(text):
     print(f"🔁 Synthesizing TTS for: {text}")
@@ -1907,110 +1494,24 @@ def bark_tts(text):
 print("🔁 Loading Whisper model...")
 whisper_model = whisper.load_model("base", device="cuda")
 print("✅ Whisper model loaded")
 def whisper_stt(audio_path):
-    if not audio_path or not os.path.exists(audio_path):
-        return ""
     result = whisper_model.transcribe(audio_path)
     return result["text"]
-# DeepFace (Video Face Emotion)
-def ensure_mp4(video_input):
-    if isinstance(video_input, str):
-        input_path = video_input
-    else:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_in:
-            temp_in.write(video_input.read())
-            input_path = temp_in.name
-    if input_path.endswith(".mp4"):
-        return input_path
-    mp4_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
-    try:
-        clip = VideoFileClip(input_path)
-        clip.write_videofile(mp4_path, codec="libx264", audio=False, verbose=False, logger=None)
-        clip.close()
-    except Exception as e:
-        print("Video conversion failed:", e)
-        shutil.copy(input_path, mp4_path)
-    return mp4_path
-def analyze_video_emotions(video_input, sample_rate=15):
-    mp4_path = ensure_mp4(video_input)
-    if not mp4_path or not os.path.exists(mp4_path):
-        return "no_face"
-    cap = cv2.VideoCapture(mp4_path)
-    frame_count = 0
-    emotion_counts = {}
-    while True:
-        ret, frame = cap.read()
-        if not ret: break
-        if frame_count % sample_rate == 0:
-            try:
-                result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
-                dominant = result[0]["dominant_emotion"] if isinstance(result, list) else result["dominant_emotion"]
-                emotion_counts[dominant] = emotion_counts.get(dominant, 0) + 1
-            except Exception: pass
-        frame_count += 1
-    cap.release()
-    if not emotion_counts: return "no_face"
-    return max(emotion_counts.items(), key=lambda x: x[1])[0]
-# Wav2Vec2 model for audio emotion analysis
-local_wav2vec_model_path = "HaniaRuby/speech-emotion-recognition-wav2vec2"
-print("🔁 Loading Wav2Vec processor and model...")
-wav2vec_processor = Wav2Vec2Processor.from_pretrained(local_wav2vec_model_path)
-wav2vec_model = Wav2Vec2ForSequenceClassification.from_pretrained(local_wav2vec_model_path)
-wav2vec_model = wav2vec_model.to("cuda" if torch.cuda.is_available() else "cpu")
-print("✅ Wav2Vec model loaded")
-wav2vec_model.eval()
-voice_label_map = {
-    0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
-    4: 'neutral', 5: 'sad', 6: 'surprise'
-}
-def analyze_audio_emotion(audio_path):
-    print(f"🔁 Analyzing audio emotion for: {audio_path}")
-    if not audio_path or not os.path.exists(audio_path):
-        return "neutral"
-    speech, sr = librosa.load(audio_path, sr=16000)
-    inputs = wav2vec_processor(speech, sampling_rate=16000, return_tensors="pt")
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    wav2vec_model.to(device)
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    with torch.no_grad():
-        logits = wav2vec_model(**inputs).logits
-    probs = torch.nn.functional.softmax(logits, dim=-1)
-    predicted_id = torch.argmax(probs, dim=-1).item()
-    return voice_label_map.get(predicted_id, "neutral")
-# Effective confidence calculation
-def interpret_confidence(voice_label, face_label, answer_score_label, k=0.2):
-    emotion_map = {"happy": 0.9, "neutral": 0.6, "surprised": 0.7, "sad": 0.4, "angry": 0.3, "disgust": 0.2, "fear": 0.3, "no_face": 0.5, "unknown": 0.5}
-    answer_score_map = {"excellent": 1.0, "good": 0.8, "medium": 0.6, "poor": 0.3}
-    voice_score, face_score, answer_score = emotion_map.get(voice_label, 0.5), emotion_map.get(face_label, 0.5), answer_score_map.get(answer_score_label, 0.5)
-    avg_emotion = (voice_score + face_score) / 2
-    control_bonus = max(0, answer_score - avg_emotion) * k
-    eff_conf = (0.5 * answer_score + 0.22 * voice_score + 0.18 * face_score + 0.1 * control_bonus)
-    return {"effective_confidence": round(eff_conf, 3), "answer_score": round(answer_score, 2), "voice_score": round(voice_score, 2), "face_score": round(face_score, 2), "control_bonus": round(control_bonus, 3)}
 seniority_mapping = {
     "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
 }
-# Gradio App
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     user_data = gr.State({})
     interview_state = gr.State({})
     missing_fields_state = gr.State([])
-    recording_state = gr.State({"is_recording": False})
-    # UI Layout
     with gr.Column(visible=True) as user_info_section:
         gr.Markdown("## Candidate Information")
         cv_file = gr.File(label="Upload CV")
@@ -2031,34 +1532,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Column(visible=False) as interview_section:
         gr.Markdown("## Interview in Progress")
-        question_audio = gr.Audio(label="Listen to Question", interactive=False, autoplay=True)
         question_text = gr.Markdown()
-        # Audio recording controls
-        with gr.Row():
-            record_btn = gr.Button("🎤 Start Recording", variant="primary")
-            stop_btn = gr.Button("⏹️ Stop Recording", interactive=False)
-        # Hidden audio component for recording
-        user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Audio Recording", visible=False)
-        # Video input (keeping for emotion analysis)
-        user_video_input = gr.Video(sources=["webcam"], label="Video Recording (for emotion analysis)", visible=False)
-        # Transcript and confirmation
-        stt_transcript = gr.Textbox(label="Transcribed Answer (automatically generated)", interactive=True)
-        confirm_btn = gr.Button("Confirm Answer", interactive=False)
-        # Status and results
-        recording_status = gr.Markdown("**Status:** Ready to record")
         evaluation_display = gr.Markdown()
-        emotion_display = gr.Markdown()
         interview_summary = gr.Markdown(visible=False)
-    # UI Logic
     def validate_start_btn(cv_file, job_desc):
         return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
     cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
     job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
@@ -2066,10 +1551,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         details = extract_candidate_details(cv_file.name)
         job_info = extract_job_details(job_desc)
         data = {
-            "name": details.get("name", "unknown"),
-            "job_role": job_info.get("job_title", "unknown"),
-            "seniority": job_info.get("experience_level", "unknown"),
-            "skills": job_info.get("skills", [])
         }
         missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
         if missing:
@@ -2077,7 +1560,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         else:
             greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
             return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
     start_btn.click(
         process_and_route_initial,
         [cv_file, job_desc],
@@ -2086,23 +1568,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     def show_missing(missing):
         if missing is None: missing = []
-        return (gr.update(visible="name" in missing),
-                gr.update(visible="job_role" in missing),
-                gr.update(visible="seniority" in missing),
-                gr.update(visible="skills" in missing))
     missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
     def validate_fields(name, role, seniority, skills, missing):
         if not missing: return gr.update(interactive=False)
-        all_filled = all([
-            (not ("name" in missing) or bool(name.strip())),
-            (not ("job_role" in missing) or bool(role.strip())),
-            (not ("seniority" in missing) or bool(seniority)),
-            (not ("skills" in missing) or bool(skills.strip()))
-        ])
         return gr.update(interactive=all_filled)
     for inp in [name_in, role_in, seniority_in, skills_in]:
         inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
@@ -2113,10 +1585,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
         greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
         return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
     submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
     def start_interview(data):
         state = {
             "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
             "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
@@ -2125,14 +1597,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
             "log": []
         }
         context = ""
         prompt = build_interview_prompt(
             conversation_history=[], user_response="", context=context, job_role=data["job_role"],
             skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
             voice_label="neutral", face_label="neutral"
         )
-        # Generate first question
         first_q = groq_llm.predict(prompt)
         q_eval = {
             "Score": "N/A",
@@ -2141,101 +1620,50 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         }
         state["questions"].append(first_q)
         state["question_evaluations"].append(q_eval)
-        state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
-        # Generate audio for question
         audio_path = bark_tts(first_q)
-        # Log
         state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
-        return (state,
-                gr.update(visible=False),
-                gr.update(visible=True),
-                audio_path,
-                f"*Question 1:* {first_q}",
-                gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
-    start_interview_final_btn.click(
-        start_interview,
-        [user_data],
-        [interview_state, interview_pre_section, interview_section, question_audio, question_text, recording_status]
-    )
-    # Recording functionality
-    def start_recording(rec_state):
-        rec_state["is_recording"] = True
-        return (rec_state,
-                gr.update(interactive=False),
-                gr.update(interactive=True),
-                gr.update(visible=True),
-                gr.update(value="**Status:** 🔴 Recording... Click 'Stop Recording' when done"))
-    record_btn.click(
-        start_recording,
-        [recording_state],
-        [recording_state, record_btn, stop_btn, user_audio_input, recording_status]
-    )
-    def stop_recording(rec_state):
-        rec_state["is_recording"] = False
-        return (rec_state,
-                gr.update(interactive=True),
-                gr.update(interactive=False),
-                gr.update(visible=False),
-                gr.update(value="**Status:** Processing audio... Please wait"))
-    stop_btn.click(
-        stop_recording,
-        [recording_state],
-        [recording_state, record_btn, stop_btn, user_audio_input, recording_status]
-    )
-    # Auto-transcription when audio is recorded
-    def transcribe_and_update(audio_path):
-        if not audio_path:
-            return "", gr.update(interactive=False), gr.update(value="**Status:** No audio recorded")
-        transcript = whisper_stt(audio_path)
-        if transcript:
-            return (transcript,
-                    gr.update(interactive=True),
-                    gr.update(value="**Status:** Audio transcribed! Review and click 'Confirm Answer'"))
-        else:
-            return ("",
-                    gr.update(interactive=False),
-                    gr.update(value="**Status:** Transcription failed. Please try recording again"))
-    user_audio_input.change(
-        transcribe_and_update,
-        [user_audio_input],
-        [stt_transcript, confirm_btn, recording_status]
-    )
     def process_answer(transcript, audio_path, video_path, state, data):
-        if not transcript:
-            return (state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
         elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
         state["timings"].append(elapsed)
         state["answers"].append(transcript)
         state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
-        # Emotion analysis (using testing values for speed)
         voice_label = "neutral"
         face_label = "neutral"
         state["voice_labels"].append(voice_label)
         state["face_labels"].append(face_label)
-        # Evaluate answer
         last_q = state["questions"][-1]
-        q_eval = state["question_evaluations"][-1]
         ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
         answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
         state["answer_evaluations"].append(answer_eval)
         answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
-        # Adaptive difficulty
         if answer_score == "excellent":
             state["difficulty_adjustment"] = "harder"
         elif answer_score in ("medium", "poor"):
@@ -2243,11 +1671,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         else:
             state["difficulty_adjustment"] = None
-        # Effective confidence (testing value)
         eff_conf = {"effective_confidence": 0.6}
         state["effective_confidences"].append(eff_conf)
-        # Log
         state["log"].append({
             "type": "answer",
             "question": last_q,
@@ -2261,38 +1694,32 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             "timestamp": time.time()
         })
-        # Check if interview is complete
         qidx = state["question_idx"] + 1
         if qidx >= state["max_questions"]:
-            # Save log
             timestamp = time.strftime("%Y%m%d_%H%M%S")
             log_file = f"interview_log_{timestamp}.json"
             with open(log_file, "w", encoding="utf-8") as f:
                 json.dump(state["log"], f, indent=2, ensure_ascii=False)
-            # Generate summary
             summary = "# Interview Summary\n"
             for i, q in enumerate(state["questions"]):
                 summary += (f"\n### Q{i + 1}: {q}\n"
                             f"- *Answer*: {state['answers'][i]}\n"
                             f"- *Q Eval*: {state['question_evaluations'][i]}\n"
                             f"- *A Eval*: {state['answer_evaluations'][i]}\n"
                             f"- *Time*: {state['timings'][i]}s\n")
             summary += f"\n\n⏺ Full log saved as {log_file}."
-            return (state,
-                    gr.update(visible=True, value=summary),
-                    gr.update(value=None),
-                    gr.update(value=None),
-                    gr.update(value=None),
-                    gr.update(interactive=False),
-                    gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"),
-                    gr.update(value="**Status:** Interview completed!"))
         else:
-            # Generate next question
             state["question_idx"] = qidx
             state["q_start_time"] = time.time()
-            context = ""
             prompt = build_interview_prompt(
                 conversation_history=state["conversation_history"],
                 user_response=transcript,
@@ -2301,34 +1728,31 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 skills=data["skills"],
                 seniority=data["seniority"],
                 difficulty_adjustment=state["difficulty_adjustment"],
-                face_label=face_label,
                 voice_label=voice_label,
-                effective_confidence=eff_conf
             )
             next_q = groq_llm.predict(prompt)
             q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
             state["questions"].append(next_q)
             state["question_evaluations"].append(q_eval)
             state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
             state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
             audio_path = bark_tts(next_q)
             eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
-            return (state,
-                    gr.update(visible=False),
-                    audio_path,
-                    f"*Question {qidx + 1}:* {next_q}",
-                    gr.update(value=""),
-                    gr.update(interactive=False),
-                    gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"),
-                    gr.update(value="**Status:** Listen to the question, then click 'Start Recording' to answer"))
     confirm_btn.click(
         process_answer,
         [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
-        [interview_state, interview_summary, question_audio, question_text, stt_transcript, confirm_btn, emotion_display, recording_status]
     )
 demo.launch(debug=True)

         "skills": skills
     }
 import gradio as gr
 import time
 import tempfile
 torch.cuda.empty_cache()
 gc.collect()
 # Bark TTS
 print("🔁 Loading Bark model...")
+model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
 print("✅ Bark model loaded")
 print("🔁 Loading Bark processor...")
 processor_bark = AutoProcessor.from_pretrained("suno/bark")
 print("✅ Bark processor loaded")
+bark_voice_preset = "v2/en_speaker_5"
 def bark_tts(text):
     print(f"🔁 Synthesizing TTS for: {text}")
 print("🔁 Loading Whisper model...")
 whisper_model = whisper.load_model("base", device="cuda")
 print("✅ Whisper model loaded")
 def whisper_stt(audio_path):
+    if not audio_path or not os.path.exists(audio_path): return ""
     result = whisper_model.transcribe(audio_path)
     return result["text"]
 seniority_mapping = {
     "Entry-level": 1, "Junior": 2, "Mid-Level": 3, "Senior": 4, "Lead": 5
 }
+# --- 2. Gradio App ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     user_data = gr.State({})
     interview_state = gr.State({})
     missing_fields_state = gr.State([])
+    # --- UI Layout ---
     with gr.Column(visible=True) as user_info_section:
         gr.Markdown("## Candidate Information")
         cv_file = gr.File(label="Upload CV")
     with gr.Column(visible=False) as interview_section:
         gr.Markdown("## Interview in Progress")
+        question_audio = gr.Audio(label="Listen", interactive=False, autoplay=True)
         question_text = gr.Markdown()
+        user_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="1. Record Audio Answer")
+        stt_transcript = gr.Textbox(label="Transcribed Answer (edit if needed)")
+        confirm_btn = gr.Button("Confirm Answer")
         evaluation_display = gr.Markdown()
         interview_summary = gr.Markdown(visible=False)
+    # --- UI Logic ---
     def validate_start_btn(cv_file, job_desc):
         return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
     cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
     job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
         details = extract_candidate_details(cv_file.name)
         job_info = extract_job_details(job_desc)
         data = {
+            "name": details.get("name", "unknown"), "job_role": job_info.get("job_title", "unknown"),
+            "seniority": job_info.get("experience_level", "unknown"), "skills": job_info.get("skills", [])
         }
         missing = [k for k, v in data.items() if (isinstance(v, str) and v.lower() == "unknown") or not v]
         if missing:
         else:
             greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
             return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
     start_btn.click(
         process_and_route_initial,
         [cv_file, job_desc],
     def show_missing(missing):
         if missing is None: missing = []
+        return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
     missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
     def validate_fields(name, role, seniority, skills, missing):
         if not missing: return gr.update(interactive=False)
+        all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip())),])
         return gr.update(interactive=all_filled)
     for inp in [name_in, role_in, seniority_in, skills_in]:
         inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
         if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
         greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
         return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
     submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
     def start_interview(data):
+        # --- Advanced state with full logging ---
         state = {
             "questions": [], "answers": [], "face_labels": [], "voice_labels": [], "timings": [],
             "question_evaluations": [], "answer_evaluations": [], "effective_confidences": [],
             "question_idx": 0, "max_questions": 3, "q_start_time": time.time(),
             "log": []
         }
+        # --- Optionally: context retrieval here (currently just blank) ---
         context = ""
         prompt = build_interview_prompt(
             conversation_history=[], user_response="", context=context, job_role=data["job_role"],
             skills=data["skills"], seniority=data["seniority"], difficulty_adjustment=None,
             voice_label="neutral", face_label="neutral"
         )
+        #here the original one
+        # first_q = groq_llm.predict(prompt)
+        # # Evaluate Q for quality
+        # q_eval = eval_question_quality(first_q, data["job_role"], data["seniority"], None)
+        # state["questions"].append(first_q)
+        # state["question_evaluations"].append(q_eval)
+        #here the testing one
         first_q = groq_llm.predict(prompt)
         q_eval = {
             "Score": "N/A",
         }
         state["questions"].append(first_q)
         state["question_evaluations"].append(q_eval)
+        state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
         audio_path = bark_tts(first_q)
+        # LOG
         state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
+        return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
+    start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
+    def transcribe(audio_path):
+        return whisper_stt(audio_path)
+    user_audio_input.change(transcribe, user_audio_input, stt_transcript)
     def process_answer(transcript, audio_path, video_path, state, data):
+        if not transcript and not video_path:
+            return state, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
         elapsed = round(time.time() - state.get("q_start_time", time.time()), 2)
         state["timings"].append(elapsed)
         state["answers"].append(transcript)
         state["conversation_history"].append({'role': 'Candidate', 'content': transcript})
+        # --- 1. Emotion analysis ---
+        # voice_label = analyze_audio_emotion(audio_path)
+        # face_label = analyze_video_emotions(video_path)
+        # state["voice_labels"].append(voice_label)
+        # state["face_labels"].append(face_label)
+        #just for testing
         voice_label = "neutral"
         face_label = "neutral"
         state["voice_labels"].append(voice_label)
         state["face_labels"].append(face_label)
+        # --- 2. Evaluate previous Q and Answer ---
         last_q = state["questions"][-1]
+        q_eval = state["question_evaluations"][-1]  # Already in state
         ref_answer = generate_reference_answer(last_q, data["job_role"], data["seniority"])
         answer_eval = evaluate_answer(last_q, transcript, ref_answer, data["job_role"], data["seniority"], None)
         state["answer_evaluations"].append(answer_eval)
         answer_score = answer_eval.get("Score", "medium") if answer_eval else "medium"
+        # --- 3. Adaptive difficulty ---
         if answer_score == "excellent":
             state["difficulty_adjustment"] = "harder"
         elif answer_score in ("medium", "poor"):
         else:
             state["difficulty_adjustment"] = None
+        # --- 4. Effective confidence ---
+        # eff_conf = interpret_confidence(voice_label, face_label, answer_score)
+        # state["effective_confidences"].append(eff_conf)
+        #just for testing:
         eff_conf = {"effective_confidence": 0.6}
         state["effective_confidences"].append(eff_conf)
+        # --- LOG ---
         state["log"].append({
             "type": "answer",
             "question": last_q,
             "timestamp": time.time()
         })
+        # --- Next or End ---
         qidx = state["question_idx"] + 1
         if qidx >= state["max_questions"]:
+            # Save as JSON (optionally)
             timestamp = time.strftime("%Y%m%d_%H%M%S")
             log_file = f"interview_log_{timestamp}.json"
             with open(log_file, "w", encoding="utf-8") as f:
                 json.dump(state["log"], f, indent=2, ensure_ascii=False)
+            # Report
             summary = "# Interview Summary\n"
             for i, q in enumerate(state["questions"]):
                 summary += (f"\n### Q{i + 1}: {q}\n"
                             f"- *Answer*: {state['answers'][i]}\n"
                             f"- *Q Eval*: {state['question_evaluations'][i]}\n"
                             f"- *A Eval*: {state['answer_evaluations'][i]}\n"
+                            #also this are removed just for testing :(
+                            # f"- *Face Emotion: {state['face_labels'][i]}, **Voice Emotion*: {state['voice_labels'][i]}\n"
+                            # f"- *Effective Confidence*: {state['effective_confidences'][i]['effective_confidence']}\n"
                             f"- *Time*: {state['timings'][i]}s\n")
             summary += f"\n\n⏺ Full log saved as {log_file}."
+            return (state, gr.update(visible=True, value=summary), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"))
         else:
+            # --- Build next prompt using adaptive difficulty ---
             state["question_idx"] = qidx
             state["q_start_time"] = time.time()
+            context = ""  # You can add your context logic here
             prompt = build_interview_prompt(
                 conversation_history=state["conversation_history"],
                 user_response=transcript,
                 skills=data["skills"],
                 seniority=data["seniority"],
                 difficulty_adjustment=state["difficulty_adjustment"],
                 voice_label=voice_label,
             )
             next_q = groq_llm.predict(prompt)
+            # Evaluate Q quality
             q_eval = eval_question_quality(next_q, data["job_role"], data["seniority"], None)
             state["questions"].append(next_q)
             state["question_evaluations"].append(q_eval)
             state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
             state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
             audio_path = bark_tts(next_q)
+            # Display evaluations
             eval_md = f"*Last Answer Eval:* {answer_eval}\n\n*Effective Confidence:* {eff_conf}"
+            return (
+                state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}",
+                gr.update(value=None), gr.update(value=None),
+                gr.update(visible=True, value=f"Last Detected — Face: {face_label}, Voice: {voice_label}"),
+            )
     confirm_btn.click(
         process_answer,
         [stt_transcript, user_audio_input, user_video_input, interview_state, user_data],
+        [interview_state, interview_summary, question_audio, question_text, user_audio_input, user_video_input, emotion_display]
+    ).then(
+        lambda: (gr.update(value=None), gr.update(value=None)), None, [user_audio_input, user_video_input]
     )
 demo.launch(debug=True)