Spaces:

husseinelsaadi
/

ai-interviewer-demo

Paused

App Files Files Community

husseinelsaadi commited on 2 days ago

Commit

4a764f7

verified ·

1 Parent(s): cd89a2a

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -39

app.py CHANGED Viewed

@@ -1740,45 +1740,72 @@ import json
 from transformers import BarkModel, AutoProcessor
 import torch, gc
 import whisper
 print(torch.cuda.is_available())
 torch.cuda.empty_cache()
 gc.collect()
-# Bark TTS
-print("🔁 Loading Bark model...")
-model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
-print("✅ Bark model loaded")
-print("🔁 Loading Bark processor...")
-processor_bark = AutoProcessor.from_pretrained("suno/bark")
-print("✅ Bark processor loaded")
-bark_voice_preset = "v2/en_speaker_12"
-def bark_tts(text):
-    print(f"🔁 Synthesizing TTS for: {text}")
-    inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
-    input_ids = inputs["input_ids"].to(model_bark.device)
-    start = time.time()
-    with torch.no_grad():
-        speech_values = model_bark.generate(
-            input_ids=input_ids,
-            do_sample=True,
-            fine_temperature=0.4,
-            coarse_temperature=0.8
-        )
-    print(f"✅ Bark finished in {round(time.time() - start, 2)}s")
-    speech = speech_values.cpu().numpy().squeeze()
-    speech = (speech * 32767).astype(np.int16)
-    temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-    wavfile.write(temp_wav.name, 22050, speech)
-    return temp_wav.name
-# Whisper STT
-print("🔁 Loading Whisper model...")
-whisper_model = whisper.load_model("base", device="cuda")
-print("✅ Whisper model loaded")
 def whisper_stt(audio_path):
-    if not audio_path or not os.path.exists(audio_path): return ""
     result = whisper_model.transcribe(audio_path)
     return result["text"]
@@ -1790,6 +1817,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     user_data = gr.State({})
     interview_state = gr.State({})
     missing_fields_state = gr.State([])
     with gr.Column(visible=True) as user_info_section:
         gr.Markdown("## Candidate Information")
@@ -1808,6 +1836,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Column(visible=False) as interview_pre_section:
         pre_interview_greeting_md = gr.Markdown()
         start_interview_final_btn = gr.Button("Start Interview")
     with gr.Column(visible=False) as interview_section:
         gr.Markdown("## Interview in Progress")
@@ -1821,6 +1850,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     def validate_start_btn(cv_file, job_desc):
         return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
     cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
     job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
@@ -1839,17 +1869,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         else:
             greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
             return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
     start_btn.click(process_and_route_initial, [cv_file, job_desc], [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md])
     def show_missing(missing):
         if missing is None: missing = []
         return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
     missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
     def validate_fields(name, role, seniority, skills, missing):
         if not missing: return gr.update(interactive=False)
         all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip()))])
         return gr.update(interactive=all_filled)
     for inp in [name_in, role_in, seniority_in, skills_in]:
         inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
@@ -1860,14 +1893,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
         greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
         return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
     submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
-    def start_interview(data):
         state = {
             "questions": [], "answers": [], "timings": [], "question_evaluations": [], "answer_evaluations": [],
             "conversation_history": [], "difficulty_adjustment": None, "question_idx": 0, "max_questions": 3,
             "q_start_time": time.time(), "log": []
         }
         context = ""
         prompt = build_interview_prompt(
             conversation_history=[], user_response="", context=context, job_role=data["job_role"],
@@ -1875,16 +1912,50 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         )
         first_q = groq_llm.predict(prompt)
         q_eval = {"Score": "N/A", "Reasoning": "Skipped to reduce processing time", "Improvements": []}
         state["questions"].append(first_q)
         state["question_evaluations"].append(q_eval)
         state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
-        audio_path = bark_tts(first_q)
         state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
-        return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
-    start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
     def transcribe(audio_path):
         return whisper_stt(audio_path)
     user_audio_input.change(transcribe, user_audio_input, stt_transcript)
     def process_answer(transcript, audio_path, state, data):
@@ -1948,7 +2019,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             state["question_evaluations"].append(q_eval)
             state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
             state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
-            audio_path = bark_tts(next_q)
             eval_md = f"*Last Answer Eval:* {answer_eval}"
             return state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}", gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=eval_md)
@@ -1962,4 +2038,3 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
 demo.launch(debug=True)

 from transformers import BarkModel, AutoProcessor
 import torch, gc
 import whisper
+import asyncio
+import threading
+from concurrent.futures import ThreadPoolExecutor
 print(torch.cuda.is_available())
 torch.cuda.empty_cache()
 gc.collect()
+# Global variables for lazy loading
+model_bark = None
+processor_bark = None
+whisper_model = None
+bark_voice_preset = "v2/en_speaker_9"
+# Thread pool for async operations
+executor = ThreadPoolExecutor(max_workers=2)
+def load_models_lazy():
+    """Load models only when needed"""
+    global model_bark, processor_bark, whisper_model
+    if model_bark is None:
+        print("🔁 Loading Bark model...")
+        model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
+        print("✅ Bark model loaded")
+    if processor_bark is None:
+        print("🔁 Loading Bark processor...")
+        processor_bark = AutoProcessor.from_pretrained("suno/bark")
+        print("✅ Bark processor loaded")
+    if whisper_model is None:
+        print("🔁 Loading Whisper model...")
+        whisper_model = whisper.load_model("base", device="cuda")
+        print("✅ Whisper model loaded")
+def bark_tts_async(text):
+    """Async TTS generation"""
+    def _generate():
+        load_models_lazy()  # Load only when needed
+        print(f"🔁 Synthesizing TTS for: {text}")
+        inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
+        input_ids = inputs["input_ids"].to(model_bark.device)
+        start = time.time()
+        with torch.no_grad():
+            speech_values = model_bark.generate(
+                input_ids=input_ids,
+                do_sample=True,
+                fine_temperature=0.4,
+                coarse_temperature=0.8
+            )
+        print(f"✅ Bark finished in {round(time.time() - start, 2)}s")
+        speech = speech_values.cpu().numpy().squeeze()
+        speech = (speech * 32767).astype(np.int16)
+        temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        wavfile.write(temp_wav.name, 22050, speech)
+        return temp_wav.name
+    return executor.submit(_generate)
 def whisper_stt(audio_path):
+    """Lazy loading whisper STT"""
+    if not audio_path or not os.path.exists(audio_path):
+        return ""
+    load_models_lazy()  # Load only when needed
     result = whisper_model.transcribe(audio_path)
     return result["text"]
     user_data = gr.State({})
     interview_state = gr.State({})
     missing_fields_state = gr.State([])
+    tts_future = gr.State(None)  # Store async TTS future
     with gr.Column(visible=True) as user_info_section:
         gr.Markdown("## Candidate Information")
     with gr.Column(visible=False) as interview_pre_section:
         pre_interview_greeting_md = gr.Markdown()
         start_interview_final_btn = gr.Button("Start Interview")
+        loading_status = gr.Markdown("", visible=False)
     with gr.Column(visible=False) as interview_section:
         gr.Markdown("## Interview in Progress")
     def validate_start_btn(cv_file, job_desc):
         return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
     cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
     job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
         else:
             greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
             return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
     start_btn.click(process_and_route_initial, [cv_file, job_desc], [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md])
     def show_missing(missing):
         if missing is None: missing = []
         return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
     missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
     def validate_fields(name, role, seniority, skills, missing):
         if not missing: return gr.update(interactive=False)
         all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip()))])
         return gr.update(interactive=all_filled)
     for inp in [name_in, role_in, seniority_in, skills_in]:
         inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
         if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
         greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
         return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
     submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
+    def start_interview_immediate(data):
+        """Start interview immediately, begin TTS generation in background"""
         state = {
             "questions": [], "answers": [], "timings": [], "question_evaluations": [], "answer_evaluations": [],
             "conversation_history": [], "difficulty_adjustment": None, "question_idx": 0, "max_questions": 3,
             "q_start_time": time.time(), "log": []
         }
+        # Generate question text first (fast)
         context = ""
         prompt = build_interview_prompt(
             conversation_history=[], user_response="", context=context, job_role=data["job_role"],
         )
         first_q = groq_llm.predict(prompt)
         q_eval = {"Score": "N/A", "Reasoning": "Skipped to reduce processing time", "Improvements": []}
         state["questions"].append(first_q)
         state["question_evaluations"].append(q_eval)
         state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
         state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
+        # Start TTS generation in background
+        tts_future_obj = bark_tts_async(first_q)
+        # Return immediately with loading message
+        return (state, tts_future_obj,
+                gr.update(visible=False),
+                gr.update(visible=True),
+                gr.update(visible=True, value="🔄 Generating audio..."),
+                gr.update(value=None),
+                f"*Question 1:* {first_q}")
+    def check_tts_ready(state, tts_future_obj):
+        """Check if TTS is ready and update audio"""
+        if tts_future_obj and tts_future_obj.done():
+            try:
+                audio_path = tts_future_obj.result()
+                return gr.update(value=audio_path), gr.update(visible=False), None
+            except Exception as e:
+                print(f"TTS Error: {e}")
+                return gr.update(value=None), gr.update(value=f"Error generating audio: {e}"), None
+        else:
+            return gr.update(), gr.update(), tts_future_obj
+    start_interview_final_btn.click(
+        start_interview_immediate,
+        [user_data],
+        [interview_state, tts_future, interview_pre_section, interview_section, loading_status, question_audio, question_text]
+    ).then(
+        # Check TTS status every 500ms
+        check_tts_ready,
+        [interview_state, tts_future],
+        [question_audio, loading_status, tts_future],
+        every=0.5
+    )
     def transcribe(audio_path):
         return whisper_stt(audio_path)
     user_audio_input.change(transcribe, user_audio_input, stt_transcript)
     def process_answer(transcript, audio_path, state, data):
             state["question_evaluations"].append(q_eval)
             state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
             state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
+            # Generate TTS asynchronously for next question too
+            audio_future = bark_tts_async(next_q)
+            # For now, we'll wait for it (you can make this async too)
+            audio_path = audio_future.result()
             eval_md = f"*Last Answer Eval:* {answer_eval}"
             return state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}", gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=eval_md)
 demo.launch(debug=True)