Spaces:

husseinelsaadi
/

ai-interviewer-demo

Paused

App Files Files Community

husseinelsaadi commited on 2 days ago

Commit

57677b5

verified ·

1 Parent(s): be4261f

Update app.py

Browse files

Files changed (1) hide show

app.py +226 -27

app.py CHANGED Viewed

@@ -1857,26 +1857,35 @@ def extract_candidate_details(file_path):
 #     )
 # demo.launch(debug=True)
 import gradio as gr
 import time
 import tempfile
 import numpy as np
 import scipy.io.wavfile as wavfile
 import os
 import torch
 import whisper
 from transformers import BarkModel, AutoProcessor
-# Initialize Bark (TTS)
-model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
 processor_bark = AutoProcessor.from_pretrained("suno/bark")
 bark_voice_preset = "v2/en_speaker_6"
-# Initialize Whisper (STT)
 whisper_model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
 def bark_tts(text):
     inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
     inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
     speech_values = model_bark.generate(**inputs)
@@ -1887,33 +1896,223 @@ def bark_tts(text):
     return temp_wav.name
 def whisper_stt(audio_path):
     if not audio_path or not os.path.exists(audio_path):
         return ""
     result = whisper_model.transcribe(audio_path)
     return result["text"]
-# Dummy Groq API stub (replace with actual logic)
-def groq_llm_predict(prompt):
-    return f"[Mock Question] Based on: {prompt}"  # Replace with groq_llm.predict(prompt)
-def interview_loop(state, audio_path):
-    transcript = whisper_stt(audio_path)
-    state["conversation"].append({"role": "Candidate", "content": transcript})
-    prompt = "\n".join([f"{turn['role']}: {turn['content']}" for turn in state["conversation"]])
-    next_q = groq_llm_predict(prompt)
-    state["conversation"].append({"role": "Interviewer", "content": next_q})
-    audio_out = bark_tts(next_q)
-    return state, audio_out, transcript
-with gr.Blocks() as demo:
-    state = gr.State({"conversation": []})
-    question_audio = gr.Audio(label="Interviewer's Question", interactive=False, autoplay=True)
-    user_audio_input = gr.Audio(source="microphone", type="filepath", label="Your Answer")
-    transcript_box = gr.Textbox(label="Transcript", interactive=False)
-    user_audio_input.change(interview_loop, [state, user_audio_input], [state, question_audio, transcript_box])
-demo.launch(debug=True)

 #     )
 # demo.launch(debug=True)
 import gradio as gr
 import time
 import tempfile
 import numpy as np
 import scipy.io.wavfile as wavfile
 import os
+import json
 import torch
 import whisper
 from transformers import BarkModel, AutoProcessor
+# Clear GPU memory
+torch.cuda.empty_cache()
+# Load only essential models
+print("🔁 Loading Bark model...")
+model_bark = BarkModel.from_pretrained("suno/bark")
 processor_bark = AutoProcessor.from_pretrained("suno/bark")
+model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
 bark_voice_preset = "v2/en_speaker_6"
+print("✅ Bark model loaded")
+print("🔁 Loading Whisper model...")
 whisper_model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
+print("✅ Whisper model loaded")
 def bark_tts(text):
+    """Convert text to speech using Bark"""
+    print(f"🔁 Synthesizing TTS for: {text}")
     inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
     inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
     speech_values = model_bark.generate(**inputs)
     return temp_wav.name
 def whisper_stt(audio_path):
+    """Convert speech to text using Whisper"""
     if not audio_path or not os.path.exists(audio_path):
         return ""
     result = whisper_model.transcribe(audio_path)
     return result["text"]
+# Simple predefined questions for quick interview
+INTERVIEW_QUESTIONS = [
+    "Tell me about yourself and your background.",
+    "What are your greatest strengths and how do they relate to this role?"
+]
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # State variables
+    interview_state = gr.State({"current_question": 0, "answers": [], "started": False})
+    # UI Layout
+    with gr.Column(visible=True) as start_section:
+        gr.Markdown("## Quick Interview Setup")
+        name_input = gr.Textbox(label="Your Name", placeholder="Enter your name")
+        role_input = gr.Textbox(label="Job Role", placeholder="e.g., Software Engineer")
+        start_btn = gr.Button("Start Interview", variant="primary")
+    with gr.Column(visible=False) as interview_section:
+        gr.Markdown("## Interview in Progress")
+        question_audio = gr.Audio(label="Listen to Question", interactive=False, autoplay=True)
+        question_text = gr.Markdown()
+        # Audio input for user response
+        user_audio_input = gr.Audio(
+            sources=["microphone"],
+            type="filepath",
+            label="Record Your Answer"
+        )
+        # Transcription display
+        transcript_display = gr.Textbox(
+            label="Your Answer (Transcribed)",
+            interactive=False
+        )
+        # Control buttons
+        with gr.Row():
+            submit_answer_btn = gr.Button("Submit Answer", variant="primary")
+            next_question_btn = gr.Button("Next Question", visible=False)
+        # Progress indicator
+        progress_text = gr.Markdown()
+        # Interview summary
+        interview_summary = gr.Markdown(visible=False)
+    # Event handlers
+    def start_interview(name, role):
+        """Initialize interview with first question"""
+        if not name.strip() or not role.strip():
+            return (
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(value="Please fill in both name and role fields."),
+                {"current_question": 0, "answers": [], "started": False, "name": "", "role": ""}
+            )
+        # Generate first question audio
+        question_text = INTERVIEW_QUESTIONS[0]
+        audio_path = bark_tts(question_text)
+        state = {
+            "current_question": 0,
+            "answers": [],
+            "started": True,
+            "name": name,
+            "role": role
+        }
+        return (
+            gr.update(visible=False),  # start_section
+            gr.update(visible=True),   # interview_section
+            audio_path,                # question_audio
+            f"**Question 1:** {question_text}",  # question_text
+            gr.update(value=f"Question 1 of {len(INTERVIEW_QUESTIONS)}"),  # progress_text
+            state
+        )
+    def transcribe_audio(audio_path):
+        """Transcribe user's audio input"""
+        if not audio_path:
+            return ""
+        return whisper_stt(audio_path)
+    def submit_answer(transcript, state):
+        """Process submitted answer and prepare for next question"""
+        if not transcript.strip():
+            return (
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(value="Please record an answer before submitting."),
+                gr.update(),
+                state
+            )
+        # Store the answer
+        state["answers"].append({
+            "question": INTERVIEW_QUESTIONS[state["current_question"]],
+            "answer": transcript,
+            "timestamp": time.time()
+        })
+        current_q = state["current_question"]
+        # Check if this was the last question
+        if current_q >= len(INTERVIEW_QUESTIONS) - 1:
+            # Interview complete
+            summary = generate_interview_summary(state)
+            return (
+                gr.update(visible=False),  # submit_answer_btn
+                gr.update(visible=False),  # next_question_btn
+                gr.update(value=None),     # question_audio
+                gr.update(value="**Interview Complete!**"),  # question_text
+                gr.update(value="Interview finished. Thank you!"),  # progress_text
+                gr.update(visible=True, value=summary),  # interview_summary
+                state
+            )
+        else:
+            # Show next question button
+            return (
+                gr.update(visible=False),  # submit_answer_btn
+                gr.update(visible=True),   # next_question_btn
+                gr.update(),               # question_audio
+                gr.update(),               # question_text
+                gr.update(value=f"Answer submitted! Ready for question {current_q + 2}?"),  # progress_text
+                gr.update(),               # interview_summary
+                state
+            )
+    def next_question(state):
+        """Move to next question"""
+        state["current_question"] += 1
+        current_q = state["current_question"]
+        if current_q < len(INTERVIEW_QUESTIONS):
+            question_text = INTERVIEW_QUESTIONS[current_q]
+            audio_path = bark_tts(question_text)
+            return (
+                gr.update(visible=True),   # submit_answer_btn
+                gr.update(visible=False),  # next_question_btn
+                audio_path,                # question_audio
+                f"**Question {current_q + 1}:** {question_text}",  # question_text
+                gr.update(value=f"Question {current_q + 1} of {len(INTERVIEW_QUESTIONS)}"),  # progress_text
+                gr.update(value=None),     # user_audio_input (clear)
+                gr.update(value=""),       # transcript_display (clear)
+                state
+            )
+        else:
+            # This shouldn't happen, but handle gracefully
+            return (
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(value="Interview Complete!"),
+                gr.update(value="All questions answered."),
+                gr.update(),
+                gr.update(),
+                state
+            )
+    def generate_interview_summary(state):
+        """Generate final interview summary"""
+        summary = f"# Interview Summary\n\n"
+        summary += f"**Candidate:** {state['name']}\n"
+        summary += f"**Role:** {state['role']}\n"
+        summary += f"**Date:** {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+        for i, qa in enumerate(state['answers']):
+            summary += f"### Question {i + 1}\n"
+            summary += f"**Q:** {qa['question']}\n\n"
+            summary += f"**A:** {qa['answer']}\n\n"
+            summary += "---\n\n"
+        # Save to file
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        log_file = f"interview_log_{timestamp}.json"
+        with open(log_file, "w", encoding="utf-8") as f:
+            json.dump(state, f, indent=2, ensure_ascii=False)
+        summary += f"*Interview log saved as {log_file}*"
+        return summary
+    # Wire up events
+    start_btn.click(
+        start_interview,
+        inputs=[name_input, role_input],
+        outputs=[start_section, interview_section, question_audio, question_text, progress_text, interview_state]
+    )
+    user_audio_input.change(
+        transcribe_audio,
+        inputs=[user_audio_input],
+        outputs=[transcript_display]
+    )
+    submit_answer_btn.click(
+        submit_answer,
+        inputs=[transcript_display, interview_state],
+        outputs=[submit_answer_btn, next_question_btn, question_audio, question_text, progress_text, interview_summary, interview_state]
+    )
+    next_question_btn.click(
+        next_question,
+        inputs=[interview_state],
+        outputs=[submit_answer_btn, next_question_btn, question_audio, question_text, progress_text, user_audio_input, transcript_display, interview_state]
+    )
+if __name__ == "__main__":
+    demo.launch(debug=True)