import whisper import gradio as gr import torch from transformers import BertTokenizer, BertForSequenceClassification, pipeline from app.questions import get_question # Load models whisper_model = whisper.load_model("small") confidence_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/final_confidence') confidence_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/final_confidence') context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model') context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model') emotion_pipe = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion", top_k=1) # Emotion map with labels and emojis interview_emotion_map = { "joy": ("Confident", "🙂"), "fear": ("Nervous", "😨"), "sadness": ("Uncertain", "🙁"), "anger": ("Frustrated", "😠"), "surprise": ("Curious", "😮"), "neutral": ("Calm", "😐"), "disgust": ("Disengaged", "😒"), } # Static question sets technical_questions = [get_question(i) for i in range(6)] non_technical_questions = [ "Tell me about yourself.", "What are your strengths and weaknesses?", "Where do you see yourself in 5 years?", "How do you handle stress or pressure?", "Describe a time you faced a conflict and how you resolved it.", "What motivates you to do your best?" ] # Index trackers current_tech_index = 0 current_non_tech_index = 0 # Relevance prediction def predict_relevance(question, answer): if not answer.strip(): return "Irrelevant" inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True) context_model.eval() with torch.no_grad(): outputs = context_model(**inputs) probs = torch.softmax(outputs.logits, dim=-1) return "Relevant" if probs[0, 1] > 0.5 else "Irrelevant" # Confidence prediction def predict_confidence(question, answer, threshold=0.4): if not answer.strip(): return "Not Confident" inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True) confidence_model.eval() with torch.no_grad(): outputs = confidence_model(**inputs) probs = torch.softmax(outputs.logits, dim=-1) return "Confident" if probs[0, 1].item() > threshold else "Not Confident" # Emotion detection def detect_emotion(answer): if not answer.strip(): return "No Answer", "" result = emotion_pipe(answer) label = result[0][0]["label"].lower() return interview_emotion_map.get(label, ("Unknown", "❓")) # Question navigation (non-tech) def show_non_tech_question(): global current_non_tech_index return non_technical_questions[current_non_tech_index] def next_non_tech_question(): global current_non_tech_index current_non_tech_index = (current_non_tech_index + 1) % len(non_technical_questions) # return: question, cleared transcribed_text, cleared emotion return non_technical_questions[current_non_tech_index], "", "" # Question navigation (tech) def show_tech_question(): global current_tech_index return technical_questions[current_tech_index] def next_tech_question(): global current_tech_index current_tech_index = (current_tech_index + 1) % len(technical_questions) # return: question, cleared transcribed_text, cleared context, cleared confidence return technical_questions[current_tech_index], "", "", "" # Transcribe + analyze (non-technical) def transcribe_and_analyze_non_tech(audio, question): try: audio_data = whisper.load_audio(audio) audio_data = whisper.pad_or_trim(audio_data) mel = whisper.log_mel_spectrogram(audio_data).to(whisper_model.device) result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False)) text = result.text emotion_text, emoji = detect_emotion(text) return text, f"{emotion_text} {emoji}" except Exception as e: return f"Error: {e}", "❓" # Transcribe + analyze (technical) def transcribe_and_analyze_tech(audio, question): try: audio_data = whisper.load_audio(audio) audio_data = whisper.pad_or_trim(audio_data) mel = whisper.log_mel_spectrogram(audio_data).to(whisper_model.device) result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False)) text = result.text return text, predict_relevance(question, text), predict_confidence(question, text) except Exception as e: return f"Error: {e}", "", "" # UI layout with gr.Blocks(css="textarea, .gr-box { font-size: 18px !important; }") as demo: gr.HTML("

INTERVIEW PREPARATION MODEL

") with gr.Tabs(): # NON-TECHNICAL TAB with gr.Tab("Non-Technical"): gr.Markdown("### Emotional Context Analysis (🧠 + 😊)") q1 = gr.Textbox(label="Interview Question", value=show_non_tech_question(), interactive=False) a1 = gr.Audio(type="filepath", label="Record Your Answer") t1 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4) e1 = gr.Textbox(label="Detected Emotion", interactive=False) a1.change( fn=transcribe_and_analyze_non_tech, inputs=[a1, q1], outputs=[t1, e1] ) btn1 = gr.Button("Next Question") btn1.click( fn=next_non_tech_question, inputs=[], outputs=[q1, t1, e1] ) # TECHNICAL TAB with gr.Tab("Technical"): gr.Markdown("### Technical Question Analysis (🎓 + 🤖)") q2 = gr.Textbox(label="Interview Question", value=show_tech_question(), interactive=False) a2 = gr.Audio(type="filepath", label="Record Your Answer") t2 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4) c2 = gr.Textbox(label="Context Analysis", interactive=False) f2 = gr.Textbox(label="Confidence Analysis", interactive=False) a2.change( fn=transcribe_and_analyze_tech, inputs=[a2, q2], outputs=[t2, c2, f2] ) btn2 = gr.Button("Next Question") btn2.click( fn=next_tech_question, inputs=[], outputs=[q2, t2, c2, f2] ) demo.launch(share=True)