|
import whisper |
|
import gradio as gr |
|
import torch |
|
from transformers import BertTokenizer, BertForSequenceClassification, pipeline |
|
from app.questions import get_question |
|
|
|
|
|
whisper_model = whisper.load_model("small") |
|
confidence_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/final_confidence') |
|
confidence_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/final_confidence') |
|
context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model') |
|
context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model') |
|
emotion_pipe = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion", top_k=1) |
|
|
|
|
|
interview_emotion_map = { |
|
"joy": ("Confident", "๐"), |
|
"fear": ("Nervous", "๐จ"), |
|
"sadness": ("Uncertain", "๐"), |
|
"anger": ("Frustrated", "๐ "), |
|
"surprise": ("Curious", "๐ฎ"), |
|
"neutral": ("Calm", "๐"), |
|
"disgust": ("Disengaged", "๐"), |
|
} |
|
|
|
|
|
technical_questions = [get_question(i) for i in range(6)] |
|
non_technical_questions = [ |
|
"Tell me about yourself.", |
|
"What are your strengths and weaknesses?", |
|
"Where do you see yourself in 5 years?", |
|
"How do you handle stress or pressure?", |
|
"Describe a time you faced a conflict and how you resolved it.", |
|
"What motivates you to do your best?" |
|
] |
|
|
|
|
|
current_tech_index = 0 |
|
current_non_tech_index = 0 |
|
|
|
|
|
def predict_relevance(question, answer): |
|
if not answer.strip(): |
|
return "Irrelevant" |
|
inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True) |
|
context_model.eval() |
|
with torch.no_grad(): |
|
outputs = context_model(**inputs) |
|
probs = torch.softmax(outputs.logits, dim=-1) |
|
return "Relevant" if probs[0, 1] > 0.5 else "Irrelevant" |
|
|
|
|
|
def predict_confidence(question, answer, threshold=0.4): |
|
if not answer.strip(): |
|
return "Not Confident" |
|
inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True) |
|
confidence_model.eval() |
|
with torch.no_grad(): |
|
outputs = confidence_model(**inputs) |
|
probs = torch.softmax(outputs.logits, dim=-1) |
|
return "Confident" if probs[0, 1].item() > threshold else "Not Confident" |
|
|
|
|
|
def detect_emotion(answer): |
|
if not answer.strip(): |
|
return "No Answer", "" |
|
result = emotion_pipe(answer) |
|
label = result[0][0]["label"].lower() |
|
return interview_emotion_map.get(label, ("Unknown", "โ")) |
|
|
|
|
|
def show_non_tech_question(): |
|
global current_non_tech_index |
|
return non_technical_questions[current_non_tech_index] |
|
|
|
def next_non_tech_question(): |
|
global current_non_tech_index |
|
current_non_tech_index = (current_non_tech_index + 1) % len(non_technical_questions) |
|
|
|
return non_technical_questions[current_non_tech_index], "", "" |
|
|
|
|
|
def show_tech_question(): |
|
global current_tech_index |
|
return technical_questions[current_tech_index] |
|
|
|
def next_tech_question(): |
|
global current_tech_index |
|
current_tech_index = (current_tech_index + 1) % len(technical_questions) |
|
|
|
return technical_questions[current_tech_index], "", "", "" |
|
|
|
|
|
def transcribe_and_analyze_non_tech(audio, question): |
|
try: |
|
audio_data = whisper.load_audio(audio) |
|
audio_data = whisper.pad_or_trim(audio_data) |
|
mel = whisper.log_mel_spectrogram(audio_data).to(whisper_model.device) |
|
result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False)) |
|
text = result.text |
|
emotion_text, emoji = detect_emotion(text) |
|
return text, f"{emotion_text} {emoji}" |
|
except Exception as e: |
|
return f"Error: {e}", "โ" |
|
|
|
|
|
def transcribe_and_analyze_tech(audio, question): |
|
try: |
|
audio_data = whisper.load_audio(audio) |
|
audio_data = whisper.pad_or_trim(audio_data) |
|
mel = whisper.log_mel_spectrogram(audio_data).to(whisper_model.device) |
|
result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False)) |
|
text = result.text |
|
return text, predict_relevance(question, text), predict_confidence(question, text) |
|
except Exception as e: |
|
return f"Error: {e}", "", "" |
|
|
|
|
|
with gr.Blocks(css="textarea, .gr-box { font-size: 18px !important; }") as demo: |
|
gr.HTML("<h1 style='text-align: center; font-size: 32px;'>INTERVIEW PREPARATION MODEL</h1>") |
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.Tab("Non-Technical"): |
|
gr.Markdown("### Emotional Context Analysis (๐ง + ๐)") |
|
q1 = gr.Textbox(label="Interview Question", value=show_non_tech_question(), interactive=False) |
|
a1 = gr.Audio(type="filepath", label="Record Your Answer") |
|
t1 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4) |
|
e1 = gr.Textbox(label="Detected Emotion", interactive=False) |
|
|
|
a1.change( |
|
fn=transcribe_and_analyze_non_tech, |
|
inputs=[a1, q1], |
|
outputs=[t1, e1] |
|
) |
|
|
|
btn1 = gr.Button("Next Question") |
|
btn1.click( |
|
fn=next_non_tech_question, |
|
inputs=[], |
|
outputs=[q1, t1, e1] |
|
) |
|
|
|
|
|
with gr.Tab("Technical"): |
|
gr.Markdown("### Technical Question Analysis (๐ + ๐ค)") |
|
q2 = gr.Textbox(label="Interview Question", value=show_tech_question(), interactive=False) |
|
a2 = gr.Audio(type="filepath", label="Record Your Answer") |
|
t2 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4) |
|
c2 = gr.Textbox(label="Context Analysis", interactive=False) |
|
f2 = gr.Textbox(label="Confidence Analysis", interactive=False) |
|
|
|
a2.change( |
|
fn=transcribe_and_analyze_tech, |
|
inputs=[a2, q2], |
|
outputs=[t2, c2, f2] |
|
) |
|
|
|
btn2 = gr.Button("Next Question") |
|
btn2.click( |
|
fn=next_tech_question, |
|
inputs=[], |
|
outputs=[q2, t2, c2, f2] |
|
) |
|
|
|
demo.launch(share=True) |
|
|