|
import os |
|
os.system("pip install --upgrade openai-whisper torch") |
|
os.system("pip install --upgrade transformers") |
|
import whisper |
|
import gradio as gr |
|
import torch |
|
from transformers import BertTokenizer, BertForSequenceClassification |
|
from app.questions import get_question |
|
|
|
|
|
whisper_model = whisper.load_model("small") |
|
|
|
|
|
confidence_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/Confidence') |
|
confidence_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/Confidence') |
|
|
|
|
|
context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model') |
|
context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model') |
|
|
|
|
|
def predict_relevance(question, answer): |
|
if not answer.strip(): |
|
return "Irrelevant" |
|
|
|
inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True) |
|
context_model.eval() |
|
|
|
with torch.no_grad(): |
|
outputs = context_model(**inputs) |
|
logits = outputs.logits |
|
probabilities = torch.softmax(logits, dim=-1) |
|
threshold = 0.5 |
|
relevant_prob = probabilities[0, 1] |
|
|
|
if relevant_prob > threshold: |
|
return "Relevant" |
|
else: |
|
return "Irrelevant" |
|
|
|
|
|
def predict_confidence(question, answer): |
|
if not answer.strip(): |
|
return "Not Confident" |
|
|
|
inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True) |
|
confidence_model.eval() |
|
|
|
with torch.no_grad(): |
|
outputs = confidence_model(**inputs) |
|
logits = outputs.logits |
|
probabilities = torch.softmax(logits, dim=-1) |
|
|
|
print("Logits:", logits) |
|
print("Probabilities:", probabilities) |
|
|
|
confidence_prob = probabilities[0, 1] |
|
|
|
|
|
return "Confident" if confidence_prob > 0.40 else "Not Confident" |
|
|
|
|
|
def fetch_questions(): |
|
return [get_question(i) for i in range(6)] |
|
|
|
questions = fetch_questions() |
|
current_question_index = 0 |
|
|
|
|
|
def show_question(): |
|
global current_question_index |
|
question = questions[current_question_index] |
|
return question |
|
|
|
|
|
def next_question(): |
|
global current_question_index |
|
current_question_index = (current_question_index + 1) % len(questions) |
|
return show_question(), None, "", "", "" |
|
|
|
|
|
def transcribe_and_analyze(audio, question): |
|
try: |
|
|
|
audio = whisper.load_audio(audio) |
|
audio = whisper.pad_or_trim(audio) |
|
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device) |
|
options = whisper.DecodingOptions(fp16=False) |
|
result = whisper.decode(whisper_model, mel, options) |
|
|
|
|
|
transcribed_text = result.text |
|
|
|
|
|
context_result = predict_relevance(question, transcribed_text) |
|
confidence_result = predict_confidence(question, transcribed_text) |
|
|
|
|
|
return transcribed_text, context_result, confidence_result |
|
except Exception as e: |
|
return f"Error: {str(e)}", "", "" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
gr.HTML(''' |
|
<style> |
|
body { |
|
background-color: #f0f0f0; |
|
} |
|
#title { |
|
color: grey; |
|
font-size: 30px; |
|
text-align: center; |
|
margin-bottom: 20px; |
|
} |
|
.transcribe-btn, .next-btn { |
|
background-color: #4CAF50; |
|
color: white; |
|
font-size: 16px; |
|
padding: 10px 20px; |
|
border-radius: 5px; |
|
cursor: pointer; |
|
margin-top: 10px; |
|
} |
|
.transcribe-btn:hover, .next-btn:hover { |
|
background-color: #45a049; |
|
} |
|
#question-box { |
|
font-size: 20px; |
|
color: #555; |
|
text-align: center; |
|
} |
|
#text-box { |
|
font-size: 18px; |
|
color: #333; |
|
} |
|
#context-box, #confidence-box { |
|
font-size: 18px; |
|
color: #333; |
|
} |
|
</style> |
|
''') |
|
|
|
|
|
gr.Markdown("<h1 id='title'>INTERVIEW PREPARATION MODEL</h1>") |
|
|
|
|
|
with gr.Row(): |
|
question_display = gr.Textbox(label="Interview Question", value=show_question(), interactive=False, elem_id="question-box") |
|
|
|
|
|
with gr.Row(): |
|
audio_input = gr.Audio(type="filepath", label="Record Your Answer") |
|
|
|
|
|
with gr.Row(): |
|
transcribed_text = gr.Textbox(label="Your Answer (Transcription)", interactive=False, lines=5, elem_id="text-box") |
|
|
|
with gr.Row(): |
|
context_analysis_result = gr.Textbox(label="Context Analysis", interactive=False, elem_id="context-box") |
|
|
|
with gr.Row(): |
|
confidence_analysis_result = gr.Textbox(label="Confidence Analysis", interactive=False, elem_id="confidence-box") |
|
|
|
|
|
audio_input.change(fn=transcribe_and_analyze, |
|
inputs=[audio_input, question_display], |
|
outputs=[transcribed_text, context_analysis_result, confidence_analysis_result]) |
|
|
|
|
|
with gr.Row(): |
|
next_button = gr.Button("Next Question", elem_classes="next-btn") |
|
|
|
|
|
next_button.click(next_question, |
|
outputs=[question_display, audio_input, transcribed_text, context_analysis_result, confidence_analysis_result]) |
|
|
|
|
|
demo.launch(share=True) |
|
|