RiteshAkhade's picture
Update app.py
41675fe verified
raw
history blame
6.34 kB
import os
os.system("pip install --upgrade openai-whisper torch")
os.system("pip install --upgrade transformers")
import whisper
import gradio as gr
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from app.questions import get_question
# Load Whisper model
whisper_model = whisper.load_model("small")
# Load the pre-trained model and tokenizer for confidence analysis
confidence_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/Confidence_model3')
confidence_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/Confidence_model3')
# Load pre-trained context analysis model (BERT-based)
context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model')
context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model')
# Function to perform context analysis using the BERT model
def predict_relevance(question, answer):
if not answer.strip(): # Check for empty answers
return "Irrelevant"
inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
context_model.eval()
with torch.no_grad():
outputs = context_model(**inputs)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=-1)
threshold = 0.5
relevant_prob = probabilities[0, 1] # Probability for relevant class
if relevant_prob > threshold:
return "Relevant"
else:
return "Irrelevant"
# confidence analysis
def predict_confidence(question, answer):
if not answer.strip():
return "Not Confident"
inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
confidence_model.eval()
with torch.no_grad():
outputs = confidence_model(**inputs)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=-1)
print("Logits:", logits)
print("Probabilities:", probabilities)
confidence_prob = probabilities[0, 1] # Probability for Confident class
# lower threshold
return "Confident" if confidence_prob > 0.475 else "Not Confident"
# Questions from questions.py
def fetch_questions():
return [get_question(i) for i in range(6)]
questions = fetch_questions()
current_question_index = 0
# Function to show the current question
def show_question():
global current_question_index
question = questions[current_question_index]
return question
# Function to move to the next question
def next_question():
global current_question_index
current_question_index = (current_question_index + 1) % len(questions)
return show_question(), None, "", "", ""
# Function to transcribe the audio input and perform both context and confidence analyses
def transcribe_and_analyze(audio, question):
try:
# Load and process audio using Whisper
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
options = whisper.DecodingOptions(fp16=False)
result = whisper.decode(whisper_model, mel, options)
# Get the transcribed text
transcribed_text = result.text
# Perform context and confidence analysis
context_result = predict_relevance(question, transcribed_text)
confidence_result = predict_confidence(question, transcribed_text)
# Return the results
return transcribed_text, context_result, confidence_result
except Exception as e:
return f"Error: {str(e)}", "", ""
# Gradio interface
with gr.Blocks() as demo:
gr.HTML('''
<style>
body {
background-color: #f0f0f0;
}
#title {
color: grey;
font-size: 30px;
text-align: center;
margin-bottom: 20px;
}
.transcribe-btn, .next-btn {
background-color: #4CAF50;
color: white;
font-size: 16px;
padding: 10px 20px;
border-radius: 5px;
cursor: pointer;
margin-top: 10px;
}
.transcribe-btn:hover, .next-btn:hover {
background-color: #45a049;
}
#question-box {
font-size: 20px;
color: #555;
text-align: center;
}
#text-box {
font-size: 18px;
color: #333;
}
#context-box, #confidence-box {
font-size: 18px;
color: #333;
}
</style>
''')
# Title
gr.Markdown("<h1 id='title'>INTERVIEW PREPARATION MODEL</h1>")
# Question display
with gr.Row():
question_display = gr.Textbox(label="Interview Question", value=show_question(), interactive=False, elem_id="question-box")
# Audio input and transcription section
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Record Your Answer")
# Separate text boxes for the transcribed text, context, and confidence analysis
with gr.Row():
transcribed_text = gr.Textbox(label="Your Answer (Transcription)", interactive=False, lines=5, elem_id="text-box")
with gr.Row():
context_analysis_result = gr.Textbox(label="Context Analysis", interactive=False, elem_id="context-box")
with gr.Row():
confidence_analysis_result = gr.Textbox(label="Confidence Analysis", interactive=False, elem_id="confidence-box")
# Automatically transcribe the audio and analyze context and confidence when audio is provided
audio_input.change(fn=transcribe_and_analyze,
inputs=[audio_input, question_display],
outputs=[transcribed_text, context_analysis_result, confidence_analysis_result])
# Button to get the next question
with gr.Row():
next_button = gr.Button("Next Question", elem_classes="next-btn")
# Clear audio, transcription, context, and confidence when moving to the next question
next_button.click(next_question,
outputs=[question_display, audio_input, transcribed_text, context_analysis_result, confidence_analysis_result])
# Launch app
demo.launch(share=True)