File size: 6,320 Bytes
31bf3a8 79c919b 81bc0f3 e55227e 81bc0f3 fdf347a 81bc0f3 fdf347a 81bc0f3 fdf347a d57d211 81bc0f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import os
os.system("pip install --upgrade openai-whisper torch")
os.system("pip install --upgrade transformers")
import whisper
import gradio as gr
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from app.questions import get_question
# Load Whisper model
whisper_model = whisper.load_model("small")
# Load the pre-trained model and tokenizer for confidence analysis
confidence_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/Confidence')
confidence_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/Confidence')
# Load pre-trained context analysis model (BERT-based)
context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model')
context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model')
# Function to perform context analysis using the BERT model
def predict_relevance(question, answer):
if not answer.strip(): # Check for empty answers
return "Irrelevant"
inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
context_model.eval()
with torch.no_grad():
outputs = context_model(**inputs)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=-1)
threshold = 0.5
relevant_prob = probabilities[0, 1] # Probability for relevant class
if relevant_prob > threshold:
return "Relevant"
else:
return "Irrelevant"
# confidence analysis
def predict_confidence(question, answer):
if not answer.strip():
return "Not Confident"
inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
confidence_model.eval()
with torch.no_grad():
outputs = confidence_model(**inputs)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=-1)
print("Logits:", logits)
print("Probabilities:", probabilities)
confidence_prob = probabilities[0, 1] # Probability for Confident class
# lower threshold
return "Confident" if confidence_prob > 0.40 else "Not Confident"
# Questions from questions.py
def fetch_questions():
return [get_question(i) for i in range(6)]
questions = fetch_questions()
current_question_index = 0
# Function to show the current question
def show_question():
global current_question_index
question = questions[current_question_index]
return question
# Function to move to the next question
def next_question():
global current_question_index
current_question_index = (current_question_index + 1) % len(questions)
return show_question(), None, "", "", ""
# Function to transcribe the audio input and perform both context and confidence analyses
def transcribe_and_analyze(audio, question):
try:
# Load and process audio using Whisper
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
options = whisper.DecodingOptions(fp16=False)
result = whisper.decode(whisper_model, mel, options)
# Get the transcribed text
transcribed_text = result.text
# Perform context and confidence analysis
context_result = predict_relevance(question, transcribed_text)
confidence_result = predict_confidence(question, transcribed_text)
# Return the results
return transcribed_text, context_result, confidence_result
except Exception as e:
return f"Error: {str(e)}", "", ""
# Gradio interface
with gr.Blocks() as demo:
gr.HTML('''
<style>
body {
background-color: #f0f0f0;
}
#title {
color: grey;
font-size: 30px;
text-align: center;
margin-bottom: 20px;
}
.transcribe-btn, .next-btn {
background-color: #4CAF50;
color: white;
font-size: 16px;
padding: 10px 20px;
border-radius: 5px;
cursor: pointer;
margin-top: 10px;
}
.transcribe-btn:hover, .next-btn:hover {
background-color: #45a049;
}
#question-box {
font-size: 20px;
color: #555;
text-align: center;
}
#text-box {
font-size: 18px;
color: #333;
}
#context-box, #confidence-box {
font-size: 18px;
color: #333;
}
</style>
''')
# Title
gr.Markdown("<h1 id='title'>INTERVIEW PREPARATION MODEL</h1>")
# Question display
with gr.Row():
question_display = gr.Textbox(label="Interview Question", value=show_question(), interactive=False, elem_id="question-box")
# Audio input and transcription section
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Record Your Answer")
# Separate text boxes for the transcribed text, context, and confidence analysis
with gr.Row():
transcribed_text = gr.Textbox(label="Your Answer (Transcription)", interactive=False, lines=5, elem_id="text-box")
with gr.Row():
context_analysis_result = gr.Textbox(label="Context Analysis", interactive=False, elem_id="context-box")
with gr.Row():
confidence_analysis_result = gr.Textbox(label="Confidence Analysis", interactive=False, elem_id="confidence-box")
# Automatically transcribe the audio and analyze context and confidence when audio is provided
audio_input.change(fn=transcribe_and_analyze,
inputs=[audio_input, question_display],
outputs=[transcribed_text, context_analysis_result, confidence_analysis_result])
# Button to get the next question
with gr.Row():
next_button = gr.Button("Next Question", elem_classes="next-btn")
# Clear audio, transcription, context, and confidence when moving to the next question
next_button.click(next_question,
outputs=[question_display, audio_input, transcribed_text, context_analysis_result, confidence_analysis_result])
# Launch app
demo.launch(share=True)
|