File size: 6,335 Bytes
31bf3a8
 
79c919b
81bc0f3
 
 
 
 
 
 
 
 
 
919a5d9
 
81bc0f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdf347a
81bc0f3
fdf347a
 
 
81bc0f3
 
 
 
 
 
fdf347a
 
 
 
 
 
 
 
41675fe
81bc0f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import os
os.system("pip install --upgrade openai-whisper torch")
os.system("pip install --upgrade transformers")
import whisper
import gradio as gr
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from app.questions import get_question

# Load Whisper model
whisper_model = whisper.load_model("small")

# Load the pre-trained model and tokenizer for confidence analysis
confidence_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/Confidence_model3')
confidence_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/Confidence_model3')

# Load pre-trained context analysis model (BERT-based)
context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model')
context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model')

# Function to perform context analysis using the BERT model
def predict_relevance(question, answer):
    if not answer.strip():  # Check for empty answers
        return "Irrelevant"
    
    inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
    context_model.eval()

    with torch.no_grad():
        outputs = context_model(**inputs)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=-1)
        threshold = 0.5
        relevant_prob = probabilities[0, 1]  # Probability for relevant class
    
    if relevant_prob > threshold:
        return "Relevant"
    else:
        return "Irrelevant"

#  confidence analysis
def predict_confidence(question, answer):
    if not answer.strip():  
        return "Not Confident"

    inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
    confidence_model.eval()

    with torch.no_grad():
        outputs = confidence_model(**inputs)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=-1)
        
        print("Logits:", logits)
        print("Probabilities:", probabilities)
        
        confidence_prob = probabilities[0, 1]  # Probability for Confident class

        #  lower threshold
        return "Confident" if confidence_prob > 0.475 else "Not Confident"

# Questions from questions.py
def fetch_questions():
    return [get_question(i) for i in range(6)]

questions = fetch_questions()
current_question_index = 0

# Function to show the current question
def show_question():
    global current_question_index
    question = questions[current_question_index]
    return question

# Function to move to the next question
def next_question():
    global current_question_index
    current_question_index = (current_question_index + 1) % len(questions)
    return show_question(), None, "", "", ""

# Function to transcribe the audio input and perform both context and confidence analyses
def transcribe_and_analyze(audio, question):
    try:
        # Load and process audio using Whisper
        audio = whisper.load_audio(audio)
        audio = whisper.pad_or_trim(audio)
        mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
        options = whisper.DecodingOptions(fp16=False)
        result = whisper.decode(whisper_model, mel, options)
        
        # Get the transcribed text
        transcribed_text = result.text

        # Perform context and confidence analysis
        context_result = predict_relevance(question, transcribed_text)
        confidence_result = predict_confidence(question, transcribed_text)

        # Return the results
        return transcribed_text, context_result, confidence_result
    except Exception as e:
        return f"Error: {str(e)}", "", ""

#  Gradio interface 
with gr.Blocks() as demo:
    
    gr.HTML('''
    <style>
        body { 
            background-color: #f0f0f0; 
        }
        #title { 
            color: grey; 
            font-size: 30px; 
            text-align: center; 
            margin-bottom: 20px; 
        }
        .transcribe-btn, .next-btn { 
            background-color: #4CAF50; 
            color: white; 
            font-size: 16px; 
            padding: 10px 20px; 
            border-radius: 5px; 
            cursor: pointer; 
            margin-top: 10px;
        }
        .transcribe-btn:hover, .next-btn:hover { 
            background-color: #45a049; 
        }
        #question-box { 
            font-size: 20px; 
            color: #555; 
            text-align: center; 
        }
        #text-box {
            font-size: 18px;
            color: #333;
        }
        #context-box, #confidence-box {
            font-size: 18px;
            color: #333;
        }
    </style>
    ''')

    # Title
    gr.Markdown("<h1 id='title'>INTERVIEW PREPARATION MODEL</h1>")

    # Question display
    with gr.Row():
        question_display = gr.Textbox(label="Interview Question", value=show_question(), interactive=False, elem_id="question-box")

    # Audio input and transcription section
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Record Your Answer")

    # Separate text boxes for the transcribed text, context, and confidence analysis
    with gr.Row():
        transcribed_text = gr.Textbox(label="Your Answer (Transcription)", interactive=False, lines=5, elem_id="text-box")
    
    with gr.Row():
        context_analysis_result = gr.Textbox(label="Context Analysis", interactive=False, elem_id="context-box")
    
    with gr.Row():
        confidence_analysis_result = gr.Textbox(label="Confidence Analysis", interactive=False, elem_id="confidence-box")
    
    # Automatically transcribe the audio and analyze context and confidence when audio is provided
    audio_input.change(fn=transcribe_and_analyze, 
                       inputs=[audio_input, question_display], 
                       outputs=[transcribed_text, context_analysis_result, confidence_analysis_result])

    # Button to get the next question
    with gr.Row():    
        next_button = gr.Button("Next Question", elem_classes="next-btn")
        
        # Clear audio, transcription, context, and confidence when moving to the next question
        next_button.click(next_question, 
                          outputs=[question_display, audio_input, transcribed_text, context_analysis_result, confidence_analysis_result])

# Launch app
demo.launch(share=True)