RiteshAkhade commited on
Commit
9c1e852
·
verified ·
1 Parent(s): b48f4c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -144
app.py CHANGED
@@ -1,180 +1,159 @@
1
- import os
2
- os.system("pip install --upgrade openai-whisper torch")
3
- os.system("pip install --upgrade transformers")
4
  import whisper
5
  import gradio as gr
6
  import torch
7
- from transformers import BertTokenizer, BertForSequenceClassification
8
  from app.questions import get_question
9
 
10
- # Load Whisper model
11
  whisper_model = whisper.load_model("small")
12
-
13
- # Load the pre-trained model and tokenizer for confidence analysis
14
- confidence_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/Confidence')
15
- confidence_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/Confidence')
16
-
17
- # Load pre-trained context analysis model (BERT-based)
18
  context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model')
19
  context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model')
20
-
21
- # Function to perform context analysis using the BERT model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def predict_relevance(question, answer):
23
- if not answer.strip(): # Check for empty answers
24
  return "Irrelevant"
25
-
26
  inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
27
  context_model.eval()
28
-
29
  with torch.no_grad():
30
  outputs = context_model(**inputs)
31
- logits = outputs.logits
32
- probabilities = torch.softmax(logits, dim=-1)
33
- threshold = 0.5
34
- relevant_prob = probabilities[0, 1] # Probability for relevant class
35
-
36
- if relevant_prob > threshold:
37
- return "Relevant"
38
- else:
39
- return "Irrelevant"
40
 
41
- # confidence analysis
42
  def predict_confidence(question, answer, threshold=0.4):
43
- if not isinstance(answer, str) or not answer.strip():
44
  return "Not Confident"
45
-
46
- # Tokenize input
47
  inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
48
-
49
- # Set model to evaluation mode
50
  confidence_model.eval()
51
-
52
  with torch.no_grad():
53
  outputs = confidence_model(**inputs)
54
- logits = outputs.logits
55
- probabilities = torch.softmax(logits, dim=-1)
56
-
57
- confidence_prob = probabilities[0, 1].item() # Extract probability for "Confident" class
58
-
59
-
60
- return "Confident" if confidence_prob > threshold else "Not Confident"
61
-
62
- # Questions from questions.py
63
- def fetch_questions():
64
- return [get_question(i) for i in range(6)]
65
-
66
- questions = fetch_questions()
67
- current_question_index = 0
68
-
69
- # Function to show the current question
70
- def show_question():
71
- global current_question_index
72
- question = questions[current_question_index]
73
- return question
74
-
75
- # Function to move to the next question
76
- def next_question():
77
- global current_question_index
78
- current_question_index = (current_question_index + 1) % len(questions)
79
- return show_question(), None, "", "", ""
80
-
81
- # Function to transcribe the audio input and perform both context and confidence analyses
82
- def transcribe_and_analyze(audio, question):
 
 
 
 
 
83
  try:
84
- # Load and process audio using Whisper
85
  audio = whisper.load_audio(audio)
86
  audio = whisper.pad_or_trim(audio)
87
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
88
- options = whisper.DecodingOptions(fp16=False)
89
- result = whisper.decode(whisper_model, mel, options)
90
-
91
- # Get the transcribed text
92
  transcribed_text = result.text
 
 
 
 
93
 
94
- # Perform context and confidence analysis
 
 
 
 
 
 
 
95
  context_result = predict_relevance(question, transcribed_text)
96
  confidence_result = predict_confidence(question, transcribed_text)
97
-
98
- # Return the results
99
  return transcribed_text, context_result, confidence_result
100
  except Exception as e:
101
  return f"Error: {str(e)}", "", ""
102
 
103
- # Gradio interface
104
- with gr.Blocks() as demo:
105
-
106
- gr.HTML('''
107
- <style>
108
- body {
109
- background-color: #f0f0f0;
110
- }
111
- #title {
112
- color: grey;
113
- font-size: 30px;
114
- text-align: center;
115
- margin-bottom: 20px;
116
- }
117
- .transcribe-btn, .next-btn {
118
- background-color: #4CAF50;
119
- color: white;
120
- font-size: 16px;
121
- padding: 10px 20px;
122
- border-radius: 5px;
123
- cursor: pointer;
124
- margin-top: 10px;
125
- }
126
- .transcribe-btn:hover, .next-btn:hover {
127
- background-color: #45a049;
128
- }
129
- #question-box {
130
- font-size: 20px;
131
- color: #555;
132
- text-align: center;
133
- }
134
- #text-box {
135
- font-size: 18px;
136
- color: #333;
137
- }
138
- #context-box, #confidence-box {
139
- font-size: 18px;
140
- color: #333;
141
- }
142
- </style>
143
- ''')
144
-
145
- # Title
146
- gr.Markdown("<h1 id='title'>INTERVIEW PREPARATION MODEL</h1>")
147
-
148
- # Question display
149
- with gr.Row():
150
- question_display = gr.Textbox(label="Interview Question", value=show_question(), interactive=False, elem_id="question-box")
151
-
152
- # Audio input and transcription section
153
- with gr.Row():
154
- audio_input = gr.Audio(type="filepath", label="Record Your Answer")
155
-
156
- # Separate text boxes for the transcribed text, context, and confidence analysis
157
- with gr.Row():
158
- transcribed_text = gr.Textbox(label="Your Answer (Transcription)", interactive=False, lines=5, elem_id="text-box")
159
-
160
- with gr.Row():
161
- context_analysis_result = gr.Textbox(label="Context Analysis", interactive=False, elem_id="context-box")
162
-
163
- with gr.Row():
164
- confidence_analysis_result = gr.Textbox(label="Confidence Analysis", interactive=False, elem_id="confidence-box")
165
-
166
- # Automatically transcribe the audio and analyze context and confidence when audio is provided
167
- audio_input.change(fn=transcribe_and_analyze,
168
- inputs=[audio_input, question_display],
169
- outputs=[transcribed_text, context_analysis_result, confidence_analysis_result])
170
-
171
- # Button to get the next question
172
- with gr.Row():
173
- next_button = gr.Button("Next Question", elem_classes="next-btn")
174
 
175
- # Clear audio, transcription, context, and confidence when moving to the next question
176
- next_button.click(next_question,
177
- outputs=[question_display, audio_input, transcribed_text, context_analysis_result, confidence_analysis_result])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- # Launch app
180
  demo.launch(share=True)
 
 
 
 
1
  import whisper
2
  import gradio as gr
3
  import torch
4
+ from transformers import BertTokenizer, BertForSequenceClassification, pipeline
5
  from app.questions import get_question
6
 
7
+ # Load models
8
  whisper_model = whisper.load_model("small")
9
+ confidence_model = BertForSequenceClassification.from_pretrained('/home/ghost/LLM/confidence_model1')
10
+ confidence_tokenizer = BertTokenizer.from_pretrained('/home/ghost/LLM/confidence_tokenizer1')
 
 
 
 
11
  context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model')
12
  context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model')
13
+ emotion_pipe = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion", top_k=1)
14
+
15
+ # Emotion map with labels and emojis
16
+ interview_emotion_map = {
17
+ "joy": ("Confident", "🙂"),
18
+ "fear": ("Nervous", "😨"),
19
+ "sadness": ("Uncertain", "🙁"),
20
+ "anger": ("Frustrated", "😠"),
21
+ "surprise": ("Curious", "😮"),
22
+ "neutral": ("Calm", "😐"),
23
+ "disgust": ("Disengaged", "😒"),
24
+ }
25
+
26
+ # Static question sets
27
+ technical_questions = [get_question(i) for i in range(6)]
28
+ non_technical_questions = [
29
+ "Tell me about yourself.",
30
+ "What are your strengths and weaknesses?",
31
+ "Where do you see yourself in 5 years?",
32
+ "How do you handle stress or pressure?",
33
+ "Describe a time you faced a conflict and how you resolved it.",
34
+ "What motivates you to do your best?"
35
+ ]
36
+
37
+ # Index trackers
38
+ current_tech_index = 0
39
+ current_non_tech_index = 0
40
+
41
+ # Relevance prediction
42
  def predict_relevance(question, answer):
43
+ if not answer.strip():
44
  return "Irrelevant"
 
45
  inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
46
  context_model.eval()
 
47
  with torch.no_grad():
48
  outputs = context_model(**inputs)
49
+ probabilities = torch.softmax(outputs.logits, dim=-1)
50
+ return "Relevant" if probabilities[0, 1] > 0.5 else "Irrelevant"
 
 
 
 
 
 
 
51
 
52
+ # Confidence prediction
53
  def predict_confidence(question, answer, threshold=0.4):
54
+ if not isinstance(answer, str) or not answer.strip():
55
  return "Not Confident"
 
 
56
  inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True)
 
 
57
  confidence_model.eval()
 
58
  with torch.no_grad():
59
  outputs = confidence_model(**inputs)
60
+ probabilities = torch.softmax(outputs.logits, dim=-1)
61
+ return "Confident" if probabilities[0, 1].item() > threshold else "Not Confident"
62
+
63
+ # Emotion detection
64
+ def detect_emotion(answer):
65
+ if not answer.strip():
66
+ return "No Answer", ""
67
+ result = emotion_pipe(answer)
68
+ label = result[0][0]["label"].lower()
69
+ emotion_text, emoji = interview_emotion_map.get(label, ("Unknown", "❓"))
70
+ return emotion_text, emoji
71
+
72
+ # Question navigation (non-tech)
73
+ def show_non_tech_question():
74
+ global current_non_tech_index
75
+ return non_technical_questions[current_non_tech_index]
76
+
77
+ def next_non_tech_question():
78
+ global current_non_tech_index
79
+ current_non_tech_index = (current_non_tech_index + 1) % len(non_technical_questions)
80
+ return non_technical_questions[current_non_tech_index], None, "", ""
81
+
82
+ # Question navigation (tech)
83
+ def show_tech_question():
84
+ global current_tech_index
85
+ return technical_questions[current_tech_index]
86
+
87
+ def next_tech_question():
88
+ global current_tech_index
89
+ current_tech_index = (current_tech_index + 1) % len(technical_questions)
90
+ return technical_questions[current_tech_index], None, "", "", ""
91
+
92
+ # Transcribe + analyze (non-technical)
93
+ def transcribe_and_analyze_non_tech(audio, question):
94
  try:
 
95
  audio = whisper.load_audio(audio)
96
  audio = whisper.pad_or_trim(audio)
97
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
98
+ result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False))
 
 
 
99
  transcribed_text = result.text
100
+ emotion_text, emoji = detect_emotion(transcribed_text)
101
+ return transcribed_text, f"{emotion_text} {emoji}"
102
+ except Exception as e:
103
+ return f"Error: {str(e)}", "❓"
104
 
105
+ # Transcribe + analyze (technical)
106
+ def transcribe_and_analyze_tech(audio, question):
107
+ try:
108
+ audio = whisper.load_audio(audio)
109
+ audio = whisper.pad_or_trim(audio)
110
+ mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
111
+ result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False))
112
+ transcribed_text = result.text
113
  context_result = predict_relevance(question, transcribed_text)
114
  confidence_result = predict_confidence(question, transcribed_text)
 
 
115
  return transcribed_text, context_result, confidence_result
116
  except Exception as e:
117
  return f"Error: {str(e)}", "", ""
118
 
119
+ # UI layout
120
+ with gr.Blocks(css="textarea, .gr-box { font-size: 18px !important; }") as demo:
121
+ gr.HTML("<h1 style='text-align: center; font-size: 32px;'>INTERVIEW PREPARATION MODEL</h1>")
122
+
123
+ with gr.Tabs():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ # NON-TECHNICAL TAB
126
+ with gr.Tab("Non-Technical"):
127
+ gr.Markdown("### Emotional Context Analysis (🧠 + 😊)")
128
+ question_display_1 = gr.Textbox(label="Interview Question", value=show_non_tech_question(), interactive=False)
129
+ audio_input_1 = gr.Audio(type="filepath", label="Record Your Answer")
130
+ transcribed_text_1 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4)
131
+ emotion_output = gr.Textbox(label="Detected Emotion", interactive=False)
132
+
133
+ audio_input_1.change(fn=transcribe_and_analyze_non_tech,
134
+ inputs=[audio_input_1, question_display_1],
135
+ outputs=[transcribed_text_1, emotion_output])
136
+
137
+ next_button_1 = gr.Button("Next Question")
138
+ next_button_1.click(fn=next_non_tech_question,
139
+ outputs=[question_display_1, audio_input_1, transcribed_text_1, emotion_output])
140
+
141
+ # TECHNICAL TAB
142
+ with gr.Tab("Technical"):
143
+ gr.Markdown("### Technical Question Analysis (🎓 + 🤖)")
144
+ question_display_2 = gr.Textbox(label="Interview Question", value=show_tech_question(), interactive=False)
145
+ audio_input_2 = gr.Audio(type="filepath", label="Record Your Answer")
146
+ transcribed_text_2 = gr.Textbox(label="Transcribed Answer", interactive=False, lines=4)
147
+ context_analysis_result = gr.Textbox(label="Context Analysis", interactive=False)
148
+ confidence_analysis_result = gr.Textbox(label="Confidence Analysis", interactive=False)
149
+
150
+ audio_input_2.change(fn=transcribe_and_analyze_tech,
151
+ inputs=[audio_input_2, question_display_2],
152
+ outputs=[transcribed_text_2, context_analysis_result, confidence_analysis_result])
153
+
154
+ next_button_2 = gr.Button("Next Question")
155
+ next_button_2.click(fn=next_tech_question,
156
+ outputs=[question_display_2, audio_input_2, transcribed_text_2,
157
+ context_analysis_result, confidence_analysis_result])
158
 
 
159
  demo.launch(share=True)