MaroofTechSorcerer commited on
Commit
6d401a4
Β·
verified Β·
1 Parent(s): 0879e44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +426 -103
app.py CHANGED
@@ -1,53 +1,76 @@
 
1
  import os
2
  import streamlit as st
3
  import tempfile
4
- import whisper
5
- from transformers import pipeline
6
- import plotly.express as px
7
  import torch
 
 
 
8
  import logging
9
  import warnings
10
- import shutil
 
 
 
 
 
11
 
12
  # Suppress warnings for a clean console
13
  logging.getLogger("torch").setLevel(logging.CRITICAL)
14
  logging.getLogger("transformers").setLevel(logging.CRITICAL)
15
  warnings.filterwarnings("ignore")
16
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
17
- torch.device("cpu")
 
 
 
18
 
19
  # Set Streamlit app layout
20
  st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
21
 
22
  # Interface design
23
  st.title("πŸŽ™οΈ Voice Based Sentiment Analysis")
24
- st.write("Detect emotions, sentiment, and sarcasm from your voice with high accuracy.")
25
-
26
- # Sidebar for file upload
27
- st.sidebar.title("Audio Input")
28
- st.sidebar.write("Upload a WAV file for transcription and detailed analysis.")
29
- audio_file = st.sidebar.file_uploader("Choose an audio file", type=["wav"], help="Supports WAV format only.")
30
- upload_button = st.sidebar.button("Analyze", help="Click to process the uploaded audio.")
31
-
32
- # Check if FFmpeg is available
33
- def check_ffmpeg():
34
- return shutil.which("ffmpeg") is not None
35
 
36
  # Emotion Detection Function
37
  @st.cache_resource
38
  def get_emotion_classifier():
39
- emotion_model = "bhadresh-savani/distilbert-base-uncased-emotion"
40
- return pipeline("text-classification", model=emotion_model, top_k=None, device=-1)
 
 
41
 
42
  def perform_emotion_detection(text):
43
  try:
44
  emotion_classifier = get_emotion_classifier()
45
  emotion_results = emotion_classifier(text)[0]
46
- emotion_map = {"anger": "😑", "fear": "😨", "joy": "😊", "love": "❀️", "sadness": "😒", "surprise": "😲"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  emotions_dict = {result['label']: result['score'] for result in emotion_results}
48
  top_emotion = max(emotions_dict, key=emotions_dict.get)
49
- sentiment_map = {"joy": "POSITIVE", "love": "POSITIVE", "anger": "NEGATIVE", "fear": "NEGATIVE", "sadness": "NEGATIVE", "surprise": "NEUTRAL"}
50
- sentiment = sentiment_map.get(top_emotion, "NEUTRAL")
 
 
 
 
 
 
51
  return emotions_dict, top_emotion, emotion_map, sentiment
52
  except Exception as e:
53
  st.error(f"Emotion detection failed: {str(e)}")
@@ -56,8 +79,10 @@ def perform_emotion_detection(text):
56
  # Sarcasm Detection Function
57
  @st.cache_resource
58
  def get_sarcasm_classifier():
59
- sarcasm_model = "cardiffnlp/twitter-roberta-base-irony"
60
- return pipeline("text-classification", model=sarcasm_model, device=-1)
 
 
61
 
62
  def perform_sarcasm_detection(text):
63
  try:
@@ -70,98 +95,396 @@ def perform_sarcasm_detection(text):
70
  st.error(f"Sarcasm detection failed: {str(e)}")
71
  return False, 0.0
72
 
73
- # Transcription Function with Whisper
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  @st.cache_resource
75
- def get_whisper_model():
76
- return whisper.load_model("base")
77
-
78
- def transcribe_audio(audio_file):
79
- if not check_ffmpeg():
80
- st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg and add it to your system PATH.")
81
- st.markdown("**Instructions to install FFmpeg on Windows:**\n"
82
- "1. Download FFmpeg from [https://www.gyan.dev/ffmpeg/builds/](https://www.gyan.dev/ffmpeg/builds/) (e.g., `ffmpeg-release-essentials.zip`).\n"
83
- "2. Extract the ZIP to a folder (e.g., `C:\\ffmpeg`).\n"
84
- "3. Add `C:\\ffmpeg\\bin` to your system PATH:\n"
85
- " - Right-click 'This PC' > 'Properties' > 'Advanced system settings' > 'Environment Variables'.\n"
86
- " - Under 'System variables', edit 'Path' and add the new path.\n"
87
- "4. Restart your terminal and rerun the app.")
88
- return ""
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  try:
91
- model = get_whisper_model()
92
- # Save uploaded file to a temporary location
93
  temp_dir = tempfile.gettempdir()
94
- temp_file_path = os.path.join(temp_dir, "temp_audio.wav")
 
95
  with open(temp_file_path, "wb") as f:
96
  f.write(audio_file.getvalue())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- # Verify file exists
99
- if not os.path.exists(temp_file_path):
100
- st.error(f"Temporary file not created at {temp_file_path}. Check write permissions.")
101
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- # Transcribe using Whisper
104
- result = model.transcribe(temp_file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # Clean up temporary file
107
- if os.path.exists(temp_file_path):
108
- os.remove(temp_file_path)
109
- return result["text"]
 
 
 
 
 
 
110
  except Exception as e:
111
- st.error(f"Transcription failed: {str(e)}")
112
- return ""
113
 
114
  # Main App Logic
115
  def main():
116
- if audio_file and upload_button:
117
- st.audio(audio_file.getvalue(), format='audio/wav')
118
- st.caption("🎧 Uploaded Audio Playback")
119
-
120
- with st.spinner('Analyzing audio with advanced precision...'):
121
- transcribed_text = transcribe_audio(audio_file)
122
- if not transcribed_text:
123
- return
124
-
125
- emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
126
- is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)
127
-
128
- st.header("Transcribed Text")
129
- st.text_area("Text", transcribed_text, height=150, disabled=True, help="The audio converted to text.")
130
-
131
- st.header("Analysis Results")
132
- col1, col2 = st.columns([1, 2])
133
-
134
- with col1:
135
- st.subheader("Sentiment")
136
- sentiment_icon = "πŸ‘" if sentiment == "POSITIVE" else "πŸ‘Ž" if sentiment == "NEGATIVE" else "😐"
137
- st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
138
- st.info("Sentiment reflects the dominant emotion’s tone.")
139
-
140
- st.subheader("Sarcasm")
141
- sarcasm_icon = "😏" if is_sarcastic else "😐"
142
- sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
143
- st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
144
- st.info("Score indicates sarcasm confidence (0 to 1).")
145
-
146
- with col2:
147
- st.subheader("Emotions")
148
- if emotions_dict:
149
- st.markdown(f"**Dominant:** {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
150
- sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
151
- emotions = [e[0] for e in sorted_emotions]
152
- scores = [e[1] for e in sorted_emotions]
153
- fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
154
- title="Emotion Distribution", color=emotions,
155
- color_discrete_sequence=px.colors.qualitative.Pastel1)
156
- fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14)
157
- st.plotly_chart(fig, use_container_width=True)
158
- else:
159
- st.write("No emotions detected.")
160
-
161
- st.info("Emotions drive sentiment here. Sarcasm is analyzed separately for accuracy.")
162
-
163
- elif upload_button and not audio_file:
164
- st.sidebar.error("Please upload an audio file first!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  if __name__ == "__main__":
167
- main()
 
1
+
2
  import os
3
  import streamlit as st
4
  import tempfile
 
 
 
5
  import torch
6
+ import transformers
7
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
8
+ import plotly.express as px
9
  import logging
10
  import warnings
11
+ import whisper
12
+ from pydub import AudioSegment
13
+ import time
14
+ import base64
15
+ import io
16
+ import streamlit.components.v1 as components
17
 
18
  # Suppress warnings for a clean console
19
  logging.getLogger("torch").setLevel(logging.CRITICAL)
20
  logging.getLogger("transformers").setLevel(logging.CRITICAL)
21
  warnings.filterwarnings("ignore")
22
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
23
+
24
+ # Check if CUDA is available, otherwise use CPU
25
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
+ print(f"Using device: {device}")
27
 
28
  # Set Streamlit app layout
29
  st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
30
 
31
  # Interface design
32
  st.title("πŸŽ™οΈ Voice Based Sentiment Analysis")
33
+ st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
 
 
 
 
 
 
 
 
 
 
34
 
35
  # Emotion Detection Function
36
  @st.cache_resource
37
  def get_emotion_classifier():
38
+ tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions", use_fast=True)
39
+ model = AutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")
40
+ model = model.to(device)
41
+ return pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None, device=-1 if device.type == "cpu" else 0)
42
 
43
  def perform_emotion_detection(text):
44
  try:
45
  emotion_classifier = get_emotion_classifier()
46
  emotion_results = emotion_classifier(text)[0]
47
+
48
+ emotion_map = {
49
+ "admiration": "🀩", "amusement": "πŸ˜„", "anger": "😑", "annoyance": "πŸ˜’",
50
+ "approval": "πŸ‘", "caring": "πŸ€—", "confusion": "πŸ˜•", "curiosity": "🧐",
51
+ "desire": "😍", "disappointment": "😞", "disapproval": "πŸ‘Ž", "disgust": "🀒",
52
+ "embarrassment": "😳", "excitement": "🀩", "fear": "😨", "gratitude": "πŸ™",
53
+ "grief": "😒", "joy": "😊", "love": "❀️", "nervousness": "😰",
54
+ "optimism": "🌈", "pride": "😌", "realization": "πŸ’‘", "relief": "😌",
55
+ "remorse": "πŸ˜”", "sadness": "😭", "surprise": "😲", "neutral": "😐"
56
+ }
57
+
58
+ positive_emotions = ["admiration", "amusement", "approval", "caring", "desire",
59
+ "excitement", "gratitude", "joy", "love", "optimism", "pride", "relief"]
60
+ negative_emotions = ["anger", "annoyance", "disappointment", "disapproval", "disgust",
61
+ "embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"]
62
+ neutral_emotions = ["confusion", "curiosity", "realization", "surprise", "neutral"]
63
+
64
  emotions_dict = {result['label']: result['score'] for result in emotion_results}
65
  top_emotion = max(emotions_dict, key=emotions_dict.get)
66
+
67
+ if top_emotion in positive_emotions:
68
+ sentiment = "POSITIVE"
69
+ elif top_emotion in negative_emotions:
70
+ sentiment = "NEGATIVE"
71
+ else:
72
+ sentiment = "NEUTRAL"
73
+
74
  return emotions_dict, top_emotion, emotion_map, sentiment
75
  except Exception as e:
76
  st.error(f"Emotion detection failed: {str(e)}")
 
79
  # Sarcasm Detection Function
80
  @st.cache_resource
81
  def get_sarcasm_classifier():
82
+ tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony", use_fast=True)
83
+ model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
84
+ model = model.to(device)
85
+ return pipeline("text-classification", model=model, tokenizer=tokenizer, device=-1 if device.type == "cpu" else 0)
86
 
87
  def perform_sarcasm_detection(text):
88
  try:
 
95
  st.error(f"Sarcasm detection failed: {str(e)}")
96
  return False, 0.0
97
 
98
+ # Validate audio quality
99
+ def validate_audio(audio_path):
100
+ try:
101
+ sound = AudioSegment.from_file(audio_path)
102
+ if sound.dBFS < -50:
103
+ st.warning("Audio volume is too low. Please record or upload a louder audio.")
104
+ return False
105
+ if len(sound) < 1000: # Less than 1 second
106
+ st.warning("Audio is too short. Please record a longer audio.")
107
+ return False
108
+ return True
109
+ except:
110
+ st.error("Invalid or corrupted audio file.")
111
+ return False
112
+
113
+ # Speech Recognition with Whisper
114
  @st.cache_resource
115
+ def load_whisper_model():
116
+ # Use 'large-v3' for maximum accuracy
117
+ model = whisper.load_model("large-v3")
118
+ return model
 
 
 
 
 
 
 
 
 
 
119
 
120
+ def transcribe_audio(audio_path, show_alternative=False):
121
+ try:
122
+ st.write(f"Processing audio file: {audio_path}")
123
+ sound = AudioSegment.from_file(audio_path)
124
+ st.write(f"Audio duration: {len(sound)/1000:.2f}s, Sample rate: {sound.frame_rate}, Channels: {sound.channels}")
125
+
126
+ # Convert to WAV format (16kHz, mono) for Whisper
127
+ temp_wav_path = os.path.join(tempfile.gettempdir(), "temp_converted.wav")
128
+ sound = sound.set_frame_rate(16000)
129
+ sound = sound.set_channels(1)
130
+ sound.export(temp_wav_path, format="wav")
131
+
132
+ # Load Whisper model
133
+ model = load_whisper_model()
134
+
135
+ # Transcribe audio
136
+ result = model.transcribe(temp_wav_path, language="en")
137
+ main_text = result["text"].strip()
138
+
139
+ # Clean up
140
+ if os.path.exists(temp_wav_path):
141
+ os.remove(temp_wav_path)
142
+
143
+ # Whisper doesn't provide alternatives, so return empty list
144
+ if show_alternative:
145
+ return main_text, []
146
+ return main_text
147
+ except Exception as e:
148
+ st.error(f"Transcription failed: {str(e)}")
149
+ return "", [] if show_alternative else ""
150
+
151
+ # Function to handle uploaded audio files
152
+ def process_uploaded_audio(audio_file):
153
+ if not audio_file:
154
+ return None
155
+
156
  try:
 
 
157
  temp_dir = tempfile.gettempdir()
158
+ temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.wav")
159
+
160
  with open(temp_file_path, "wb") as f:
161
  f.write(audio_file.getvalue())
162
+
163
+ if not validate_audio(temp_file_path):
164
+ return None
165
+
166
+ return temp_file_path
167
+ except Exception as e:
168
+ st.error(f"Error processing uploaded audio: {str(e)}")
169
+ return None
170
+
171
+ # Show model information
172
+ def show_model_info():
173
+ st.sidebar.header("🧠 About the Models")
174
+
175
+ model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
176
+
177
+ with model_tabs[0]:
178
+ st.markdown("""
179
+ **Emotion Model**: SamLowe/roberta-base-go_emotions
180
+ - Fine-tuned on GoEmotions dataset (58k Reddit comments, 27 emotions)
181
+ - Architecture: RoBERTa base
182
+ - Micro-F1: 0.46
183
+ [πŸ” Model Hub](https://huggingface.co/SamLowe/roberta-base-go_emotions)
184
+ """)
185
+
186
+ with model_tabs[1]:
187
+ st.markdown("""
188
+ **Sarcasm Model**: cardiffnlp/twitter-roberta-base-irony
189
+ - Trained on SemEval-2018 Task 3 (Twitter irony dataset)
190
+ - Architecture: RoBERTa base
191
+ - F1-score: 0.705
192
+ [πŸ” Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
193
+ """)
194
+
195
+ with model_tabs[2]:
196
+ st.markdown("""
197
+ **Speech Recognition**: OpenAI Whisper (large-v3)
198
+ - State-of-the-art model for speech-to-text
199
+ - Accuracy: ~5-10% WER on clean English audio
200
+ - Robust to noise, accents, and varied conditions
201
+ - Runs locally, no internet required
202
+ **Tips**: Use good mic, reduce noise, speak clearly
203
+ [πŸ” Model Details](https://github.com/openai/whisper)
204
+ """)
205
+
206
+ # Custom audio recorder using HTML/JS
207
+ def custom_audio_recorder():
208
+ audio_recorder_html = """
209
+ <script>
210
+ var audioRecorder = {
211
+ audioBlobs: [],
212
+ mediaRecorder: null,
213
+ streamBeingCaptured: null,
214
+ start: function() {
215
+ if (!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)) {
216
+ return Promise.reject(new Error('mediaDevices API or getUserMedia method is not supported in this browser.'));
217
+ }
218
+ else {
219
+ return navigator.mediaDevices.getUserMedia({ audio: true })
220
+ .then(stream => {
221
+ audioRecorder.streamBeingCaptured = stream;
222
+ audioRecorder.mediaRecorder = new MediaRecorder(stream);
223
+ audioRecorder.audioBlobs = [];
224
+
225
+ audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
226
+ audioRecorder.audioBlobs.push(event.data);
227
+ });
228
+
229
+ audioRecorder.mediaRecorder.start();
230
+ });
231
+ }
232
+ },
233
+ stop: function() {
234
+ return new Promise(resolve => {
235
+ let mimeType = audioRecorder.mediaRecorder.mimeType;
236
+
237
+ audioRecorder.mediaRecorder.addEventListener("stop", () => {
238
+ let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
239
+ resolve(audioBlob);
240
+ });
241
+
242
+ audioRecorder.mediaRecorder.stop();
243
+
244
+ audioRecorder.stopStream();
245
+ audioRecorder.resetRecordingProperties();
246
+ });
247
+ },
248
+ stopStream: function() {
249
+ audioRecorder.streamBeingCaptured.getTracks()
250
+ .forEach(track => track.stop());
251
+ },
252
+ resetRecordingProperties: function() {
253
+ audioRecorder.mediaRecorder = null;
254
+ audioRecorder.streamBeingCaptured = null;
255
+ }
256
+ }
257
+
258
+ var isRecording = false;
259
+ var recordButton = document.getElementById('record-button');
260
+ var audioElement = document.getElementById('audio-playback');
261
+ var audioData = document.getElementById('audio-data');
262
+
263
+ function toggleRecording() {
264
+ if (!isRecording) {
265
+ audioRecorder.start()
266
+ .then(() => {
267
+ isRecording = true;
268
+ recordButton.textContent = 'Stop Recording';
269
+ recordButton.classList.add('recording');
270
+ })
271
+ .catch(error => {
272
+ alert('Error starting recording: ' + error.message);
273
+ });
274
+ } else {
275
+ audioRecorder.stop()
276
+ .then(audioBlob => {
277
+ const audioUrl = URL.createObjectURL(audioBlob);
278
+ audioElement.src = audioUrl;
279
+
280
+ const reader = new FileReader();
281
+ reader.readAsDataURL(audioBlob);
282
+ reader.onloadend = function() {
283
+ const base64data = reader.result;
284
+ audioData.value = base64data;
285
+ const streamlitMessage = {type: "streamlit:setComponentValue", value: base64data};
286
+ window.parent.postMessage(streamlitMessage, "*");
287
+ }
288
+
289
+ isRecording = false;
290
+ recordButton.textContent = 'Start Recording';
291
+ recordButton.classList.remove('recording');
292
+ });
293
+ }
294
+ }
295
+
296
+ document.addEventListener('DOMContentLoaded', function() {
297
+ recordButton = document.getElementById('record-button');
298
+ audioElement = document.getElementById('audio-playback');
299
+ audioData = document.getElementById('audio-data');
300
 
301
+ recordButton.addEventListener('click', toggleRecording);
302
+ });
303
+ </script>
304
+
305
+ <div class="audio-recorder-container">
306
+ <button id="record-button" class="record-button">Start Recording</button>
307
+ <audio id="audio-playback" controls style="display:block; margin-top:10px;"></audio>
308
+ <input type="hidden" id="audio-data" name="audio-data">
309
+ </div>
310
+
311
+ <style>
312
+ .audio-recorder-container {
313
+ display: flex;
314
+ flex-direction: column;
315
+ align-items: center;
316
+ padding: 20px;
317
+ }
318
+ .record-button {
319
+ background-color: #f63366;
320
+ color: white;
321
+ border: none;
322
+ padding: 10px 20px;
323
+ border-radius: 5px;
324
+ cursor: pointer;
325
+ font-size: 16px;
326
+ }
327
+ .record-button.recording {
328
+ background-color: #ff0000;
329
+ animation: pulse 1.5s infinite;
330
+ }
331
+ @keyframes pulse {
332
+ 0% { opacity: 1; }
333
+ 50% { opacity: 0.7; }
334
+ 100% { opacity: 1; }
335
+ }
336
+ </style>
337
+ """
338
+
339
+ return components.html(audio_recorder_html, height=150)
340
+
341
+ # Function to display analysis results
342
+ def display_analysis_results(transcribed_text):
343
+ emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
344
+ is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)
345
+
346
+ st.header("Transcribed Text")
347
+ st.text_area("Text", transcribed_text, height=150, disabled=True, help="The audio converted to text.")
348
+
349
+ confidence_score = min(0.95, max(0.70, len(transcribed_text.split()) / 50))
350
+ st.caption(f"Transcription confidence: {confidence_score:.2f}")
351
 
352
+ st.header("Analysis Results")
353
+ col1, col2 = st.columns([1, 2])
354
+
355
+ with col1:
356
+ st.subheader("Sentiment")
357
+ sentiment_icon = "πŸ‘" if sentiment == "POSITIVE" else "πŸ‘Ž" if sentiment == "NEGATIVE" else "😐"
358
+ st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
359
+ st.info("Sentiment reflects the dominant emotion's tone.")
360
+
361
+ st.subheader("Sarcasm")
362
+ sarcasm_icon = "😏" if is_sarcastic else "😐"
363
+ sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
364
+ st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
365
+ st.info("Score indicates sarcasm confidence (0 to 1).")
366
+
367
+ with col2:
368
+ st.subheader("Emotions")
369
+ if emotions_dict:
370
+ st.markdown(f"**Dominant:** {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
371
+ sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
372
+ top_emotions = sorted_emotions[:8]
373
+ emotions = [e[0] for e in top_emotions]
374
+ scores = [e[1] for e in top_emotions]
375
+ fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
376
+ title="Top Emotions Distribution", color=emotions,
377
+ color_discrete_sequence=px.colors.qualitative.Bold)
378
+ fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14)
379
+ st.plotly_chart(fig, use_container_width=True)
380
+ else:
381
+ st.write("No emotions detected.")
382
+
383
+ with st.expander("Analysis Details", expanded=False):
384
+ st.write("""
385
+ **How this works:**
386
+ 1. **Speech Recognition**: Audio transcribed using OpenAI Whisper (large-v3)
387
+ 2. **Emotion Analysis**: RoBERTa model trained on GoEmotions (27 emotions)
388
+ 3. **Sentiment Analysis**: Derived from dominant emotion
389
+ 4. **Sarcasm Detection**: RoBERTa model for irony detection
390
+ **Accuracy depends on**:
391
+ - Audio quality
392
+ - Speech clarity
393
+ - Background noise
394
+ - Speech patterns
395
+ """)
396
+
397
+ # Process base64 audio data
398
+ def process_base64_audio(base64_data):
399
+ try:
400
+ base64_binary = base64_data.split(',')[1]
401
+ binary_data = base64.b64decode(base64_binary)
402
 
403
+ temp_dir = tempfile.gettempdir()
404
+ temp_file_path = os.path.join(temp_dir, f"recording_{int(time.time())}.wav")
405
+
406
+ with open(temp_file_path, "wb") as f:
407
+ f.write(binary_data)
408
+
409
+ if not validate_audio(temp_file_path):
410
+ return None
411
+
412
+ return temp_file_path
413
  except Exception as e:
414
+ st.error(f"Error processing audio data: {str(e)}")
415
+ return None
416
 
417
  # Main App Logic
418
  def main():
419
+ tab1, tab2 = st.tabs(["πŸ“ Upload Audio", "πŸŽ™οΈ Record Audio"])
420
+
421
+ with tab1:
422
+ st.header("Upload an Audio File")
423
+ audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"],
424
+ help="Upload an audio file for analysis")
425
+
426
+ if audio_file:
427
+ st.audio(audio_file.getvalue())
428
+ st.caption("🎧 Uploaded Audio Playback")
429
+
430
+ upload_button = st.button("Analyze Upload", key="analyze_upload")
431
+
432
+ if upload_button:
433
+ with st.spinner('Analyzing audio with advanced precision...'):
434
+ temp_audio_path = process_uploaded_audio(audio_file)
435
+ if temp_audio_path:
436
+ main_text, alternatives = transcribe_audio(temp_audio_path, show_alternative=True)
437
+
438
+ if main_text:
439
+ if alternatives:
440
+ with st.expander("Alternative transcriptions detected", expanded=False):
441
+ for i, alt in enumerate(alternatives[:3], 1):
442
+ st.write(f"{i}. {alt}")
443
+
444
+ display_analysis_results(main_text)
445
+ else:
446
+ st.error("Could not transcribe the audio. Please try again with clearer audio.")
447
+
448
+ if os.path.exists(temp_audio_path):
449
+ os.remove(temp_audio_path)
450
+
451
+ with tab2:
452
+ st.header("Record Your Voice")
453
+ st.write("Use the recorder below to analyze your speech in real-time.")
454
+
455
+ st.subheader("Browser-Based Recorder")
456
+ st.write("Click the button below to start/stop recording.")
457
+
458
+ audio_data = custom_audio_recorder()
459
+
460
+ if audio_data:
461
+ analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
462
+
463
+ if analyze_rec_button:
464
+ with st.spinner("Processing your recording..."):
465
+ temp_audio_path = process_base64_audio(audio_data)
466
+
467
+ if temp_audio_path:
468
+ transcribed_text = transcribe_audio(temp_audio_path)
469
+
470
+ if transcribed_text:
471
+ display_analysis_results(transcribed_text)
472
+ else:
473
+ st.error("Could not transcribe the audio. Please try speaking more clearly.")
474
+
475
+ if os.path.exists(temp_audio_path):
476
+ os.remove(temp_audio_path)
477
+
478
+ st.subheader("Manual Text Input")
479
+ st.write("If recording doesn't work, you can type your text here:")
480
+
481
+ manual_text = st.text_area("Enter text to analyze:", placeholder="Type what you want to analyze...")
482
+ analyze_text_button = st.button("Analyze Text", key="analyze_manual")
483
+
484
+ if analyze_text_button and manual_text:
485
+ display_analysis_results(manual_text)
486
+
487
+ show_model_info()
488
 
489
  if __name__ == "__main__":
490
+ main()