MaroofTechSorcerer commited on
Commit
d8a1b1b
Β·
verified Β·
1 Parent(s): a781ade

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -529
app.py CHANGED
@@ -13,10 +13,8 @@ import time
13
  import base64
14
  import io
15
  import streamlit.components.v1 as components
16
- import functools
17
- import threading
18
- from typing import Dict, Tuple, List, Any, Optional, Union
19
  from concurrent.futures import ThreadPoolExecutor
 
20
  import numpy as np
21
 
22
  # Suppress warnings for a clean console
@@ -42,7 +40,7 @@ st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
42
 
43
  # Interface design
44
  st.title("πŸŽ™ Voice Based Sentiment Analysis")
45
- st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
46
 
47
  # Emotion Detection Function with optimizations
48
  @st.cache_resource
@@ -53,16 +51,15 @@ def get_emotion_classifier():
53
  model_max_length=512)
54
  model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
55
  model = model.to(device)
56
- model.eval() # Set model to evaluation mode for better inference performance
57
 
58
- # Use batch_size for faster processing when appropriate
59
  classifier = pipeline("text-classification",
60
  model=model,
61
  tokenizer=tokenizer,
62
- top_k=None,
63
  device=0 if torch.cuda.is_available() else -1)
64
 
65
- # Verify the model is working with a test
66
  test_result = classifier("I am happy today")
67
  print(f"Emotion classifier test: {test_result}")
68
 
@@ -72,11 +69,10 @@ def get_emotion_classifier():
72
  st.error(f"Failed to load emotion model. Please check logs.")
73
  return None
74
 
75
- # Cache emotion results to prevent recomputation
76
- @st.cache_data(ttl=600) # Cache for 10 minutes
77
  def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[str, str], str]:
78
  try:
79
- # Handle empty or very short text
80
  if not text or len(text.strip()) < 3:
81
  return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
82
 
@@ -85,18 +81,8 @@ def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[st
85
  st.error("Emotion classifier not available.")
86
  return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
87
 
88
- # Chunk long text for better processing
89
- max_chunk_size = 512
90
- if len(text) > max_chunk_size:
91
- chunks = [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
92
- all_results = []
93
- for chunk in chunks:
94
- chunk_results = emotion_classifier(chunk)
95
- all_results.extend(chunk_results)
96
- # Aggregate results across chunks
97
- emotion_results = [result[0] for result in all_results]
98
- else:
99
- emotion_results = emotion_classifier(text)[0]
100
 
101
  emotion_map = {
102
  "joy": "😊", "anger": "😑", "disgust": "🀒", "fear": "😨",
@@ -108,56 +94,30 @@ def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[st
108
  neutral_emotions = ["surprise", "neutral"]
109
 
110
  # Process results
111
- emotions_dict = {}
112
- for result in emotion_results:
113
- if isinstance(result, dict) and 'label' in result and 'score' in result:
114
- # If we have multiple chunks, average the scores
115
- if result['label'] in emotions_dict:
116
- emotions_dict[result['label']] = (emotions_dict[result['label']] + result['score']) / 2
117
- else:
118
- emotions_dict[result['label']] = result['score']
119
- else:
120
- print(f"Invalid result format: {result}")
121
-
122
- if not emotions_dict:
123
- st.error("No valid emotions detected.")
124
- return {}, "neutral", emotion_map, "NEUTRAL"
125
 
126
- # Filter out very low probability emotions (improved threshold)
127
- filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.05}
128
 
129
  if not filtered_emotions:
130
  filtered_emotions = emotions_dict
131
 
132
- # Get top emotion
133
- top_emotion = max(filtered_emotions, key=filtered_emotions.get)
134
- top_score = filtered_emotions[top_emotion]
 
 
 
135
 
136
- # Determine sentiment with improved logic
137
- if top_emotion in positive_emotions:
 
 
138
  sentiment = "POSITIVE"
139
  elif top_emotion in negative_emotions:
140
  sentiment = "NEGATIVE"
141
  else:
142
- # Better handling of mixed emotions
143
- competing_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]
144
-
145
- if len(competing_emotions) > 1:
146
- # If top two emotions are close in score
147
- if (competing_emotions[1][1] > 0.8 * competing_emotions[0][1]):
148
- # Check if second emotion changes sentiment classification
149
- second_emotion = competing_emotions[1][0]
150
- if second_emotion in positive_emotions:
151
- sentiment = "POSITIVE" if top_emotion not in negative_emotions else "MIXED"
152
- elif second_emotion in negative_emotions:
153
- sentiment = "NEGATIVE" if top_emotion not in positive_emotions else "MIXED"
154
- else:
155
- sentiment = "NEUTRAL"
156
- else:
157
- # Stick with top emotion for sentiment
158
- sentiment = "NEUTRAL"
159
- else:
160
- sentiment = "NEUTRAL"
161
 
162
  return emotions_dict, top_emotion, emotion_map, sentiment
163
  except Exception as e:
@@ -165,7 +125,7 @@ def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[st
165
  print(f"Exception in emotion detection: {str(e)}")
166
  return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
167
 
168
- # Sarcasm Detection Function with optimizations
169
  @st.cache_resource
170
  def get_sarcasm_classifier():
171
  try:
@@ -174,7 +134,7 @@ def get_sarcasm_classifier():
174
  model_max_length=512)
175
  model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
176
  model = model.to(device)
177
- model.eval() # Set to evaluation mode
178
 
179
  classifier = pipeline("text-classification",
180
  model=model,
@@ -191,8 +151,7 @@ def get_sarcasm_classifier():
191
  st.error(f"Failed to load sarcasm model. Please check logs.")
192
  return None
193
 
194
- # Cache sarcasm results
195
- @st.cache_data(ttl=600) # Cache for 10 minutes
196
  def perform_sarcasm_detection(text: str) -> Tuple[bool, float]:
197
  try:
198
  if not text or len(text.strip()) < 3:
@@ -203,46 +162,19 @@ def perform_sarcasm_detection(text: str) -> Tuple[bool, float]:
203
  st.error("Sarcasm classifier not available.")
204
  return False, 0.0
205
 
206
- # Handle long text by chunking
207
- max_chunk_size = 512
208
- if len(text) > max_chunk_size:
209
- chunks = [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
210
- # Process chunks and average results
211
- sarcasm_scores = []
212
- for chunk in chunks:
213
- result = sarcasm_classifier(chunk)[0]
214
- is_chunk_sarcastic = result['label'] == "LABEL_1"
215
- sarcasm_score = result['score'] if is_chunk_sarcastic else 1 - result['score']
216
- sarcasm_scores.append((is_chunk_sarcastic, sarcasm_score))
217
-
218
- # Average sarcasm scores
219
- total_sarcasm_score = sum(score for _, score in sarcasm_scores)
220
- avg_sarcasm_score = total_sarcasm_score / len(sarcasm_scores)
221
- # Count sarcastic chunks
222
- sarcastic_chunks = sum(1 for is_sarcastic, _ in sarcasm_scores if is_sarcastic)
223
-
224
- # If majority of chunks are sarcastic, classify as sarcastic
225
- is_sarcastic = sarcastic_chunks > len(chunks) / 2
226
- return is_sarcastic, avg_sarcasm_score
227
- else:
228
- # Process normally for short text
229
- result = sarcasm_classifier(text)[0]
230
- is_sarcastic = result['label'] == "LABEL_1"
231
- sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
232
- return is_sarcastic, sarcasm_score
233
  except Exception as e:
234
  st.error(f"Sarcasm detection failed: {str(e)}")
235
  return False, 0.0
236
 
237
- # Validate audio quality - optimized
238
  def validate_audio(audio_path: str) -> bool:
239
  try:
240
  sound = AudioSegment.from_file(audio_path)
241
- # Improved audio validation
242
- if sound.dBFS < -50: # Slightly relaxed threshold
243
- st.warning("Audio volume is low. Please record or upload a louder audio for better results.")
244
- return len(sound) > 500 # Still process if at least 0.5 seconds
245
- if len(sound) < 500: # Less than 0.5 second
246
  st.warning("Audio is very short. Longer audio provides better analysis.")
247
  return False
248
  return True
@@ -250,12 +182,11 @@ def validate_audio(audio_path: str) -> bool:
250
  st.error(f"Invalid or corrupted audio file: {str(e)}")
251
  return False
252
 
253
- # Speech Recognition with Whisper - optimized for speed
254
  @st.cache_resource
255
  def load_whisper_model():
256
  try:
257
- # Use medium model for better speed/accuracy balance
258
- model = whisper.load_model("medium")
259
  return model
260
  except Exception as e:
261
  print(f"Error loading Whisper model: {str(e)}")
@@ -263,31 +194,26 @@ def load_whisper_model():
263
  return None
264
 
265
  @st.cache_data
266
- def transcribe_audio(audio_path: str, show_alternative: bool = False) -> Union[str, Tuple[str, List[str]]]:
267
  try:
268
- st.write(f"Processing audio file...")
269
  sound = AudioSegment.from_file(audio_path)
270
- st.write(f"Audio duration: {len(sound) / 1000:.2f}s")
271
-
272
  # Convert to WAV format (16kHz, mono) for Whisper
273
  temp_wav_path = os.path.join(tempfile.gettempdir(), f"temp_converted_{int(time.time())}.wav")
274
- # Optimize audio for speech recognition
275
- sound = sound.set_frame_rate(16000) # 16kHz is optimal for Whisper
276
- sound = sound.set_channels(1)
277
  sound.export(temp_wav_path, format="wav")
278
 
279
  # Load model
280
  model = load_whisper_model()
281
  if model is None:
282
- return "", [] if show_alternative else ""
283
 
284
  # Transcribe with optimized settings
285
  result = model.transcribe(
286
  temp_wav_path,
287
  language="en",
288
  task="transcribe",
289
- fp16=torch.cuda.is_available(), # Use fp16 if GPU available
290
- beam_size=5 # Slightly larger beam size for better accuracy
291
  )
292
 
293
  main_text = result["text"].strip()
@@ -296,48 +222,29 @@ def transcribe_audio(audio_path: str, show_alternative: bool = False) -> Union[s
296
  if os.path.exists(temp_wav_path):
297
  os.remove(temp_wav_path)
298
 
299
- # Return results
300
- if show_alternative and "segments" in result:
301
- # Create alternative texts by combining segments differently
302
- segments = result["segments"]
303
- if len(segments) > 1:
304
- alternatives = []
305
- # Create up to 3 alternatives by varying confidence thresholds
306
- for conf in [0.5, 0.7, 0.9]:
307
- alt_text = " ".join(seg["text"] for seg in segments if seg["no_speech_prob"] < conf)
308
- if alt_text and alt_text != main_text:
309
- alternatives.append(alt_text)
310
- return main_text, alternatives[:3] # Limit to 3 alternatives
311
-
312
- return (main_text, []) if show_alternative else main_text
313
  except Exception as e:
314
  st.error(f"Transcription failed: {str(e)}")
315
- return "", [] if show_alternative else ""
316
 
317
- # Process uploaded audio files - optimized
318
  def process_uploaded_audio(audio_file) -> Optional[str]:
319
  if not audio_file:
320
  return None
321
 
322
  try:
323
  temp_dir = tempfile.gettempdir()
324
-
325
- # Extract extension more safely
326
- filename = audio_file.name
327
- ext = filename.split('.')[-1].lower() if '.' in filename else ''
328
-
329
  if ext not in ['wav', 'mp3', 'ogg', 'm4a', 'flac']:
330
  st.error("Unsupported audio format. Please upload WAV, MP3, OGG, M4A, or FLAC.")
331
  return None
332
 
333
  temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.{ext}")
334
-
335
  with open(temp_file_path, "wb") as f:
336
  f.write(audio_file.getvalue())
337
 
338
  if not validate_audio(temp_file_path):
339
- # We'll still try to process even if validation fails
340
- st.warning("Audio may not be optimal quality, but we'll try to process it anyway.")
341
 
342
  return temp_file_path
343
  except Exception as e:
@@ -347,40 +254,36 @@ def process_uploaded_audio(audio_file) -> Optional[str]:
347
  # Show model information
348
  def show_model_info():
349
  st.sidebar.header("🧠 About the Models")
350
-
351
  model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
352
 
353
  with model_tabs[0]:
354
  st.markdown("""
355
  *Emotion Model*: distilbert-base-uncased-emotion
356
- - Fine-tuned for six emotions (joy, anger, disgust, fear, sadness, surprise)
357
  - Architecture: DistilBERT base
358
- - High accuracy for basic emotion classification
359
  [πŸ” Model Hub](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
360
  """)
361
 
362
  with model_tabs[1]:
363
  st.markdown("""
364
  *Sarcasm Model*: cardiffnlp/twitter-roberta-base-irony
365
- - Trained on SemEval-2018 Task 3 (Twitter irony dataset)
366
  - Architecture: RoBERTa base
367
- - F1-score: 0.705
368
  [πŸ” Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
369
  """)
370
 
371
  with model_tabs[2]:
372
  st.markdown("""
373
- *Speech Recognition*: OpenAI Whisper (medium model)
374
- - Optimized for speed and accuracy
375
- - Performs well even with background noise and varied accents
376
- - Runs locally, no internet required
377
- *Tips*: Use good mic, reduce noise, speak clearly
378
  [πŸ” Model Details](https://github.com/openai/whisper)
379
  """)
380
 
381
- # Custom audio recorder using HTML/JS - optimized for better user experience
382
  def custom_audio_recorder():
383
- st.warning("Browser-based recording requires microphone access and a modern browser. If recording fails, try uploading an audio file instead.")
384
  audio_recorder_html = """
385
  <script>
386
  var audioRecorder = {
@@ -388,119 +291,49 @@ def custom_audio_recorder():
388
  mediaRecorder: null,
389
  streamBeingCaptured: null,
390
  isRecording: false,
391
- recordingTimer: null,
392
- recordingDuration: 0,
393
 
394
  start: function() {
395
  if (!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)) {
396
- document.getElementById('status-message').textContent = "Recording not supported in this browser";
397
- return Promise.reject(new Error('mediaDevices API or getUserMedia method is not supported in this browser.'));
398
  }
399
- else {
400
- return navigator.mediaDevices.getUserMedia({
401
- audio: {
402
- echoCancellation: true,
403
- noiseSuppression: true,
404
- autoGainControl: true
405
- }
406
- })
407
- .then(stream => {
408
- audioRecorder.streamBeingCaptured = stream;
409
-
410
- // Create audio context for visualization
411
- const audioContext = new (window.AudioContext || window.webkitAudioContext)();
412
- const source = audioContext.createMediaStreamSource(stream);
413
- const analyser = audioContext.createAnalyser();
414
- analyser.fftSize = 256;
415
- source.connect(analyser);
416
-
417
- // Start monitoring audio levels
418
- const bufferLength = analyser.frequencyBinCount;
419
- const dataArray = new Uint8Array(bufferLength);
420
-
421
- function updateMeter() {
422
- if (!audioRecorder.isRecording) return;
423
-
424
- analyser.getByteFrequencyData(dataArray);
425
- let sum = 0;
426
- for(let i = 0; i < bufferLength; i++) {
427
- sum += dataArray[i];
428
- }
429
- const average = sum / bufferLength;
430
-
431
- // Update volume meter
432
- const meter = document.getElementById('volume-meter');
433
- if (meter) {
434
- const height = Math.min(100, average * 2);
435
- meter.style.height = height + '%';
436
- }
437
-
438
- requestAnimationFrame(updateMeter);
439
- }
440
-
441
- // Setup media recorder with better settings
442
- audioRecorder.mediaRecorder = new MediaRecorder(stream, {
443
- mimeType: 'audio/webm;codecs=opus',
444
- audioBitsPerSecond: 128000
445
- });
446
-
447
- audioRecorder.audioBlobs = [];
448
- audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
449
- audioRecorder.audioBlobs.push(event.data);
450
- });
451
-
452
- // Start the recording and visualization
453
- audioRecorder.mediaRecorder.start(100);
454
- audioRecorder.isRecording = true;
455
-
456
- // Start timer
457
- audioRecorder.recordingDuration = 0;
458
- audioRecorder.recordingTimer = setInterval(() => {
459
- audioRecorder.recordingDuration += 1;
460
- const timerDisplay = document.getElementById('recording-timer');
461
- if (timerDisplay) {
462
- const minutes = Math.floor(audioRecorder.recordingDuration / 60);
463
- const seconds = audioRecorder.recordingDuration % 60;
464
- timerDisplay.textContent = `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`;
465
- }
466
- }, 1000);
467
-
468
- updateMeter();
469
- document.getElementById('status-message').textContent = "Recording...";
470
  });
471
- }
 
 
 
 
 
 
 
472
  },
473
 
474
  stop: function() {
475
  return new Promise(resolve => {
476
  let mimeType = audioRecorder.mediaRecorder.mimeType;
477
-
478
  audioRecorder.mediaRecorder.addEventListener("stop", () => {
479
  let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
480
  resolve(audioBlob);
481
  audioRecorder.isRecording = false;
482
  document.getElementById('status-message').textContent = "Recording stopped";
483
-
484
- // Stop the timer
485
- if (audioRecorder.recordingTimer) {
486
- clearInterval(audioRecorder.recordingTimer);
487
- }
488
  });
489
-
490
  audioRecorder.mediaRecorder.stop();
491
- audioRecorder.stopStream();
492
- audioRecorder.resetRecordingProperties();
 
493
  });
494
- },
495
-
496
- stopStream: function() {
497
- audioRecorder.streamBeingCaptured.getTracks()
498
- .forEach(track => track.stop());
499
- },
500
-
501
- resetRecordingProperties: function() {
502
- audioRecorder.mediaRecorder = null;
503
- audioRecorder.streamBeingCaptured = null;
504
  }
505
  }
506
 
@@ -509,8 +342,6 @@ def custom_audio_recorder():
509
  function toggleRecording() {
510
  var recordButton = document.getElementById('record-button');
511
  var statusMessage = document.getElementById('status-message');
512
- var volumeMeter = document.getElementById('volume-meter');
513
- var recordingTimer = document.getElementById('recording-timer');
514
 
515
  if (!isRecording) {
516
  audioRecorder.start()
@@ -518,8 +349,6 @@ def custom_audio_recorder():
518
  isRecording = true;
519
  recordButton.textContent = 'Stop Recording';
520
  recordButton.classList.add('recording');
521
- volumeMeter.style.display = 'block';
522
- recordingTimer.style.display = 'block';
523
  })
524
  .catch(error => {
525
  statusMessage.textContent = 'Error: ' + error.message;
@@ -545,8 +374,6 @@ def custom_audio_recorder():
545
  isRecording = false;
546
  recordButton.textContent = 'Start Recording';
547
  recordButton.classList.remove('recording');
548
- volumeMeter.style.display = 'none';
549
- volumeMeter.style.height = '0%';
550
  });
551
  }
552
  }
@@ -560,14 +387,6 @@ def custom_audio_recorder():
560
  <div class="audio-recorder-container">
561
  <button id="record-button" class="record-button">Start Recording</button>
562
  <div id="status-message" class="status-message">Ready to record</div>
563
-
564
- <div class="recording-info">
565
- <div class="volume-meter-container">
566
- <div id="volume-meter" class="volume-meter"></div>
567
- </div>
568
- <div id="recording-timer" class="recording-timer">00:00</div>
569
- </div>
570
-
571
  <audio id="audio-playback" controls style="display:none; margin-top:10px; width:100%;"></audio>
572
  <input type="hidden" id="audio-data" name="audio-data">
573
  </div>
@@ -593,7 +412,6 @@ def custom_audio_recorder():
593
  font-size: 16px;
594
  font-weight: bold;
595
  transition: all 0.3s ease;
596
- box-shadow: 0 2px 5px rgba(0,0,0,0.2);
597
  }
598
 
599
  .record-button:hover {
@@ -612,41 +430,6 @@ def custom_audio_recorder():
612
  color: #666;
613
  }
614
 
615
- .recording-info {
616
- display: flex;
617
- align-items: center;
618
- margin-top: 15px;
619
- width: 100%;
620
- justify-content: center;
621
- }
622
-
623
- .volume-meter-container {
624
- width: 20px;
625
- height: 60px;
626
- background-color: #ddd;
627
- border-radius: 3px;
628
- overflow: hidden;
629
- position: relative;
630
- }
631
-
632
- .volume-meter {
633
- width: 100%;
634
- height: 0%;
635
- background-color: #f63366;
636
- position: absolute;
637
- bottom: 0;
638
- transition: height 0.1s ease;
639
- display: none;
640
- }
641
-
642
- .recording-timer {
643
- margin-left: 15px;
644
- font-family: monospace;
645
- font-size: 18px;
646
- color: #f63366;
647
- display: none;
648
- }
649
-
650
  @keyframes pulse {
651
  0% { opacity: 1; box-shadow: 0 0 0 0 rgba(255,0,0,0.7); }
652
  50% { opacity: 0.8; box-shadow: 0 0 0 10px rgba(255,0,0,0); }
@@ -655,35 +438,21 @@ def custom_audio_recorder():
655
  </style>
656
  """
657
 
658
- return components.html(audio_recorder_html, height=220)
659
 
660
- # Function to display analysis results - optimized
661
- def display_analysis_results(transcribed_text):
662
  st.session_state.debug_info = st.session_state.get('debug_info', [])
663
- st.session_state.debug_info.append(f"Processing text: {transcribed_text[:50]}...")
664
- st.session_state.debug_info = st.session_state.debug_info[-100:] # Keep last 100 entries
665
-
666
- # Run emotion and sarcasm detection in parallel
667
- with ThreadPoolExecutor(max_workers=2) as executor:
668
- emotion_future = executor.submit(perform_emotion_detection, transcribed_text)
669
- sarcasm_future = executor.submit(perform_sarcasm_detection, transcribed_text)
670
-
671
- emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
672
- is_sarcastic, sarcasm_score = sarcasm_future.result()
673
-
674
- # Add results to debug info
675
- st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}")
676
- st.session_state.debug_info.append(f"Sarcasm: {is_sarcastic}, Score: {sarcasm_score:.3f}")
677
 
678
  st.header("Transcribed Text")
679
- st.text_area("Text", transcribed_text, height=120, disabled=True,
680
- help="The audio converted to text. The text was processed for emotion and sentiment analysis.")
681
 
682
- # Improved confidence estimation
683
- words = transcribed_text.split()
684
- word_count = len(words)
685
  confidence_score = min(0.98, max(0.75, 0.75 + (word_count / 100) * 0.2))
686
-
687
  st.caption(f"Estimated transcription confidence: {confidence_score:.2f}")
688
 
689
  st.header("Analysis Results")
@@ -693,171 +462,59 @@ def display_analysis_results(transcribed_text):
693
  st.subheader("Sentiment")
694
  sentiment_icon = "πŸ‘" if sentiment == "POSITIVE" else "πŸ‘Ž" if sentiment == "NEGATIVE" else "πŸ”„" if sentiment == "MIXED" else "😐"
695
  st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
696
- st.info("Sentiment reflects the dominant emotion's tone and context.")
697
 
698
  st.subheader("Sarcasm")
699
  sarcasm_icon = "😏" if is_sarcastic else "😐"
700
  sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
701
  st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
702
-
703
- # More informative sarcasm info
704
- if is_sarcastic:
705
- if sarcasm_score > 0.8:
706
- st.info("High confidence in sarcasm detection.")
707
- else:
708
- st.info("Moderate confidence in sarcasm detection.")
709
- else:
710
- st.info("No clear indicators of sarcasm found.")
711
 
712
  with col2:
713
  st.subheader("Emotions")
714
  if emotions_dict:
715
- st.markdown(
716
- f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
717
 
718
- # Enhanced visualization
719
  sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
720
- significant_emotions = [(e, s) for e, s in sorted_emotions if s > 0.05] # Only show significant emotions
721
 
722
  if significant_emotions:
723
  emotions = [e[0] for e in significant_emotions]
724
  scores = [e[1] for e in significant_emotions]
725
-
726
- # Use a color scale that helps distinguish emotions better
727
  fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
728
  title="Emotion Distribution", color=emotions,
729
  color_discrete_sequence=px.colors.qualitative.Bold)
730
-
731
- fig.update_layout(
732
- yaxis_range=[0, 1],
733
- showlegend=False,
734
- title_font_size=14,
735
- margin=dict(l=20, r=20, t=40, b=20),
736
- xaxis_title="Emotion",
737
- yaxis_title="Confidence Score",
738
- bargap=0.3
739
- )
740
-
741
- # Add horizontal reference line for minimal significance
742
- fig.add_shape(
743
- type="line",
744
- x0=-0.5,
745
- x1=len(emotions) - 0.5,
746
- y0=0.1,
747
- y1=0.1,
748
- line=dict(color="gray", width=1, dash="dot")
749
- )
750
-
751
  st.plotly_chart(fig, use_container_width=True)
752
  else:
753
  st.write("No significant emotions detected.")
754
  else:
755
  st.write("No emotions detected.")
756
 
757
- # Expert analysis section
758
- with st.expander("Expert Analysis", expanded=False):
759
- col1, col2 = st.columns(2)
760
-
761
- with col1:
762
- st.subheader("Emotion Insights")
763
- # Provide more insightful analysis based on emotion combinations
764
- if emotions_dict:
765
- top_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)[:3]
766
-
767
- if len(top_emotions) >= 2:
768
- emotion1, score1 = top_emotions[0]
769
- emotion2, score2 = top_emotions[1]
770
-
771
- if score2 > 0.7 * score1: # If second emotion is close to first
772
- st.markdown(f"**Mixed emotional state detected:** {emotion_map.get(emotion1, '')} {emotion1} + {emotion_map.get(emotion2, '')} {emotion2}")
773
-
774
- # Analyze specific combinations
775
- if (emotion1 == "joy" and emotion2 == "surprise") or (emotion1 == "surprise" and emotion2 == "joy"):
776
- st.write("πŸ’‘ This indicates excitement or delight")
777
- elif (emotion1 == "sadness" and emotion2 == "anger") or (emotion1 == "anger" and emotion2 == "sadness"):
778
- st.write("πŸ’‘ This suggests frustration or disappointment")
779
- elif (emotion1 == "fear" and emotion2 == "surprise") or (emotion1 == "surprise" and emotion2 == "fear"):
780
- st.write("πŸ’‘ This indicates shock or alarm")
781
- else:
782
- st.markdown(f"**Clear emotional state:** {emotion_map.get(emotion1, '')} {emotion1}")
783
- else:
784
- st.write("Single dominant emotion detected.")
785
- else:
786
- st.write("No significant emotional patterns detected.")
787
-
788
- with col2:
789
- st.subheader("Context Analysis")
790
- # Analyze the context based on combination of sentiment and sarcasm
791
- if is_sarcastic and sentiment == "POSITIVE":
792
- st.markdown("⚠️ **Potential Negative Connotation:** The positive sentiment might be misleading due to detected sarcasm.")
793
- elif is_sarcastic and sentiment == "NEGATIVE":
794
- st.markdown("⚠️ **Complex Expression:** Negative sentiment combined with sarcasm may indicate frustrated humor or ironic criticism.")
795
- elif sentiment == "MIXED":
796
- st.markdown("πŸ”„ **Ambivalent Message:** The content expresses mixed or conflicting emotions.")
797
- elif sentiment == "POSITIVE" and sarcasm_score > 0.3:
798
- st.markdown("⚠️ **Moderate Sarcasm Indicators:** The positive sentiment might be qualified by subtle sarcasm.")
799
- elif sentiment == "NEGATIVE" and not is_sarcastic:
800
- st.markdown("πŸ‘Ž **Clear Negative Expression:** The content expresses genuine negative sentiment without sarcasm.")
801
- elif sentiment == "POSITIVE" and not is_sarcastic:
802
- st.markdown("πŸ‘ **Clear Positive Expression:** The content expresses genuine positive sentiment without sarcasm.")
803
-
804
  # Debug expander
805
  with st.expander("Debug Information", expanded=False):
806
- st.write("Debugging information for troubleshooting:")
807
  for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
808
  st.text(f"{i + 1}. {debug_line}")
809
  if emotions_dict:
810
  st.write("Raw emotion scores:")
811
  for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
812
- if score > 0.01: # Only show non-negligible scores
813
  st.text(f"{emotion}: {score:.4f}")
814
 
815
- # Analysis details expander
816
- with st.expander("Analysis Details", expanded=False):
817
- st.write("""
818
- *How this works:*
819
- 1. *Speech Recognition*: Audio transcribed using OpenAI Whisper
820
- 2. *Emotion Analysis*: DistilBERT model trained for six emotions
821
- 3. *Sentiment Analysis*: Derived from dominant emotion
822
- 4. *Sarcasm Detection*: RoBERTa model for irony detection
823
- *Accuracy depends on*:
824
- - Audio quality
825
- - Speech clarity
826
- - Background noise
827
- - Speech patterns
828
- """)
829
-
830
- # Process base64 audio data - optimized
831
  def process_base64_audio(base64_data):
832
  try:
833
- # Ensure we have proper base64 data
834
  if not base64_data or not isinstance(base64_data, str) or not base64_data.startswith('data:'):
835
  st.error("Invalid audio data received")
836
  return None
837
 
838
- # Extract the base64 binary part
839
- try:
840
- base64_binary = base64_data.split(',')[1]
841
- except IndexError:
842
- st.error("Invalid base64 data format")
843
- return None
844
-
845
- # Decode the binary data
846
- try:
847
- binary_data = base64.b64decode(base64_binary)
848
- except Exception as e:
849
- st.error(f"Failed to decode base64 data: {str(e)}")
850
- return None
851
 
852
- # Create a temporary file
853
- temp_dir = tempfile.gettempdir()
854
- temp_file_path = os.path.join(temp_dir, f"recording_{int(time.time())}.wav")
855
-
856
- # Write the binary data to the file
857
  with open(temp_file_path, "wb") as f:
858
  f.write(binary_data)
859
 
860
- # Validate the audio file
861
  if not validate_audio(temp_file_path):
862
  st.warning("Audio quality may not be optimal, but we'll try to process it.")
863
 
@@ -866,154 +523,113 @@ def process_base64_audio(base64_data):
866
  st.error(f"Error processing audio data: {str(e)}")
867
  return None
868
 
869
- # Preload models in background to improve performance
870
  def preload_models():
871
  threading.Thread(target=load_whisper_model).start()
872
  threading.Thread(target=get_emotion_classifier).start()
873
  threading.Thread(target=get_sarcasm_classifier).start()
874
 
875
- # Main App Logic - optimized
876
  def main():
877
- # Initialize session state
878
  if 'debug_info' not in st.session_state:
879
  st.session_state.debug_info = []
880
  if 'models_loaded' not in st.session_state:
881
  st.session_state.models_loaded = False
882
 
883
- # Preload models in background
884
  if not st.session_state.models_loaded:
885
  preload_models()
886
  st.session_state.models_loaded = True
887
-
888
- # Create tabs
889
  tab1, tab2 = st.tabs(["πŸ“ Upload Audio", "πŸŽ™ Record Audio"])
890
 
891
  with tab1:
892
  st.header("Upload an Audio File")
893
- audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg", "m4a", "flac"],
894
- help="Upload an audio file for sentiment analysis (WAV, MP3, OGG, M4A, FLAC)")
895
 
896
  if audio_file:
897
  st.audio(audio_file.getvalue())
898
- st.caption("🎧 Uploaded Audio Playback")
899
-
900
- # Add a placeholder for progress updates
901
- progress_placeholder = st.empty()
902
-
903
- # Add analyze button
904
  upload_button = st.button("Analyze Upload", key="analyze_upload")
905
 
906
  if upload_button:
907
- # Show progress bar
908
- progress_bar = progress_placeholder.progress(0, text="Preparing audio...")
909
-
910
- # Process audio
911
  temp_audio_path = process_uploaded_audio(audio_file)
912
 
913
  if temp_audio_path:
914
- # Update progress
915
- progress_bar.progress(25, text="Transcribing audio...")
916
-
917
- # Transcribe audio
918
- main_text, alternatives = transcribe_audio(temp_audio_path, show_alternative=True)
919
-
920
- if main_text:
921
- # Update progress
922
- progress_bar.progress(60, text="Analyzing sentiment and emotions...")
923
-
924
- # Display alternatives if available
925
- if alternatives:
926
- with st.expander("Alternative transcriptions detected", expanded=False):
927
- for i, alt in enumerate(alternatives[:3], 1):
928
- st.write(f"{i}. {alt}")
929
-
930
- # Final analysis
931
- progress_bar.progress(90, text="Finalizing results...")
932
- display_analysis_results(main_text)
933
-
934
- # Complete progress
935
- progress_bar.progress(100, text="Analysis complete!")
936
- progress_placeholder.empty()
937
- else:
938
- progress_placeholder.empty()
939
- st.error("Could not transcribe the audio. Please try again with clearer audio.")
940
 
941
- # Clean up temp file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
942
  if os.path.exists(temp_audio_path):
943
  os.remove(temp_audio_path)
944
  else:
945
- progress_placeholder.empty()
946
- st.error("Could not process the audio file. Please try a different file.")
947
 
948
  with tab2:
949
  st.header("Record Your Voice")
950
- st.write("Use the recorder below to analyze your speech in real-time.")
951
-
952
- # Browser recorder
953
- st.subheader("Browser-Based Recorder")
954
- st.write("Click the button below to start/stop recording.")
955
-
956
  audio_data = custom_audio_recorder()
957
 
958
  if audio_data:
959
- # Add a placeholder for progress updates
960
- progress_placeholder = st.empty()
961
-
962
- # Add analyze button
963
  analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
964
 
965
  if analyze_rec_button:
966
- # Show progress bar
967
- progress_bar = progress_placeholder.progress(0, text="Processing recording...")
968
-
969
- # Process the recording
970
  temp_audio_path = process_base64_audio(audio_data)
971
 
972
  if temp_audio_path:
973
- # Update progress
974
- progress_bar.progress(30, text="Transcribing speech...")
975
-
976
- # Transcribe the audio
977
- transcribed_text = transcribe_audio(temp_audio_path)
978
 
 
 
 
 
 
 
 
 
 
 
979
  if transcribed_text:
980
- # Update progress
981
- progress_bar.progress(70, text="Analyzing sentiment and emotions...")
982
-
983
- # Display the results
984
- display_analysis_results(transcribed_text)
985
-
986
- # Complete progress
987
- progress_bar.progress(100, text="Analysis complete!")
988
- progress_placeholder.empty()
989
  else:
990
- progress_placeholder.empty()
991
- st.error("Could not transcribe the audio. Please try speaking more clearly.")
992
 
993
- # Clean up temp file
994
  if os.path.exists(temp_audio_path):
995
  os.remove(temp_audio_path)
996
  else:
997
- progress_placeholder.empty()
998
- st.error("Could not process the recording. Please try again.")
999
 
1000
- # Text input option
1001
  st.subheader("Manual Text Input")
1002
- st.write("If recording doesn't work, you can type your text here:")
1003
-
1004
- manual_text = st.text_area("Enter text to analyze:", placeholder="Type what you want to analyze...")
1005
  analyze_text_button = st.button("Analyze Text", key="analyze_manual")
1006
 
1007
  if analyze_text_button and manual_text:
1008
- with st.spinner("Analyzing text..."):
1009
- display_analysis_results(manual_text)
 
 
 
 
 
 
1010
 
1011
- # Show model information
1012
  show_model_info()
1013
-
1014
- # Add a small footer with version info
1015
  st.sidebar.markdown("---")
1016
- st.sidebar.caption("Voice Sentiment Analysis v2.0")
1017
  st.sidebar.caption("Optimized for speed and accuracy")
1018
 
1019
  if __name__ == "__main__":
 
13
  import base64
14
  import io
15
  import streamlit.components.v1 as components
 
 
 
16
  from concurrent.futures import ThreadPoolExecutor
17
+ from typing import Dict, Tuple, List, Any, Optional, Union
18
  import numpy as np
19
 
20
  # Suppress warnings for a clean console
 
40
 
41
  # Interface design
42
  st.title("πŸŽ™ Voice Based Sentiment Analysis")
43
+ st.write("Detect emotions, sentiment, and sarcasm from your voice with fast and accurate processing.")
44
 
45
  # Emotion Detection Function with optimizations
46
  @st.cache_resource
 
51
  model_max_length=512)
52
  model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
53
  model = model.to(device)
54
+ model.eval()
55
 
 
56
  classifier = pipeline("text-classification",
57
  model=model,
58
  tokenizer=tokenizer,
59
+ return_all_scores=True,
60
  device=0 if torch.cuda.is_available() else -1)
61
 
62
+ # Test the model
63
  test_result = classifier("I am happy today")
64
  print(f"Emotion classifier test: {test_result}")
65
 
 
69
  st.error(f"Failed to load emotion model. Please check logs.")
70
  return None
71
 
72
+ # Cache emotion results
73
+ @st.cache_data(ttl=600)
74
  def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[str, str], str]:
75
  try:
 
76
  if not text or len(text.strip()) < 3:
77
  return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
78
 
 
81
  st.error("Emotion classifier not available.")
82
  return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
83
 
84
+ # Process text directly (skip chunking for speed)
85
+ emotion_results = emotion_classifier(text)
 
 
 
 
 
 
 
 
 
 
86
 
87
  emotion_map = {
88
  "joy": "😊", "anger": "😑", "disgust": "🀒", "fear": "😨",
 
94
  neutral_emotions = ["surprise", "neutral"]
95
 
96
  # Process results
97
+ emotions_dict = {emotion['label']: emotion['score'] for emotion in emotion_results[0]}
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ # Filter emotions with a lower threshold
100
+ filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01} # Lowered from 0.05
101
 
102
  if not filtered_emotions:
103
  filtered_emotions = emotions_dict
104
 
105
+ # Check for mixed emotions
106
+ sorted_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)
107
+ if len(sorted_emotions) > 1 and sorted_emotions[1][1] > 0.8 * sorted_emotions[0][1]:
108
+ top_emotion = "MIXED"
109
+ else:
110
+ top_emotion = sorted_emotions[0][0]
111
 
112
+ # Determine sentiment
113
+ if top_emotion == "MIXED":
114
+ sentiment = "MIXED"
115
+ elif top_emotion in positive_emotions:
116
  sentiment = "POSITIVE"
117
  elif top_emotion in negative_emotions:
118
  sentiment = "NEGATIVE"
119
  else:
120
+ sentiment = "NEUTRAL"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  return emotions_dict, top_emotion, emotion_map, sentiment
123
  except Exception as e:
 
125
  print(f"Exception in emotion detection: {str(e)}")
126
  return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
127
 
128
+ # Sarcasm Detection Function
129
  @st.cache_resource
130
  def get_sarcasm_classifier():
131
  try:
 
134
  model_max_length=512)
135
  model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
136
  model = model.to(device)
137
+ model.eval()
138
 
139
  classifier = pipeline("text-classification",
140
  model=model,
 
151
  st.error(f"Failed to load sarcasm model. Please check logs.")
152
  return None
153
 
154
+ @st.cache_data(ttl=600)
 
155
  def perform_sarcasm_detection(text: str) -> Tuple[bool, float]:
156
  try:
157
  if not text or len(text.strip()) < 3:
 
162
  st.error("Sarcasm classifier not available.")
163
  return False, 0.0
164
 
165
+ result = sarcasm_classifier(text)[0]
166
+ is_sarcastic = result['label'] == "LABEL_1"
167
+ sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
168
+ return is_sarcastic, sarcasm_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  except Exception as e:
170
  st.error(f"Sarcasm detection failed: {str(e)}")
171
  return False, 0.0
172
 
173
+ # Validate audio quality (streamlined for speed)
174
  def validate_audio(audio_path: str) -> bool:
175
  try:
176
  sound = AudioSegment.from_file(audio_path)
177
+ if len(sound) < 300: # Relaxed to 0.3s
 
 
 
 
178
  st.warning("Audio is very short. Longer audio provides better analysis.")
179
  return False
180
  return True
 
182
  st.error(f"Invalid or corrupted audio file: {str(e)}")
183
  return False
184
 
185
+ # Speech Recognition with Whisper
186
  @st.cache_resource
187
  def load_whisper_model():
188
  try:
189
+ model = whisper.load_model("base") # Fastest model for quick transcription
 
190
  return model
191
  except Exception as e:
192
  print(f"Error loading Whisper model: {str(e)}")
 
194
  return None
195
 
196
  @st.cache_data
197
+ def transcribe_audio(audio_path: str) -> str:
198
  try:
 
199
  sound = AudioSegment.from_file(audio_path)
 
 
200
  # Convert to WAV format (16kHz, mono) for Whisper
201
  temp_wav_path = os.path.join(tempfile.gettempdir(), f"temp_converted_{int(time.time())}.wav")
202
+ sound = sound.set_frame_rate(16000).set_channels(1)
 
 
203
  sound.export(temp_wav_path, format="wav")
204
 
205
  # Load model
206
  model = load_whisper_model()
207
  if model is None:
208
+ return ""
209
 
210
  # Transcribe with optimized settings
211
  result = model.transcribe(
212
  temp_wav_path,
213
  language="en",
214
  task="transcribe",
215
+ fp16=torch.cuda.is_available(),
216
+ beam_size=3 # Reduced for speed
217
  )
218
 
219
  main_text = result["text"].strip()
 
222
  if os.path.exists(temp_wav_path):
223
  os.remove(temp_wav_path)
224
 
225
+ return main_text
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  except Exception as e:
227
  st.error(f"Transcription failed: {str(e)}")
228
+ return ""
229
 
230
+ # Process uploaded audio files
231
  def process_uploaded_audio(audio_file) -> Optional[str]:
232
  if not audio_file:
233
  return None
234
 
235
  try:
236
  temp_dir = tempfile.gettempdir()
237
+ ext = audio_file.name.split('.')[-1].lower() if '.' in audio_file.name else ''
 
 
 
 
238
  if ext not in ['wav', 'mp3', 'ogg', 'm4a', 'flac']:
239
  st.error("Unsupported audio format. Please upload WAV, MP3, OGG, M4A, or FLAC.")
240
  return None
241
 
242
  temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.{ext}")
 
243
  with open(temp_file_path, "wb") as f:
244
  f.write(audio_file.getvalue())
245
 
246
  if not validate_audio(temp_file_path):
247
+ st.warning("Audio may not be optimal, but we'll try to process it.")
 
248
 
249
  return temp_file_path
250
  except Exception as e:
 
254
  # Show model information
255
  def show_model_info():
256
  st.sidebar.header("🧠 About the Models")
 
257
  model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
258
 
259
  with model_tabs[0]:
260
  st.markdown("""
261
  *Emotion Model*: distilbert-base-uncased-emotion
262
+ - Detects joy, anger, disgust, fear, sadness, surprise
263
  - Architecture: DistilBERT base
 
264
  [πŸ” Model Hub](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
265
  """)
266
 
267
  with model_tabs[1]:
268
  st.markdown("""
269
  *Sarcasm Model*: cardiffnlp/twitter-roberta-base-irony
270
+ - Trained on Twitter irony dataset
271
  - Architecture: RoBERTa base
 
272
  [πŸ” Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
273
  """)
274
 
275
  with model_tabs[2]:
276
  st.markdown("""
277
+ *Speech Recognition*: OpenAI Whisper (base model)
278
+ - Optimized for speed
279
+ - Handles varied accents
280
+ *Tips*: Use good mic, reduce noise
 
281
  [πŸ” Model Details](https://github.com/openai/whisper)
282
  """)
283
 
284
+ # Custom audio recorder
285
  def custom_audio_recorder():
286
+ st.warning("Browser-based recording requires microphone access. If recording fails, try uploading an audio file.")
287
  audio_recorder_html = """
288
  <script>
289
  var audioRecorder = {
 
291
  mediaRecorder: null,
292
  streamBeingCaptured: null,
293
  isRecording: false,
 
 
294
 
295
  start: function() {
296
  if (!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)) {
297
+ document.getElementById('status-message').textContent = "Recording not supported";
298
+ return Promise.reject(new Error('mediaDevices API not supported'));
299
  }
300
+ return navigator.mediaDevices.getUserMedia({
301
+ audio: {
302
+ echoCancellation: true,
303
+ noiseSuppression: true,
304
+ autoGainControl: true
305
+ }
306
+ })
307
+ .then(stream => {
308
+ audioRecorder.streamBeingCaptured = stream;
309
+ audioRecorder.mediaRecorder = new MediaRecorder(stream, {
310
+ mimeType: 'audio/webm;codecs=opus',
311
+ audioBitsPerSecond: 128000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  });
313
+ audioRecorder.audioBlobs = [];
314
+ audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
315
+ audioRecorder.audioBlobs.push(event.data);
316
+ });
317
+ audioRecorder.mediaRecorder.start(100);
318
+ audioRecorder.isRecording = true;
319
+ document.getElementById('status-message').textContent = "Recording...";
320
+ });
321
  },
322
 
323
  stop: function() {
324
  return new Promise(resolve => {
325
  let mimeType = audioRecorder.mediaRecorder.mimeType;
 
326
  audioRecorder.mediaRecorder.addEventListener("stop", () => {
327
  let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
328
  resolve(audioBlob);
329
  audioRecorder.isRecording = false;
330
  document.getElementById('status-message').textContent = "Recording stopped";
 
 
 
 
 
331
  });
 
332
  audioRecorder.mediaRecorder.stop();
333
+ audioRecorder.streamBeingCaptured.getTracks().forEach(track => track.stop());
334
+ audioRecorder.mediaRecorder = null;
335
+ audioRecorder.streamBeingCaptured = null;
336
  });
 
 
 
 
 
 
 
 
 
 
337
  }
338
  }
339
 
 
342
  function toggleRecording() {
343
  var recordButton = document.getElementById('record-button');
344
  var statusMessage = document.getElementById('status-message');
 
 
345
 
346
  if (!isRecording) {
347
  audioRecorder.start()
 
349
  isRecording = true;
350
  recordButton.textContent = 'Stop Recording';
351
  recordButton.classList.add('recording');
 
 
352
  })
353
  .catch(error => {
354
  statusMessage.textContent = 'Error: ' + error.message;
 
374
  isRecording = false;
375
  recordButton.textContent = 'Start Recording';
376
  recordButton.classList.remove('recording');
 
 
377
  });
378
  }
379
  }
 
387
  <div class="audio-recorder-container">
388
  <button id="record-button" class="record-button">Start Recording</button>
389
  <div id="status-message" class="status-message">Ready to record</div>
 
 
 
 
 
 
 
 
390
  <audio id="audio-playback" controls style="display:none; margin-top:10px; width:100%;"></audio>
391
  <input type="hidden" id="audio-data" name="audio-data">
392
  </div>
 
412
  font-size: 16px;
413
  font-weight: bold;
414
  transition: all 0.3s ease;
 
415
  }
416
 
417
  .record-button:hover {
 
430
  color: #666;
431
  }
432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  @keyframes pulse {
434
  0% { opacity: 1; box-shadow: 0 0 0 0 rgba(255,0,0,0.7); }
435
  50% { opacity: 0.8; box-shadow: 0 0 0 10px rgba(255,0,0,0); }
 
438
  </style>
439
  """
440
 
441
+ return components.html(audio_recorder_html, height=150)
442
 
443
+ # Display analysis results
444
+ def display_analysis_results(transcribed_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score):
445
  st.session_state.debug_info = st.session_state.get('debug_info', [])
446
+ st.session_state.debug_info.append(f"Text: {transcribed_text[:50]}...")
447
+ st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}, Sarcasm: {is_sarcastic}")
448
+ st.session_state.debug_info = st.session_state.debug_info[-100:]
 
 
 
 
 
 
 
 
 
 
 
449
 
450
  st.header("Transcribed Text")
451
+ st.text_area("Text", transcribed_text, height=100, disabled=True)
 
452
 
453
+ # Confidence estimation
454
+ word_count = len(transcribed_text.split())
 
455
  confidence_score = min(0.98, max(0.75, 0.75 + (word_count / 100) * 0.2))
 
456
  st.caption(f"Estimated transcription confidence: {confidence_score:.2f}")
457
 
458
  st.header("Analysis Results")
 
462
  st.subheader("Sentiment")
463
  sentiment_icon = "πŸ‘" if sentiment == "POSITIVE" else "πŸ‘Ž" if sentiment == "NEGATIVE" else "πŸ”„" if sentiment == "MIXED" else "😐"
464
  st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
 
465
 
466
  st.subheader("Sarcasm")
467
  sarcasm_icon = "😏" if is_sarcastic else "😐"
468
  sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
469
  st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
 
 
 
 
 
 
 
 
 
470
 
471
  with col2:
472
  st.subheader("Emotions")
473
  if emotions_dict:
474
+ st.markdown(f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
 
475
 
 
476
  sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
477
+ significant_emotions = [(e, s) for e, s in sorted_emotions if s > 0.01]
478
 
479
  if significant_emotions:
480
  emotions = [e[0] for e in significant_emotions]
481
  scores = [e[1] for e in significant_emotions]
 
 
482
  fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
483
  title="Emotion Distribution", color=emotions,
484
  color_discrete_sequence=px.colors.qualitative.Bold)
485
+ fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14,
486
+ margin=dict(l=20, r=20, t=40, b=20), bargap=0.3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  st.plotly_chart(fig, use_container_width=True)
488
  else:
489
  st.write("No significant emotions detected.")
490
  else:
491
  st.write("No emotions detected.")
492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  # Debug expander
494
  with st.expander("Debug Information", expanded=False):
495
+ st.write("Debugging information:")
496
  for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
497
  st.text(f"{i + 1}. {debug_line}")
498
  if emotions_dict:
499
  st.write("Raw emotion scores:")
500
  for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
501
+ if score > 0.01:
502
  st.text(f"{emotion}: {score:.4f}")
503
 
504
+ # Process base64 audio data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  def process_base64_audio(base64_data):
506
  try:
 
507
  if not base64_data or not isinstance(base64_data, str) or not base64_data.startswith('data:'):
508
  st.error("Invalid audio data received")
509
  return None
510
 
511
+ base64_binary = base64_data.split(',')[1]
512
+ binary_data = base64.b64decode(base64_binary)
513
+ temp_file_path = os.path.join(tempfile.gettempdir(), f"recording_{int(time.time())}.wav")
 
 
 
 
 
 
 
 
 
 
514
 
 
 
 
 
 
515
  with open(temp_file_path, "wb") as f:
516
  f.write(binary_data)
517
 
 
518
  if not validate_audio(temp_file_path):
519
  st.warning("Audio quality may not be optimal, but we'll try to process it.")
520
 
 
523
  st.error(f"Error processing audio data: {str(e)}")
524
  return None
525
 
526
+ # Preload models in background
527
  def preload_models():
528
  threading.Thread(target=load_whisper_model).start()
529
  threading.Thread(target=get_emotion_classifier).start()
530
  threading.Thread(target=get_sarcasm_classifier).start()
531
 
532
+ # Main App Logic
533
  def main():
 
534
  if 'debug_info' not in st.session_state:
535
  st.session_state.debug_info = []
536
  if 'models_loaded' not in st.session_state:
537
  st.session_state.models_loaded = False
538
 
 
539
  if not st.session_state.models_loaded:
540
  preload_models()
541
  st.session_state.models_loaded = True
542
+
 
543
  tab1, tab2 = st.tabs(["πŸ“ Upload Audio", "πŸŽ™ Record Audio"])
544
 
545
  with tab1:
546
  st.header("Upload an Audio File")
547
+ audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg", "m4a", "flac"])
 
548
 
549
  if audio_file:
550
  st.audio(audio_file.getvalue())
 
 
 
 
 
 
551
  upload_button = st.button("Analyze Upload", key="analyze_upload")
552
 
553
  if upload_button:
554
+ progress_bar = st.progress(0, text="Preparing audio...")
 
 
 
555
  temp_audio_path = process_uploaded_audio(audio_file)
556
 
557
  if temp_audio_path:
558
+ progress_bar.progress(25, text="Processing in parallel...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
+ with ThreadPoolExecutor(max_workers=3) as executor:
561
+ transcribe_future = executor.submit(transcribe_audio, temp_audio_path)
562
+ emotion_future = executor.submit(perform_emotion_detection, transcribe_future.result())
563
+ sarcasm_future = executor.submit(perform_sarcasm_detection, transcribe_future.result())
564
+
565
+ transcribed_text = transcribe_future.result()
566
+ emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
567
+ is_sarcastic, sarcasm_score = sarcasm_future.result()
568
+
569
+ progress_bar.progress(90, text="Finalizing results...")
570
+ if transcribed_text:
571
+ display_analysis_results(transcribed_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score)
572
+ else:
573
+ st.error("Could not transcribe the audio. Try clearer audio.")
574
+
575
+ progress_bar.progress(100, text="Analysis complete!")
576
  if os.path.exists(temp_audio_path):
577
  os.remove(temp_audio_path)
578
  else:
579
+ st.error("Could not process the audio file.")
 
580
 
581
  with tab2:
582
  st.header("Record Your Voice")
 
 
 
 
 
 
583
  audio_data = custom_audio_recorder()
584
 
585
  if audio_data:
 
 
 
 
586
  analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
587
 
588
  if analyze_rec_button:
589
+ progress_bar = st.progress(0, text="Processing recording...")
 
 
 
590
  temp_audio_path = process_base64_audio(audio_data)
591
 
592
  if temp_audio_path:
593
+ progress_bar.progress(30, text="Processing in parallel...")
 
 
 
 
594
 
595
+ with ThreadPoolExecutor(max_workers=3) as executor:
596
+ transcribe_future = executor.submit(transcribe_audio, temp_audio_path)
597
+ emotion_future = executor.submit(perform_emotion_detection, transcribe_future.result())
598
+ sarcasm_future = executor.submit(perform_sarcasm_detection, transcribe_future.result())
599
+
600
+ transcribed_text = transcribe_future.result()
601
+ emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
602
+ is_sarcastic, sarcasm_score = sarcasm_future.result()
603
+
604
+ progress_bar.progress(90, text="Finalizing results...")
605
  if transcribed_text:
606
+ display_analysis_results(transcribed_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score)
 
 
 
 
 
 
 
 
607
  else:
608
+ st.error("Could not transcribe the audio. Speak clearly.")
 
609
 
610
+ progress_bar.progress(100, text="Analysis complete!")
611
  if os.path.exists(temp_audio_path):
612
  os.remove(temp_audio_path)
613
  else:
614
+ st.error("Could not process the recording.")
 
615
 
 
616
  st.subheader("Manual Text Input")
617
+ manual_text = st.text_area("Enter text to analyze:", placeholder="Type text to analyze...")
 
 
618
  analyze_text_button = st.button("Analyze Text", key="analyze_manual")
619
 
620
  if analyze_text_button and manual_text:
621
+ with ThreadPoolExecutor(max_workers=2) as executor:
622
+ emotion_future = executor.submit(perform_emotion_detection, manual_text)
623
+ sarcasm_future = executor.submit(perform_sarcasm_detection, manual_text)
624
+
625
+ emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
626
+ is_sarcastic, sarcasm_score = sarcasm_future.result()
627
+
628
+ display_analysis_results(manual_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score)
629
 
 
630
  show_model_info()
 
 
631
  st.sidebar.markdown("---")
632
+ st.sidebar.caption("Voice Sentiment Analysis v2.1")
633
  st.sidebar.caption("Optimized for speed and accuracy")
634
 
635
  if __name__ == "__main__":