MaroofTechSorcerer commited on
Commit
06ac263
Β·
verified Β·
1 Parent(s): d8a1b1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +235 -282
app.py CHANGED
@@ -13,9 +13,6 @@ import time
13
  import base64
14
  import io
15
  import streamlit.components.v1 as components
16
- from concurrent.futures import ThreadPoolExecutor
17
- from typing import Dict, Tuple, List, Any, Optional, Union
18
- import numpy as np
19
 
20
  # Suppress warnings for a clean console
21
  logging.getLogger("torch").setLevel(logging.CRITICAL)
@@ -23,14 +20,6 @@ logging.getLogger("transformers").setLevel(logging.CRITICAL)
23
  warnings.filterwarnings("ignore")
24
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
25
 
26
- # Check if NumPy is available
27
- try:
28
- test_array = np.array([1, 2, 3])
29
- torch.from_numpy(test_array)
30
- except Exception as e:
31
- st.error(f"NumPy is not available or incompatible with PyTorch: {str(e)}. Ensure 'numpy' is in requirements.txt and reinstall dependencies.")
32
- st.stop()
33
-
34
  # Check if CUDA is available, otherwise use CPU
35
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
  print(f"Using device: {device}")
@@ -40,26 +29,23 @@ st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
40
 
41
  # Interface design
42
  st.title("πŸŽ™ Voice Based Sentiment Analysis")
43
- st.write("Detect emotions, sentiment, and sarcasm from your voice with fast and accurate processing.")
44
 
45
- # Emotion Detection Function with optimizations
46
  @st.cache_resource
47
  def get_emotion_classifier():
48
  try:
49
- tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion",
50
- use_fast=True,
51
- model_max_length=512)
52
  model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
53
  model = model.to(device)
54
- model.eval()
55
-
56
  classifier = pipeline("text-classification",
57
  model=model,
58
  tokenizer=tokenizer,
59
- return_all_scores=True,
60
  device=0 if torch.cuda.is_available() else -1)
61
 
62
- # Test the model
63
  test_result = classifier("I am happy today")
64
  print(f"Emotion classifier test: {test_result}")
65
 
@@ -69,79 +55,98 @@ def get_emotion_classifier():
69
  st.error(f"Failed to load emotion model. Please check logs.")
70
  return None
71
 
72
- # Cache emotion results
73
- @st.cache_data(ttl=600)
74
- def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[str, str], str]:
75
  try:
76
  if not text or len(text.strip()) < 3:
77
- return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
78
 
79
  emotion_classifier = get_emotion_classifier()
80
  if emotion_classifier is None:
81
  st.error("Emotion classifier not available.")
82
- return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
83
 
84
- # Process text directly (skip chunking for speed)
85
  emotion_results = emotion_classifier(text)
 
 
 
 
86
 
 
 
87
  emotion_map = {
88
  "joy": "😊", "anger": "😑", "disgust": "🀒", "fear": "😨",
89
- "sadness": "😭", "surprise": "😲", "neutral": "😐"
90
  }
91
-
92
  positive_emotions = ["joy"]
93
  negative_emotions = ["anger", "disgust", "fear", "sadness"]
94
- neutral_emotions = ["surprise", "neutral"]
 
 
 
 
 
 
 
95
 
96
- # Process results
97
- emotions_dict = {emotion['label']: emotion['score'] for emotion in emotion_results[0]}
 
98
 
99
- # Filter emotions with a lower threshold
100
- filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01} # Lowered from 0.05
101
 
102
  if not filtered_emotions:
103
  filtered_emotions = emotions_dict
104
 
105
- # Check for mixed emotions
106
- sorted_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)
107
- if len(sorted_emotions) > 1 and sorted_emotions[1][1] > 0.8 * sorted_emotions[0][1]:
108
- top_emotion = "MIXED"
109
- else:
110
- top_emotion = sorted_emotions[0][0]
111
 
112
- # Determine sentiment
113
- if top_emotion == "MIXED":
114
- sentiment = "MIXED"
115
- elif top_emotion in positive_emotions:
116
  sentiment = "POSITIVE"
117
  elif top_emotion in negative_emotions:
118
  sentiment = "NEGATIVE"
119
  else:
120
- sentiment = "NEUTRAL"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  return emotions_dict, top_emotion, emotion_map, sentiment
123
  except Exception as e:
124
  st.error(f"Emotion detection failed: {str(e)}")
125
  print(f"Exception in emotion detection: {str(e)}")
126
- return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
127
 
128
  # Sarcasm Detection Function
129
  @st.cache_resource
130
  def get_sarcasm_classifier():
131
  try:
132
- tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony",
133
- use_fast=True,
134
- model_max_length=512)
135
  model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
136
  model = model.to(device)
137
- model.eval()
138
-
139
- classifier = pipeline("text-classification",
140
- model=model,
141
- tokenizer=tokenizer,
142
  device=0 if torch.cuda.is_available() else -1)
143
 
144
- # Test the model
145
  test_result = classifier("This is totally amazing")
146
  print(f"Sarcasm classifier test: {test_result}")
147
 
@@ -151,8 +156,7 @@ def get_sarcasm_classifier():
151
  st.error(f"Failed to load sarcasm model. Please check logs.")
152
  return None
153
 
154
- @st.cache_data(ttl=600)
155
- def perform_sarcasm_detection(text: str) -> Tuple[bool, float]:
156
  try:
157
  if not text or len(text.strip()) < 3:
158
  return False, 0.0
@@ -170,82 +174,84 @@ def perform_sarcasm_detection(text: str) -> Tuple[bool, float]:
170
  st.error(f"Sarcasm detection failed: {str(e)}")
171
  return False, 0.0
172
 
173
- # Validate audio quality (streamlined for speed)
174
- def validate_audio(audio_path: str) -> bool:
175
  try:
176
  sound = AudioSegment.from_file(audio_path)
177
- if len(sound) < 300: # Relaxed to 0.3s
178
- st.warning("Audio is very short. Longer audio provides better analysis.")
 
 
 
179
  return False
180
  return True
181
- except Exception as e:
182
- st.error(f"Invalid or corrupted audio file: {str(e)}")
183
  return False
184
 
185
  # Speech Recognition with Whisper
186
  @st.cache_resource
187
  def load_whisper_model():
188
  try:
189
- model = whisper.load_model("base") # Fastest model for quick transcription
190
  return model
191
  except Exception as e:
192
  print(f"Error loading Whisper model: {str(e)}")
193
  st.error(f"Failed to load Whisper model. Please check logs.")
194
  return None
195
 
196
- @st.cache_data
197
- def transcribe_audio(audio_path: str) -> str:
198
  try:
 
199
  sound = AudioSegment.from_file(audio_path)
 
 
 
200
  # Convert to WAV format (16kHz, mono) for Whisper
201
- temp_wav_path = os.path.join(tempfile.gettempdir(), f"temp_converted_{int(time.time())}.wav")
202
- sound = sound.set_frame_rate(16000).set_channels(1)
 
203
  sound.export(temp_wav_path, format="wav")
204
 
205
- # Load model
206
  model = load_whisper_model()
207
- if model is None:
208
- return ""
209
-
210
- # Transcribe with optimized settings
211
- result = model.transcribe(
212
- temp_wav_path,
213
- language="en",
214
- task="transcribe",
215
- fp16=torch.cuda.is_available(),
216
- beam_size=3 # Reduced for speed
217
- )
218
-
219
  main_text = result["text"].strip()
220
 
221
  # Clean up
222
  if os.path.exists(temp_wav_path):
223
  os.remove(temp_wav_path)
224
 
 
 
 
225
  return main_text
226
  except Exception as e:
227
  st.error(f"Transcription failed: {str(e)}")
228
- return ""
229
 
230
- # Process uploaded audio files
231
- def process_uploaded_audio(audio_file) -> Optional[str]:
232
  if not audio_file:
233
  return None
234
 
235
  try:
236
  temp_dir = tempfile.gettempdir()
237
- ext = audio_file.name.split('.')[-1].lower() if '.' in audio_file.name else ''
238
- if ext not in ['wav', 'mp3', 'ogg', 'm4a', 'flac']:
239
- st.error("Unsupported audio format. Please upload WAV, MP3, OGG, M4A, or FLAC.")
 
240
  return None
241
-
242
  temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.{ext}")
 
243
  with open(temp_file_path, "wb") as f:
244
  f.write(audio_file.getvalue())
245
 
246
  if not validate_audio(temp_file_path):
247
- st.warning("Audio may not be optimal, but we'll try to process it.")
248
-
249
  return temp_file_path
250
  except Exception as e:
251
  st.error(f"Error processing uploaded audio: {str(e)}")
@@ -254,95 +260,90 @@ def process_uploaded_audio(audio_file) -> Optional[str]:
254
  # Show model information
255
  def show_model_info():
256
  st.sidebar.header("🧠 About the Models")
 
257
  model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
258
 
259
  with model_tabs[0]:
260
  st.markdown("""
261
  *Emotion Model*: distilbert-base-uncased-emotion
262
- - Detects joy, anger, disgust, fear, sadness, surprise
263
  - Architecture: DistilBERT base
 
264
  [πŸ” Model Hub](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
265
  """)
266
 
267
  with model_tabs[1]:
268
  st.markdown("""
269
  *Sarcasm Model*: cardiffnlp/twitter-roberta-base-irony
270
- - Trained on Twitter irony dataset
271
  - Architecture: RoBERTa base
 
272
  [πŸ” Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
273
  """)
274
 
275
  with model_tabs[2]:
276
  st.markdown("""
277
- *Speech Recognition*: OpenAI Whisper (base model)
278
- - Optimized for speed
279
- - Handles varied accents
280
- *Tips*: Use good mic, reduce noise
 
 
281
  [πŸ” Model Details](https://github.com/openai/whisper)
282
  """)
283
 
284
- # Custom audio recorder
285
  def custom_audio_recorder():
286
- st.warning("Browser-based recording requires microphone access. If recording fails, try uploading an audio file.")
287
  audio_recorder_html = """
288
  <script>
289
  var audioRecorder = {
290
  audioBlobs: [],
291
  mediaRecorder: null,
292
  streamBeingCaptured: null,
293
- isRecording: false,
294
-
295
  start: function() {
296
  if (!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)) {
297
- document.getElementById('status-message').textContent = "Recording not supported";
298
- return Promise.reject(new Error('mediaDevices API not supported'));
 
 
 
 
 
 
 
 
 
 
 
299
  }
300
- return navigator.mediaDevices.getUserMedia({
301
- audio: {
302
- echoCancellation: true,
303
- noiseSuppression: true,
304
- autoGainControl: true
305
- }
306
- })
307
- .then(stream => {
308
- audioRecorder.streamBeingCaptured = stream;
309
- audioRecorder.mediaRecorder = new MediaRecorder(stream, {
310
- mimeType: 'audio/webm;codecs=opus',
311
- audioBitsPerSecond: 128000
312
- });
313
- audioRecorder.audioBlobs = [];
314
- audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
315
- audioRecorder.audioBlobs.push(event.data);
316
- });
317
- audioRecorder.mediaRecorder.start(100);
318
- audioRecorder.isRecording = true;
319
- document.getElementById('status-message').textContent = "Recording...";
320
- });
321
  },
322
-
323
  stop: function() {
324
  return new Promise(resolve => {
325
  let mimeType = audioRecorder.mediaRecorder.mimeType;
326
  audioRecorder.mediaRecorder.addEventListener("stop", () => {
327
  let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
328
  resolve(audioBlob);
329
- audioRecorder.isRecording = false;
330
- document.getElementById('status-message').textContent = "Recording stopped";
331
  });
332
  audioRecorder.mediaRecorder.stop();
333
- audioRecorder.streamBeingCaptured.getTracks().forEach(track => track.stop());
334
- audioRecorder.mediaRecorder = null;
335
- audioRecorder.streamBeingCaptured = null;
336
  });
 
 
 
 
 
 
 
 
337
  }
338
  }
339
-
340
  var isRecording = false;
341
-
 
 
342
  function toggleRecording() {
343
- var recordButton = document.getElementById('record-button');
344
- var statusMessage = document.getElementById('status-message');
345
-
346
  if (!isRecording) {
347
  audioRecorder.start()
348
  .then(() => {
@@ -351,286 +352,238 @@ def custom_audio_recorder():
351
  recordButton.classList.add('recording');
352
  })
353
  .catch(error => {
354
- statusMessage.textContent = 'Error: ' + error.message;
355
  });
356
  } else {
357
  audioRecorder.stop()
358
  .then(audioBlob => {
359
  const audioUrl = URL.createObjectURL(audioBlob);
360
- var audioElement = document.getElementById('audio-playback');
361
  audioElement.src = audioUrl;
362
- audioElement.style.display = 'block';
363
-
364
  const reader = new FileReader();
365
  reader.readAsDataURL(audioBlob);
366
  reader.onloadend = function() {
367
  const base64data = reader.result;
368
- var audioData = document.getElementById('audio-data');
369
  audioData.value = base64data;
370
  const streamlitMessage = {type: "streamlit:setComponentValue", value: base64data};
371
  window.parent.postMessage(streamlitMessage, "*");
372
  }
373
-
374
  isRecording = false;
375
  recordButton.textContent = 'Start Recording';
376
  recordButton.classList.remove('recording');
377
  });
378
  }
379
  }
380
-
381
  document.addEventListener('DOMContentLoaded', function() {
382
- var recordButton = document.getElementById('record-button');
 
 
383
  recordButton.addEventListener('click', toggleRecording);
384
  });
385
  </script>
386
-
387
  <div class="audio-recorder-container">
388
  <button id="record-button" class="record-button">Start Recording</button>
389
- <div id="status-message" class="status-message">Ready to record</div>
390
- <audio id="audio-playback" controls style="display:none; margin-top:10px; width:100%;"></audio>
391
  <input type="hidden" id="audio-data" name="audio-data">
392
  </div>
393
-
394
  <style>
395
  .audio-recorder-container {
396
  display: flex;
397
  flex-direction: column;
398
  align-items: center;
399
- padding: 15px;
400
- border-radius: 8px;
401
- background-color: #f7f7f7;
402
- box-shadow: 0 2px 5px rgba(0,0,0,0.1);
403
  }
404
-
405
  .record-button {
406
  background-color: #f63366;
407
  color: white;
408
  border: none;
409
- padding: 12px 24px;
410
- border-radius: 24px;
411
  cursor: pointer;
412
  font-size: 16px;
413
- font-weight: bold;
414
- transition: all 0.3s ease;
415
- }
416
-
417
- .record-button:hover {
418
- background-color: #e62958;
419
- transform: translateY(-2px);
420
  }
421
-
422
  .record-button.recording {
423
  background-color: #ff0000;
424
  animation: pulse 1.5s infinite;
425
  }
426
-
427
- .status-message {
428
- margin-top: 10px;
429
- font-size: 14px;
430
- color: #666;
431
- }
432
-
433
  @keyframes pulse {
434
- 0% { opacity: 1; box-shadow: 0 0 0 0 rgba(255,0,0,0.7); }
435
- 50% { opacity: 0.8; box-shadow: 0 0 0 10px rgba(255,0,0,0); }
436
- 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(255,0,0,0); }
437
  }
438
  </style>
439
  """
440
 
441
  return components.html(audio_recorder_html, height=150)
442
 
443
- # Display analysis results
444
- def display_analysis_results(transcribed_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score):
445
  st.session_state.debug_info = st.session_state.get('debug_info', [])
446
- st.session_state.debug_info.append(f"Text: {transcribed_text[:50]}...")
447
- st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}, Sarcasm: {is_sarcastic}")
448
- st.session_state.debug_info = st.session_state.debug_info[-100:]
 
 
 
 
 
 
449
 
450
  st.header("Transcribed Text")
451
- st.text_area("Text", transcribed_text, height=100, disabled=True)
452
 
453
- # Confidence estimation
454
- word_count = len(transcribed_text.split())
455
- confidence_score = min(0.98, max(0.75, 0.75 + (word_count / 100) * 0.2))
456
- st.caption(f"Estimated transcription confidence: {confidence_score:.2f}")
457
 
458
  st.header("Analysis Results")
459
  col1, col2 = st.columns([1, 2])
460
 
461
  with col1:
462
  st.subheader("Sentiment")
463
- sentiment_icon = "πŸ‘" if sentiment == "POSITIVE" else "πŸ‘Ž" if sentiment == "NEGATIVE" else "πŸ”„" if sentiment == "MIXED" else "😐"
464
- st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
 
465
 
466
  st.subheader("Sarcasm")
467
  sarcasm_icon = "😏" if is_sarcastic else "😐"
468
  sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
469
- st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
 
470
 
471
  with col2:
472
  st.subheader("Emotions")
473
  if emotions_dict:
474
- st.markdown(f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
475
-
476
  sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
477
- significant_emotions = [(e, s) for e, s in sorted_emotions if s > 0.01]
478
-
479
- if significant_emotions:
480
- emotions = [e[0] for e in significant_emotions]
481
- scores = [e[1] for e in significant_emotions]
482
- fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
483
- title="Emotion Distribution", color=emotions,
484
- color_discrete_sequence=px.colors.qualitative.Bold)
485
- fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14,
486
- margin=dict(l=20, r=20, t=40, b=20), bargap=0.3)
487
- st.plotly_chart(fig, use_container_width=True)
488
- else:
489
- st.write("No significant emotions detected.")
490
  else:
491
  st.write("No emotions detected.")
492
 
493
- # Debug expander
494
  with st.expander("Debug Information", expanded=False):
495
- st.write("Debugging information:")
496
  for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
497
  st.text(f"{i + 1}. {debug_line}")
498
  if emotions_dict:
499
  st.write("Raw emotion scores:")
500
  for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
501
- if score > 0.01:
502
  st.text(f"{emotion}: {score:.4f}")
503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  # Process base64 audio data
505
  def process_base64_audio(base64_data):
506
  try:
507
- if not base64_data or not isinstance(base64_data, str) or not base64_data.startswith('data:'):
508
- st.error("Invalid audio data received")
509
- return None
510
-
511
  base64_binary = base64_data.split(',')[1]
512
  binary_data = base64.b64decode(base64_binary)
513
- temp_file_path = os.path.join(tempfile.gettempdir(), f"recording_{int(time.time())}.wav")
 
 
514
 
515
  with open(temp_file_path, "wb") as f:
516
  f.write(binary_data)
517
 
518
  if not validate_audio(temp_file_path):
519
- st.warning("Audio quality may not be optimal, but we'll try to process it.")
520
-
521
  return temp_file_path
522
  except Exception as e:
523
  st.error(f"Error processing audio data: {str(e)}")
524
  return None
525
 
526
- # Preload models in background
527
- def preload_models():
528
- threading.Thread(target=load_whisper_model).start()
529
- threading.Thread(target=get_emotion_classifier).start()
530
- threading.Thread(target=get_sarcasm_classifier).start()
531
-
532
  # Main App Logic
533
  def main():
534
  if 'debug_info' not in st.session_state:
535
  st.session_state.debug_info = []
536
- if 'models_loaded' not in st.session_state:
537
- st.session_state.models_loaded = False
538
-
539
- if not st.session_state.models_loaded:
540
- preload_models()
541
- st.session_state.models_loaded = True
542
 
543
  tab1, tab2 = st.tabs(["πŸ“ Upload Audio", "πŸŽ™ Record Audio"])
544
 
545
  with tab1:
546
  st.header("Upload an Audio File")
547
- audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg", "m4a", "flac"])
 
548
 
549
  if audio_file:
550
  st.audio(audio_file.getvalue())
 
 
551
  upload_button = st.button("Analyze Upload", key="analyze_upload")
552
 
553
  if upload_button:
554
- progress_bar = st.progress(0, text="Preparing audio...")
555
- temp_audio_path = process_uploaded_audio(audio_file)
556
-
557
- if temp_audio_path:
558
- progress_bar.progress(25, text="Processing in parallel...")
559
-
560
- with ThreadPoolExecutor(max_workers=3) as executor:
561
- transcribe_future = executor.submit(transcribe_audio, temp_audio_path)
562
- emotion_future = executor.submit(perform_emotion_detection, transcribe_future.result())
563
- sarcasm_future = executor.submit(perform_sarcasm_detection, transcribe_future.result())
564
-
565
- transcribed_text = transcribe_future.result()
566
- emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
567
- is_sarcastic, sarcasm_score = sarcasm_future.result()
568
-
569
- progress_bar.progress(90, text="Finalizing results...")
570
- if transcribed_text:
571
- display_analysis_results(transcribed_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score)
572
- else:
573
- st.error("Could not transcribe the audio. Try clearer audio.")
574
-
575
- progress_bar.progress(100, text="Analysis complete!")
576
- if os.path.exists(temp_audio_path):
577
- os.remove(temp_audio_path)
578
- else:
579
- st.error("Could not process the audio file.")
580
 
581
  with tab2:
582
  st.header("Record Your Voice")
 
 
 
 
 
583
  audio_data = custom_audio_recorder()
584
 
585
  if audio_data:
586
  analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
587
 
588
  if analyze_rec_button:
589
- progress_bar = st.progress(0, text="Processing recording...")
590
- temp_audio_path = process_base64_audio(audio_data)
591
 
592
- if temp_audio_path:
593
- progress_bar.progress(30, text="Processing in parallel...")
594
 
595
- with ThreadPoolExecutor(max_workers=3) as executor:
596
- transcribe_future = executor.submit(transcribe_audio, temp_audio_path)
597
- emotion_future = executor.submit(perform_emotion_detection, transcribe_future.result())
598
- sarcasm_future = executor.submit(perform_sarcasm_detection, transcribe_future.result())
599
 
600
- transcribed_text = transcribe_future.result()
601
- emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
602
- is_sarcastic, sarcasm_score = sarcasm_future.result()
603
-
604
- progress_bar.progress(90, text="Finalizing results...")
605
- if transcribed_text:
606
- display_analysis_results(transcribed_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score)
607
- else:
608
- st.error("Could not transcribe the audio. Speak clearly.")
609
-
610
- progress_bar.progress(100, text="Analysis complete!")
611
- if os.path.exists(temp_audio_path):
612
- os.remove(temp_audio_path)
613
- else:
614
- st.error("Could not process the recording.")
615
 
616
  st.subheader("Manual Text Input")
617
- manual_text = st.text_area("Enter text to analyze:", placeholder="Type text to analyze...")
 
 
618
  analyze_text_button = st.button("Analyze Text", key="analyze_manual")
619
 
620
  if analyze_text_button and manual_text:
621
- with ThreadPoolExecutor(max_workers=2) as executor:
622
- emotion_future = executor.submit(perform_emotion_detection, manual_text)
623
- sarcasm_future = executor.submit(perform_sarcasm_detection, manual_text)
624
-
625
- emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
626
- is_sarcastic, sarcasm_score = sarcasm_future.result()
627
-
628
- display_analysis_results(manual_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score)
629
 
630
  show_model_info()
631
- st.sidebar.markdown("---")
632
- st.sidebar.caption("Voice Sentiment Analysis v2.1")
633
- st.sidebar.caption("Optimized for speed and accuracy")
634
 
635
  if __name__ == "__main__":
636
  main()
 
13
  import base64
14
  import io
15
  import streamlit.components.v1 as components
 
 
 
16
 
17
  # Suppress warnings for a clean console
18
  logging.getLogger("torch").setLevel(logging.CRITICAL)
 
20
  warnings.filterwarnings("ignore")
21
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
22
 
 
 
 
 
 
 
 
 
23
  # Check if CUDA is available, otherwise use CPU
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
  print(f"Using device: {device}")
 
29
 
30
  # Interface design
31
  st.title("πŸŽ™ Voice Based Sentiment Analysis")
32
+ st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
33
 
34
+ # Emotion Detection Function
35
  @st.cache_resource
36
  def get_emotion_classifier():
37
  try:
38
+ tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion", use_fast=True)
 
 
39
  model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
40
  model = model.to(device)
41
+
 
42
  classifier = pipeline("text-classification",
43
  model=model,
44
  tokenizer=tokenizer,
45
+ top_k=None,
46
  device=0 if torch.cuda.is_available() else -1)
47
 
48
+ # Add a verification test to make sure the model is working
49
  test_result = classifier("I am happy today")
50
  print(f"Emotion classifier test: {test_result}")
51
 
 
55
  st.error(f"Failed to load emotion model. Please check logs.")
56
  return None
57
 
58
+ def perform_emotion_detection(text):
 
 
59
  try:
60
  if not text or len(text.strip()) < 3:
61
+ return {}, "neutral", {}, "NEUTRAL"
62
 
63
  emotion_classifier = get_emotion_classifier()
64
  if emotion_classifier is None:
65
  st.error("Emotion classifier not available.")
66
+ return {}, "neutral", {}, "NEUTRAL"
67
 
 
68
  emotion_results = emotion_classifier(text)
69
+ print(f"Raw emotion classifier output: {emotion_results}")
70
+ if not emotion_results or not isinstance(emotion_results, list) or not emotion_results[0]:
71
+ st.error("Emotion classifier returned invalid or empty results.")
72
+ return {}, "neutral", {}, "NEUTRAL"
73
 
74
+ # Access the first inner list, which contains the emotion dictionaries
75
+ emotion_results = emotion_results[0]
76
  emotion_map = {
77
  "joy": "😊", "anger": "😑", "disgust": "🀒", "fear": "😨",
78
+ "sadness": "😭", "surprise": "😲"
79
  }
 
80
  positive_emotions = ["joy"]
81
  negative_emotions = ["anger", "disgust", "fear", "sadness"]
82
+ neutral_emotions = ["surprise"]
83
+
84
+ emotions_dict = {}
85
+ for result in emotion_results:
86
+ if isinstance(result, dict) and 'label' in result and 'score' in result:
87
+ emotions_dict[result['label']] = result['score']
88
+ else:
89
+ print(f"Invalid result format: {result}")
90
 
91
+ if not emotions_dict:
92
+ st.error("No valid emotions detected.")
93
+ return {}, "neutral", {}, "NEUTRAL"
94
 
95
+ filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}
 
96
 
97
  if not filtered_emotions:
98
  filtered_emotions = emotions_dict
99
 
100
+ top_emotion = max(filtered_emotions, key=filtered_emotions.get)
101
+ top_score = filtered_emotions[top_emotion]
 
 
 
 
102
 
103
+ if top_emotion in positive_emotions:
 
 
 
104
  sentiment = "POSITIVE"
105
  elif top_emotion in negative_emotions:
106
  sentiment = "NEGATIVE"
107
  else:
108
+ competing_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]
109
+ if len(competing_emotions) > 1:
110
+ if (competing_emotions[0][0] in neutral_emotions and
111
+ competing_emotions[1][0] not in neutral_emotions and
112
+ competing_emotions[1][1] > 0.7 * competing_emotions[0][1]):
113
+ top_emotion = competing_emotions[1][0]
114
+ if top_emotion in positive_emotions:
115
+ sentiment = "POSITIVE"
116
+ elif top_emotion in negative_emotions:
117
+ sentiment = "NEGATIVE"
118
+ else:
119
+ sentiment = "NEUTRAL"
120
+ else:
121
+ sentiment = "NEUTRAL"
122
+ else:
123
+ sentiment = "NEUTRAL"
124
+
125
+ print(f"Text: {text[:50]}...")
126
+ print(f"Top 3 emotions: {sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]}")
127
+ print(f"Selected top emotion: {top_emotion} ({filtered_emotions.get(top_emotion, 0):.3f})")
128
+ print(f"Sentiment determined: {sentiment}")
129
+ print(f"All emotions detected: {emotions_dict}")
130
+ print(f"Filtered emotions: {filtered_emotions}")
131
+ print(f"Emotion classification threshold: 0.01")
132
 
133
  return emotions_dict, top_emotion, emotion_map, sentiment
134
  except Exception as e:
135
  st.error(f"Emotion detection failed: {str(e)}")
136
  print(f"Exception in emotion detection: {str(e)}")
137
+ return {}, "neutral", {}, "NEUTRAL"
138
 
139
  # Sarcasm Detection Function
140
  @st.cache_resource
141
  def get_sarcasm_classifier():
142
  try:
143
+ tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony", use_fast=True)
 
 
144
  model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
145
  model = model.to(device)
146
+ classifier = pipeline("text-classification", model=model, tokenizer=tokenizer,
 
 
 
 
147
  device=0 if torch.cuda.is_available() else -1)
148
 
149
+ # Add a verification test to ensure the model is working
150
  test_result = classifier("This is totally amazing")
151
  print(f"Sarcasm classifier test: {test_result}")
152
 
 
156
  st.error(f"Failed to load sarcasm model. Please check logs.")
157
  return None
158
 
159
+ def perform_sarcasm_detection(text):
 
160
  try:
161
  if not text or len(text.strip()) < 3:
162
  return False, 0.0
 
174
  st.error(f"Sarcasm detection failed: {str(e)}")
175
  return False, 0.0
176
 
177
+ # Validate audio quality
178
+ def validate_audio(audio_path):
179
  try:
180
  sound = AudioSegment.from_file(audio_path)
181
+ if sound.dBFS < -55:
182
+ st.warning("Audio volume is too low. Please record or upload a louder audio.")
183
+ return False
184
+ if len(sound) < 1000: # Less than 1 second
185
+ st.warning("Audio is too short. Please record a longer audio.")
186
  return False
187
  return True
188
+ except:
189
+ st.error("Invalid or corrupted audio file.")
190
  return False
191
 
192
  # Speech Recognition with Whisper
193
  @st.cache_resource
194
  def load_whisper_model():
195
  try:
196
+ model = whisper.load_model("large-v3")
197
  return model
198
  except Exception as e:
199
  print(f"Error loading Whisper model: {str(e)}")
200
  st.error(f"Failed to load Whisper model. Please check logs.")
201
  return None
202
 
203
+ def transcribe_audio(audio_path, show_alternative=False):
 
204
  try:
205
+ st.write(f"Processing audio file: {audio_path}")
206
  sound = AudioSegment.from_file(audio_path)
207
+ st.write(
208
+ f"Audio duration: {len(sound) / 1000:.2f}s, Sample rate: {sound.frame_rate}, Channels: {sound.channels}")
209
+
210
  # Convert to WAV format (16kHz, mono) for Whisper
211
+ temp_wav_path = os.path.join(tempfile.gettempdir(), "temp_converted.wav")
212
+ sound = sound.set_frame_rate(22050)
213
+ sound = sound.set_channels(1)
214
  sound.export(temp_wav_path, format="wav")
215
 
216
+ # Load Whisper model
217
  model = load_whisper_model()
218
+
219
+ # Transcribe audio
220
+ result = model.transcribe(temp_wav_path, language="en")
 
 
 
 
 
 
 
 
 
221
  main_text = result["text"].strip()
222
 
223
  # Clean up
224
  if os.path.exists(temp_wav_path):
225
  os.remove(temp_wav_path)
226
 
227
+ # Whisper doesn't provide alternatives, so return empty list
228
+ if show_alternative:
229
+ return main_text, []
230
  return main_text
231
  except Exception as e:
232
  st.error(f"Transcription failed: {str(e)}")
233
+ return "", [] if show_alternative else ""
234
 
235
+ # Function to handle uploaded audio files
236
+ def process_uploaded_audio(audio_file):
237
  if not audio_file:
238
  return None
239
 
240
  try:
241
  temp_dir = tempfile.gettempdir()
242
+
243
+ ext = audio_file.name.split('.')[-1].lower()
244
+ if ext not in ['wav', 'mp3', 'ogg']:
245
+ st.error("Unsupported audio format. Please upload WAV, MP3, or OGG.")
246
  return None
 
247
  temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.{ext}")
248
+
249
  with open(temp_file_path, "wb") as f:
250
  f.write(audio_file.getvalue())
251
 
252
  if not validate_audio(temp_file_path):
253
+ return None
254
+
255
  return temp_file_path
256
  except Exception as e:
257
  st.error(f"Error processing uploaded audio: {str(e)}")
 
260
  # Show model information
261
  def show_model_info():
262
  st.sidebar.header("🧠 About the Models")
263
+
264
  model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
265
 
266
  with model_tabs[0]:
267
  st.markdown("""
268
  *Emotion Model*: distilbert-base-uncased-emotion
269
+ - Fine-tuned for six emotions (joy, anger, disgust, fear, sadness, surprise)
270
  - Architecture: DistilBERT base
271
+ - High accuracy for basic emotion classification
272
  [πŸ” Model Hub](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
273
  """)
274
 
275
  with model_tabs[1]:
276
  st.markdown("""
277
  *Sarcasm Model*: cardiffnlp/twitter-roberta-base-irony
278
+ - Trained on SemEval-2018 Task 3 (Twitter irony dataset)
279
  - Architecture: RoBERTa base
280
+ - F1-score: 0.705
281
  [πŸ” Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
282
  """)
283
 
284
  with model_tabs[2]:
285
  st.markdown("""
286
+ *Speech Recognition*: OpenAI Whisper (large-v3)
287
+ - State-of-the-art model for speech-to-text
288
+ - Accuracy: ~5-10% WER on clean English audio
289
+ - Robust to noise, accents, and varied conditions
290
+ - Runs locally, no internet required
291
+ *Tips*: Use good mic, reduce noise, speak clearly
292
  [πŸ” Model Details](https://github.com/openai/whisper)
293
  """)
294
 
295
+ # Custom audio recorder using HTML/JS
296
  def custom_audio_recorder():
297
+ st.warning("Browser-based recording requires microphone access and a modern browser. If recording fails, try uploading an audio file instead.")
298
  audio_recorder_html = """
299
  <script>
300
  var audioRecorder = {
301
  audioBlobs: [],
302
  mediaRecorder: null,
303
  streamBeingCaptured: null,
 
 
304
  start: function() {
305
  if (!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)) {
306
+ return Promise.reject(new Error('mediaDevices API or getUserMedia method is not supported in this browser.'));
307
+ }
308
+ else {
309
+ return navigator.mediaDevices.getUserMedia({ audio: true })
310
+ .then(stream => {
311
+ audioRecorder.streamBeingCaptured = stream;
312
+ audioRecorder.mediaRecorder = new MediaRecorder(stream);
313
+ audioRecorder.audioBlobs = [];
314
+ audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
315
+ audioRecorder.audioBlobs.push(event.data);
316
+ });
317
+ audioRecorder.mediaRecorder.start();
318
+ });
319
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  },
 
321
  stop: function() {
322
  return new Promise(resolve => {
323
  let mimeType = audioRecorder.mediaRecorder.mimeType;
324
  audioRecorder.mediaRecorder.addEventListener("stop", () => {
325
  let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
326
  resolve(audioBlob);
 
 
327
  });
328
  audioRecorder.mediaRecorder.stop();
329
+ audioRecorder.stopStream();
330
+ audioRecorder.resetRecordingProperties();
 
331
  });
332
+ },
333
+ stopStream: function() {
334
+ audioRecorder.streamBeingCaptured.getTracks()
335
+ .forEach(track => track.stop());
336
+ },
337
+ resetRecordingProperties: function() {
338
+ audioRecorder.mediaRecorder = null;
339
+ audioRecorder.streamBeingCaptured = null;
340
  }
341
  }
 
342
  var isRecording = false;
343
+ var recordButton = document.getElementById('record-button');
344
+ var audioElement = document.getElementById('audio-playback');
345
+ var audioData = document.getElementById('audio-data');
346
  function toggleRecording() {
 
 
 
347
  if (!isRecording) {
348
  audioRecorder.start()
349
  .then(() => {
 
352
  recordButton.classList.add('recording');
353
  })
354
  .catch(error => {
355
+ alert('Error starting recording: ' + error.message);
356
  });
357
  } else {
358
  audioRecorder.stop()
359
  .then(audioBlob => {
360
  const audioUrl = URL.createObjectURL(audioBlob);
 
361
  audioElement.src = audioUrl;
 
 
362
  const reader = new FileReader();
363
  reader.readAsDataURL(audioBlob);
364
  reader.onloadend = function() {
365
  const base64data = reader.result;
 
366
  audioData.value = base64data;
367
  const streamlitMessage = {type: "streamlit:setComponentValue", value: base64data};
368
  window.parent.postMessage(streamlitMessage, "*");
369
  }
 
370
  isRecording = false;
371
  recordButton.textContent = 'Start Recording';
372
  recordButton.classList.remove('recording');
373
  });
374
  }
375
  }
 
376
  document.addEventListener('DOMContentLoaded', function() {
377
+ recordButton = document.getElementById('record-button');
378
+ audioElement = document.getElementById('audio-playback');
379
+ audioData = document.getElementById('audio-data');
380
  recordButton.addEventListener('click', toggleRecording);
381
  });
382
  </script>
 
383
  <div class="audio-recorder-container">
384
  <button id="record-button" class="record-button">Start Recording</button>
385
+ <audio id="audio-playback" controls style="display:block; margin-top:10px;"></audio>
 
386
  <input type="hidden" id="audio-data" name="audio-data">
387
  </div>
 
388
  <style>
389
  .audio-recorder-container {
390
  display: flex;
391
  flex-direction: column;
392
  align-items: center;
393
+ padding: 20px;
 
 
 
394
  }
 
395
  .record-button {
396
  background-color: #f63366;
397
  color: white;
398
  border: none;
399
+ padding: 10px 20px;
400
+ border-radius: 5px;
401
  cursor: pointer;
402
  font-size: 16px;
 
 
 
 
 
 
 
403
  }
 
404
  .record-button.recording {
405
  background-color: #ff0000;
406
  animation: pulse 1.5s infinite;
407
  }
 
 
 
 
 
 
 
408
  @keyframes pulse {
409
+ 0% { opacity: 1; }
410
+ 50% { opacity: 0.7; }
411
+ 100% { opacity: 1; }
412
  }
413
  </style>
414
  """
415
 
416
  return components.html(audio_recorder_html, height=150)
417
 
418
+ # Function to display analysis results
419
+ def display_analysis_results(transcribed_text):
420
  st.session_state.debug_info = st.session_state.get('debug_info', [])
421
+ st.session_state.debug_info.append(f"Processing text: {transcribed_text[:50]}...")
422
+ st.session_state.debug_info = st.session_state.debug_info[-100:] # Keep last 100 entries
423
+
424
+ emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
425
+ is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)
426
+
427
+ # Add results to debug info
428
+ st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}")
429
+ st.session_state.debug_info.append(f"Sarcasm: {is_sarcastic}, Score: {sarcasm_score:.3f}")
430
 
431
  st.header("Transcribed Text")
432
+ st.text_area("Text", transcribed_text, height=150, disabled=True, help="The audio converted to text.")
433
 
434
+ confidence_score = min(0.95, max(0.70, len(transcribed_text.split()) / 50))
435
+ st.caption(f"Estimated transcription confidence: {confidence_score:.2f} (based on text length)")
 
 
436
 
437
  st.header("Analysis Results")
438
  col1, col2 = st.columns([1, 2])
439
 
440
  with col1:
441
  st.subheader("Sentiment")
442
+ sentiment_icon = "πŸ‘" if sentiment == "POSITIVE" else "πŸ‘Ž" if sentiment == "NEGATIVE" else "😐"
443
+ st.markdown(f"{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
444
+ st.info("Sentiment reflects the dominant emotion's tone.")
445
 
446
  st.subheader("Sarcasm")
447
  sarcasm_icon = "😏" if is_sarcastic else "😐"
448
  sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
449
+ st.markdown(f"{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
450
+ st.info("Score indicates sarcasm confidence (0 to 1).")
451
 
452
  with col2:
453
  st.subheader("Emotions")
454
  if emotions_dict:
455
+ st.markdown(
456
+ f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
457
  sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
458
+ top_emotions = sorted_emotions[:8]
459
+ emotions = [e[0] for e in top_emotions]
460
+ scores = [e[1] for e in top_emotions]
461
+ fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
462
+ title="Top Emotions Distribution", color=emotions,
463
+ color_discrete_sequence=px.colors.qualitative.Bold)
464
+ fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14)
465
+ st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
466
  else:
467
  st.write("No emotions detected.")
468
 
 
469
  with st.expander("Debug Information", expanded=False):
470
+ st.write("Debugging information for troubleshooting:")
471
  for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
472
  st.text(f"{i + 1}. {debug_line}")
473
  if emotions_dict:
474
  st.write("Raw emotion scores:")
475
  for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
476
+ if score > 0.01: # Only show non-negligible scores
477
  st.text(f"{emotion}: {score:.4f}")
478
 
479
+ with st.expander("Analysis Details", expanded=False):
480
+ st.write("""
481
+ *How this works:*
482
+ 1. *Speech Recognition*: Audio transcribed using OpenAI Whisper (large-v3)
483
+ 2. *Emotion Analysis*: DistilBERT model trained for six emotions
484
+ 3. *Sentiment Analysis*: Derived from dominant emotion
485
+ 4. *Sarcasm Detection*: RoBERTa model for irony detection
486
+ *Accuracy depends on*:
487
+ - Audio quality
488
+ - Speech clarity
489
+ - Background noise
490
+ - Speech patterns
491
+ """)
492
+
493
  # Process base64 audio data
494
  def process_base64_audio(base64_data):
495
  try:
 
 
 
 
496
  base64_binary = base64_data.split(',')[1]
497
  binary_data = base64.b64decode(base64_binary)
498
+
499
+ temp_dir = tempfile.gettempdir()
500
+ temp_file_path = os.path.join(temp_dir, f"recording_{int(time.time())}.wav")
501
 
502
  with open(temp_file_path, "wb") as f:
503
  f.write(binary_data)
504
 
505
  if not validate_audio(temp_file_path):
506
+ return None
507
+
508
  return temp_file_path
509
  except Exception as e:
510
  st.error(f"Error processing audio data: {str(e)}")
511
  return None
512
 
 
 
 
 
 
 
513
  # Main App Logic
514
  def main():
515
  if 'debug_info' not in st.session_state:
516
  st.session_state.debug_info = []
 
 
 
 
 
 
517
 
518
  tab1, tab2 = st.tabs(["πŸ“ Upload Audio", "πŸŽ™ Record Audio"])
519
 
520
  with tab1:
521
  st.header("Upload an Audio File")
522
+ audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"],
523
+ help="Upload an audio file for analysis")
524
 
525
  if audio_file:
526
  st.audio(audio_file.getvalue())
527
+ st.caption("🎧 Uploaded Audio Playback")
528
+
529
  upload_button = st.button("Analyze Upload", key="analyze_upload")
530
 
531
  if upload_button:
532
+ with st.spinner('Analyzing audio with advanced precision...'):
533
+ temp_audio_path = process_uploaded_audio(audio_file)
534
+ if temp_audio_path:
535
+ main_text, alternatives = transcribe_audio(temp_audio_path, show_alternative=True)
536
+
537
+ if main_text:
538
+ if alternatives:
539
+ with st.expander("Alternative transcriptions detected", expanded=False):
540
+ for i, alt in enumerate(alternatives[:3], 1):
541
+ st.write(f"{i}. {alt}")
542
+
543
+ display_analysis_results(main_text)
544
+ else:
545
+ st.error("Could not transcribe the audio. Please try again with clearer audio.")
546
+
547
+ if os.path.exists(temp_audio_path):
548
+ os.remove(temp_audio_path)
 
 
 
 
 
 
 
 
 
549
 
550
  with tab2:
551
  st.header("Record Your Voice")
552
+ st.write("Use the recorder below to analyze your speech in real-time.")
553
+
554
+ st.subheader("Browser-Based Recorder")
555
+ st.write("Click the button below to start/stop recording.")
556
+
557
  audio_data = custom_audio_recorder()
558
 
559
  if audio_data:
560
  analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
561
 
562
  if analyze_rec_button:
563
+ with st.spinner("Processing your recording..."):
564
+ temp_audio_path = process_base64_audio(audio_data)
565
 
566
+ if temp_audio_path:
567
+ transcribed_text = transcribe_audio(temp_audio_path)
568
 
569
+ if transcribed_text:
570
+ display_analysis_results(transcribed_text)
571
+ else:
572
+ st.error("Could not transcribe the audio. Please try speaking more clearly.")
573
 
574
+ if os.path.exists(temp_audio_path):
575
+ os.remove(temp_audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
576
 
577
  st.subheader("Manual Text Input")
578
+ st.write("If recording doesn't work, you can type your text here:")
579
+
580
+ manual_text = st.text_area("Enter text to analyze:", placeholder="Type what you want to analyze...")
581
  analyze_text_button = st.button("Analyze Text", key="analyze_manual")
582
 
583
  if analyze_text_button and manual_text:
584
+ display_analysis_results(manual_text)
 
 
 
 
 
 
 
585
 
586
  show_model_info()
 
 
 
587
 
588
  if __name__ == "__main__":
589
  main()