MaroofTechSorcerer commited on
Commit
9e1cb2f
Β·
verified Β·
1 Parent(s): 1cec378

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +829 -149
app.py CHANGED
@@ -13,7 +13,9 @@ import time
13
  import base64
14
  import io
15
  import streamlit.components.v1 as components
16
- import numpy as np
 
 
17
 
18
  # Suppress warnings for a clean console
19
  logging.getLogger("torch").setLevel(logging.CRITICAL)
@@ -21,14 +23,6 @@ logging.getLogger("transformers").setLevel(logging.CRITICAL)
21
  warnings.filterwarnings("ignore")
22
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
23
 
24
- # Check if NumPy is available
25
- try:
26
- test_array = np.array([1, 2, 3])
27
- torch.from_numpy(test_array)
28
- except Exception as e:
29
- st.error(f"NumPy is not available or incompatible with PyTorch: {str(e)}. Ensure 'numpy' is in requirements.txt and reinstall dependencies.")
30
- st.stop()
31
-
32
  # Check if CUDA is available, otherwise use CPU
33
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34
  print(f"Using device: {device}")
@@ -38,293 +32,979 @@ st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
38
 
39
  # Interface design
40
  st.title("πŸŽ™ Voice Based Sentiment Analysis")
41
- st.write("Detect emotions, sentiment, and sarcasm from your voice with optimized speed and accuracy using OpenAI Whisper.")
42
 
43
- # Emotion Detection Function
44
  @st.cache_resource
45
  def get_emotion_classifier():
46
  try:
47
- tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion", use_fast=True)
48
- model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion").to(device)
49
- if torch.cuda.is_available():
50
- model = model.half() # Use fp16 on GPU
 
 
 
 
51
  classifier = pipeline("text-classification",
52
  model=model,
53
  tokenizer=tokenizer,
54
  top_k=None,
55
  device=0 if torch.cuda.is_available() else -1)
 
 
 
 
 
56
  return classifier
57
  except Exception as e:
58
- st.error(f"Failed to load emotion model: {str(e)}")
 
59
  return None
60
 
61
- def perform_emotion_detection(text):
 
 
62
  try:
 
63
  if not text or len(text.strip()) < 3:
64
- return {}, "neutral", {}, "NEUTRAL"
 
65
  emotion_classifier = get_emotion_classifier()
66
- if not emotion_classifier:
67
- return {}, "neutral", {}, "NEUTRAL"
68
- emotion_results = emotion_classifier(text)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  emotion_map = {
70
  "joy": "😊", "anger": "😑", "disgust": "🀒", "fear": "😨",
71
- "sadness": "😭", "surprise": "😲"
72
  }
 
73
  positive_emotions = ["joy"]
74
  negative_emotions = ["anger", "disgust", "fear", "sadness"]
75
- neutral_emotions = ["surprise"]
76
- emotions_dict = {result['label']: result['score'] for result in emotion_results}
77
- filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  if not filtered_emotions:
79
  filtered_emotions = emotions_dict
 
 
80
  top_emotion = max(filtered_emotions, key=filtered_emotions.get)
 
 
 
81
  if top_emotion in positive_emotions:
82
  sentiment = "POSITIVE"
83
  elif top_emotion in negative_emotions:
84
  sentiment = "NEGATIVE"
85
  else:
86
- sentiment = "NEUTRAL"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  return emotions_dict, top_emotion, emotion_map, sentiment
88
  except Exception as e:
89
  st.error(f"Emotion detection failed: {str(e)}")
90
- return {}, "neutral", {}, "NEUTRAL"
 
91
 
92
- # Sarcasm Detection Function
93
  @st.cache_resource
94
  def get_sarcasm_classifier():
95
  try:
96
- tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony", use_fast=True)
97
- model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony").to(device)
98
- if torch.cuda.is_available():
99
- model = model.half() # Use fp16 on GPU
100
- classifier = pipeline("text-classification", model=model, tokenizer=tokenizer,
 
 
 
 
 
101
  device=0 if torch.cuda.is_available() else -1)
 
 
 
 
 
102
  return classifier
103
  except Exception as e:
104
- st.error(f"Failed to load sarcasm model: {str(e)}")
 
105
  return None
106
 
107
- def perform_sarcasm_detection(text):
 
 
108
  try:
109
  if not text or len(text.strip()) < 3:
110
  return False, 0.0
 
111
  sarcasm_classifier = get_sarcasm_classifier()
112
- if not sarcasm_classifier:
 
113
  return False, 0.0
114
- result = sarcasm_classifier(text)[0]
115
- is_sarcastic = result['label'] == "LABEL_1"
116
- sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
117
- return is_sarcastic, sarcasm_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  except Exception as e:
119
  st.error(f"Sarcasm detection failed: {str(e)}")
120
  return False, 0.0
121
 
122
- # Validate audio quality
123
- def validate_audio(audio_path):
124
  try:
125
  sound = AudioSegment.from_file(audio_path)
126
- if sound.dBFS < -55:
127
- st.warning("Audio volume is too low.")
128
- return False
129
- if len(sound) < 1000:
130
- st.warning("Audio is too short.")
 
131
  return False
132
  return True
133
  except Exception as e:
134
- st.error(f"Invalid audio file: {str(e)}")
135
  return False
136
 
137
- # Speech Recognition with Whisper
138
  @st.cache_resource
139
  def load_whisper_model():
140
  try:
141
- model = whisper.load_model("base").to(device)
 
142
  return model
143
  except Exception as e:
144
- st.error(f"Failed to load Whisper model: {str(e)}")
 
145
  return None
146
 
147
- def transcribe_audio(audio_path):
148
- temp_wav_path = None
149
  try:
150
- sound = AudioSegment.from_file(audio_path).set_frame_rate(16000).set_channels(1)
151
- temp_wav_path = os.path.join(tempfile.gettempdir(), f"temp_{int(time.time())}.wav")
 
 
 
 
 
 
 
152
  sound.export(temp_wav_path, format="wav")
 
 
153
  model = load_whisper_model()
154
- if not model:
155
- return ""
156
- result = model.transcribe(temp_wav_path, language="en", fp16=torch.cuda.is_available())
157
- return result["text"].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  except Exception as e:
159
  st.error(f"Transcription failed: {str(e)}")
160
- return ""
161
- finally:
162
- if temp_wav_path and os.path.exists(temp_wav_path):
163
- os.remove(temp_wav_path)
164
 
165
- # Process uploaded audio files
166
- def process_uploaded_audio(audio_file):
167
  if not audio_file:
168
  return None
169
- temp_file_path = None
170
  try:
171
- ext = audio_file.name.split('.')[-1].lower()
172
- if ext not in ['wav', 'mp3', 'ogg']:
173
- st.error("Unsupported audio format. Use WAV, MP3, or OGG.")
 
 
 
 
 
174
  return None
175
- temp_file_path = os.path.join(tempfile.gettempdir(), f"uploaded_{int(time.time())}.{ext}")
 
 
176
  with open(temp_file_path, "wb") as f:
177
  f.write(audio_file.getvalue())
 
178
  if not validate_audio(temp_file_path):
179
- return None
 
 
180
  return temp_file_path
181
  except Exception as e:
182
  st.error(f"Error processing uploaded audio: {str(e)}")
183
  return None
184
- finally:
185
- if temp_file_path and os.path.exists(temp_file_path):
186
- os.remove(temp_file_path)
187
 
188
  # Show model information
189
  def show_model_info():
190
  st.sidebar.header("🧠 About the Models")
191
- with st.sidebar.expander("Model Details"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  st.markdown("""
193
- - *Emotion*: DistilBERT (bhadresh-savani/distilbert-base-uncased-emotion)
194
- - *Sarcasm*: RoBERTa (cardiffnlp/twitter-roberta-base-irony)
195
- - *Speech*: OpenAI Whisper (base)
 
 
 
196
  """)
197
 
198
- # Custom audio recorder
199
  def custom_audio_recorder():
200
- st.warning("Recording requires microphone access and a modern browser.")
201
  audio_recorder_html = """
202
  <script>
203
- let recorder, stream;
204
- async function startRecording() {
205
- try {
206
- stream = await navigator.mediaDevices.getUserMedia({ audio: true });
207
- recorder = new MediaRecorder(stream);
208
- const chunks = [];
209
- recorder.ondataavailable = e => chunks.push(e.data);
210
- recorder.onstop = () => {
211
- const blob = new Blob(chunks, { type: 'audio/wav' });
212
- const reader = new FileReader();
213
- reader.onloadend = () => {
214
- window.parent.postMessage({type: "streamlit:setComponentValue", value: reader.result}, "*");
215
- };
216
- reader.readAsDataURL(blob);
217
- stream.getTracks().forEach(track => track.stop());
218
- };
219
- recorder.start();
220
- document.getElementById('record-btn').textContent = 'Stop Recording';
221
- } catch (e) { alert('Recording failed: ' + e.message); }
222
- }
223
- function stopRecording() {
224
- recorder.stop();
225
- document.getElementById('record-btn').textContent = 'Start Recording';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  }
 
 
 
227
  function toggleRecording() {
228
- if (!recorder || recorder.state === 'inactive') startRecording();
229
- else stopRecording();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  }
 
 
 
 
 
231
  </script>
232
- <button id="record-btn" onclick="toggleRecording()">Start Recording</button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  <style>
234
- #record-btn {
 
 
 
 
 
 
 
 
 
 
235
  background-color: #f63366;
236
  color: white;
237
  border: none;
238
- padding: 10px 20px;
239
- border-radius: 5px;
240
  cursor: pointer;
 
 
 
 
241
  }
242
- #record-btn:hover {
 
 
 
 
 
 
243
  background-color: #ff0000;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  }
245
  </style>
246
  """
247
- return components.html(audio_recorder_html, height=100)
248
 
249
- # Display analysis results
 
 
250
  def display_analysis_results(transcribed_text):
251
- emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
252
- is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  st.header("Analysis Results")
254
- st.text_area("Transcribed Text", transcribed_text, height=100, disabled=True)
255
  col1, col2 = st.columns([1, 2])
 
256
  with col1:
257
  st.subheader("Sentiment")
258
- sentiment_icon = "πŸ‘" if sentiment == "POSITIVE" else "πŸ‘Ž" if sentiment == "NEGATIVE" else "😐"
259
- st.markdown(f"{sentiment_icon} {sentiment} (Based on {top_emotion})")
 
 
260
  st.subheader("Sarcasm")
261
  sarcasm_icon = "😏" if is_sarcastic else "😐"
262
- st.markdown(f"{sarcasm_icon} {'Detected' if is_sarcastic else 'Not Detected'} (Score: {sarcasm_score:.2f})")
 
 
 
 
 
 
 
 
 
 
 
263
  with col2:
264
  st.subheader("Emotions")
265
  if emotions_dict:
266
- st.markdown(f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
267
- fig = px.bar(x=list(emotions_dict.keys()), y=list(emotions_dict.values()),
268
- labels={'x': 'Emotion', 'y': 'Score'}, title="Emotion Distribution")
269
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  else:
271
  st.write("No emotions detected.")
272
 
273
- # Process base64 audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  def process_base64_audio(base64_data):
275
- temp_file_path = None
276
  try:
277
- audio_bytes = base64.b64decode(base64_data.split(',')[1])
278
- temp_file_path = os.path.join(tempfile.gettempdir(), f"rec_{int(time.time())}.wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  with open(temp_file_path, "wb") as f:
280
- f.write(audio_bytes)
 
 
281
  if not validate_audio(temp_file_path):
282
- return None
 
283
  return temp_file_path
284
  except Exception as e:
285
- st.error(f"Error processing recorded audio: {str(e)}")
286
  return None
287
- finally:
288
- if temp_file_path and os.path.exists(temp_file_path):
289
- os.remove(temp_file_path)
290
 
291
- # Main App Logic
 
 
 
 
 
 
292
  def main():
 
293
  if 'debug_info' not in st.session_state:
294
  st.session_state.debug_info = []
 
 
 
 
 
 
 
 
 
295
  tab1, tab2 = st.tabs(["πŸ“ Upload Audio", "πŸŽ™ Record Audio"])
 
296
  with tab1:
297
  st.header("Upload an Audio File")
298
- audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"])
 
 
299
  if audio_file:
300
  st.audio(audio_file.getvalue())
301
- if st.button("Analyze Upload", key="analyze_upload"):
302
- with st.spinner("Analyzing audio..."):
303
- temp_audio_path = process_uploaded_audio(audio_file)
304
- if temp_audio_path:
305
- transcribed_text = transcribe_audio(temp_audio_path)
306
- if transcribed_text:
307
- display_analysis_results(transcribed_text)
308
- else:
309
- st.error("Could not transcribe audio. Try clearer audio.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  with tab2:
311
  st.header("Record Your Voice")
 
 
 
312
  st.subheader("Browser-Based Recorder")
 
 
313
  audio_data = custom_audio_recorder()
314
- if audio_data and st.button("Analyze Recording", key="analyze_rec"):
315
- with st.spinner("Processing recording..."):
 
 
 
 
 
 
 
 
 
 
 
316
  temp_audio_path = process_base64_audio(audio_data)
 
317
  if temp_audio_path:
 
 
 
 
318
  transcribed_text = transcribe_audio(temp_audio_path)
 
319
  if transcribed_text:
 
 
 
 
320
  display_analysis_results(transcribed_text)
 
 
 
 
321
  else:
322
- st.error("Could not transcribe audio. Speak clearly.")
 
 
 
 
 
 
 
 
 
 
323
  st.subheader("Manual Text Input")
324
- manual_text = st.text_area("Enter text to analyze:", placeholder="Type your text...")
325
- if st.button("Analyze Text", key="analyze_manual") and manual_text:
326
- display_analysis_results(manual_text)
 
 
 
 
 
 
 
327
  show_model_info()
 
 
 
 
 
328
 
329
  if __name__ == "__main__":
330
  main()
 
13
  import base64
14
  import io
15
  import streamlit.components.v1 as components
16
+ import functools
17
+ import threading
18
+ from typing import Dict, Tuple, List, Any, Optional
19
 
20
  # Suppress warnings for a clean console
21
  logging.getLogger("torch").setLevel(logging.CRITICAL)
 
23
  warnings.filterwarnings("ignore")
24
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
25
 
 
 
 
 
 
 
 
 
26
  # Check if CUDA is available, otherwise use CPU
27
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
  print(f"Using device: {device}")
 
32
 
33
  # Interface design
34
  st.title("πŸŽ™ Voice Based Sentiment Analysis")
35
+ st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
36
 
37
+ # Emotion Detection Function with optimizations
38
  @st.cache_resource
39
  def get_emotion_classifier():
40
  try:
41
+ tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion",
42
+ use_fast=True,
43
+ model_max_length=512)
44
+ model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
45
+ model = model.to(device)
46
+ model.eval() # Set model to evaluation mode for better inference performance
47
+
48
+ # Use batch_size for faster processing when appropriate
49
  classifier = pipeline("text-classification",
50
  model=model,
51
  tokenizer=tokenizer,
52
  top_k=None,
53
  device=0 if torch.cuda.is_available() else -1)
54
+
55
+ # Verify the model is working with a test
56
+ test_result = classifier("I am happy today")
57
+ print(f"Emotion classifier test: {test_result}")
58
+
59
  return classifier
60
  except Exception as e:
61
+ print(f"Error loading emotion model: {str(e)}")
62
+ st.error(f"Failed to load emotion model. Please check logs.")
63
  return None
64
 
65
+ # Cache emotion results to prevent recomputation
66
+ @st.cache_data(ttl=600) # Cache for 10 minutes
67
+ def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[str, str], str]:
68
  try:
69
+ # Handle empty or very short text
70
  if not text or len(text.strip()) < 3:
71
+ return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
72
+
73
  emotion_classifier = get_emotion_classifier()
74
+ if emotion_classifier is None:
75
+ st.error("Emotion classifier not available.")
76
+ return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
77
+
78
+ # Chunk long text for better processing
79
+ max_chunk_size = 512
80
+ if len(text) > max_chunk_size:
81
+ chunks = [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
82
+ all_results = []
83
+ for chunk in chunks:
84
+ chunk_results = emotion_classifier(chunk)
85
+ all_results.extend(chunk_results)
86
+ # Aggregate results across chunks
87
+ emotion_results = [result[0] for result in all_results]
88
+ else:
89
+ emotion_results = emotion_classifier(text)[0]
90
+
91
  emotion_map = {
92
  "joy": "😊", "anger": "😑", "disgust": "🀒", "fear": "😨",
93
+ "sadness": "😭", "surprise": "😲", "neutral": "😐"
94
  }
95
+
96
  positive_emotions = ["joy"]
97
  negative_emotions = ["anger", "disgust", "fear", "sadness"]
98
+ neutral_emotions = ["surprise", "neutral"]
99
+
100
+ # Process results
101
+ emotions_dict = {}
102
+ for result in emotion_results:
103
+ if isinstance(result, dict) and 'label' in result and 'score' in result:
104
+ # If we have multiple chunks, average the scores
105
+ if result['label'] in emotions_dict:
106
+ emotions_dict[result['label']] = (emotions_dict[result['label']] + result['score']) / 2
107
+ else:
108
+ emotions_dict[result['label']] = result['score']
109
+ else:
110
+ print(f"Invalid result format: {result}")
111
+
112
+ if not emotions_dict:
113
+ st.error("No valid emotions detected.")
114
+ return {}, "neutral", emotion_map, "NEUTRAL"
115
+
116
+ # Filter out very low probability emotions (improved threshold)
117
+ filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.05}
118
+
119
  if not filtered_emotions:
120
  filtered_emotions = emotions_dict
121
+
122
+ # Get top emotion
123
  top_emotion = max(filtered_emotions, key=filtered_emotions.get)
124
+ top_score = filtered_emotions[top_emotion]
125
+
126
+ # Determine sentiment with improved logic
127
  if top_emotion in positive_emotions:
128
  sentiment = "POSITIVE"
129
  elif top_emotion in negative_emotions:
130
  sentiment = "NEGATIVE"
131
  else:
132
+ # Better handling of mixed emotions
133
+ competing_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]
134
+
135
+ if len(competing_emotions) > 1:
136
+ # If top two emotions are close in score
137
+ if (competing_emotions[1][1] > 0.8 * competing_emotions[0][1]):
138
+ # Check if second emotion changes sentiment classification
139
+ second_emotion = competing_emotions[1][0]
140
+ if second_emotion in positive_emotions:
141
+ sentiment = "POSITIVE" if top_emotion not in negative_emotions else "MIXED"
142
+ elif second_emotion in negative_emotions:
143
+ sentiment = "NEGATIVE" if top_emotion not in positive_emotions else "MIXED"
144
+ else:
145
+ sentiment = "NEUTRAL"
146
+ else:
147
+ # Stick with top emotion for sentiment
148
+ sentiment = "NEUTRAL"
149
+ else:
150
+ sentiment = "NEUTRAL"
151
+
152
  return emotions_dict, top_emotion, emotion_map, sentiment
153
  except Exception as e:
154
  st.error(f"Emotion detection failed: {str(e)}")
155
+ print(f"Exception in emotion detection: {str(e)}")
156
+ return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
157
 
158
+ # Sarcasm Detection Function with optimizations
159
  @st.cache_resource
160
  def get_sarcasm_classifier():
161
  try:
162
+ tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony",
163
+ use_fast=True,
164
+ model_max_length=512)
165
+ model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
166
+ model = model.to(device)
167
+ model.eval() # Set to evaluation mode
168
+
169
+ classifier = pipeline("text-classification",
170
+ model=model,
171
+ tokenizer=tokenizer,
172
  device=0 if torch.cuda.is_available() else -1)
173
+
174
+ # Test the model
175
+ test_result = classifier("This is totally amazing")
176
+ print(f"Sarcasm classifier test: {test_result}")
177
+
178
  return classifier
179
  except Exception as e:
180
+ print(f"Error loading sarcasm model: {str(e)}")
181
+ st.error(f"Failed to load sarcasm model. Please check logs.")
182
  return None
183
 
184
+ # Cache sarcasm results
185
+ @st.cache_data(ttl=600) # Cache for 10 minutes
186
+ def perform_sarcasm_detection(text: str) -> Tuple[bool, float]:
187
  try:
188
  if not text or len(text.strip()) < 3:
189
  return False, 0.0
190
+
191
  sarcasm_classifier = get_sarcasm_classifier()
192
+ if sarcasm_classifier is None:
193
+ st.error("Sarcasm classifier not available.")
194
  return False, 0.0
195
+
196
+ # Handle long text by chunking
197
+ max_chunk_size = 512
198
+ if len(text) > max_chunk_size:
199
+ chunks = [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
200
+ # Process chunks and average results
201
+ sarcasm_scores = []
202
+ for chunk in chunks:
203
+ result = sarcasm_classifier(chunk)[0]
204
+ is_chunk_sarcastic = result['label'] == "LABEL_1"
205
+ sarcasm_score = result['score'] if is_chunk_sarcastic else 1 - result['score']
206
+ sarcasm_scores.append((is_chunk_sarcastic, sarcasm_score))
207
+
208
+ # Average sarcasm scores
209
+ total_sarcasm_score = sum(score for _, score in sarcasm_scores)
210
+ avg_sarcasm_score = total_sarcasm_score / len(sarcasm_scores)
211
+ # Count sarcastic chunks
212
+ sarcastic_chunks = sum(1 for is_sarcastic, _ in sarcasm_scores if is_sarcastic)
213
+
214
+ # If majority of chunks are sarcastic, classify as sarcastic
215
+ is_sarcastic = sarcastic_chunks > len(chunks) / 2
216
+ return is_sarcastic, avg_sarcasm_score
217
+ else:
218
+ # Process normally for short text
219
+ result = sarcasm_classifier(text)[0]
220
+ is_sarcastic = result['label'] == "LABEL_1"
221
+ sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
222
+ return is_sarcastic, sarcasm_score
223
  except Exception as e:
224
  st.error(f"Sarcasm detection failed: {str(e)}")
225
  return False, 0.0
226
 
227
+ # Validate audio quality - optimized
228
+ def validate_audio(audio_path: str) -> bool:
229
  try:
230
  sound = AudioSegment.from_file(audio_path)
231
+ # Improved audio validation
232
+ if sound.dBFS < -50: # Slightly relaxed threshold
233
+ st.warning("Audio volume is low. Please record or upload a louder audio for better results.")
234
+ return len(sound) > 500 # Still process if at least 0.5 seconds
235
+ if len(sound) < 500: # Less than 0.5 second
236
+ st.warning("Audio is very short. Longer audio provides better analysis.")
237
  return False
238
  return True
239
  except Exception as e:
240
+ st.error(f"Invalid or corrupted audio file: {str(e)}")
241
  return False
242
 
243
+ # Speech Recognition with Whisper - optimized for speed
244
  @st.cache_resource
245
  def load_whisper_model():
246
  try:
247
+ # Use medium model for better speed/accuracy balance
248
+ model = whisper.load_model("medium")
249
  return model
250
  except Exception as e:
251
+ print(f"Error loading Whisper model: {str(e)}")
252
+ st.error(f"Failed to load Whisper model. Please check logs.")
253
  return None
254
 
255
+ @st.cache_data
256
+ def transcribe_audio(audio_path: str, show_alternative: bool = False) -> Union[str, Tuple[str, List[str]]]:
257
  try:
258
+ st.write(f"Processing audio file...")
259
+ sound = AudioSegment.from_file(audio_path)
260
+ st.write(f"Audio duration: {len(sound) / 1000:.2f}s")
261
+
262
+ # Convert to WAV format (16kHz, mono) for Whisper
263
+ temp_wav_path = os.path.join(tempfile.gettempdir(), f"temp_converted_{int(time.time())}.wav")
264
+ # Optimize audio for speech recognition
265
+ sound = sound.set_frame_rate(16000) # 16kHz is optimal for Whisper
266
+ sound = sound.set_channels(1)
267
  sound.export(temp_wav_path, format="wav")
268
+
269
+ # Load model
270
  model = load_whisper_model()
271
+ if model is None:
272
+ return "", [] if show_alternative else ""
273
+
274
+ # Transcribe with optimized settings
275
+ result = model.transcribe(
276
+ temp_wav_path,
277
+ language="en",
278
+ task="transcribe",
279
+ fp16=torch.cuda.is_available(), # Use fp16 if GPU available
280
+ beam_size=5 # Slightly larger beam size for better accuracy
281
+ )
282
+
283
+ main_text = result["text"].strip()
284
+
285
+ # Clean up
286
+ if os.path.exists(temp_wav_path):
287
+ os.remove(temp_wav_path)
288
+
289
+ # Return results
290
+ if show_alternative and "segments" in result:
291
+ # Create alternative texts by combining segments differently
292
+ segments = result["segments"]
293
+ if len(segments) > 1:
294
+ alternatives = []
295
+ # Create up to 3 alternatives by varying confidence thresholds
296
+ for conf in [0.5, 0.7, 0.9]:
297
+ alt_text = " ".join(seg["text"] for seg in segments if seg["no_speech_prob"] < conf)
298
+ if alt_text and alt_text != main_text:
299
+ alternatives.append(alt_text)
300
+ return main_text, alternatives[:3] # Limit to 3 alternatives
301
+
302
+ return (main_text, []) if show_alternative else main_text
303
  except Exception as e:
304
  st.error(f"Transcription failed: {str(e)}")
305
+ return "", [] if show_alternative else ""
 
 
 
306
 
307
+ # Process uploaded audio files - optimized
308
+ def process_uploaded_audio(audio_file) -> Optional[str]:
309
  if not audio_file:
310
  return None
311
+
312
  try:
313
+ temp_dir = tempfile.gettempdir()
314
+
315
+ # Extract extension more safely
316
+ filename = audio_file.name
317
+ ext = filename.split('.')[-1].lower() if '.' in filename else ''
318
+
319
+ if ext not in ['wav', 'mp3', 'ogg', 'm4a', 'flac']:
320
+ st.error("Unsupported audio format. Please upload WAV, MP3, OGG, M4A, or FLAC.")
321
  return None
322
+
323
+ temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.{ext}")
324
+
325
  with open(temp_file_path, "wb") as f:
326
  f.write(audio_file.getvalue())
327
+
328
  if not validate_audio(temp_file_path):
329
+ # We'll still try to process even if validation fails
330
+ st.warning("Audio may not be optimal quality, but we'll try to process it anyway.")
331
+
332
  return temp_file_path
333
  except Exception as e:
334
  st.error(f"Error processing uploaded audio: {str(e)}")
335
  return None
 
 
 
336
 
337
  # Show model information
338
  def show_model_info():
339
  st.sidebar.header("🧠 About the Models")
340
+
341
+ model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
342
+
343
+ with model_tabs[0]:
344
+ st.markdown("""
345
+ *Emotion Model*: distilbert-base-uncased-emotion
346
+ - Fine-tuned for six emotions (joy, anger, disgust, fear, sadness, surprise)
347
+ - Architecture: DistilBERT base
348
+ - High accuracy for basic emotion classification
349
+ [πŸ” Model Hub](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
350
+ """)
351
+
352
+ with model_tabs[1]:
353
+ st.markdown("""
354
+ *Sarcasm Model*: cardiffnlp/twitter-roberta-base-irony
355
+ - Trained on SemEval-2018 Task 3 (Twitter irony dataset)
356
+ - Architecture: RoBERTa base
357
+ - F1-score: 0.705
358
+ [πŸ” Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
359
+ """)
360
+
361
+ with model_tabs[2]:
362
  st.markdown("""
363
+ *Speech Recognition*: OpenAI Whisper (medium model)
364
+ - Optimized for speed and accuracy
365
+ - Performs well even with background noise and varied accents
366
+ - Runs locally, no internet required
367
+ *Tips*: Use good mic, reduce noise, speak clearly
368
+ [πŸ” Model Details](https://github.com/openai/whisper)
369
  """)
370
 
371
+ # Custom audio recorder using HTML/JS - optimized for better user experience
372
  def custom_audio_recorder():
373
+ st.warning("Browser-based recording requires microphone access and a modern browser. If recording fails, try uploading an audio file instead.")
374
  audio_recorder_html = """
375
  <script>
376
+ var audioRecorder = {
377
+ audioBlobs: [],
378
+ mediaRecorder: null,
379
+ streamBeingCaptured: null,
380
+ isRecording: false,
381
+ recordingTimer: null,
382
+ recordingDuration: 0,
383
+
384
+ start: function() {
385
+ if (!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)) {
386
+ document.getElementById('status-message').textContent = "Recording not supported in this browser";
387
+ return Promise.reject(new Error('mediaDevices API or getUserMedia method is not supported in this browser.'));
388
+ }
389
+ else {
390
+ return navigator.mediaDevices.getUserMedia({
391
+ audio: {
392
+ echoCancellation: true,
393
+ noiseSuppression: true,
394
+ autoGainControl: true
395
+ }
396
+ })
397
+ .then(stream => {
398
+ audioRecorder.streamBeingCaptured = stream;
399
+
400
+ // Create audio context for visualization
401
+ const audioContext = new (window.AudioContext || window.webkitAudioContext)();
402
+ const source = audioContext.createMediaStreamSource(stream);
403
+ const analyser = audioContext.createAnalyser();
404
+ analyser.fftSize = 256;
405
+ source.connect(analyser);
406
+
407
+ // Start monitoring audio levels
408
+ const bufferLength = analyser.frequencyBinCount;
409
+ const dataArray = new Uint8Array(bufferLength);
410
+
411
+ function updateMeter() {
412
+ if (!audioRecorder.isRecording) return;
413
+
414
+ analyser.getByteFrequencyData(dataArray);
415
+ let sum = 0;
416
+ for(let i = 0; i < bufferLength; i++) {
417
+ sum += dataArray[i];
418
+ }
419
+ const average = sum / bufferLength;
420
+
421
+ // Update volume meter
422
+ const meter = document.getElementById('volume-meter');
423
+ if (meter) {
424
+ const height = Math.min(100, average * 2);
425
+ meter.style.height = height + '%';
426
+ }
427
+
428
+ requestAnimationFrame(updateMeter);
429
+ }
430
+
431
+ // Setup media recorder with better settings
432
+ audioRecorder.mediaRecorder = new MediaRecorder(stream, {
433
+ mimeType: 'audio/webm;codecs=opus',
434
+ audioBitsPerSecond: 128000
435
+ });
436
+
437
+ audioRecorder.audioBlobs = [];
438
+ audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
439
+ audioRecorder.audioBlobs.push(event.data);
440
+ });
441
+
442
+ // Start the recording and visualization
443
+ audioRecorder.mediaRecorder.start(100);
444
+ audioRecorder.isRecording = true;
445
+
446
+ // Start timer
447
+ audioRecorder.recordingDuration = 0;
448
+ audioRecorder.recordingTimer = setInterval(() => {
449
+ audioRecorder.recordingDuration += 1;
450
+ const timerDisplay = document.getElementById('recording-timer');
451
+ if (timerDisplay) {
452
+ const minutes = Math.floor(audioRecorder.recordingDuration / 60);
453
+ const seconds = audioRecorder.recordingDuration % 60;
454
+ timerDisplay.textContent = `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`;
455
+ }
456
+ }, 1000);
457
+
458
+ updateMeter();
459
+ document.getElementById('status-message').textContent = "Recording...";
460
+ });
461
+ }
462
+ },
463
+
464
+ stop: function() {
465
+ return new Promise(resolve => {
466
+ let mimeType = audioRecorder.mediaRecorder.mimeType;
467
+
468
+ audioRecorder.mediaRecorder.addEventListener("stop", () => {
469
+ let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
470
+ resolve(audioBlob);
471
+ audioRecorder.isRecording = false;
472
+ document.getElementById('status-message').textContent = "Recording stopped";
473
+
474
+ // Stop the timer
475
+ if (audioRecorder.recordingTimer) {
476
+ clearInterval(audioRecorder.recordingTimer);
477
+ }
478
+ });
479
+
480
+ audioRecorder.mediaRecorder.stop();
481
+ audioRecorder.stopStream();
482
+ audioRecorder.resetRecordingProperties();
483
+ });
484
+ },
485
+
486
+ stopStream: function() {
487
+ audioRecorder.streamBeingCaptured.getTracks()
488
+ .forEach(track => track.stop());
489
+ },
490
+
491
+ resetRecordingProperties: function() {
492
+ audioRecorder.mediaRecorder = null;
493
+ audioRecorder.streamBeingCaptured = null;
494
+ }
495
  }
496
+
497
+ var isRecording = false;
498
+
499
  function toggleRecording() {
500
+ var recordButton = document.getElementById('record-button');
501
+ var statusMessage = document.getElementById('status-message');
502
+ var volumeMeter = document.getElementById('volume-meter');
503
+ var recordingTimer = document.getElementById('recording-timer');
504
+
505
+ if (!isRecording) {
506
+ audioRecorder.start()
507
+ .then(() => {
508
+ isRecording = true;
509
+ recordButton.textContent = 'Stop Recording';
510
+ recordButton.classList.add('recording');
511
+ volumeMeter.style.display = 'block';
512
+ recordingTimer.style.display = 'block';
513
+ })
514
+ .catch(error => {
515
+ statusMessage.textContent = 'Error: ' + error.message;
516
+ });
517
+ } else {
518
+ audioRecorder.stop()
519
+ .then(audioBlob => {
520
+ const audioUrl = URL.createObjectURL(audioBlob);
521
+ var audioElement = document.getElementById('audio-playback');
522
+ audioElement.src = audioUrl;
523
+ audioElement.style.display = 'block';
524
+
525
+ const reader = new FileReader();
526
+ reader.readAsDataURL(audioBlob);
527
+ reader.onloadend = function() {
528
+ const base64data = reader.result;
529
+ var audioData = document.getElementById('audio-data');
530
+ audioData.value = base64data;
531
+ const streamlitMessage = {type: "streamlit:setComponentValue", value: base64data};
532
+ window.parent.postMessage(streamlitMessage, "*");
533
+ }
534
+
535
+ isRecording = false;
536
+ recordButton.textContent = 'Start Recording';
537
+ recordButton.classList.remove('recording');
538
+ volumeMeter.style.display = 'none';
539
+ volumeMeter.style.height = '0%';
540
+ });
541
+ }
542
  }
543
+
544
+ document.addEventListener('DOMContentLoaded', function() {
545
+ var recordButton = document.getElementById('record-button');
546
+ recordButton.addEventListener('click', toggleRecording);
547
+ });
548
  </script>
549
+
550
+ <div class="audio-recorder-container">
551
+ <button id="record-button" class="record-button">Start Recording</button>
552
+ <div id="status-message" class="status-message">Ready to record</div>
553
+
554
+ <div class="recording-info">
555
+ <div class="volume-meter-container">
556
+ <div id="volume-meter" class="volume-meter"></div>
557
+ </div>
558
+ <div id="recording-timer" class="recording-timer">00:00</div>
559
+ </div>
560
+
561
+ <audio id="audio-playback" controls style="display:none; margin-top:10px; width:100%;"></audio>
562
+ <input type="hidden" id="audio-data" name="audio-data">
563
+ </div>
564
+
565
  <style>
566
+ .audio-recorder-container {
567
+ display: flex;
568
+ flex-direction: column;
569
+ align-items: center;
570
+ padding: 15px;
571
+ border-radius: 8px;
572
+ background-color: #f7f7f7;
573
+ box-shadow: 0 2px 5px rgba(0,0,0,0.1);
574
+ }
575
+
576
+ .record-button {
577
  background-color: #f63366;
578
  color: white;
579
  border: none;
580
+ padding: 12px 24px;
581
+ border-radius: 24px;
582
  cursor: pointer;
583
+ font-size: 16px;
584
+ font-weight: bold;
585
+ transition: all 0.3s ease;
586
+ box-shadow: 0 2px 5px rgba(0,0,0,0.2);
587
  }
588
+
589
+ .record-button:hover {
590
+ background-color: #e62958;
591
+ transform: translateY(-2px);
592
+ }
593
+
594
+ .record-button.recording {
595
  background-color: #ff0000;
596
+ animation: pulse 1.5s infinite;
597
+ }
598
+
599
+ .status-message {
600
+ margin-top: 10px;
601
+ font-size: 14px;
602
+ color: #666;
603
+ }
604
+
605
+ .recording-info {
606
+ display: flex;
607
+ align-items: center;
608
+ margin-top: 15px;
609
+ width: 100%;
610
+ justify-content: center;
611
+ }
612
+
613
+ .volume-meter-container {
614
+ width: 20px;
615
+ height: 60px;
616
+ background-color: #ddd;
617
+ border-radius: 3px;
618
+ overflow: hidden;
619
+ position: relative;
620
+ }
621
+
622
+ .volume-meter {
623
+ width: 100%;
624
+ height: 0%;
625
+ background-color: #f63366;
626
+ position: absolute;
627
+ bottom: 0;
628
+ transition: height 0.1s ease;
629
+ display: none;
630
+ }
631
+
632
+ .recording-timer {
633
+ margin-left: 15px;
634
+ font-family: monospace;
635
+ font-size: 18px;
636
+ color: #f63366;
637
+ display: none;
638
+ }
639
+
640
+ @keyframes pulse {
641
+ 0% { opacity: 1; box-shadow: 0 0 0 0 rgba(255,0,0,0.7); }
642
+ 50% { opacity: 0.8; box-shadow: 0 0 0 10px rgba(255,0,0,0); }
643
+ 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(255,0,0,0); }
644
  }
645
  </style>
646
  """
 
647
 
648
+ return components.html(audio_recorder_html, height=220)
649
+
650
+ # Function to display analysis results - optimized
651
  def display_analysis_results(transcribed_text):
652
+ st.session_state.debug_info = st.session_state.get('debug_info', [])
653
+ st.session_state.debug_info.append(f"Processing text: {transcribed_text[:50]}...")
654
+ st.session_state.debug_info = st.session_state.debug_info[-100:] # Keep last 100 entries
655
+
656
+ # Run emotion and sarcasm detection in parallel
657
+ with ThreadPoolExecutor(max_workers=2) as executor:
658
+ emotion_future = executor.submit(perform_emotion_detection, transcribed_text)
659
+ sarcasm_future = executor.submit(perform_sarcasm_detection, transcribed_text)
660
+
661
+ emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
662
+ is_sarcastic, sarcasm_score = sarcasm_future.result()
663
+
664
+ # Add results to debug info
665
+ st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}")
666
+ st.session_state.debug_info.append(f"Sarcasm: {is_sarcastic}, Score: {sarcasm_score:.3f}")
667
+
668
+ st.header("Transcribed Text")
669
+ st.text_area("Text", transcribed_text, height=120, disabled=True,
670
+ help="The audio converted to text. The text was processed for emotion and sentiment analysis.")
671
+
672
+ # Improved confidence estimation
673
+ words = transcribed_text.split()
674
+ word_count = len(words)
675
+ confidence_score = min(0.98, max(0.75, 0.75 + (word_count / 100) * 0.2))
676
+
677
+ st.caption(f"Estimated transcription confidence: {confidence_score:.2f}")
678
+
679
  st.header("Analysis Results")
 
680
  col1, col2 = st.columns([1, 2])
681
+
682
  with col1:
683
  st.subheader("Sentiment")
684
+ sentiment_icon = "πŸ‘" if sentiment == "POSITIVE" else "πŸ‘Ž" if sentiment == "NEGATIVE" else "πŸ”„" if sentiment == "MIXED" else "😐"
685
+ st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
686
+ st.info("Sentiment reflects the dominant emotion's tone and context.")
687
+
688
  st.subheader("Sarcasm")
689
  sarcasm_icon = "😏" if is_sarcastic else "😐"
690
+ sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
691
+ st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
692
+
693
+ # More informative sarcasm info
694
+ if is_sarcastic:
695
+ if sarcasm_score > 0.8:
696
+ st.info("High confidence in sarcasm detection.")
697
+ else:
698
+ st.info("Moderate confidence in sarcasm detection.")
699
+ else:
700
+ st.info("No clear indicators of sarcasm found.")
701
+
702
  with col2:
703
  st.subheader("Emotions")
704
  if emotions_dict:
705
+ st.markdown(
706
+ f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
707
+
708
+ # Enhanced visualization
709
+ sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
710
+ significant_emotions = [(e, s) for e, s in sorted_emotions if s > 0.05] # Only show significant emotions
711
+
712
+ if significant_emotions:
713
+ emotions = [e[0] for e in significant_emotions]
714
+ scores = [e[1] for e in significant_emotions]
715
+
716
+ # Use a color scale that helps distinguish emotions better
717
+ fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
718
+ title="Emotion Distribution", color=emotions,
719
+ color_discrete_sequence=px.colors.qualitative.Bold)
720
+
721
+ fig.update_layout(
722
+ yaxis_range=[0, 1],
723
+ showlegend=False,
724
+ title_font_size=14,
725
+ margin=dict(l=20, r=20, t=40, b=20),
726
+ xaxis_title="Emotion",
727
+ yaxis_title="Confidence Score",
728
+ bargap=0.3
729
+ )
730
+
731
+ # Add horizontal reference line for minimal significance
732
+ fig.add_shape(
733
+ type="line",
734
+ x0=-0.5,
735
+ x1=len(emotions) - 0.5,
736
+ y0=0.1,
737
+ y1=0.1,
738
+ line=dict(color="gray", width=1, dash="dot")
739
+ )
740
+
741
+ st.plotly_chart(fig, use_container_width=True)
742
+ else:
743
+ st.write("No significant emotions detected.")
744
  else:
745
  st.write("No emotions detected.")
746
 
747
+ # Expert analysis section (new feature while maintaining UI)
748
+ with st.expander("Expert Analysis", expanded=False):
749
+ col1, col2 = st.columns(2)
750
+
751
+ with col1:
752
+ st.subheader("Emotion Insights")
753
+ # Provide more insightful analysis based on emotion combinations
754
+ if emotions_dict:
755
+ top_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)[:3]
756
+
757
+ if len(top_emotions) >= 2:
758
+ emotion1, score1 = top_emotions[0]
759
+ emotion2, score2 = top_emotions[1]
760
+
761
+ if score2 > 0.7 * score1: # If second emotion is close to first
762
+ st.markdown(f"**Mixed emotional state detected:** {emotion_map.get(emotion1, '')} {emotion1} + {emotion_map.get(emotion2, '')} {emotion2}")
763
+
764
+ # Analyze specific combinations
765
+ if (emotion1 == "joy" and emotion2 == "surprise") or (emotion1 == "surprise" and emotion2 == "joy"):
766
+ st.write("πŸ’‘ This indicates excitement or delight")
767
+ elif (emotion1 == "sadness" and emotion2 == "anger") or (emotion1 == "anger" and emotion2 == "sadness"):
768
+ st.write("πŸ’‘ This suggests frustration or disappointment")
769
+ elif (emotion1 == "fear" and emotion2 == "surprise") or (emotion1 == "surprise" and emotion2 == "fear"):
770
+ st.write("πŸ’‘ This indicates shock or alarm")
771
+ else:
772
+ st.markdown(f"**Clear emotional state:** {emotion_map.get(emotion1, '')} {emotion1}")
773
+ else:
774
+ st.write("Single dominant emotion detected.")
775
+ else:
776
+ st.write("No significant emotional patterns detected.")
777
+
778
+ with col2:
779
+ st.subheader("Context Analysis")
780
+ # Analyze the context based on combination of sentiment and sarcasm
781
+ if is_sarcastic and sentiment == "POSITIVE":
782
+ st.markdown("⚠️ **Potential Negative Connotation:** The positive sentiment might be misleading due to detected sarcasm.")
783
+ elif is_sarcastic and sentiment == "NEGATIVE":
784
+ st.markdown("⚠️ **Complex Expression:** Negative sentiment combined with sarcasm may indicate frustrated humor or ironic criticism.")
785
+ elif sentiment == "MIXED":
786
+ st.markdown("πŸ”„ **Ambivalent Message:** The content expresses mixed or conflicting emotions.")
787
+ elif sentiment == "POSITIVE" and sarcasm_score > 0.3:
788
+ st.markdown("⚠️ **Moderate Sarcasm Indicators:** The positive sentiment might be qualified by subtle sarcasm.")
789
+ elif sentiment == "NEGATIVE" and not is_sarcastic:
790
+ st.markdown("πŸ‘Ž **Clear Negative Expression:** The content expresses genuine negative sentiment without sarcasm.")
791
+ elif sentiment == "POSITIVE" and not is_sarcastic:
792
+ st.markdown("πŸ‘ **Clear Positive Expression:** The content expresses genuine positive sentiment without sarcasm.")
793
+
794
+ # Original debug expander (maintained from original code)
795
+ with st.expander("Debug Information", expanded=False):
796
+ st.write("Debugging information for troubleshooting:")
797
+ for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
798
+ st.text(f"{i + 1}. {debug_line}")
799
+ if emotions_dict:
800
+ st.write("Raw emotion scores:")
801
+ for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
802
+ if score > 0.01: # Only show non-negligible scores
803
+ st.text(f"{emotion}: {score:.4f}")
804
+
805
+ # Original analysis details expander (maintained from original code)
806
+ with st.expander("Analysis Details", expanded=False):
807
+ st.write("""
808
+ *How this works:*
809
+ 1. *Speech Recognition*: Audio transcribed using OpenAI Whisper
810
+ 2. *Emotion Analysis*: DistilBERT model trained for six emotions
811
+ 3. *Sentiment Analysis*: Derived from dominant emotion
812
+ 4. *Sarcasm Detection*: RoBERTa model for irony detection
813
+ *Accuracy depends on*:
814
+ - Audio quality
815
+ - Speech clarity
816
+ - Background noise
817
+ - Speech patterns
818
+ """)
819
+
820
+ # Process base64 audio data - optimized
821
  def process_base64_audio(base64_data):
 
822
  try:
823
+ # Ensure we have proper base64 data
824
+ if not base64_data or not isinstance(base64_data, str) or not base64_data.startswith('data:'):
825
+ st.error("Invalid audio data received")
826
+ return None
827
+
828
+ # Extract the base64 binary part
829
+ try:
830
+ base64_binary = base64_data.split(',')[1]
831
+ except IndexError:
832
+ st.error("Invalid base64 data format")
833
+ return None
834
+
835
+ # Decode the binary data
836
+ try:
837
+ binary_data = base64.b64decode(base64_binary)
838
+ except Exception as e:
839
+ st.error(f"Failed to decode base64 data: {str(e)}")
840
+ return None
841
+
842
+ # Create a temporary file
843
+ temp_dir = tempfile.gettempdir()
844
+ temp_file_path = os.path.join(temp_dir, f"recording_{int(time.time())}.wav")
845
+
846
+ # Write the binary data to the file
847
  with open(temp_file_path, "wb") as f:
848
+ f.write(binary_data)
849
+
850
+ # Validate the audio file
851
  if not validate_audio(temp_file_path):
852
+ st.warning("Audio quality may not be optimal, but we'll try to process it.")
853
+
854
  return temp_file_path
855
  except Exception as e:
856
+ st.error(f"Error processing audio data: {str(e)}")
857
  return None
 
 
 
858
 
859
+ # Preload models in background to improve performance
860
+ def preload_models():
861
+ threading.Thread(target=load_whisper_model).start()
862
+ threading.Thread(target=get_emotion_classifier).start()
863
+ threading.Thread(target=get_sarcasm_classifier).start()
864
+
865
+ # Main App Logic - optimized
866
  def main():
867
+ # Initialize session state
868
  if 'debug_info' not in st.session_state:
869
  st.session_state.debug_info = []
870
+ if 'models_loaded' not in st.session_state:
871
+ st.session_state.models_loaded = False
872
+
873
+ # Preload models in background
874
+ if not st.session_state.models_loaded:
875
+ preload_models()
876
+ st.session_state.models_loaded = True
877
+
878
+ # Create tabs
879
  tab1, tab2 = st.tabs(["πŸ“ Upload Audio", "πŸŽ™ Record Audio"])
880
+
881
  with tab1:
882
  st.header("Upload an Audio File")
883
+ audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg", "m4a", "flac"],
884
+ help="Upload an audio file for sentiment analysis (WAV, MP3, OGG, M4A, FLAC)")
885
+
886
  if audio_file:
887
  st.audio(audio_file.getvalue())
888
+ st.caption("🎧 Uploaded Audio Playback")
889
+
890
+ # Add a placeholder for progress updates
891
+ progress_placeholder = st.empty()
892
+
893
+ # Add analyze button
894
+ upload_button = st.button("Analyze Upload", key="analyze_upload")
895
+
896
+ if upload_button:
897
+ # Show progress bar
898
+ progress_bar = progress_placeholder.progress(0, text="Preparing audio...")
899
+
900
+ # Process audio
901
+ temp_audio_path = process_uploaded_audio(audio_file)
902
+
903
+ if temp_audio_path:
904
+ # Update progress
905
+ progress_bar.progress(25, text="Transcribing audio...")
906
+
907
+ # Transcribe audio
908
+ main_text, alternatives = transcribe_audio(temp_audio_path, show_alternative=True)
909
+
910
+ if main_text:
911
+ # Update progress
912
+ progress_bar.progress(60, text="Analyzing sentiment and emotions...")
913
+
914
+ # Display alternatives if available
915
+ if alternatives:
916
+ with st.expander("Alternative transcriptions detected", expanded=False):
917
+ for i, alt in enumerate(alternatives[:3], 1):
918
+ st.write(f"{i}. {alt}")
919
+
920
+ # Final analysis
921
+ progress_bar.progress(90, text="Finalizing results...")
922
+ display_analysis_results(main_text)
923
+
924
+ # Complete progress
925
+ progress_bar.progress(100, text="Analysis complete!")
926
+ progress_placeholder.empty()
927
+ else:
928
+ progress_placeholder.empty()
929
+ st.error("Could not transcribe the audio. Please try again with clearer audio.")
930
+
931
+ # Clean up temp file
932
+ if os.path.exists(temp_audio_path):
933
+ os.remove(temp_audio_path)
934
+ else:
935
+ progress_placeholder.empty()
936
+ st.error("Could not process the audio file. Please try a different file.")
937
+
938
  with tab2:
939
  st.header("Record Your Voice")
940
+ st.write("Use the recorder below to analyze your speech in real-time.")
941
+
942
+ # Browser recorder
943
  st.subheader("Browser-Based Recorder")
944
+ st.write("Click the button below to start/stop recording.")
945
+
946
  audio_data = custom_audio_recorder()
947
+
948
+ if audio_data:
949
+ # Add a placeholder for progress updates
950
+ progress_placeholder = st.empty()
951
+
952
+ # Add analyze button
953
+ analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
954
+
955
+ if analyze_rec_button:
956
+ # Show progress bar
957
+ progress_bar = progress_placeholder.progress(0, text="Processing recording...")
958
+
959
+ # Process the recording
960
  temp_audio_path = process_base64_audio(audio_data)
961
+
962
  if temp_audio_path:
963
+ # Update progress
964
+ progress_bar.progress(30, text="Transcribing speech...")
965
+
966
+ # Transcribe the audio
967
  transcribed_text = transcribe_audio(temp_audio_path)
968
+
969
  if transcribed_text:
970
+ # Update progress
971
+ progress_bar.progress(70, text="Analyzing sentiment and emotions...")
972
+
973
+ # Display the results
974
  display_analysis_results(transcribed_text)
975
+
976
+ # Complete progress
977
+ progress_bar.progress(100, text="Analysis complete!")
978
+ progress_placeholder.empty()
979
  else:
980
+ progress_placeholder.empty()
981
+ st.error("Could not transcribe the audio. Please try speaking more clearly.")
982
+
983
+ # Clean up temp file
984
+ if os.path.exists(temp_audio_path):
985
+ os.remove(temp_audio_path)
986
+ else:
987
+ progress_placeholder.empty()
988
+ st.error("Could not process the recording. Please try again.")
989
+
990
+ # Text input option
991
  st.subheader("Manual Text Input")
992
+ st.write("If recording doesn't work, you can type your text here:")
993
+
994
+ manual_text = st.text_area("Enter text to analyze:", placeholder="Type what you want to analyze...")
995
+ analyze_text_button = st.button("Analyze Text", key="analyze_manual")
996
+
997
+ if analyze_text_button and manual_text:
998
+ with st.spinner("Analyzing text..."):
999
+ display_analysis_results(manual_text)
1000
+
1001
+ # Show model information
1002
  show_model_info()
1003
+
1004
+ # Add a small footer with version info
1005
+ st.sidebar.markdown("---")
1006
+ st.sidebar.caption("Voice Sentiment Analysis v2.0")
1007
+ st.sidebar.caption("Optimized for speed and accuracy")
1008
 
1009
  if __name__ == "__main__":
1010
  main()