Update app.py
Browse files
app.py
CHANGED
@@ -31,23 +31,19 @@ st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
|
|
31 |
st.title("π Voice Based Sentiment Analysis")
|
32 |
st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
|
33 |
|
34 |
-
|
35 |
-
# Emotion Detection Function
|
36 |
# Emotion Detection Function
|
37 |
@st.cache_resource
|
38 |
def get_emotion_classifier():
|
39 |
try:
|
40 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
41 |
-
model = AutoModelForSequenceClassification.from_pretrained("
|
42 |
model = model.to(device)
|
43 |
|
44 |
-
# Changed from device=-1 if device.type == "cpu" else 0
|
45 |
-
# to ensure proper device selection
|
46 |
classifier = pipeline("text-classification",
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
|
52 |
# Add a verification test to make sure the model is working
|
53 |
test_result = classifier("I am happy today")
|
@@ -57,65 +53,54 @@ def get_emotion_classifier():
|
|
57 |
except Exception as e:
|
58 |
print(f"Error loading emotion model: {str(e)}")
|
59 |
st.error(f"Failed to load emotion model. Please check logs.")
|
60 |
-
# Return a basic fallback that won't crash
|
61 |
return None
|
62 |
|
63 |
-
|
64 |
def perform_emotion_detection(text):
|
65 |
try:
|
66 |
if not text or len(text.strip()) < 3:
|
67 |
return {}, "neutral", {}, "NEUTRAL"
|
68 |
|
69 |
emotion_classifier = get_emotion_classifier()
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
emotion_map = {
|
73 |
-
"
|
74 |
-
"
|
75 |
-
"desire": "π", "disappointment": "π", "disapproval": "π", "disgust": "π€’",
|
76 |
-
"embarrassment": "π³", "excitement": "π€©", "fear": "π¨", "gratitude": "π",
|
77 |
-
"grief": "π’", "joy": "π", "love": "β€", "nervousness": "π°",
|
78 |
-
"optimism": "π", "pride": "π", "realization": "π‘", "relief": "π",
|
79 |
-
"remorse": "π", "sadness": "π", "surprise": "π²", "neutral": "π"
|
80 |
}
|
|
|
|
|
|
|
81 |
|
82 |
-
positive_emotions = ["admiration", "amusement", "approval", "caring", "desire",
|
83 |
-
"excitement", "gratitude", "joy", "love", "optimism", "pride", "relief"]
|
84 |
-
negative_emotions = ["anger", "annoyance", "disappointment", "disapproval", "disgust",
|
85 |
-
"embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"]
|
86 |
-
neutral_emotions = ["confusion", "curiosity", "realization", "surprise", "neutral"]
|
87 |
-
|
88 |
-
# Fix 1: Create a clean emotions dictionary from results
|
89 |
emotions_dict = {}
|
90 |
for result in emotion_results:
|
91 |
emotions_dict[result['label']] = result['score']
|
92 |
|
93 |
-
|
94 |
-
filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.02}
|
95 |
|
96 |
-
# If filtered dictionary is empty, fall back to original
|
97 |
if not filtered_emotions:
|
98 |
filtered_emotions = emotions_dict
|
99 |
|
100 |
-
# Fix 3: Make sure we properly find the top emotion
|
101 |
top_emotion = max(filtered_emotions, key=filtered_emotions.get)
|
102 |
top_score = filtered_emotions[top_emotion]
|
103 |
|
104 |
-
# Fix 4: More robust sentiment assignment
|
105 |
if top_emotion in positive_emotions:
|
106 |
sentiment = "POSITIVE"
|
107 |
elif top_emotion in negative_emotions:
|
108 |
sentiment = "NEGATIVE"
|
109 |
else:
|
110 |
-
# If the top emotion is neutral but there are strong competing emotions, use them
|
111 |
competing_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]
|
112 |
-
|
113 |
-
# Check if there's a close second non-neutral emotion
|
114 |
if len(competing_emotions) > 1:
|
115 |
if (competing_emotions[0][0] in neutral_emotions and
|
116 |
competing_emotions[1][0] not in neutral_emotions and
|
117 |
-
competing_emotions[1][1] > 0.
|
118 |
-
# Use the second strongest emotion instead
|
119 |
top_emotion = competing_emotions[1][0]
|
120 |
if top_emotion in positive_emotions:
|
121 |
sentiment = "POSITIVE"
|
@@ -128,15 +113,13 @@ def perform_emotion_detection(text):
|
|
128 |
else:
|
129 |
sentiment = "NEUTRAL"
|
130 |
|
131 |
-
# Log for debugging
|
132 |
print(f"Text: {text[:50]}...")
|
133 |
print(f"Top 3 emotions: {sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]}")
|
134 |
print(f"Selected top emotion: {top_emotion} ({filtered_emotions.get(top_emotion, 0):.3f})")
|
135 |
print(f"Sentiment determined: {sentiment}")
|
136 |
-
|
137 |
-
print(f"All emotions detected: {filtered_emotions}")
|
138 |
print(f"Filtered emotions: {filtered_emotions}")
|
139 |
-
print(f"Emotion classification threshold: 0.
|
140 |
|
141 |
return emotions_dict, top_emotion, emotion_map, sentiment
|
142 |
except Exception as e:
|
@@ -144,7 +127,6 @@ def perform_emotion_detection(text):
|
|
144 |
print(f"Exception in emotion detection: {str(e)}")
|
145 |
return {}, "neutral", {}, "NEUTRAL"
|
146 |
|
147 |
-
|
148 |
# Sarcasm Detection Function
|
149 |
@st.cache_resource
|
150 |
def get_sarcasm_classifier():
|
@@ -153,7 +135,7 @@ def get_sarcasm_classifier():
|
|
153 |
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
|
154 |
model = model.to(device)
|
155 |
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer,
|
156 |
-
|
157 |
|
158 |
# Add a verification test to ensure the model is working
|
159 |
test_result = classifier("This is totally amazing")
|
@@ -165,7 +147,6 @@ def get_sarcasm_classifier():
|
|
165 |
st.error(f"Failed to load sarcasm model. Please check logs.")
|
166 |
return None
|
167 |
|
168 |
-
|
169 |
def perform_sarcasm_detection(text):
|
170 |
try:
|
171 |
if not text or len(text.strip()) < 3:
|
@@ -184,7 +165,6 @@ def perform_sarcasm_detection(text):
|
|
184 |
st.error(f"Sarcasm detection failed: {str(e)}")
|
185 |
return False, 0.0
|
186 |
|
187 |
-
|
188 |
# Validate audio quality
|
189 |
def validate_audio(audio_path):
|
190 |
try:
|
@@ -200,9 +180,7 @@ def validate_audio(audio_path):
|
|
200 |
st.error("Invalid or corrupted audio file.")
|
201 |
return False
|
202 |
|
203 |
-
|
204 |
# Speech Recognition with Whisper
|
205 |
-
# @st.cache_resource
|
206 |
@st.cache_resource
|
207 |
def load_whisper_model():
|
208 |
try:
|
@@ -245,7 +223,6 @@ def transcribe_audio(audio_path, show_alternative=False):
|
|
245 |
st.error(f"Transcription failed: {str(e)}")
|
246 |
return "", [] if show_alternative else ""
|
247 |
|
248 |
-
|
249 |
# Function to handle uploaded audio files
|
250 |
def process_uploaded_audio(audio_file):
|
251 |
if not audio_file:
|
@@ -271,7 +248,6 @@ def process_uploaded_audio(audio_file):
|
|
271 |
st.error(f"Error processing uploaded audio: {str(e)}")
|
272 |
return None
|
273 |
|
274 |
-
|
275 |
# Show model information
|
276 |
def show_model_info():
|
277 |
st.sidebar.header("π§ About the Models")
|
@@ -280,11 +256,11 @@ def show_model_info():
|
|
280 |
|
281 |
with model_tabs[0]:
|
282 |
st.markdown("""
|
283 |
-
*Emotion Model*:
|
284 |
-
- Fine-tuned
|
285 |
-
- Architecture:
|
286 |
-
-
|
287 |
-
[π Model Hub](https://huggingface.co/
|
288 |
""")
|
289 |
|
290 |
with model_tabs[1]:
|
@@ -307,7 +283,6 @@ def show_model_info():
|
|
307 |
[π Model Details](https://github.com/openai/whisper)
|
308 |
""")
|
309 |
|
310 |
-
|
311 |
# Custom audio recorder using HTML/JS
|
312 |
def custom_audio_recorder():
|
313 |
st.warning("Browser-based recording requires microphone access and a modern browser. If recording fails, try uploading an audio file instead.")
|
@@ -440,10 +415,8 @@ def custom_audio_recorder():
|
|
440 |
|
441 |
return components.html(audio_recorder_html, height=150)
|
442 |
|
443 |
-
|
444 |
# Function to display analysis results
|
445 |
def display_analysis_results(transcribed_text):
|
446 |
-
# Fix 5: Add debugging to track what's happening
|
447 |
st.session_state.debug_info = st.session_state.get('debug_info', [])
|
448 |
st.session_state.debug_info.append(f"Processing text: {transcribed_text[:50]}...")
|
449 |
st.session_state.debug_info = st.session_state.debug_info[-100:] # Keep last 100 entries
|
@@ -493,7 +466,6 @@ def display_analysis_results(transcribed_text):
|
|
493 |
else:
|
494 |
st.write("No emotions detected.")
|
495 |
|
496 |
-
# Fix 6: Add debug expander for troubleshooting
|
497 |
with st.expander("Debug Information", expanded=False):
|
498 |
st.write("Debugging information for troubleshooting:")
|
499 |
for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
|
@@ -508,7 +480,7 @@ def display_analysis_results(transcribed_text):
|
|
508 |
st.write("""
|
509 |
*How this works:*
|
510 |
1. *Speech Recognition*: Audio transcribed using OpenAI Whisper (large-v3)
|
511 |
-
2. *Emotion Analysis*:
|
512 |
3. *Sentiment Analysis*: Derived from dominant emotion
|
513 |
4. *Sarcasm Detection*: RoBERTa model for irony detection
|
514 |
*Accuracy depends on*:
|
@@ -518,7 +490,6 @@ def display_analysis_results(transcribed_text):
|
|
518 |
- Speech patterns
|
519 |
""")
|
520 |
|
521 |
-
|
522 |
# Process base64 audio data
|
523 |
def process_base64_audio(base64_data):
|
524 |
try:
|
@@ -539,10 +510,8 @@ def process_base64_audio(base64_data):
|
|
539 |
st.error(f"Error processing audio data: {str(e)}")
|
540 |
return None
|
541 |
|
542 |
-
|
543 |
# Main App Logic
|
544 |
def main():
|
545 |
-
# Fix 7: Initialize session state for debugging
|
546 |
if 'debug_info' not in st.session_state:
|
547 |
st.session_state.debug_info = []
|
548 |
|
@@ -616,6 +585,5 @@ def main():
|
|
616 |
|
617 |
show_model_info()
|
618 |
|
619 |
-
|
620 |
if __name__ == "__main__":
|
621 |
main()
|
|
|
31 |
st.title("π Voice Based Sentiment Analysis")
|
32 |
st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
|
33 |
|
|
|
|
|
34 |
# Emotion Detection Function
|
35 |
@st.cache_resource
|
36 |
def get_emotion_classifier():
|
37 |
try:
|
38 |
+
tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion", use_fast=True)
|
39 |
+
model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
|
40 |
model = model.to(device)
|
41 |
|
|
|
|
|
42 |
classifier = pipeline("text-classification",
|
43 |
+
model=model,
|
44 |
+
tokenizer=tokenizer,
|
45 |
+
top_k=None,
|
46 |
+
device=0 if torch.cuda.is_available() else -1)
|
47 |
|
48 |
# Add a verification test to make sure the model is working
|
49 |
test_result = classifier("I am happy today")
|
|
|
53 |
except Exception as e:
|
54 |
print(f"Error loading emotion model: {str(e)}")
|
55 |
st.error(f"Failed to load emotion model. Please check logs.")
|
|
|
56 |
return None
|
57 |
|
|
|
58 |
def perform_emotion_detection(text):
|
59 |
try:
|
60 |
if not text or len(text.strip()) < 3:
|
61 |
return {}, "neutral", {}, "NEUTRAL"
|
62 |
|
63 |
emotion_classifier = get_emotion_classifier()
|
64 |
+
if emotion_classifier is None:
|
65 |
+
st.error("Emotion classifier not available.")
|
66 |
+
return {}, "neutral", {}, "NEUTRAL"
|
67 |
+
|
68 |
+
emotion_results = emotion_classifier(text)
|
69 |
+
print(f"Raw emotion classifier output: {emotion_results}")
|
70 |
+
if not emotion_results or not isinstance(emotion_results, list):
|
71 |
+
st.error("Emotion classifier returned invalid results.")
|
72 |
+
return {}, "neutral", {}, "NEUTRAL"
|
73 |
|
74 |
emotion_map = {
|
75 |
+
"joy": "π", "anger": "π‘", "disgust": "π€’", "fear": "π¨",
|
76 |
+
"sadness": "π", "surprise": "π²"
|
|
|
|
|
|
|
|
|
|
|
77 |
}
|
78 |
+
positive_emotions = ["joy"]
|
79 |
+
negative_emotions = ["anger", "disgust", "fear", "sadness"]
|
80 |
+
neutral_emotions = ["surprise"]
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
emotions_dict = {}
|
83 |
for result in emotion_results:
|
84 |
emotions_dict[result['label']] = result['score']
|
85 |
|
86 |
+
filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}
|
|
|
87 |
|
|
|
88 |
if not filtered_emotions:
|
89 |
filtered_emotions = emotions_dict
|
90 |
|
|
|
91 |
top_emotion = max(filtered_emotions, key=filtered_emotions.get)
|
92 |
top_score = filtered_emotions[top_emotion]
|
93 |
|
|
|
94 |
if top_emotion in positive_emotions:
|
95 |
sentiment = "POSITIVE"
|
96 |
elif top_emotion in negative_emotions:
|
97 |
sentiment = "NEGATIVE"
|
98 |
else:
|
|
|
99 |
competing_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]
|
|
|
|
|
100 |
if len(competing_emotions) > 1:
|
101 |
if (competing_emotions[0][0] in neutral_emotions and
|
102 |
competing_emotions[1][0] not in neutral_emotions and
|
103 |
+
competing_emotions[1][1] > 0.7 * competing_emotions[0][1]):
|
|
|
104 |
top_emotion = competing_emotions[1][0]
|
105 |
if top_emotion in positive_emotions:
|
106 |
sentiment = "POSITIVE"
|
|
|
113 |
else:
|
114 |
sentiment = "NEUTRAL"
|
115 |
|
|
|
116 |
print(f"Text: {text[:50]}...")
|
117 |
print(f"Top 3 emotions: {sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]}")
|
118 |
print(f"Selected top emotion: {top_emotion} ({filtered_emotions.get(top_emotion, 0):.3f})")
|
119 |
print(f"Sentiment determined: {sentiment}")
|
120 |
+
print(f"All emotions detected: {emotions_dict}")
|
|
|
121 |
print(f"Filtered emotions: {filtered_emotions}")
|
122 |
+
print(f"Emotion classification threshold: 0.01")
|
123 |
|
124 |
return emotions_dict, top_emotion, emotion_map, sentiment
|
125 |
except Exception as e:
|
|
|
127 |
print(f"Exception in emotion detection: {str(e)}")
|
128 |
return {}, "neutral", {}, "NEUTRAL"
|
129 |
|
|
|
130 |
# Sarcasm Detection Function
|
131 |
@st.cache_resource
|
132 |
def get_sarcasm_classifier():
|
|
|
135 |
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
|
136 |
model = model.to(device)
|
137 |
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer,
|
138 |
+
device=0 if torch.cuda.is_available() else -1)
|
139 |
|
140 |
# Add a verification test to ensure the model is working
|
141 |
test_result = classifier("This is totally amazing")
|
|
|
147 |
st.error(f"Failed to load sarcasm model. Please check logs.")
|
148 |
return None
|
149 |
|
|
|
150 |
def perform_sarcasm_detection(text):
|
151 |
try:
|
152 |
if not text or len(text.strip()) < 3:
|
|
|
165 |
st.error(f"Sarcasm detection failed: {str(e)}")
|
166 |
return False, 0.0
|
167 |
|
|
|
168 |
# Validate audio quality
|
169 |
def validate_audio(audio_path):
|
170 |
try:
|
|
|
180 |
st.error("Invalid or corrupted audio file.")
|
181 |
return False
|
182 |
|
|
|
183 |
# Speech Recognition with Whisper
|
|
|
184 |
@st.cache_resource
|
185 |
def load_whisper_model():
|
186 |
try:
|
|
|
223 |
st.error(f"Transcription failed: {str(e)}")
|
224 |
return "", [] if show_alternative else ""
|
225 |
|
|
|
226 |
# Function to handle uploaded audio files
|
227 |
def process_uploaded_audio(audio_file):
|
228 |
if not audio_file:
|
|
|
248 |
st.error(f"Error processing uploaded audio: {str(e)}")
|
249 |
return None
|
250 |
|
|
|
251 |
# Show model information
|
252 |
def show_model_info():
|
253 |
st.sidebar.header("π§ About the Models")
|
|
|
256 |
|
257 |
with model_tabs[0]:
|
258 |
st.markdown("""
|
259 |
+
*Emotion Model*: distilbert-base-uncased-emotion
|
260 |
+
- Fine-tuned for six emotions (joy, anger, disgust, fear, sadness, surprise)
|
261 |
+
- Architecture: DistilBERT base
|
262 |
+
- High accuracy for basic emotion classification
|
263 |
+
[π Model Hub](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
|
264 |
""")
|
265 |
|
266 |
with model_tabs[1]:
|
|
|
283 |
[π Model Details](https://github.com/openai/whisper)
|
284 |
""")
|
285 |
|
|
|
286 |
# Custom audio recorder using HTML/JS
|
287 |
def custom_audio_recorder():
|
288 |
st.warning("Browser-based recording requires microphone access and a modern browser. If recording fails, try uploading an audio file instead.")
|
|
|
415 |
|
416 |
return components.html(audio_recorder_html, height=150)
|
417 |
|
|
|
418 |
# Function to display analysis results
|
419 |
def display_analysis_results(transcribed_text):
|
|
|
420 |
st.session_state.debug_info = st.session_state.get('debug_info', [])
|
421 |
st.session_state.debug_info.append(f"Processing text: {transcribed_text[:50]}...")
|
422 |
st.session_state.debug_info = st.session_state.debug_info[-100:] # Keep last 100 entries
|
|
|
466 |
else:
|
467 |
st.write("No emotions detected.")
|
468 |
|
|
|
469 |
with st.expander("Debug Information", expanded=False):
|
470 |
st.write("Debugging information for troubleshooting:")
|
471 |
for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
|
|
|
480 |
st.write("""
|
481 |
*How this works:*
|
482 |
1. *Speech Recognition*: Audio transcribed using OpenAI Whisper (large-v3)
|
483 |
+
2. *Emotion Analysis*: DistilBERT model trained for six emotions
|
484 |
3. *Sentiment Analysis*: Derived from dominant emotion
|
485 |
4. *Sarcasm Detection*: RoBERTa model for irony detection
|
486 |
*Accuracy depends on*:
|
|
|
490 |
- Speech patterns
|
491 |
""")
|
492 |
|
|
|
493 |
# Process base64 audio data
|
494 |
def process_base64_audio(base64_data):
|
495 |
try:
|
|
|
510 |
st.error(f"Error processing audio data: {str(e)}")
|
511 |
return None
|
512 |
|
|
|
513 |
# Main App Logic
|
514 |
def main():
|
|
|
515 |
if 'debug_info' not in st.session_state:
|
516 |
st.session_state.debug_info = []
|
517 |
|
|
|
585 |
|
586 |
show_model_info()
|
587 |
|
|
|
588 |
if __name__ == "__main__":
|
589 |
main()
|