Spaces:

MaroofTechSorcerer
/

Voice_Based_Sentiment_Analysis_with_Sarcasm_Detection

Running

App Files Files Community

MaroofTechSorcerer commited on Apr 30

Commit

9464f08

verified ·

1 Parent(s): 64c8a12

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -66

app.py CHANGED Viewed

@@ -13,9 +13,6 @@ import time
 import numpy as np
 import librosa
 import subprocess
-import pyaudio
-import wave
-import io
 # Suppress warnings for a clean console
 logging.getLogger("torch").setLevel(logging.CRITICAL)
@@ -32,7 +29,7 @@ st.set_page_config(layout="wide", page_title="Advanced Voice Emotion Analyzer")
 # Interface design
 st.title("🎙️ Advanced Voice Emotion Analyzer")
-st.write("Analyze all emotions from audio using hybrid ML models, ensuring accurate detection across 27 emotions.")
 # Audio Preprocessing
 def make_audio_scarier(audio_path, output_path):
@@ -83,14 +80,27 @@ def perform_audio_emotion_detection(audio_path):
         audio_emotions = ["neutral", "happy", "sad", "angry", "fearful", "surprise", "disgust"]
         emotion_dict = {emotion: float(scores[i]) for i, emotion in enumerate(audio_emotions)}
         top_emotion = audio_emotions[np.argmax(scores)]
-        # Boost emotions for audio characteristics
         features = extract_audio_features(audio_path)
         if features.get("pitch_mean", 0) < 200 and features.get("energy_mean", 0) > 0.1 and features.get("zcr_mean", 0) > 0.1:
-            emotion_dict["fearful"] = min(1.0, emotion_dict.get("fearful", 0) + 0.3)
             top_emotion = "fearful" if emotion_dict["fearful"] > emotion_dict[top_emotion] else top_emotion
-        elif features.get("energy_mean", 0) > 0.2:
-            emotion_dict["angry"] = min(1.0, emotion_dict.get("angry", 0) + 0.2)
             top_emotion = "angry" if emotion_dict["angry"] > emotion_dict[top_emotion] else top_emotion
         return emotion_dict, top_emotion
     except Exception as e:
         st.error(f"Audio emotion detection failed: {str(e)}")
@@ -114,6 +124,10 @@ def perform_text_emotion_detection(text):
                     "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"]
         emotions_dict = {result['label']: result['score'] for result in results if result['label'] in emotions}
         top_emotion = max(emotions_dict, key=emotions_dict.get)
         return emotions_dict, top_emotion
     except Exception as e:
         st.error(f"Text emotion detection failed: {str(e)}")
@@ -169,50 +183,12 @@ def transcribe_audio(audio_path):
         st.error(f"Transcription failed: {str(e)}")
         return ""
-# Python Audio Recording
-def record_audio():
-    CHUNK = 1024
-    FORMAT = pyaudio.paInt16
-    CHANNELS = 1
-    RATE = 16000
-    RECORD_SECONDS = st.slider("Recording duration (seconds)", 1, 30, 5)
-    p = pyaudio.PyAudio()
-    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
-    if st.button("Start Recording"):
-        st.write("Recording...")
-        frames = []
-        for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
-            data = stream.read(CHUNK)
-            frames.append(data)
-        st.write("Recording finished.")
-        stream.stop_stream()
-        stream.close()
-        p.terminate()
-        temp_file_path = os.path.join(tempfile.gettempdir(), f"recorded_audio_{int(time.time())}.wav")
-        wf = wave.open(temp_file_path, 'wb')
-        wf.setnchannels(CHANNELS)
-        wf.setsampwidth(p.get_sample_size(FORMAT))
-        wf.setframerate(RATE)
-        wf.writeframes(b''.join(frames))
-        wf.close()
-        return temp_file_path
-    return None
 # Process Audio Files
 def process_audio_file(audio_data):
     temp_dir = tempfile.gettempdir()
     temp_file_path = os.path.join(temp_dir, f"audio_{int(time.time())}.wav")
     with open(temp_file_path, "wb") as f:
-        if isinstance(audio_data, str):
-            with open(audio_data, "rb") as f_audio:
-                f.write(f_audio.read())
-        else:
-            f.write(audio_data.getvalue())
     if not validate_audio(temp_file_path):
         return None
     return temp_file_path
@@ -230,6 +206,7 @@ def display_analysis_results(audio_path):
     audio_emotions, audio_top_emotion = perform_audio_emotion_detection(processed_audio_path)
     st.subheader("Audio-Based Emotion")
     st.write(f"**Dominant Emotion:** {audio_top_emotion} (Score: {audio_emotions.get(audio_top_emotion, 0):.3f})")
     # Transcription and text emotion detection
     transcribed_text = transcribe_audio(processed_audio_path)
@@ -238,6 +215,7 @@ def display_analysis_results(audio_path):
     if transcribed_text:
         text_emotions, text_top_emotion = perform_text_emotion_detection(transcribed_text)
         st.write(f"**Text-Based Dominant Emotion:** {text_top_emotion} (Score: {text_emotions.get(text_top_emotion, 0):.3f})")
     # Combine emotions (prioritize audio, map to 27 emotions)
     emotion_map = {
@@ -256,7 +234,12 @@ def display_analysis_results(audio_path):
         for text_emotion, score in text_emotions.items():
             combined_emotions[text_emotion] = combined_emotions.get(text_emotion, 0) + score * 0.3
     top_emotion = max(combined_emotions, key=combined_emotions.get)
     sentiment = "POSITIVE" if top_emotion in ["admiration", "amusement", "approval", "caring", "desire", "excitement",
                                              "gratitude", "joy", "love", "optimism", "pride", "relief"] else "NEGATIVE" if top_emotion in ["anger", "annoyance", "disappointment", "disapproval", "disgust", "embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"] else "NEUTRAL"
@@ -287,7 +270,7 @@ def display_analysis_results(audio_path):
         st.write(f"**Audio Features:** {extract_audio_features(processed_audio_path)}")
         st.write("""
         **How it works:**
-        - Audio Emotion: Wav2Vec2 detects 7 emotions from audio.
         - Transcription: Whisper converts audio to text.
         - Text Emotion: RoBERTa refines 27 emotions from text.
         - Sarcasm: Analyzes text for irony.
@@ -301,25 +284,13 @@ def display_analysis_results(audio_path):
 # Main App Logic
 def main():
-    tab1, tab2 = st.tabs(["📁 Upload Audio", "🎙️ Record Audio"])
-    with tab1:
-        st.header("Upload Audio File")
-        audio_file = st.file_uploader("Upload audio (wav, mp3, ogg)", type=["wav", "mp3", "ogg"])
-        if audio_file:
-            temp_audio_path = process_audio_file(audio_file)
-            if temp_audio_path:
-                if st.button("Analyze Upload"):
-                    with st.spinner("Analyzing..."):
-                        display_analysis_results(temp_audio_path)
-    with tab2:
-        st.header("Record Your Voice")
-        st.write("Record audio to analyze emotions in real-time.")
-        temp_audio_path = record_audio()
         if temp_audio_path:
-            if st.button("Analyze Recording"):
-                with st.spinner("Processing..."):
                     display_analysis_results(temp_audio_path)
     st.sidebar.header("About")
@@ -329,6 +300,7 @@ def main():
     - Text: SamLowe/roberta-base-go_emotions (27 emotions)
     - Sarcasm: cardiffnlp/twitter-roberta-base-irony
     - Speech: OpenAI Whisper (large-v3)
     """)
 if __name__ == "__main__":

 import numpy as np
 import librosa
 import subprocess
 # Suppress warnings for a clean console
 logging.getLogger("torch").setLevel(logging.CRITICAL)
 # Interface design
 st.title("🎙️ Advanced Voice Emotion Analyzer")
+st.write("Analyze all 27 emotions from uploaded audio with enhanced detection to avoid neutral defaults.")
 # Audio Preprocessing
 def make_audio_scarier(audio_path, output_path):
         audio_emotions = ["neutral", "happy", "sad", "angry", "fearful", "surprise", "disgust"]
         emotion_dict = {emotion: float(scores[i]) for i, emotion in enumerate(audio_emotions)}
         top_emotion = audio_emotions[np.argmax(scores)]
+        # Enhanced boosting based on audio features
         features = extract_audio_features(audio_path)
         if features.get("pitch_mean", 0) < 200 and features.get("energy_mean", 0) > 0.1 and features.get("zcr_mean", 0) > 0.1:
+            emotion_dict["fearful"] = min(1.0, emotion_dict.get("fearful", 0) + 0.4)  # Increased boost
             top_emotion = "fearful" if emotion_dict["fearful"] > emotion_dict[top_emotion] else top_emotion
+        elif features.get("energy_mean", 0) > 0.25:  # Stricter threshold
+            emotion_dict["angry"] = min(1.0, emotion_dict.get("angry", 0) + 0.35)
             top_emotion = "angry" if emotion_dict["angry"] > emotion_dict[top_emotion] else top_emotion
+        elif features.get("pitch_mean", 0) > 500 and features.get("energy_mean", 0) < 0.05:
+            emotion_dict["sad"] = min(1.0, emotion_dict.get("sad", 0) + 0.3)
+            top_emotion = "sad" if emotion_dict["sad"] > emotion_dict[top_emotion] else top_emotion
+        elif features.get("energy_mean", 0) > 0.15 and features.get("pitch_mean", 0) > 300:
+            emotion_dict["happy"] = min(1.0, emotion_dict.get("happy", 0) + 0.3)
+            top_emotion = "happy" if emotion_dict["happy"] > emotion_dict[top_emotion] else top_emotion
+        elif features.get("zcr_mean", 0) > 0.15 and features.get("energy_mean", 0) > 0.1:
+            emotion_dict["surprise"] = min(1.0, emotion_dict.get("surprise", 0) + 0.25)
+            top_emotion = "surprise" if emotion_dict["surprise"] > emotion_dict[top_emotion] else top_emotion
+        # Fallback to avoid neutral if score is low
+        if emotion_dict["neutral"] > 0.5 and max([v for k, v in emotion_dict.items() if k != "neutral"]) > 0.3:
+            emotion_dict["neutral"] = max(0.0, emotion_dict["neutral"] - 0.2)  # Reduce neutral weight
+            top_emotion = max(emotion_dict, key=emotion_dict.get)
         return emotion_dict, top_emotion
     except Exception as e:
         st.error(f"Audio emotion detection failed: {str(e)}")
                     "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"]
         emotions_dict = {result['label']: result['score'] for result in results if result['label'] in emotions}
         top_emotion = max(emotions_dict, key=emotions_dict.get)
+        # Reduce neutral influence if other emotions are strong
+        if emotions_dict.get("neutral", 0) > 0.5 and max([v for k, v in emotions_dict.items() if k != "neutral"]) > 0.4:
+            emotions_dict["neutral"] = max(0.0, emotions_dict["neutral"] - 0.15)
+            top_emotion = max(emotions_dict, key=emotions_dict.get)
         return emotions_dict, top_emotion
     except Exception as e:
         st.error(f"Text emotion detection failed: {str(e)}")
         st.error(f"Transcription failed: {str(e)}")
         return ""
 # Process Audio Files
 def process_audio_file(audio_data):
     temp_dir = tempfile.gettempdir()
     temp_file_path = os.path.join(temp_dir, f"audio_{int(time.time())}.wav")
     with open(temp_file_path, "wb") as f:
+        f.write(audio_data.getvalue())
     if not validate_audio(temp_file_path):
         return None
     return temp_file_path
     audio_emotions, audio_top_emotion = perform_audio_emotion_detection(processed_audio_path)
     st.subheader("Audio-Based Emotion")
     st.write(f"**Dominant Emotion:** {audio_top_emotion} (Score: {audio_emotions.get(audio_top_emotion, 0):.3f})")
+    st.write("Audio Emotions:", audio_emotions)  # Debug output
     # Transcription and text emotion detection
     transcribed_text = transcribe_audio(processed_audio_path)
     if transcribed_text:
         text_emotions, text_top_emotion = perform_text_emotion_detection(transcribed_text)
         st.write(f"**Text-Based Dominant Emotion:** {text_top_emotion} (Score: {text_emotions.get(text_top_emotion, 0):.3f})")
+        st.write("Text Emotions:", text_emotions)  # Debug output
     # Combine emotions (prioritize audio, map to 27 emotions)
     emotion_map = {
         for text_emotion, score in text_emotions.items():
             combined_emotions[text_emotion] = combined_emotions.get(text_emotion, 0) + score * 0.3
+    # Avoid neutral if other emotions are competitive
     top_emotion = max(combined_emotions, key=combined_emotions.get)
+    if combined_emotions["neutral"] > 0.5 and max([v for k, v in combined_emotions.items() if k != "neutral"]) > 0.4:
+        combined_emotions["neutral"] = max(0.0, combined_emotions["neutral"] - 0.25)  # Stronger reduction
+        top_emotion = max(combined_emotions, key=combined_emotions.get)
     sentiment = "POSITIVE" if top_emotion in ["admiration", "amusement", "approval", "caring", "desire", "excitement",
                                              "gratitude", "joy", "love", "optimism", "pride", "relief"] else "NEGATIVE" if top_emotion in ["anger", "annoyance", "disappointment", "disapproval", "disgust", "embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"] else "NEUTRAL"
         st.write(f"**Audio Features:** {extract_audio_features(processed_audio_path)}")
         st.write("""
         **How it works:**
+        - Audio Emotion: Wav2Vec2 detects 7 emotions with feature-based boosts.
         - Transcription: Whisper converts audio to text.
         - Text Emotion: RoBERTa refines 27 emotions from text.
         - Sarcasm: Analyzes text for irony.
 # Main App Logic
 def main():
+    st.header("Upload Audio File")
+    audio_file = st.file_uploader("Upload audio (wav, mp3, ogg)", type=["wav", "mp3", "ogg"])
+    if audio_file:
+        temp_audio_path = process_audio_file(audio_file)
         if temp_audio_path:
+            if st.button("Analyze Audio"):
+                with st.spinner("Analyzing..."):
                     display_analysis_results(temp_audio_path)
     st.sidebar.header("About")
     - Text: SamLowe/roberta-base-go_emotions (27 emotions)
     - Sarcasm: cardiffnlp/twitter-roberta-base-irony
     - Speech: OpenAI Whisper (large-v3)
+    **Note:** Recording is not supported on Hugging Face Spaces; use uploaded files.
     """)
 if __name__ == "__main__":