Spaces:

MaroofTechSorcerer
/

Voice_Based_Sentiment_Analysis_with_Sarcasm_Detection

Sleeping

App Files Files Community

MaroofTechSorcerer commited on Apr 30

Commit

f6d1ff0

verified ·

1 Parent(s): 48302e7

Update app.py

Browse files

Files changed (1) hide show

app.py +227 -446

app.py CHANGED Viewed

@@ -3,16 +3,19 @@ import streamlit as st
 import tempfile
 import torch
 import transformers
-from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 import plotly.express as px
 import logging
 import warnings
 import whisper
 from pydub import AudioSegment
 import time
-import base64
 import io
-import streamlit.components.v1 as components
 # Suppress warnings for a clean console
 logging.getLogger("torch").setLevel(logging.CRITICAL)
@@ -25,100 +28,98 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 # Set Streamlit app layout
-st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
 # Interface design
-st.title("🎙️ Voice Based Sentiment Analysis")
-st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
-# Emotion Detection Function
 @st.cache_resource
-def get_emotion_classifier():
     tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions", use_fast=True)
     model = AutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")
     model = model.to(device)
     return pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None, device=-1 if device.type == "cpu" else 0)
-def perform_emotion_detection(text):
     try:
-        if not text or len(text.strip()) < 3:
-            return {}, "neutral", {}, "NEUTRAL"
-        emotion_classifier = get_emotion_classifier()
-        emotion_results = emotion_classifier(text)[0]
-        emotion_map = {
-            "admiration": "🤩", "amusement": "😄", "anger": "😡", "annoyance": "😒",
-            "approval": "👍", "caring": "🤗", "confusion": "😕", "curiosity": "🧐",
-            "desire": "😍", "disappointment": "😞", "disapproval": "👎", "disgust": "🤢",
-            "embarrassment": "😳", "excitement": "🤩", "fear": "😨", "gratitude": "🙏",
-            "grief": "😢", "joy": "😊", "love": "❤️", "nervousness": "😰",
-            "optimism": "🌈", "pride": "😌", "realization": "💡", "relief": "😌",
-            "remorse": "😔", "sadness": "😭", "surprise": "😲", "neutral": "😐"
-        }
-        positive_emotions = ["admiration", "amusement", "approval", "caring", "desire",
-                            "excitement", "gratitude", "joy", "love", "optimism", "pride", "relief"]
-        negative_emotions = ["anger", "annoyance", "disappointment", "disapproval", "disgust",
-                            "embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"]
-        neutral_emotions = ["confusion", "curiosity", "realization", "surprise", "neutral"]
-        # Fix 1: Create a clean emotions dictionary from results
-        emotions_dict = {}
-        for result in emotion_results:
-            emotions_dict[result['label']] = result['score']
-        # Fix 2: Filter out very low scores (below threshold)
-        filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.05}
-        # If filtered dictionary is empty, fall back to original
-        if not filtered_emotions:
-            filtered_emotions = emotions_dict
-        # Fix 3: Make sure we properly find the top emotion
-        top_emotion = max(filtered_emotions, key=filtered_emotions.get)
-        top_score = filtered_emotions[top_emotion]
-        # Fix 4: More robust sentiment assignment
-        if top_emotion in positive_emotions:
-            sentiment = "POSITIVE"
-        elif top_emotion in negative_emotions:
-            sentiment = "NEGATIVE"
-        else:
-            # If the top emotion is neutral but there are strong competing emotions, use them
-            competing_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]
-            # Check if there's a close second non-neutral emotion
-            if len(competing_emotions) > 1:
-                if (competing_emotions[0][0] in neutral_emotions and
-                    competing_emotions[1][0] not in neutral_emotions and
-                    competing_emotions[1][1] > 0.7 * competing_emotions[0][1]):
-                    # Use the second strongest emotion instead
-                    top_emotion = competing_emotions[1][0]
-                    if top_emotion in positive_emotions:
-                        sentiment = "POSITIVE"
-                    elif top_emotion in negative_emotions:
-                        sentiment = "NEGATIVE"
-                    else:
-                        sentiment = "NEUTRAL"
-                else:
-                    sentiment = "NEUTRAL"
-            else:
-                sentiment = "NEUTRAL"
-        # Log for debugging
-        print(f"Text: {text[:50]}...")
-        print(f"Top 3 emotions: {sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]}")
-        print(f"Selected top emotion: {top_emotion} ({filtered_emotions.get(top_emotion, 0):.3f})")
-        print(f"Sentiment determined: {sentiment}")
-        return emotions_dict, top_emotion, emotion_map, sentiment
     except Exception as e:
-        st.error(f"Emotion detection failed: {str(e)}")
-        print(f"Exception in emotion detection: {str(e)}")
-        return {}, "neutral", {}, "NEUTRAL"
-# Sarcasm Detection Function
 @st.cache_resource
 def get_sarcasm_classifier():
     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony", use_fast=True)
@@ -128,11 +129,8 @@ def get_sarcasm_classifier():
 def perform_sarcasm_detection(text):
     try:
-        if not text or len(text.strip()) < 3:
-            return False, 0.0
-        sarcasm_classifier = get_sarcasm_classifier()
-        result = sarcasm_classifier(text)[0]
         is_sarcastic = result['label'] == "LABEL_1"
         sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
         return is_sarcastic, sarcasm_score
@@ -140,415 +138,198 @@ def perform_sarcasm_detection(text):
         st.error(f"Sarcasm detection failed: {str(e)}")
         return False, 0.0
-# Validate audio quality
 def validate_audio(audio_path):
     try:
         sound = AudioSegment.from_file(audio_path)
-        if sound.dBFS < -50:
-            st.warning("Audio volume is too low. Please record or upload a louder audio.")
-            return False
-        if len(sound) < 1000:  # Less than 1 second
-            st.warning("Audio is too short. Please record a longer audio.")
             return False
         return True
-    except:
         st.error("Invalid or corrupted audio file.")
         return False
 # Speech Recognition with Whisper
 @st.cache_resource
 def load_whisper_model():
-    # Use 'large-v3' for maximum accuracy
-    model = whisper.load_model("large-v3")
-    return model
-def transcribe_audio(audio_path, show_alternative=False):
     try:
-        st.write(f"Processing audio file: {audio_path}")
         sound = AudioSegment.from_file(audio_path)
-        st.write(f"Audio duration: {len(sound)/1000:.2f}s, Sample rate: {sound.frame_rate}, Channels: {sound.channels}")
-        # Convert to WAV format (16kHz, mono) for Whisper
         temp_wav_path = os.path.join(tempfile.gettempdir(), "temp_converted.wav")
-        sound = sound.set_frame_rate(16000)
-        sound = sound.set_channels(1)
         sound.export(temp_wav_path, format="wav")
-        # Load Whisper model
         model = load_whisper_model()
-        # Transcribe audio
         result = model.transcribe(temp_wav_path, language="en")
-        main_text = result["text"].strip()
-        # Clean up
-        if os.path.exists(temp_wav_path):
-            os.remove(temp_wav_path)
-        # Whisper doesn't provide alternatives, so return empty list
-        if show_alternative:
-            return main_text, []
-        return main_text
     except Exception as e:
         st.error(f"Transcription failed: {str(e)}")
-        return "", [] if show_alternative else ""
-# Function to handle uploaded audio files
-def process_uploaded_audio(audio_file):
-    if not audio_file:
-        return None
-    try:
-        temp_dir = tempfile.gettempdir()
-        temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.wav")
-        with open(temp_file_path, "wb") as f:
-            f.write(audio_file.getvalue())
-        if not validate_audio(temp_file_path):
-            return None
         return temp_file_path
-    except Exception as e:
-        st.error(f"Error processing uploaded audio: {str(e)}")
         return None
-# Show model information
-def show_model_info():
-    st.sidebar.header("🧠 About the Models")
-    model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
-    with model_tabs[0]:
-        st.markdown("""
-        **Emotion Model**: SamLowe/roberta-base-go_emotions
-        - Fine-tuned on GoEmotions dataset (58k Reddit comments, 27 emotions)
-        - Architecture: RoBERTa base
-        - Micro-F1: 0.46
-        [🔍 Model Hub](https://huggingface.co/SamLowe/roberta-base-go_emotions)
-        """)
-    with model_tabs[1]:
-        st.markdown("""
-        **Sarcasm Model**: cardiffnlp/twitter-roberta-base-irony
-        - Trained on SemEval-2018 Task 3 (Twitter irony dataset)
-        - Architecture: RoBERTa base
-        - F1-score: 0.705
-        [🔍 Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
-        """)
-    with model_tabs[2]:
-        st.markdown("""
-        **Speech Recognition**: OpenAI Whisper (large-v3)
-        - State-of-the-art model for speech-to-text
-        - Accuracy: ~5-10% WER on clean English audio
-        - Robust to noise, accents, and varied conditions
-        - Runs locally, no internet required
-        **Tips**: Use good mic, reduce noise, speak clearly
-        [🔍 Model Details](https://github.com/openai/whisper)
-        """)
-# Custom audio recorder using HTML/JS
-def custom_audio_recorder():
-    audio_recorder_html = """
-    <script>
-    var audioRecorder = {
-        audioBlobs: [],
-        mediaRecorder: null,
-        streamBeingCaptured: null,
-        start: function() {
-            if (!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)) {
-                return Promise.reject(new Error('mediaDevices API or getUserMedia method is not supported in this browser.'));
-            }
-            else {
-                return navigator.mediaDevices.getUserMedia({ audio: true })
-                    .then(stream => {
-                        audioRecorder.streamBeingCaptured = stream;
-                        audioRecorder.mediaRecorder = new MediaRecorder(stream);
-                        audioRecorder.audioBlobs = [];
-                        audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
-                            audioRecorder.audioBlobs.push(event.data);
-                        });
-                        audioRecorder.mediaRecorder.start();
-                    });
-            }
-        },
-        stop: function() {
-            return new Promise(resolve => {
-                let mimeType = audioRecorder.mediaRecorder.mimeType;
-                audioRecorder.mediaRecorder.addEventListener("stop", () => {
-                    let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
-                    resolve(audioBlob);
-                });
-                audioRecorder.mediaRecorder.stop();
-                audioRecorder.stopStream();
-                audioRecorder.resetRecordingProperties();
-            });
-        },
-        stopStream: function() {
-            audioRecorder.streamBeingCaptured.getTracks()
-                .forEach(track => track.stop());
-        },
-        resetRecordingProperties: function() {
-            audioRecorder.mediaRecorder = null;
-            audioRecorder.streamBeingCaptured = null;
-        }
-    }
-    var isRecording = false;
-    var recordButton = document.getElementById('record-button');
-    var audioElement = document.getElementById('audio-playback');
-    var audioData = document.getElementById('audio-data');
-    function toggleRecording() {
-        if (!isRecording) {
-            audioRecorder.start()
-                .then(() => {
-                    isRecording = true;
-                    recordButton.textContent = 'Stop Recording';
-                    recordButton.classList.add('recording');
-                })
-                .catch(error => {
-                    alert('Error starting recording: ' + error.message);
-                });
-        } else {
-            audioRecorder.stop()
-                .then(audioBlob => {
-                    const audioUrl = URL.createObjectURL(audioBlob);
-                    audioElement.src = audioUrl;
-                    const reader = new FileReader();
-                    reader.readAsDataURL(audioBlob);
-                    reader.onloadend = function() {
-                        const base64data = reader.result;
-                        audioData.value = base64data;
-                        const streamlitMessage = {type: "streamlit:setComponentValue", value: base64data};
-                        window.parent.postMessage(streamlitMessage, "*");
-                    }
-                    isRecording = false;
-                    recordButton.textContent = 'Start Recording';
-                    recordButton.classList.remove('recording');
-                });
-        }
-    }
-    document.addEventListener('DOMContentLoaded', function() {
-        recordButton = document.getElementById('record-button');
-        audioElement = document.getElementById('audio-playback');
-        audioData = document.getElementById('audio-data');
-        recordButton.addEventListener('click', toggleRecording);
-    });
-    </script>
-    <div class="audio-recorder-container">
-        <button id="record-button" class="record-button">Start Recording</button>
-        <audio id="audio-playback" controls style="display:block; margin-top:10px;"></audio>
-        <input type="hidden" id="audio-data" name="audio-data">
-    </div>
-    <style>
-    .audio-recorder-container {
-        display: flex;
-        flex-direction: column;
-        align-items: center;
-        padding: 20px;
-    }
-    .record-button {
-        background-color: #f63366;
-        color: white;
-        border: none;
-        padding: 10px 20px;
-        border-radius: 5px;
-        cursor: pointer;
-        font-size: 16px;
-    }
-    .record-button.recording {
-        background-color: #ff0000;
-        animation: pulse 1.5s infinite;
-    }
-    @keyframes pulse {
-        0% { opacity: 1; }
-        50% { opacity: 0.7; }
-        100% { opacity: 1; }
-    }
-    </style>
-    """
-    return components.html(audio_recorder_html, height=150)
-# Function to display analysis results
-def display_analysis_results(transcribed_text):
-    # Fix 5: Add debugging to track what's happening
-    st.session_state.debug_info = st.session_state.get('debug_info', [])
-    st.session_state.debug_info.append(f"Processing text: {transcribed_text[:50]}...")
-    emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
-    is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)
-    # Add results to debug info
-    st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}")
-    st.session_state.debug_info.append(f"Sarcasm: {is_sarcastic}, Score: {sarcasm_score:.3f}")
-    st.header("Transcribed Text")
-    st.text_area("Text", transcribed_text, height=150, disabled=True, help="The audio converted to text.")
-    confidence_score = min(0.95, max(0.70, len(transcribed_text.split()) / 50))
-    st.caption(f"Transcription confidence: {confidence_score:.2f}")
-    st.header("Analysis Results")
-    col1, col2 = st.columns([1, 2])
     with col1:
         st.subheader("Sentiment")
         sentiment_icon = "👍" if sentiment == "POSITIVE" else "👎" if sentiment == "NEGATIVE" else "😐"
         st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
-        st.info("Sentiment reflects the dominant emotion's tone.")
         st.subheader("Sarcasm")
         sarcasm_icon = "😏" if is_sarcastic else "😐"
-        sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
-        st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
-        st.info("Score indicates sarcasm confidence (0 to 1).")
     with col2:
-        st.subheader("Emotions")
-        if emotions_dict:
-            st.markdown(f"**Dominant:** {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
-            sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
-            top_emotions = sorted_emotions[:8]
-            emotions = [e[0] for e in top_emotions]
-            scores = [e[1] for e in top_emotions]
-            fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
-                         title="Top Emotions Distribution", color=emotions,
-                         color_discrete_sequence=px.colors.qualitative.Bold)
-            fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14)
-            st.plotly_chart(fig, use_container_width=True)
-        else:
-            st.write("No emotions detected.")
-    # Fix 6: Add debug expander for troubleshooting
-    with st.expander("Debug Information", expanded=False):
-        st.write("Debugging information for troubleshooting:")
-        for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
-            st.text(f"{i+1}. {debug_line}")
-        if emotions_dict:
-            st.write("Raw emotion scores:")
-            for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
-                if score > 0.01:  # Only show non-negligible scores
-                    st.text(f"{emotion}: {score:.4f}")
-    with st.expander("Analysis Details", expanded=False):
         st.write("""
-        **How this works:**
-        1. **Speech Recognition**: Audio transcribed using OpenAI Whisper (large-v3)
-        2. **Emotion Analysis**: RoBERTa model trained on GoEmotions (27 emotions)
-        3. **Sentiment Analysis**: Derived from dominant emotion
-        4. **Sarcasm Detection**: RoBERTa model for irony detection
-        **Accuracy depends on**:
-        - Audio quality
-        - Speech clarity
-        - Background noise
-        - Speech patterns
         """)
-# Process base64 audio data
-def process_base64_audio(base64_data):
-    try:
-        base64_binary = base64_data.split(',')[1]
-        binary_data = base64.b64decode(base64_binary)
-        temp_dir = tempfile.gettempdir()
-        temp_file_path = os.path.join(temp_dir, f"recording_{int(time.time())}.wav")
-        with open(temp_file_path, "wb") as f:
-            f.write(binary_data)
-        if not validate_audio(temp_file_path):
-            return None
-        return temp_file_path
-    except Exception as e:
-        st.error(f"Error processing audio data: {str(e)}")
-        return None
 # Main App Logic
 def main():
-    # Fix 7: Initialize session state for debugging
-    if 'debug_info' not in st.session_state:
-        st.session_state.debug_info = []
     tab1, tab2 = st.tabs(["📁 Upload Audio", "🎙️ Record Audio"])
     with tab1:
-        st.header("Upload an Audio File")
-        audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"],
-                                     help="Upload an audio file for analysis")
         if audio_file:
-            st.audio(audio_file.getvalue())
-            st.caption("🎧 Uploaded Audio Playback")
-            upload_button = st.button("Analyze Upload", key="analyze_upload")
-            if upload_button:
-                with st.spinner('Analyzing audio with advanced precision...'):
-                    temp_audio_path = process_uploaded_audio(audio_file)
-                    if temp_audio_path:
-                        main_text, alternatives = transcribe_audio(temp_audio_path, show_alternative=True)
-                        if main_text:
-                            if alternatives:
-                                with st.expander("Alternative transcriptions detected", expanded=False):
-                                    for i, alt in enumerate(alternatives[:3], 1):
-                                        st.write(f"{i}. {alt}")
-                            display_analysis_results(main_text)
-                        else:
-                            st.error("Could not transcribe the audio. Please try again with clearer audio.")
-                        if os.path.exists(temp_audio_path):
-                            os.remove(temp_audio_path)
     with tab2:
         st.header("Record Your Voice")
-        st.write("Use the recorder below to analyze your speech in real-time.")
-        st.subheader("Browser-Based Recorder")
-        st.write("Click the button below to start/stop recording.")
-        audio_data = custom_audio_recorder()
-        if audio_data:
-            analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
-            if analyze_rec_button:
-                with st.spinner("Processing your recording..."):
-                    temp_audio_path = process_base64_audio(audio_data)
-                    if temp_audio_path:
-                        transcribed_text = transcribe_audio(temp_audio_path)
-                        if transcribed_text:
-                            display_analysis_results(transcribed_text)
-                        else:
-                            st.error("Could not transcribe the audio. Please try speaking more clearly.")
-                        if os.path.exists(temp_audio_path):
-                            os.remove(temp_audio_path)
-        st.subheader("Manual Text Input")
-        st.write("If recording doesn't work, you can type your text here:")
-        manual_text = st.text_area("Enter text to analyze:", placeholder="Type what you want to analyze...")
-        analyze_text_button = st.button("Analyze Text", key="analyze_manual")
-        if analyze_text_button and manual_text:
-            display_analysis_results(manual_text)
-    show_model_info()
 if __name__ == "__main__":
     main()

 import tempfile
 import torch
 import transformers
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
 import plotly.express as px
 import logging
 import warnings
 import whisper
 from pydub import AudioSegment
 import time
+import numpy as np
+import librosa
+import subprocess
+import pyaudio
+import wave
 import io
 # Suppress warnings for a clean console
 logging.getLogger("torch").setLevel(logging.CRITICAL)
 print(f"Using device: {device}")
 # Set Streamlit app layout
+st.set_page_config(layout="wide", page_title="Advanced Voice Emotion Analyzer")
 # Interface design
+st.title("🎙️ Advanced Voice Emotion Analyzer")
+st.write("Analyze all emotions from audio using hybrid ML models, ensuring accurate detection across 27 emotions.")
+# Audio Preprocessing
+def make_audio_scarier(audio_path, output_path):
+    try:
+        commands = [
+            f"ffmpeg -i {audio_path} -af 'asetrate=44100*0.8,aresample=44100' temp1.wav",
+            f"ffmpeg -i temp1.wav -af 'reverb=0.8:0.2:0.5:0.5:0.5:0.5' temp2.wav",
+            f"ffmpeg -i temp2.wav -af 'atempo=1.2' {output_path}"
+        ]
+        for cmd in commands:
+            subprocess.run(cmd, shell=True, check=True)
+        for temp_file in ["temp1.wav", "temp2.wav"]:
+            if os.path.exists(temp_file):
+                os.remove(temp_file)
+    except Exception as e:
+        st.error(f"Audio processing failed: {str(e)}")
+        raise
+# Audio Feature Extraction
+def extract_audio_features(audio_path):
+    try:
+        y, sr = librosa.load(audio_path, sr=16000)
+        pitch_mean = np.mean(librosa.piptrack(y=y, sr=sr)[0][librosa.piptrack(y=y, sr=sr)[0] > 0]) if np.any(librosa.piptrack(y=y, sr=sr)[0] > 0) else 0
+        energy_mean = np.mean(librosa.feature.rms(y=y))
+        zcr_mean = np.mean(librosa.feature.zero_crossing_rate(y))
+        return {"pitch_mean": pitch_mean, "energy_mean": energy_mean, "zcr_mean": zcr_mean}
+    except Exception as e:
+        st.error(f"Audio feature extraction failed: {str(e)}")
+        return {}
+# Audio Emotion Classification with Wav2Vec2
+@st.cache_resource
+def get_audio_emotion_classifier():
+    processor = Wav2Vec2Processor.from_pretrained("superb/wav2vec2-base-superb-er")
+    model = Wav2Vec2ForSequenceClassification.from_pretrained("superb/wav2vec2-base-superb-er")
+    model = model.to(device)
+    return processor, model
+def perform_audio_emotion_detection(audio_path):
+    try:
+        processor, model = get_audio_emotion_classifier()
+        waveform, sample_rate = librosa.load(audio_path, sr=16000)
+        inputs = processor(waveform, sampling_rate=16000, return_tensors="pt", padding=True)
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        scores = torch.softmax(logits, dim=1).detach().cpu().numpy()[0]
+        audio_emotions = ["neutral", "happy", "sad", "angry", "fearful", "surprise", "disgust"]
+        emotion_dict = {emotion: float(scores[i]) for i, emotion in enumerate(audio_emotions)}
+        top_emotion = audio_emotions[np.argmax(scores)]
+        # Boost emotions for audio characteristics
+        features = extract_audio_features(audio_path)
+        if features.get("pitch_mean", 0) < 200 and features.get("energy_mean", 0) > 0.1 and features.get("zcr_mean", 0) > 0.1:
+            emotion_dict["fearful"] = min(1.0, emotion_dict.get("fearful", 0) + 0.3)
+            top_emotion = "fearful" if emotion_dict["fearful"] > emotion_dict[top_emotion] else top_emotion
+        elif features.get("energy_mean", 0) > 0.2:
+            emotion_dict["angry"] = min(1.0, emotion_dict.get("angry", 0) + 0.2)
+            top_emotion = "angry" if emotion_dict["angry"] > emotion_dict[top_emotion] else top_emotion
+        return emotion_dict, top_emotion
+    except Exception as e:
+        st.error(f"Audio emotion detection failed: {str(e)}")
+        return {}, "unknown"
+# Text Emotion Classification with RoBERTa
 @st.cache_resource
+def get_text_emotion_classifier():
     tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions", use_fast=True)
     model = AutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")
     model = model.to(device)
     return pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None, device=-1 if device.type == "cpu" else 0)
+def perform_text_emotion_detection(text):
     try:
+        classifier = get_text_emotion_classifier()
+        results = classifier(text)[0]
+        emotions = ["admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion",
+                    "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment",
+                    "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism",
+                    "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"]
+        emotions_dict = {result['label']: result['score'] for result in results if result['label'] in emotions}
+        top_emotion = max(emotions_dict, key=emotions_dict.get)
+        return emotions_dict, top_emotion
     except Exception as e:
+        st.error(f"Text emotion detection failed: {str(e)}")
+        return {}, "unknown"
+# Sarcasm Detection
 @st.cache_resource
 def get_sarcasm_classifier():
     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony", use_fast=True)
 def perform_sarcasm_detection(text):
     try:
+        classifier = get_sarcasm_classifier()
+        result = classifier(text)[0]
         is_sarcastic = result['label'] == "LABEL_1"
         sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
         return is_sarcastic, sarcasm_score
         st.error(f"Sarcasm detection failed: {str(e)}")
         return False, 0.0
+# Validate Audio
 def validate_audio(audio_path):
     try:
         sound = AudioSegment.from_file(audio_path)
+        if sound.dBFS < -50 or len(sound) < 1000:
+            st.warning("Audio volume too low or too short. Please use a louder, longer audio.")
             return False
         return True
+    except Exception:
         st.error("Invalid or corrupted audio file.")
         return False
 # Speech Recognition with Whisper
 @st.cache_resource
 def load_whisper_model():
+    return whisper.load_model("large-v3")
+def transcribe_audio(audio_path):
     try:
         sound = AudioSegment.from_file(audio_path)
         temp_wav_path = os.path.join(tempfile.gettempdir(), "temp_converted.wav")
+        sound = sound.set_frame_rate(16000).set_channels(1)
         sound.export(temp_wav_path, format="wav")
         model = load_whisper_model()
         result = model.transcribe(temp_wav_path, language="en")
+        os.remove(temp_wav_path)
+        return result["text"].strip()
     except Exception as e:
         st.error(f"Transcription failed: {str(e)}")
+        return ""
+# Python Audio Recording
+def record_audio():
+    CHUNK = 1024
+    FORMAT = pyaudio.paInt16
+    CHANNELS = 1
+    RATE = 16000
+    RECORD_SECONDS = st.slider("Recording duration (seconds)", 1, 30, 5)
+    p = pyaudio.PyAudio()
+    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+    if st.button("Start Recording"):
+        st.write("Recording...")
+        frames = []
+        for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
+            data = stream.read(CHUNK)
+            frames.append(data)
+        st.write("Recording finished.")
+        stream.stop_stream()
+        stream.close()
+        p.terminate()
+        temp_file_path = os.path.join(tempfile.gettempdir(), f"recorded_audio_{int(time.time())}.wav")
+        wf = wave.open(temp_file_path, 'wb')
+        wf.setnchannels(CHANNELS)
+        wf.setsampwidth(p.get_sample_size(FORMAT))
+        wf.setframerate(RATE)
+        wf.writeframes(b''.join(frames))
+        wf.close()
         return temp_file_path
+    return None
+# Process Audio Files
+def process_audio_file(audio_data):
+    temp_dir = tempfile.gettempdir()
+    temp_file_path = os.path.join(temp_dir, f"audio_{int(time.time())}.wav")
+    with open(temp_file_path, "wb") as f:
+        if isinstance(audio_data, str):
+            with open(audio_data, "rb") as f_audio:
+                f.write(f_audio.read())
+        else:
+            f.write(audio_data.getvalue())
+    if not validate_audio(temp_file_path):
         return None
+    return temp_file_path
+# Display Results
+def display_analysis_results(audio_path):
+    st.header("Audio Analysis")
+    st.audio(audio_path)
+    # Preprocess audio
+    processed_audio_path = os.path.join(tempfile.gettempdir(), f"processed_{int(time.time())}.wav")
+    make_audio_scarier(audio_path, processed_audio_path)
+    # Audio emotion detection
+    audio_emotions, audio_top_emotion = perform_audio_emotion_detection(processed_audio_path)
+    st.subheader("Audio-Based Emotion")
+    st.write(f"**Dominant Emotion:** {audio_top_emotion} (Score: {audio_emotions.get(audio_top_emotion, 0):.3f})")
+    # Transcription and text emotion detection
+    transcribed_text = transcribe_audio(processed_audio_path)
+    st.subheader("Transcribed Text")
+    st.text_area("Text", transcribed_text, height=100, disabled=True)
+    if transcribed_text:
+        text_emotions, text_top_emotion = perform_text_emotion_detection(transcribed_text)
+        st.write(f"**Text-Based Dominant Emotion:** {text_top_emotion} (Score: {text_emotions.get(text_top_emotion, 0):.3f})")
+    # Combine emotions (prioritize audio, map to 27 emotions)
+    emotion_map = {
+        "neutral": "neutral", "happy": "joy", "sad": "sadness", "angry": "anger",
+        "fearful": "fear", "surprise": "surprise", "disgust": "disgust"
+    }
+    combined_emotions = {emotion: 0 for emotion in ["admiration", "amusement", "anger", "annoyance", "approval", "caring",
+                                                   "confusion", "curiosity", "desire", "disappointment", "disapproval",
+                                                   "disgust", "embarrassment", "excitement", "fear", "gratitude",
+                                                   "grief", "joy", "love", "nervousness", "optimism", "pride",
+                                                   "realization", "relief", "remorse", "sadness", "surprise", "neutral"]}
+    for audio_emotion, score in audio_emotions.items():
+        mapped_emotion = emotion_map.get(audio_emotion, "neutral")
+        combined_emotions[mapped_emotion] = max(combined_emotions[mapped_emotion], score * 0.7)
+    if transcribed_text:
+        for text_emotion, score in text_emotions.items():
+            combined_emotions[text_emotion] = combined_emotions.get(text_emotion, 0) + score * 0.3
+    top_emotion = max(combined_emotions, key=combined_emotions.get)
+    sentiment = "POSITIVE" if top_emotion in ["admiration", "amusement", "approval", "caring", "desire", "excitement",
+                                             "gratitude", "joy", "love", "optimism", "pride", "relief"] else "NEGATIVE" if top_emotion in ["anger", "annoyance", "disappointment", "disapproval", "disgust", "embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"] else "NEUTRAL"
+    # Sarcasm detection
+    is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text) if transcribed_text else (False, 0.0)
+    # Display results
+    col1, col2 = st.columns([1, 2])
     with col1:
         st.subheader("Sentiment")
         sentiment_icon = "👍" if sentiment == "POSITIVE" else "👎" if sentiment == "NEGATIVE" else "😐"
         st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
         st.subheader("Sarcasm")
         sarcasm_icon = "😏" if is_sarcastic else "😐"
+        st.markdown(f"**{sarcasm_icon} {'Detected' if is_sarcastic else 'Not Detected'}** (Score: {sarcasm_score:.3f})")
     with col2:
+        st.subheader("Emotion Distribution")
+        sorted_emotions = sorted(combined_emotions.items(), key=lambda x: x[1], reverse=True)[:10]
+        emotions, scores = zip(*sorted_emotions)
+        fig = px.bar(x=list(emotions), y=list(scores), labels={'x': 'Emotion', 'y': 'Score'},
+                     title="Top Emotion Scores", color=list(emotions),
+                     color_discrete_sequence=px.colors.qualitative.Bold)
+        fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14)
+        st.plotly_chart(fig, use_container_width=True)
+    with st.expander("Details"):
+        st.write(f"**Audio Features:** {extract_audio_features(processed_audio_path)}")
         st.write("""
+        **How it works:**
+        - Audio Emotion: Wav2Vec2 detects 7 emotions from audio.
+        - Transcription: Whisper converts audio to text.
+        - Text Emotion: RoBERTa refines 27 emotions from text.
+        - Sarcasm: Analyzes text for irony.
+        **Accuracy depends on:** Audio quality, clarity, and noise.
         """)
+    # Clean up
+    for path in [audio_path, processed_audio_path]:
+        if os.path.exists(path):
+            os.remove(path)
 # Main App Logic
 def main():
     tab1, tab2 = st.tabs(["📁 Upload Audio", "🎙️ Record Audio"])
     with tab1:
+        st.header("Upload Audio File")
+        audio_file = st.file_uploader("Upload audio (wav, mp3, ogg)", type=["wav", "mp3", "ogg"])
         if audio_file:
+            temp_audio_path = process_audio_file(audio_file)
+            if temp_audio_path:
+                if st.button("Analyze Upload"):
+                    with st.spinner("Analyzing..."):
+                        display_analysis_results(temp_audio_path)
     with tab2:
         st.header("Record Your Voice")
+        st.write("Record audio to analyze emotions in real-time.")
+        temp_audio_path = record_audio()
+        if temp_audio_path:
+            if st.button("Analyze Recording"):
+                with st.spinner("Processing..."):
+                    display_analysis_results(temp_audio_path)
+    st.sidebar.header("About")
+    st.sidebar.write("""
+    **Models Used:**
+    - Audio: superb/wav2vec2-base-superb-er (7 emotions)
+    - Text: SamLowe/roberta-base-go_emotions (27 emotions)
+    - Sarcasm: cardiffnlp/twitter-roberta-base-irony
+    - Speech: OpenAI Whisper (large-v3)
+    """)
 if __name__ == "__main__":
     main()