Spaces:

MaroofTechSorcerer
/

Voice_Based_Sentiment_Analysis_with_Sarcasm_Detection

Running

App Files Files Community

MaroofTechSorcerer commited on May 6

Commit

d8a1b1b

verified ·

1 Parent(s): a781ade

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -529

app.py CHANGED Viewed

@@ -13,10 +13,8 @@ import time
 import base64
 import io
 import streamlit.components.v1 as components
-import functools
-import threading
-from typing import Dict, Tuple, List, Any, Optional, Union
 from concurrent.futures import ThreadPoolExecutor
 import numpy as np
 # Suppress warnings for a clean console
@@ -42,7 +40,7 @@ st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
 # Interface design
 st.title("🎙 Voice Based Sentiment Analysis")
-st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
 # Emotion Detection Function with optimizations
 @st.cache_resource
@@ -53,16 +51,15 @@ def get_emotion_classifier():
                                                  model_max_length=512)
         model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
         model = model.to(device)
-        model.eval()  # Set model to evaluation mode for better inference performance
-        # Use batch_size for faster processing when appropriate
         classifier = pipeline("text-classification",
                              model=model,
                              tokenizer=tokenizer,
-                             top_k=None,
                              device=0 if torch.cuda.is_available() else -1)
-        # Verify the model is working with a test
         test_result = classifier("I am happy today")
         print(f"Emotion classifier test: {test_result}")
@@ -72,11 +69,10 @@ def get_emotion_classifier():
         st.error(f"Failed to load emotion model. Please check logs.")
         return None
-# Cache emotion results to prevent recomputation
-@st.cache_data(ttl=600)  # Cache for 10 minutes
 def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[str, str], str]:
     try:
-        # Handle empty or very short text
         if not text or len(text.strip()) < 3:
             return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
@@ -85,18 +81,8 @@ def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[st
             st.error("Emotion classifier not available.")
             return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
-        # Chunk long text for better processing
-        max_chunk_size = 512
-        if len(text) > max_chunk_size:
-            chunks = [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
-            all_results = []
-            for chunk in chunks:
-                chunk_results = emotion_classifier(chunk)
-                all_results.extend(chunk_results)
-            # Aggregate results across chunks
-            emotion_results = [result[0] for result in all_results]
-        else:
-            emotion_results = emotion_classifier(text)[0]
         emotion_map = {
             "joy": "😊", "anger": "😡", "disgust": "🤢", "fear": "😨",
@@ -108,56 +94,30 @@ def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[st
         neutral_emotions = ["surprise", "neutral"]
         # Process results
-        emotions_dict = {}
-        for result in emotion_results:
-            if isinstance(result, dict) and 'label' in result and 'score' in result:
-                # If we have multiple chunks, average the scores
-                if result['label'] in emotions_dict:
-                    emotions_dict[result['label']] = (emotions_dict[result['label']] + result['score']) / 2
-                else:
-                    emotions_dict[result['label']] = result['score']
-            else:
-                print(f"Invalid result format: {result}")
-        if not emotions_dict:
-            st.error("No valid emotions detected.")
-            return {}, "neutral", emotion_map, "NEUTRAL"
-        # Filter out very low probability emotions (improved threshold)
-        filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.05}
         if not filtered_emotions:
             filtered_emotions = emotions_dict
-        # Get top emotion
-        top_emotion = max(filtered_emotions, key=filtered_emotions.get)
-        top_score = filtered_emotions[top_emotion]
-        # Determine sentiment with improved logic
-        if top_emotion in positive_emotions:
             sentiment = "POSITIVE"
         elif top_emotion in negative_emotions:
             sentiment = "NEGATIVE"
         else:
-            # Better handling of mixed emotions
-            competing_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]
-            if len(competing_emotions) > 1:
-                # If top two emotions are close in score
-                if (competing_emotions[1][1] > 0.8 * competing_emotions[0][1]):
-                    # Check if second emotion changes sentiment classification
-                    second_emotion = competing_emotions[1][0]
-                    if second_emotion in positive_emotions:
-                        sentiment = "POSITIVE" if top_emotion not in negative_emotions else "MIXED"
-                    elif second_emotion in negative_emotions:
-                        sentiment = "NEGATIVE" if top_emotion not in positive_emotions else "MIXED"
-                    else:
-                        sentiment = "NEUTRAL"
-                else:
-                    # Stick with top emotion for sentiment
-                    sentiment = "NEUTRAL"
-            else:
-                sentiment = "NEUTRAL"
         return emotions_dict, top_emotion, emotion_map, sentiment
     except Exception as e:
@@ -165,7 +125,7 @@ def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[st
         print(f"Exception in emotion detection: {str(e)}")
         return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
-# Sarcasm Detection Function with optimizations
 @st.cache_resource
 def get_sarcasm_classifier():
     try:
@@ -174,7 +134,7 @@ def get_sarcasm_classifier():
                                                  model_max_length=512)
         model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
         model = model.to(device)
-        model.eval()  # Set to evaluation mode
         classifier = pipeline("text-classification",
                              model=model,
@@ -191,8 +151,7 @@ def get_sarcasm_classifier():
         st.error(f"Failed to load sarcasm model. Please check logs.")
         return None
-# Cache sarcasm results
-@st.cache_data(ttl=600)  # Cache for 10 minutes
 def perform_sarcasm_detection(text: str) -> Tuple[bool, float]:
     try:
         if not text or len(text.strip()) < 3:
@@ -203,46 +162,19 @@ def perform_sarcasm_detection(text: str) -> Tuple[bool, float]:
             st.error("Sarcasm classifier not available.")
             return False, 0.0
-        # Handle long text by chunking
-        max_chunk_size = 512
-        if len(text) > max_chunk_size:
-            chunks = [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
-            # Process chunks and average results
-            sarcasm_scores = []
-            for chunk in chunks:
-                result = sarcasm_classifier(chunk)[0]
-                is_chunk_sarcastic = result['label'] == "LABEL_1"
-                sarcasm_score = result['score'] if is_chunk_sarcastic else 1 - result['score']
-                sarcasm_scores.append((is_chunk_sarcastic, sarcasm_score))
-            # Average sarcasm scores
-            total_sarcasm_score = sum(score for _, score in sarcasm_scores)
-            avg_sarcasm_score = total_sarcasm_score / len(sarcasm_scores)
-            # Count sarcastic chunks
-            sarcastic_chunks = sum(1 for is_sarcastic, _ in sarcasm_scores if is_sarcastic)
-            # If majority of chunks are sarcastic, classify as sarcastic
-            is_sarcastic = sarcastic_chunks > len(chunks) / 2
-            return is_sarcastic, avg_sarcasm_score
-        else:
-            # Process normally for short text
-            result = sarcasm_classifier(text)[0]
-            is_sarcastic = result['label'] == "LABEL_1"
-            sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
-            return is_sarcastic, sarcasm_score
     except Exception as e:
         st.error(f"Sarcasm detection failed: {str(e)}")
         return False, 0.0
-# Validate audio quality - optimized
 def validate_audio(audio_path: str) -> bool:
     try:
         sound = AudioSegment.from_file(audio_path)
-        # Improved audio validation
-        if sound.dBFS < -50:  # Slightly relaxed threshold
-            st.warning("Audio volume is low. Please record or upload a louder audio for better results.")
-            return len(sound) > 500  # Still process if at least 0.5 seconds
-        if len(sound) < 500:  # Less than 0.5 second
             st.warning("Audio is very short. Longer audio provides better analysis.")
             return False
         return True
@@ -250,12 +182,11 @@ def validate_audio(audio_path: str) -> bool:
         st.error(f"Invalid or corrupted audio file: {str(e)}")
         return False
-# Speech Recognition with Whisper - optimized for speed
 @st.cache_resource
 def load_whisper_model():
     try:
-        # Use medium model for better speed/accuracy balance
-        model = whisper.load_model("medium")
         return model
     except Exception as e:
         print(f"Error loading Whisper model: {str(e)}")
@@ -263,31 +194,26 @@ def load_whisper_model():
         return None
 @st.cache_data
-def transcribe_audio(audio_path: str, show_alternative: bool = False) -> Union[str, Tuple[str, List[str]]]:
     try:
-        st.write(f"Processing audio file...")
         sound = AudioSegment.from_file(audio_path)
-        st.write(f"Audio duration: {len(sound) / 1000:.2f}s")
         # Convert to WAV format (16kHz, mono) for Whisper
         temp_wav_path = os.path.join(tempfile.gettempdir(), f"temp_converted_{int(time.time())}.wav")
-        # Optimize audio for speech recognition
-        sound = sound.set_frame_rate(16000)  # 16kHz is optimal for Whisper
-        sound = sound.set_channels(1)
         sound.export(temp_wav_path, format="wav")
         # Load model
         model = load_whisper_model()
         if model is None:
-            return "", [] if show_alternative else ""
         # Transcribe with optimized settings
         result = model.transcribe(
             temp_wav_path,
             language="en",
             task="transcribe",
-            fp16=torch.cuda.is_available(),  # Use fp16 if GPU available
-            beam_size=5  # Slightly larger beam size for better accuracy
         )
         main_text = result["text"].strip()
@@ -296,48 +222,29 @@ def transcribe_audio(audio_path: str, show_alternative: bool = False) -> Union[s
         if os.path.exists(temp_wav_path):
             os.remove(temp_wav_path)
-        # Return results
-        if show_alternative and "segments" in result:
-            # Create alternative texts by combining segments differently
-            segments = result["segments"]
-            if len(segments) > 1:
-                alternatives = []
-                # Create up to 3 alternatives by varying confidence thresholds
-                for conf in [0.5, 0.7, 0.9]:
-                    alt_text = " ".join(seg["text"] for seg in segments if seg["no_speech_prob"] < conf)
-                    if alt_text and alt_text != main_text:
-                        alternatives.append(alt_text)
-                return main_text, alternatives[:3]  # Limit to 3 alternatives
-        return (main_text, []) if show_alternative else main_text
     except Exception as e:
         st.error(f"Transcription failed: {str(e)}")
-        return "", [] if show_alternative else ""
-# Process uploaded audio files - optimized
 def process_uploaded_audio(audio_file) -> Optional[str]:
     if not audio_file:
         return None
     try:
         temp_dir = tempfile.gettempdir()
-        # Extract extension more safely
-        filename = audio_file.name
-        ext = filename.split('.')[-1].lower() if '.' in filename else ''
         if ext not in ['wav', 'mp3', 'ogg', 'm4a', 'flac']:
             st.error("Unsupported audio format. Please upload WAV, MP3, OGG, M4A, or FLAC.")
             return None
         temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.{ext}")
         with open(temp_file_path, "wb") as f:
             f.write(audio_file.getvalue())
         if not validate_audio(temp_file_path):
-            # We'll still try to process even if validation fails
-            st.warning("Audio may not be optimal quality, but we'll try to process it anyway.")
         return temp_file_path
     except Exception as e:
@@ -347,40 +254,36 @@ def process_uploaded_audio(audio_file) -> Optional[str]:
 # Show model information
 def show_model_info():
     st.sidebar.header("🧠 About the Models")
     model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
     with model_tabs[0]:
         st.markdown("""
         *Emotion Model*: distilbert-base-uncased-emotion
-        - Fine-tuned for six emotions (joy, anger, disgust, fear, sadness, surprise)
         - Architecture: DistilBERT base
-        - High accuracy for basic emotion classification
         [🔍 Model Hub](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
         """)
     with model_tabs[1]:
         st.markdown("""
         *Sarcasm Model*: cardiffnlp/twitter-roberta-base-irony
-        - Trained on SemEval-2018 Task 3 (Twitter irony dataset)
         - Architecture: RoBERTa base
-        - F1-score: 0.705
         [🔍 Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
         """)
     with model_tabs[2]:
         st.markdown("""
-        *Speech Recognition*: OpenAI Whisper (medium model)
-        - Optimized for speed and accuracy
-        - Performs well even with background noise and varied accents
-        - Runs locally, no internet required
-        *Tips*: Use good mic, reduce noise, speak clearly
         [🔍 Model Details](https://github.com/openai/whisper)
         """)
-# Custom audio recorder using HTML/JS - optimized for better user experience
 def custom_audio_recorder():
-    st.warning("Browser-based recording requires microphone access and a modern browser. If recording fails, try uploading an audio file instead.")
     audio_recorder_html = """
     <script>
     var audioRecorder = {
@@ -388,119 +291,49 @@ def custom_audio_recorder():
         mediaRecorder: null,
         streamBeingCaptured: null,
         isRecording: false,
-        recordingTimer: null,
-        recordingDuration: 0,
         start: function() {
             if (!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)) {
-                document.getElementById('status-message').textContent = "Recording not supported in this browser";
-                return Promise.reject(new Error('mediaDevices API or getUserMedia method is not supported in this browser.'));
             }
-            else {
-                return navigator.mediaDevices.getUserMedia({
-                    audio: {
-                        echoCancellation: true,
-                        noiseSuppression: true,
-                        autoGainControl: true
-                    }
-                })
-                .then(stream => {
-                    audioRecorder.streamBeingCaptured = stream;
-                    // Create audio context for visualization
-                    const audioContext = new (window.AudioContext || window.webkitAudioContext)();
-                    const source = audioContext.createMediaStreamSource(stream);
-                    const analyser = audioContext.createAnalyser();
-                    analyser.fftSize = 256;
-                    source.connect(analyser);
-                    // Start monitoring audio levels
-                    const bufferLength = analyser.frequencyBinCount;
-                    const dataArray = new Uint8Array(bufferLength);
-                    function updateMeter() {
-                        if (!audioRecorder.isRecording) return;
-                        analyser.getByteFrequencyData(dataArray);
-                        let sum = 0;
-                        for(let i = 0; i < bufferLength; i++) {
-                            sum += dataArray[i];
-                        }
-                        const average = sum / bufferLength;
-                        // Update volume meter
-                        const meter = document.getElementById('volume-meter');
-                        if (meter) {
-                            const height = Math.min(100, average * 2);
-                            meter.style.height = height + '%';
-                        }
-                        requestAnimationFrame(updateMeter);
-                    }
-                    // Setup media recorder with better settings
-                    audioRecorder.mediaRecorder = new MediaRecorder(stream, {
-                        mimeType: 'audio/webm;codecs=opus',
-                        audioBitsPerSecond: 128000
-                    });
-                    audioRecorder.audioBlobs = [];
-                    audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
-                        audioRecorder.audioBlobs.push(event.data);
-                    });
-                    // Start the recording and visualization
-                    audioRecorder.mediaRecorder.start(100);
-                    audioRecorder.isRecording = true;
-                    // Start timer
-                    audioRecorder.recordingDuration = 0;
-                    audioRecorder.recordingTimer = setInterval(() => {
-                        audioRecorder.recordingDuration += 1;
-                        const timerDisplay = document.getElementById('recording-timer');
-                        if (timerDisplay) {
-                            const minutes = Math.floor(audioRecorder.recordingDuration / 60);
-                            const seconds = audioRecorder.recordingDuration % 60;
-                            timerDisplay.textContent = `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`;
-                        }
-                    }, 1000);
-                    updateMeter();
-                    document.getElementById('status-message').textContent = "Recording...";
                 });
-            }
         },
         stop: function() {
             return new Promise(resolve => {
                 let mimeType = audioRecorder.mediaRecorder.mimeType;
                 audioRecorder.mediaRecorder.addEventListener("stop", () => {
                     let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
                     resolve(audioBlob);
                     audioRecorder.isRecording = false;
                     document.getElementById('status-message').textContent = "Recording stopped";
-                    // Stop the timer
-                    if (audioRecorder.recordingTimer) {
-                        clearInterval(audioRecorder.recordingTimer);
-                    }
                 });
                 audioRecorder.mediaRecorder.stop();
-                audioRecorder.stopStream();
-                audioRecorder.resetRecordingProperties();
             });
-        },
-        stopStream: function() {
-            audioRecorder.streamBeingCaptured.getTracks()
-                .forEach(track => track.stop());
-        },
-        resetRecordingProperties: function() {
-            audioRecorder.mediaRecorder = null;
-            audioRecorder.streamBeingCaptured = null;
         }
     }
@@ -509,8 +342,6 @@ def custom_audio_recorder():
     function toggleRecording() {
         var recordButton = document.getElementById('record-button');
         var statusMessage = document.getElementById('status-message');
-        var volumeMeter = document.getElementById('volume-meter');
-        var recordingTimer = document.getElementById('recording-timer');
         if (!isRecording) {
             audioRecorder.start()
@@ -518,8 +349,6 @@ def custom_audio_recorder():
                     isRecording = true;
                     recordButton.textContent = 'Stop Recording';
                     recordButton.classList.add('recording');
-                    volumeMeter.style.display = 'block';
-                    recordingTimer.style.display = 'block';
                 })
                 .catch(error => {
                     statusMessage.textContent = 'Error: ' + error.message;
@@ -545,8 +374,6 @@ def custom_audio_recorder():
                     isRecording = false;
                     recordButton.textContent = 'Start Recording';
                     recordButton.classList.remove('recording');
-                    volumeMeter.style.display = 'none';
-                    volumeMeter.style.height = '0%';
                 });
         }
     }
@@ -560,14 +387,6 @@ def custom_audio_recorder():
     <div class="audio-recorder-container">
         <button id="record-button" class="record-button">Start Recording</button>
         <div id="status-message" class="status-message">Ready to record</div>
-        <div class="recording-info">
-            <div class="volume-meter-container">
-                <div id="volume-meter" class="volume-meter"></div>
-            </div>
-            <div id="recording-timer" class="recording-timer">00:00</div>
-        </div>
         <audio id="audio-playback" controls style="display:none; margin-top:10px; width:100%;"></audio>
         <input type="hidden" id="audio-data" name="audio-data">
     </div>
@@ -593,7 +412,6 @@ def custom_audio_recorder():
         font-size: 16px;
         font-weight: bold;
         transition: all 0.3s ease;
-        box-shadow: 0 2px 5px rgba(0,0,0,0.2);
     }
     .record-button:hover {
@@ -612,41 +430,6 @@ def custom_audio_recorder():
         color: #666;
     }
-    .recording-info {
-        display: flex;
-        align-items: center;
-        margin-top: 15px;
-        width: 100%;
-        justify-content: center;
-    }
-    .volume-meter-container {
-        width: 20px;
-        height: 60px;
-        background-color: #ddd;
-        border-radius: 3px;
-        overflow: hidden;
-        position: relative;
-    }
-    .volume-meter {
-        width: 100%;
-        height: 0%;
-        background-color: #f63366;
-        position: absolute;
-        bottom: 0;
-        transition: height 0.1s ease;
-        display: none;
-    }
-    .recording-timer {
-        margin-left: 15px;
-        font-family: monospace;
-        font-size: 18px;
-        color: #f63366;
-        display: none;
-    }
     @keyframes pulse {
         0% { opacity: 1; box-shadow: 0 0 0 0 rgba(255,0,0,0.7); }
         50% { opacity: 0.8; box-shadow: 0 0 0 10px rgba(255,0,0,0); }
@@ -655,35 +438,21 @@ def custom_audio_recorder():
     </style>
     """
-    return components.html(audio_recorder_html, height=220)
-# Function to display analysis results - optimized
-def display_analysis_results(transcribed_text):
     st.session_state.debug_info = st.session_state.get('debug_info', [])
-    st.session_state.debug_info.append(f"Processing text: {transcribed_text[:50]}...")
-    st.session_state.debug_info = st.session_state.debug_info[-100:]  # Keep last 100 entries
-    # Run emotion and sarcasm detection in parallel
-    with ThreadPoolExecutor(max_workers=2) as executor:
-        emotion_future = executor.submit(perform_emotion_detection, transcribed_text)
-        sarcasm_future = executor.submit(perform_sarcasm_detection, transcribed_text)
-        emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
-        is_sarcastic, sarcasm_score = sarcasm_future.result()
-    # Add results to debug info
-    st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}")
-    st.session_state.debug_info.append(f"Sarcasm: {is_sarcastic}, Score: {sarcasm_score:.3f}")
     st.header("Transcribed Text")
-    st.text_area("Text", transcribed_text, height=120, disabled=True,
-                help="The audio converted to text. The text was processed for emotion and sentiment analysis.")
-    # Improved confidence estimation
-    words = transcribed_text.split()
-    word_count = len(words)
     confidence_score = min(0.98, max(0.75, 0.75 + (word_count / 100) * 0.2))
     st.caption(f"Estimated transcription confidence: {confidence_score:.2f}")
     st.header("Analysis Results")
@@ -693,171 +462,59 @@ def display_analysis_results(transcribed_text):
         st.subheader("Sentiment")
         sentiment_icon = "👍" if sentiment == "POSITIVE" else "👎" if sentiment == "NEGATIVE" else "🔄" if sentiment == "MIXED" else "😐"
         st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
-        st.info("Sentiment reflects the dominant emotion's tone and context.")
         st.subheader("Sarcasm")
         sarcasm_icon = "😏" if is_sarcastic else "😐"
         sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
         st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
-        # More informative sarcasm info
-        if is_sarcastic:
-            if sarcasm_score > 0.8:
-                st.info("High confidence in sarcasm detection.")
-            else:
-                st.info("Moderate confidence in sarcasm detection.")
-        else:
-            st.info("No clear indicators of sarcasm found.")
     with col2:
         st.subheader("Emotions")
         if emotions_dict:
-            st.markdown(
-                f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
-            # Enhanced visualization
             sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
-            significant_emotions = [(e, s) for e, s in sorted_emotions if s > 0.05]  # Only show significant emotions
             if significant_emotions:
                 emotions = [e[0] for e in significant_emotions]
                 scores = [e[1] for e in significant_emotions]
-                # Use a color scale that helps distinguish emotions better
                 fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
                              title="Emotion Distribution", color=emotions,
                              color_discrete_sequence=px.colors.qualitative.Bold)
-                fig.update_layout(
-                    yaxis_range=[0, 1],
-                    showlegend=False,
-                    title_font_size=14,
-                    margin=dict(l=20, r=20, t=40, b=20),
-                    xaxis_title="Emotion",
-                    yaxis_title="Confidence Score",
-                    bargap=0.3
-                )
-                # Add horizontal reference line for minimal significance
-                fig.add_shape(
-                    type="line",
-                    x0=-0.5,
-                    x1=len(emotions) - 0.5,
-                    y0=0.1,
-                    y1=0.1,
-                    line=dict(color="gray", width=1, dash="dot")
-                )
                 st.plotly_chart(fig, use_container_width=True)
             else:
                 st.write("No significant emotions detected.")
         else:
             st.write("No emotions detected.")
-    # Expert analysis section
-    with st.expander("Expert Analysis", expanded=False):
-        col1, col2 = st.columns(2)
-        with col1:
-            st.subheader("Emotion Insights")
-            # Provide more insightful analysis based on emotion combinations
-            if emotions_dict:
-                top_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)[:3]
-                if len(top_emotions) >= 2:
-                    emotion1, score1 = top_emotions[0]
-                    emotion2, score2 = top_emotions[1]
-                    if score2 > 0.7 * score1:  # If second emotion is close to first
-                        st.markdown(f"**Mixed emotional state detected:** {emotion_map.get(emotion1, '')} {emotion1} + {emotion_map.get(emotion2, '')} {emotion2}")
-                        # Analyze specific combinations
-                        if (emotion1 == "joy" and emotion2 == "surprise") or (emotion1 == "surprise" and emotion2 == "joy"):
-                            st.write("💡 This indicates excitement or delight")
-                        elif (emotion1 == "sadness" and emotion2 == "anger") or (emotion1 == "anger" and emotion2 == "sadness"):
-                            st.write("💡 This suggests frustration or disappointment")
-                        elif (emotion1 == "fear" and emotion2 == "surprise") or (emotion1 == "surprise" and emotion2 == "fear"):
-                            st.write("💡 This indicates shock or alarm")
-                    else:
-                        st.markdown(f"**Clear emotional state:** {emotion_map.get(emotion1, '')} {emotion1}")
-                else:
-                    st.write("Single dominant emotion detected.")
-            else:
-                st.write("No significant emotional patterns detected.")
-        with col2:
-            st.subheader("Context Analysis")
-            # Analyze the context based on combination of sentiment and sarcasm
-            if is_sarcastic and sentiment == "POSITIVE":
-                st.markdown("⚠️ **Potential Negative Connotation:** The positive sentiment might be misleading due to detected sarcasm.")
-            elif is_sarcastic and sentiment == "NEGATIVE":
-                st.markdown("⚠️ **Complex Expression:** Negative sentiment combined with sarcasm may indicate frustrated humor or ironic criticism.")
-            elif sentiment == "MIXED":
-                st.markdown("🔄 **Ambivalent Message:** The content expresses mixed or conflicting emotions.")
-            elif sentiment == "POSITIVE" and sarcasm_score > 0.3:
-                st.markdown("⚠️ **Moderate Sarcasm Indicators:** The positive sentiment might be qualified by subtle sarcasm.")
-            elif sentiment == "NEGATIVE" and not is_sarcastic:
-                st.markdown("👎 **Clear Negative Expression:** The content expresses genuine negative sentiment without sarcasm.")
-            elif sentiment == "POSITIVE" and not is_sarcastic:
-                st.markdown("👍 **Clear Positive Expression:** The content expresses genuine positive sentiment without sarcasm.")
     # Debug expander
     with st.expander("Debug Information", expanded=False):
-        st.write("Debugging information for troubleshooting:")
         for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
             st.text(f"{i + 1}. {debug_line}")
         if emotions_dict:
             st.write("Raw emotion scores:")
             for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
-                if score > 0.01:  # Only show non-negligible scores
                     st.text(f"{emotion}: {score:.4f}")
-    # Analysis details expander
-    with st.expander("Analysis Details", expanded=False):
-        st.write("""
-        *How this works:*
-        1. *Speech Recognition*: Audio transcribed using OpenAI Whisper
-        2. *Emotion Analysis*: DistilBERT model trained for six emotions
-        3. *Sentiment Analysis*: Derived from dominant emotion
-        4. *Sarcasm Detection*: RoBERTa model for irony detection
-        *Accuracy depends on*:
-        - Audio quality
-        - Speech clarity
-        - Background noise
-        - Speech patterns
-        """)
-# Process base64 audio data - optimized
 def process_base64_audio(base64_data):
     try:
-        # Ensure we have proper base64 data
         if not base64_data or not isinstance(base64_data, str) or not base64_data.startswith('data:'):
             st.error("Invalid audio data received")
             return None
-        # Extract the base64 binary part
-        try:
-            base64_binary = base64_data.split(',')[1]
-        except IndexError:
-            st.error("Invalid base64 data format")
-            return None
-        # Decode the binary data
-        try:
-            binary_data = base64.b64decode(base64_binary)
-        except Exception as e:
-            st.error(f"Failed to decode base64 data: {str(e)}")
-            return None
-        # Create a temporary file
-        temp_dir = tempfile.gettempdir()
-        temp_file_path = os.path.join(temp_dir, f"recording_{int(time.time())}.wav")
-        # Write the binary data to the file
         with open(temp_file_path, "wb") as f:
             f.write(binary_data)
-        # Validate the audio file
         if not validate_audio(temp_file_path):
             st.warning("Audio quality may not be optimal, but we'll try to process it.")
@@ -866,154 +523,113 @@ def process_base64_audio(base64_data):
         st.error(f"Error processing audio data: {str(e)}")
         return None
-# Preload models in background to improve performance
 def preload_models():
     threading.Thread(target=load_whisper_model).start()
     threading.Thread(target=get_emotion_classifier).start()
     threading.Thread(target=get_sarcasm_classifier).start()
-# Main App Logic - optimized
 def main():
-    # Initialize session state
     if 'debug_info' not in st.session_state:
         st.session_state.debug_info = []
     if 'models_loaded' not in st.session_state:
         st.session_state.models_loaded = False
-    # Preload models in background
     if not st.session_state.models_loaded:
         preload_models()
         st.session_state.models_loaded = True
-    # Create tabs
     tab1, tab2 = st.tabs(["📁 Upload Audio", "🎙 Record Audio"])
     with tab1:
         st.header("Upload an Audio File")
-        audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg", "m4a", "flac"],
-                                      help="Upload an audio file for sentiment analysis (WAV, MP3, OGG, M4A, FLAC)")
         if audio_file:
             st.audio(audio_file.getvalue())
-            st.caption("🎧 Uploaded Audio Playback")
-            # Add a placeholder for progress updates
-            progress_placeholder = st.empty()
-            # Add analyze button
             upload_button = st.button("Analyze Upload", key="analyze_upload")
             if upload_button:
-                # Show progress bar
-                progress_bar = progress_placeholder.progress(0, text="Preparing audio...")
-                # Process audio
                 temp_audio_path = process_uploaded_audio(audio_file)
                 if temp_audio_path:
-                    # Update progress
-                    progress_bar.progress(25, text="Transcribing audio...")
-                    # Transcribe audio
-                    main_text, alternatives = transcribe_audio(temp_audio_path, show_alternative=True)
-                    if main_text:
-                        # Update progress
-                        progress_bar.progress(60, text="Analyzing sentiment and emotions...")
-                        # Display alternatives if available
-                        if alternatives:
-                            with st.expander("Alternative transcriptions detected", expanded=False):
-                                for i, alt in enumerate(alternatives[:3], 1):
-                                    st.write(f"{i}. {alt}")
-                        # Final analysis
-                        progress_bar.progress(90, text="Finalizing results...")
-                        display_analysis_results(main_text)
-                        # Complete progress
-                        progress_bar.progress(100, text="Analysis complete!")
-                        progress_placeholder.empty()
-                    else:
-                        progress_placeholder.empty()
-                        st.error("Could not transcribe the audio. Please try again with clearer audio.")
-                    # Clean up temp file
                     if os.path.exists(temp_audio_path):
                         os.remove(temp_audio_path)
                 else:
-                    progress_placeholder.empty()
-                    st.error("Could not process the audio file. Please try a different file.")
     with tab2:
         st.header("Record Your Voice")
-        st.write("Use the recorder below to analyze your speech in real-time.")
-        # Browser recorder
-        st.subheader("Browser-Based Recorder")
-        st.write("Click the button below to start/stop recording.")
         audio_data = custom_audio_recorder()
         if audio_data:
-            # Add a placeholder for progress updates
-            progress_placeholder = st.empty()
-            # Add analyze button
             analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
             if analyze_rec_button:
-                # Show progress bar
-                progress_bar = progress_placeholder.progress(0, text="Processing recording...")
-                # Process the recording
                 temp_audio_path = process_base64_audio(audio_data)
                 if temp_audio_path:
-                    # Update progress
-                    progress_bar.progress(30, text="Transcribing speech...")
-                    # Transcribe the audio
-                    transcribed_text = transcribe_audio(temp_audio_path)
                     if transcribed_text:
-                        # Update progress
-                        progress_bar.progress(70, text="Analyzing sentiment and emotions...")
-                        # Display the results
-                        display_analysis_results(transcribed_text)
-                        # Complete progress
-                        progress_bar.progress(100, text="Analysis complete!")
-                        progress_placeholder.empty()
                     else:
-                        progress_placeholder.empty()
-                        st.error("Could not transcribe the audio. Please try speaking more clearly.")
-                    # Clean up temp file
                     if os.path.exists(temp_audio_path):
                         os.remove(temp_audio_path)
                 else:
-                    progress_placeholder.empty()
-                    st.error("Could not process the recording. Please try again.")
-        # Text input option
         st.subheader("Manual Text Input")
-        st.write("If recording doesn't work, you can type your text here:")
-        manual_text = st.text_area("Enter text to analyze:", placeholder="Type what you want to analyze...")
         analyze_text_button = st.button("Analyze Text", key="analyze_manual")
         if analyze_text_button and manual_text:
-            with st.spinner("Analyzing text..."):
-                display_analysis_results(manual_text)
-    # Show model information
     show_model_info()
-    # Add a small footer with version info
     st.sidebar.markdown("---")
-    st.sidebar.caption("Voice Sentiment Analysis v2.0")
     st.sidebar.caption("Optimized for speed and accuracy")
 if __name__ == "__main__":

 import base64
 import io
 import streamlit.components.v1 as components
 from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, Tuple, List, Any, Optional, Union
 import numpy as np
 # Suppress warnings for a clean console
 # Interface design
 st.title("🎙 Voice Based Sentiment Analysis")
+st.write("Detect emotions, sentiment, and sarcasm from your voice with fast and accurate processing.")
 # Emotion Detection Function with optimizations
 @st.cache_resource
                                                  model_max_length=512)
         model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
         model = model.to(device)
+        model.eval()
         classifier = pipeline("text-classification",
                              model=model,
                              tokenizer=tokenizer,
+                             return_all_scores=True,
                              device=0 if torch.cuda.is_available() else -1)
+        # Test the model
         test_result = classifier("I am happy today")
         print(f"Emotion classifier test: {test_result}")
         st.error(f"Failed to load emotion model. Please check logs.")
         return None
+# Cache emotion results
+@st.cache_data(ttl=600)
 def perform_emotion_detection(text: str) -> Tuple[Dict[str, float], str, Dict[str, str], str]:
     try:
         if not text or len(text.strip()) < 3:
             return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
             st.error("Emotion classifier not available.")
             return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
+        # Process text directly (skip chunking for speed)
+        emotion_results = emotion_classifier(text)
         emotion_map = {
             "joy": "😊", "anger": "😡", "disgust": "🤢", "fear": "😨",
         neutral_emotions = ["surprise", "neutral"]
         # Process results
+        emotions_dict = {emotion['label']: emotion['score'] for emotion in emotion_results[0]}
+        # Filter emotions with a lower threshold
+        filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}  # Lowered from 0.05
         if not filtered_emotions:
             filtered_emotions = emotions_dict
+        # Check for mixed emotions
+        sorted_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)
+        if len(sorted_emotions) > 1 and sorted_emotions[1][1] > 0.8 * sorted_emotions[0][1]:
+            top_emotion = "MIXED"
+        else:
+            top_emotion = sorted_emotions[0][0]
+        # Determine sentiment
+        if top_emotion == "MIXED":
+            sentiment = "MIXED"
+        elif top_emotion in positive_emotions:
             sentiment = "POSITIVE"
         elif top_emotion in negative_emotions:
             sentiment = "NEGATIVE"
         else:
+            sentiment = "NEUTRAL"
         return emotions_dict, top_emotion, emotion_map, sentiment
     except Exception as e:
         print(f"Exception in emotion detection: {str(e)}")
         return {}, "neutral", {"neutral": "😐"}, "NEUTRAL"
+# Sarcasm Detection Function
 @st.cache_resource
 def get_sarcasm_classifier():
     try:
                                                  model_max_length=512)
         model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
         model = model.to(device)
+        model.eval()
         classifier = pipeline("text-classification",
                              model=model,
         st.error(f"Failed to load sarcasm model. Please check logs.")
         return None
+@st.cache_data(ttl=600)
 def perform_sarcasm_detection(text: str) -> Tuple[bool, float]:
     try:
         if not text or len(text.strip()) < 3:
             st.error("Sarcasm classifier not available.")
             return False, 0.0
+        result = sarcasm_classifier(text)[0]
+        is_sarcastic = result['label'] == "LABEL_1"
+        sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
+        return is_sarcastic, sarcasm_score
     except Exception as e:
         st.error(f"Sarcasm detection failed: {str(e)}")
         return False, 0.0
+# Validate audio quality (streamlined for speed)
 def validate_audio(audio_path: str) -> bool:
     try:
         sound = AudioSegment.from_file(audio_path)
+        if len(sound) < 300:  # Relaxed to 0.3s
             st.warning("Audio is very short. Longer audio provides better analysis.")
             return False
         return True
         st.error(f"Invalid or corrupted audio file: {str(e)}")
         return False
+# Speech Recognition with Whisper
 @st.cache_resource
 def load_whisper_model():
     try:
+        model = whisper.load_model("base")  # Fastest model for quick transcription
         return model
     except Exception as e:
         print(f"Error loading Whisper model: {str(e)}")
         return None
 @st.cache_data
+def transcribe_audio(audio_path: str) -> str:
     try:
         sound = AudioSegment.from_file(audio_path)
         # Convert to WAV format (16kHz, mono) for Whisper
         temp_wav_path = os.path.join(tempfile.gettempdir(), f"temp_converted_{int(time.time())}.wav")
+        sound = sound.set_frame_rate(16000).set_channels(1)
         sound.export(temp_wav_path, format="wav")
         # Load model
         model = load_whisper_model()
         if model is None:
+            return ""
         # Transcribe with optimized settings
         result = model.transcribe(
             temp_wav_path,
             language="en",
             task="transcribe",
+            fp16=torch.cuda.is_available(),
+            beam_size=3  # Reduced for speed
         )
         main_text = result["text"].strip()
         if os.path.exists(temp_wav_path):
             os.remove(temp_wav_path)
+        return main_text
     except Exception as e:
         st.error(f"Transcription failed: {str(e)}")
+        return ""
+# Process uploaded audio files
 def process_uploaded_audio(audio_file) -> Optional[str]:
     if not audio_file:
         return None
     try:
         temp_dir = tempfile.gettempdir()
+        ext = audio_file.name.split('.')[-1].lower() if '.' in audio_file.name else ''
         if ext not in ['wav', 'mp3', 'ogg', 'm4a', 'flac']:
             st.error("Unsupported audio format. Please upload WAV, MP3, OGG, M4A, or FLAC.")
             return None
         temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.{ext}")
         with open(temp_file_path, "wb") as f:
             f.write(audio_file.getvalue())
         if not validate_audio(temp_file_path):
+            st.warning("Audio may not be optimal, but we'll try to process it.")
         return temp_file_path
     except Exception as e:
 # Show model information
 def show_model_info():
     st.sidebar.header("🧠 About the Models")
     model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
     with model_tabs[0]:
         st.markdown("""
         *Emotion Model*: distilbert-base-uncased-emotion
+        - Detects joy, anger, disgust, fear, sadness, surprise
         - Architecture: DistilBERT base
         [🔍 Model Hub](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
         """)
     with model_tabs[1]:
         st.markdown("""
         *Sarcasm Model*: cardiffnlp/twitter-roberta-base-irony
+        - Trained on Twitter irony dataset
         - Architecture: RoBERTa base
         [🔍 Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
         """)
     with model_tabs[2]:
         st.markdown("""
+        *Speech Recognition*: OpenAI Whisper (base model)
+        - Optimized for speed
+        - Handles varied accents
+        *Tips*: Use good mic, reduce noise
         [🔍 Model Details](https://github.com/openai/whisper)
         """)
+# Custom audio recorder
 def custom_audio_recorder():
+    st.warning("Browser-based recording requires microphone access. If recording fails, try uploading an audio file.")
     audio_recorder_html = """
     <script>
     var audioRecorder = {
         mediaRecorder: null,
         streamBeingCaptured: null,
         isRecording: false,
         start: function() {
             if (!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)) {
+                document.getElementById('status-message').textContent = "Recording not supported";
+                return Promise.reject(new Error('mediaDevices API not supported'));
             }
+            return navigator.mediaDevices.getUserMedia({
+                audio: {
+                    echoCancellation: true,
+                    noiseSuppression: true,
+                    autoGainControl: true
+                }
+            })
+            .then(stream => {
+                audioRecorder.streamBeingCaptured = stream;
+                audioRecorder.mediaRecorder = new MediaRecorder(stream, {
+                    mimeType: 'audio/webm;codecs=opus',
+                    audioBitsPerSecond: 128000
                 });
+                audioRecorder.audioBlobs = [];
+                audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
+                    audioRecorder.audioBlobs.push(event.data);
+                });
+                audioRecorder.mediaRecorder.start(100);
+                audioRecorder.isRecording = true;
+                document.getElementById('status-message').textContent = "Recording...";
+            });
         },
         stop: function() {
             return new Promise(resolve => {
                 let mimeType = audioRecorder.mediaRecorder.mimeType;
                 audioRecorder.mediaRecorder.addEventListener("stop", () => {
                     let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
                     resolve(audioBlob);
                     audioRecorder.isRecording = false;
                     document.getElementById('status-message').textContent = "Recording stopped";
                 });
                 audioRecorder.mediaRecorder.stop();
+                audioRecorder.streamBeingCaptured.getTracks().forEach(track => track.stop());
+                audioRecorder.mediaRecorder = null;
+                audioRecorder.streamBeingCaptured = null;
             });
         }
     }
     function toggleRecording() {
         var recordButton = document.getElementById('record-button');
         var statusMessage = document.getElementById('status-message');
         if (!isRecording) {
             audioRecorder.start()
                     isRecording = true;
                     recordButton.textContent = 'Stop Recording';
                     recordButton.classList.add('recording');
                 })
                 .catch(error => {
                     statusMessage.textContent = 'Error: ' + error.message;
                     isRecording = false;
                     recordButton.textContent = 'Start Recording';
                     recordButton.classList.remove('recording');
                 });
         }
     }
     <div class="audio-recorder-container">
         <button id="record-button" class="record-button">Start Recording</button>
         <div id="status-message" class="status-message">Ready to record</div>
         <audio id="audio-playback" controls style="display:none; margin-top:10px; width:100%;"></audio>
         <input type="hidden" id="audio-data" name="audio-data">
     </div>
         font-size: 16px;
         font-weight: bold;
         transition: all 0.3s ease;
     }
     .record-button:hover {
         color: #666;
     }
     @keyframes pulse {
         0% { opacity: 1; box-shadow: 0 0 0 0 rgba(255,0,0,0.7); }
         50% { opacity: 0.8; box-shadow: 0 0 0 10px rgba(255,0,0,0); }
     </style>
     """
+    return components.html(audio_recorder_html, height=150)
+# Display analysis results
+def display_analysis_results(transcribed_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score):
     st.session_state.debug_info = st.session_state.get('debug_info', [])
+    st.session_state.debug_info.append(f"Text: {transcribed_text[:50]}...")
+    st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}, Sarcasm: {is_sarcastic}")
+    st.session_state.debug_info = st.session_state.debug_info[-100:]
     st.header("Transcribed Text")
+    st.text_area("Text", transcribed_text, height=100, disabled=True)
+    # Confidence estimation
+    word_count = len(transcribed_text.split())
     confidence_score = min(0.98, max(0.75, 0.75 + (word_count / 100) * 0.2))
     st.caption(f"Estimated transcription confidence: {confidence_score:.2f}")
     st.header("Analysis Results")
         st.subheader("Sentiment")
         sentiment_icon = "👍" if sentiment == "POSITIVE" else "👎" if sentiment == "NEGATIVE" else "🔄" if sentiment == "MIXED" else "😐"
         st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
         st.subheader("Sarcasm")
         sarcasm_icon = "😏" if is_sarcastic else "😐"
         sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
         st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
     with col2:
         st.subheader("Emotions")
         if emotions_dict:
+            st.markdown(f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
             sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
+            significant_emotions = [(e, s) for e, s in sorted_emotions if s > 0.01]
             if significant_emotions:
                 emotions = [e[0] for e in significant_emotions]
                 scores = [e[1] for e in significant_emotions]
                 fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
                              title="Emotion Distribution", color=emotions,
                              color_discrete_sequence=px.colors.qualitative.Bold)
+                fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14,
+                                  margin=dict(l=20, r=20, t=40, b=20), bargap=0.3)
                 st.plotly_chart(fig, use_container_width=True)
             else:
                 st.write("No significant emotions detected.")
         else:
             st.write("No emotions detected.")
     # Debug expander
     with st.expander("Debug Information", expanded=False):
+        st.write("Debugging information:")
         for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
             st.text(f"{i + 1}. {debug_line}")
         if emotions_dict:
             st.write("Raw emotion scores:")
             for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
+                if score > 0.01:
                     st.text(f"{emotion}: {score:.4f}")
+# Process base64 audio data
 def process_base64_audio(base64_data):
     try:
         if not base64_data or not isinstance(base64_data, str) or not base64_data.startswith('data:'):
             st.error("Invalid audio data received")
             return None
+        base64_binary = base64_data.split(',')[1]
+        binary_data = base64.b64decode(base64_binary)
+        temp_file_path = os.path.join(tempfile.gettempdir(), f"recording_{int(time.time())}.wav")
         with open(temp_file_path, "wb") as f:
             f.write(binary_data)
         if not validate_audio(temp_file_path):
             st.warning("Audio quality may not be optimal, but we'll try to process it.")
         st.error(f"Error processing audio data: {str(e)}")
         return None
+# Preload models in background
 def preload_models():
     threading.Thread(target=load_whisper_model).start()
     threading.Thread(target=get_emotion_classifier).start()
     threading.Thread(target=get_sarcasm_classifier).start()
+# Main App Logic
 def main():
     if 'debug_info' not in st.session_state:
         st.session_state.debug_info = []
     if 'models_loaded' not in st.session_state:
         st.session_state.models_loaded = False
     if not st.session_state.models_loaded:
         preload_models()
         st.session_state.models_loaded = True
     tab1, tab2 = st.tabs(["📁 Upload Audio", "🎙 Record Audio"])
     with tab1:
         st.header("Upload an Audio File")
+        audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg", "m4a", "flac"])
         if audio_file:
             st.audio(audio_file.getvalue())
             upload_button = st.button("Analyze Upload", key="analyze_upload")
             if upload_button:
+                progress_bar = st.progress(0, text="Preparing audio...")
                 temp_audio_path = process_uploaded_audio(audio_file)
                 if temp_audio_path:
+                    progress_bar.progress(25, text="Processing in parallel...")
+                    with ThreadPoolExecutor(max_workers=3) as executor:
+                        transcribe_future = executor.submit(transcribe_audio, temp_audio_path)
+                        emotion_future = executor.submit(perform_emotion_detection, transcribe_future.result())
+                        sarcasm_future = executor.submit(perform_sarcasm_detection, transcribe_future.result())
+                        transcribed_text = transcribe_future.result()
+                        emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
+                        is_sarcastic, sarcasm_score = sarcasm_future.result()
+                    progress_bar.progress(90, text="Finalizing results...")
+                    if transcribed_text:
+                        display_analysis_results(transcribed_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score)
+                    else:
+                        st.error("Could not transcribe the audio. Try clearer audio.")
+                    progress_bar.progress(100, text="Analysis complete!")
                     if os.path.exists(temp_audio_path):
                         os.remove(temp_audio_path)
                 else:
+                    st.error("Could not process the audio file.")
     with tab2:
         st.header("Record Your Voice")
         audio_data = custom_audio_recorder()
         if audio_data:
             analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
             if analyze_rec_button:
+                progress_bar = st.progress(0, text="Processing recording...")
                 temp_audio_path = process_base64_audio(audio_data)
                 if temp_audio_path:
+                    progress_bar.progress(30, text="Processing in parallel...")
+                    with ThreadPoolExecutor(max_workers=3) as executor:
+                        transcribe_future = executor.submit(transcribe_audio, temp_audio_path)
+                        emotion_future = executor.submit(perform_emotion_detection, transcribe_future.result())
+                        sarcasm_future = executor.submit(perform_sarcasm_detection, transcribe_future.result())
+                        transcribed_text = transcribe_future.result()
+                        emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
+                        is_sarcastic, sarcasm_score = sarcasm_future.result()
+                    progress_bar.progress(90, text="Finalizing results...")
                     if transcribed_text:
+                        display_analysis_results(transcribed_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score)
                     else:
+                        st.error("Could not transcribe the audio. Speak clearly.")
+                    progress_bar.progress(100, text="Analysis complete!")
                     if os.path.exists(temp_audio_path):
                         os.remove(temp_audio_path)
                 else:
+                    st.error("Could not process the recording.")
         st.subheader("Manual Text Input")
+        manual_text = st.text_area("Enter text to analyze:", placeholder="Type text to analyze...")
         analyze_text_button = st.button("Analyze Text", key="analyze_manual")
         if analyze_text_button and manual_text:
+            with ThreadPoolExecutor(max_workers=2) as executor:
+                emotion_future = executor.submit(perform_emotion_detection, manual_text)
+                sarcasm_future = executor.submit(perform_sarcasm_detection, manual_text)
+                emotions_dict, top_emotion, emotion_map, sentiment = emotion_future.result()
+                is_sarcastic, sarcasm_score = sarcasm_future.result()
+            display_analysis_results(manual_text, emotions_dict, top_emotion, emotion_map, sentiment, is_sarcastic, sarcasm_score)
     show_model_info()
     st.sidebar.markdown("---")
+    st.sidebar.caption("Voice Sentiment Analysis v2.1")
     st.sidebar.caption("Optimized for speed and accuracy")
 if __name__ == "__main__":