Spaces:

MaroofTechSorcerer
/

Voice_Based_Sentiment_Analysis_with_Sarcasm_Detection

Running

App Files Files Community

MaroofTechSorcerer commited on 15 days ago

Commit

3a51c3e

verified ·

1 Parent(s): 3448878

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -91

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import os
 import streamlit as st
 import tempfile
@@ -32,24 +31,46 @@ st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
 st.title("🎙 Voice Based Sentiment Analysis")
 st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
 # Emotion Detection Function
 @st.cache_resource
 def get_emotion_classifier():
-    tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions", use_fast=True)
-    model = AutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")
-    model = model.to(device)
-    return pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None, device=-1 if device.type == "cpu" else 0)
 def perform_emotion_detection(text):
     try:
         if not text or len(text.strip()) < 3:
             return {}, "neutral", {}, "NEUTRAL"
         emotion_classifier = get_emotion_classifier()
         emotion_results = emotion_classifier(text)[0]
         emotion_map = {
-            "admiration": "🤩", "amusement": "😄", "anger": "😡", "annoyance": "😒",
             "approval": "👍", "caring": "🤗", "confusion": "😕", "curiosity": "🧐",
             "desire": "😍", "disappointment": "😞", "disapproval": "👎", "disgust": "🤢",
             "embarrassment": "😳", "excitement": "🤩", "fear": "😨", "gratitude": "🙏",
@@ -57,29 +78,29 @@ def perform_emotion_detection(text):
             "optimism": "🌈", "pride": "😌", "realization": "💡", "relief": "😌",
             "remorse": "😔", "sadness": "😭", "surprise": "😲", "neutral": "😐"
         }
-        positive_emotions = ["admiration", "amusement", "approval", "caring", "desire",
-                            "excitement", "gratitude", "joy", "love", "optimism", "pride", "relief"]
         negative_emotions = ["anger", "annoyance", "disappointment", "disapproval", "disgust",
-                            "embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"]
         neutral_emotions = ["confusion", "curiosity", "realization", "surprise", "neutral"]
         # Fix 1: Create a clean emotions dictionary from results
         emotions_dict = {}
         for result in emotion_results:
             emotions_dict[result['label']] = result['score']
         # Fix 2: Filter out very low scores (below threshold)
-        filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.05}
         # If filtered dictionary is empty, fall back to original
         if not filtered_emotions:
             filtered_emotions = emotions_dict
         # Fix 3: Make sure we properly find the top emotion
         top_emotion = max(filtered_emotions, key=filtered_emotions.get)
         top_score = filtered_emotions[top_emotion]
         # Fix 4: More robust sentiment assignment
         if top_emotion in positive_emotions:
             sentiment = "POSITIVE"
@@ -88,12 +109,12 @@ def perform_emotion_detection(text):
         else:
             # If the top emotion is neutral but there are strong competing emotions, use them
             competing_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]
             # Check if there's a close second non-neutral emotion
             if len(competing_emotions) > 1:
-                if (competing_emotions[0][0] in neutral_emotions and
-                    competing_emotions[1][0] not in neutral_emotions and
-                    competing_emotions[1][1] > 0.7 * competing_emotions[0][1]):
                     # Use the second strongest emotion instead
                     top_emotion = competing_emotions[1][0]
                     if top_emotion in positive_emotions:
@@ -106,33 +127,55 @@ def perform_emotion_detection(text):
                     sentiment = "NEUTRAL"
             else:
                 sentiment = "NEUTRAL"
         # Log for debugging
         print(f"Text: {text[:50]}...")
         print(f"Top 3 emotions: {sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]}")
         print(f"Selected top emotion: {top_emotion} ({filtered_emotions.get(top_emotion, 0):.3f})")
         print(f"Sentiment determined: {sentiment}")
         return emotions_dict, top_emotion, emotion_map, sentiment
     except Exception as e:
         st.error(f"Emotion detection failed: {str(e)}")
         print(f"Exception in emotion detection: {str(e)}")
         return {}, "neutral", {}, "NEUTRAL"
 # Sarcasm Detection Function
 @st.cache_resource
 def get_sarcasm_classifier():
-    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony", use_fast=True)
-    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
-    model = model.to(device)
-    return pipeline("text-classification", model=model, tokenizer=tokenizer, device=-1 if device.type == "cpu" else 0)
 def perform_sarcasm_detection(text):
     try:
         if not text or len(text.strip()) < 3:
             return False, 0.0
         sarcasm_classifier = get_sarcasm_classifier()
         result = sarcasm_classifier(text)[0]
         is_sarcastic = result['label'] == "LABEL_1"
         sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
@@ -141,11 +184,12 @@ def perform_sarcasm_detection(text):
         st.error(f"Sarcasm detection failed: {str(e)}")
         return False, 0.0
 # Validate audio quality
 def validate_audio(audio_path):
     try:
         sound = AudioSegment.from_file(audio_path)
-        if sound.dBFS < -50:
             st.warning("Audio volume is too low. Please record or upload a louder audio.")
             return False
         if len(sound) < 1000:  # Less than 1 second
@@ -156,36 +200,43 @@ def validate_audio(audio_path):
         st.error("Invalid or corrupted audio file.")
         return False
 # Speech Recognition with Whisper
 @st.cache_resource
 def load_whisper_model():
-    # Use 'large-v3' for maximum accuracy
-    model = whisper.load_model("large-v3")
-    return model
 def transcribe_audio(audio_path, show_alternative=False):
     try:
         st.write(f"Processing audio file: {audio_path}")
         sound = AudioSegment.from_file(audio_path)
-        st.write(f"Audio duration: {len(sound)/1000:.2f}s, Sample rate: {sound.frame_rate}, Channels: {sound.channels}")
         # Convert to WAV format (16kHz, mono) for Whisper
         temp_wav_path = os.path.join(tempfile.gettempdir(), "temp_converted.wav")
-        sound = sound.set_frame_rate(16000)
         sound = sound.set_channels(1)
         sound.export(temp_wav_path, format="wav")
         # Load Whisper model
         model = load_whisper_model()
         # Transcribe audio
         result = model.transcribe(temp_wav_path, language="en")
         main_text = result["text"].strip()
         # Clean up
         if os.path.exists(temp_wav_path):
             os.remove(temp_wav_path)
         # Whisper doesn't provide alternatives, so return empty list
         if show_alternative:
             return main_text, []
@@ -194,32 +245,39 @@ def transcribe_audio(audio_path, show_alternative=False):
         st.error(f"Transcription failed: {str(e)}")
         return "", [] if show_alternative else ""
 # Function to handle uploaded audio files
 def process_uploaded_audio(audio_file):
     if not audio_file:
         return None
     try:
         temp_dir = tempfile.gettempdir()
-        temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.wav")
         with open(temp_file_path, "wb") as f:
             f.write(audio_file.getvalue())
         if not validate_audio(temp_file_path):
             return None
         return temp_file_path
     except Exception as e:
         st.error(f"Error processing uploaded audio: {str(e)}")
         return None
 # Show model information
 def show_model_info():
     st.sidebar.header("🧠 About the Models")
     model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
     with model_tabs[0]:
         st.markdown("""
         *Emotion Model*: SamLowe/roberta-base-go_emotions
@@ -228,7 +286,7 @@ def show_model_info():
         - Micro-F1: 0.46
         [🔍 Model Hub](https://huggingface.co/SamLowe/roberta-base-go_emotions)
         """)
     with model_tabs[1]:
         st.markdown("""
         *Sarcasm Model*: cardiffnlp/twitter-roberta-base-irony
@@ -237,7 +295,7 @@ def show_model_info():
         - F1-score: 0.705
         [🔍 Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
         """)
     with model_tabs[2]:
         st.markdown("""
         *Speech Recognition*: OpenAI Whisper (large-v3)
@@ -249,8 +307,10 @@ def show_model_info():
         [🔍 Model Details](https://github.com/openai/whisper)
         """)
 # Custom audio recorder using HTML/JS
 def custom_audio_recorder():
     audio_recorder_html = """
     <script>
     var audioRecorder = {
@@ -267,11 +327,11 @@ def custom_audio_recorder():
                         audioRecorder.streamBeingCaptured = stream;
                         audioRecorder.mediaRecorder = new MediaRecorder(stream);
                         audioRecorder.audioBlobs = [];
                         audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
                             audioRecorder.audioBlobs.push(event.data);
                         });
                         audioRecorder.mediaRecorder.start();
                     });
             }
@@ -279,14 +339,14 @@ def custom_audio_recorder():
         stop: function() {
             return new Promise(resolve => {
                 let mimeType = audioRecorder.mediaRecorder.mimeType;
                 audioRecorder.mediaRecorder.addEventListener("stop", () => {
                     let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
                     resolve(audioBlob);
                 });
                 audioRecorder.mediaRecorder.stop();
                 audioRecorder.stopStream();
                 audioRecorder.resetRecordingProperties();
             });
@@ -304,7 +364,7 @@ def custom_audio_recorder():
     var recordButton = document.getElementById('record-button');
     var audioElement = document.getElementById('audio-playback');
     var audioData = document.getElementById('audio-data');
     function toggleRecording() {
         if (!isRecording) {
             audioRecorder.start()
@@ -321,7 +381,7 @@ def custom_audio_recorder():
                 .then(audioBlob => {
                     const audioUrl = URL.createObjectURL(audioBlob);
                     audioElement.src = audioUrl;
                     const reader = new FileReader();
                     reader.readAsDataURL(audioBlob);
                     reader.onloadend = function() {
@@ -330,7 +390,7 @@ def custom_audio_recorder():
                         const streamlitMessage = {type: "streamlit:setComponentValue", value: base64data};
                         window.parent.postMessage(streamlitMessage, "*");
                     }
                     isRecording = false;
                     recordButton.textContent = 'Start Recording';
                     recordButton.classList.remove('recording');
@@ -341,7 +401,7 @@ def custom_audio_recorder():
         recordButton = document.getElementById('record-button');
         audioElement = document.getElementById('audio-playback');
         audioData = document.getElementById('audio-data');
         recordButton.addEventListener('click', toggleRecording);
     });
     </script>
@@ -377,18 +437,20 @@ def custom_audio_recorder():
     }
     </style>
     """
     return components.html(audio_recorder_html, height=150)
 # Function to display analysis results
 def display_analysis_results(transcribed_text):
     # Fix 5: Add debugging to track what's happening
     st.session_state.debug_info = st.session_state.get('debug_info', [])
     st.session_state.debug_info.append(f"Processing text: {transcribed_text[:50]}...")
     emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
     is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)
     # Add results to debug info
     st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}")
     st.session_state.debug_info.append(f"Sarcasm: {is_sarcastic}, Score: {sarcasm_score:.3f}")
@@ -397,7 +459,7 @@ def display_analysis_results(transcribed_text):
     st.text_area("Text", transcribed_text, height=150, disabled=True, help="The audio converted to text.")
     confidence_score = min(0.95, max(0.70, len(transcribed_text.split()) / 50))
-    st.caption(f"Transcription confidence: {confidence_score:.2f}")
     st.header("Analysis Results")
     col1, col2 = st.columns([1, 2])
@@ -417,13 +479,14 @@ def display_analysis_results(transcribed_text):
     with col2:
         st.subheader("Emotions")
         if emotions_dict:
-            st.markdown(f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
             sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
             top_emotions = sorted_emotions[:8]
             emotions = [e[0] for e in top_emotions]
             scores = [e[1] for e in top_emotions]
-            fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
-                         title="Top Emotions Distribution", color=emotions,
                          color_discrete_sequence=px.colors.qualitative.Bold)
             fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14)
             st.plotly_chart(fig, use_container_width=True)
@@ -434,7 +497,7 @@ def display_analysis_results(transcribed_text):
     with st.expander("Debug Information", expanded=False):
         st.write("Debugging information for troubleshooting:")
         for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
-            st.text(f"{i+1}. {debug_line}")
         if emotions_dict:
             st.write("Raw emotion scores:")
             for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
@@ -455,101 +518,104 @@ def display_analysis_results(transcribed_text):
         - Speech patterns
         """)
 # Process base64 audio data
 def process_base64_audio(base64_data):
     try:
         base64_binary = base64_data.split(',')[1]
         binary_data = base64.b64decode(base64_binary)
         temp_dir = tempfile.gettempdir()
         temp_file_path = os.path.join(temp_dir, f"recording_{int(time.time())}.wav")
         with open(temp_file_path, "wb") as f:
             f.write(binary_data)
         if not validate_audio(temp_file_path):
             return None
         return temp_file_path
     except Exception as e:
         st.error(f"Error processing audio data: {str(e)}")
         return None
 # Main App Logic
 def main():
     # Fix 7: Initialize session state for debugging
     if 'debug_info' not in st.session_state:
         st.session_state.debug_info = []
     tab1, tab2 = st.tabs(["📁 Upload Audio", "🎙 Record Audio"])
     with tab1:
         st.header("Upload an Audio File")
-        audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"],
-                                     help="Upload an audio file for analysis")
         if audio_file:
             st.audio(audio_file.getvalue())
             st.caption("🎧 Uploaded Audio Playback")
             upload_button = st.button("Analyze Upload", key="analyze_upload")
             if upload_button:
                 with st.spinner('Analyzing audio with advanced precision...'):
                     temp_audio_path = process_uploaded_audio(audio_file)
                     if temp_audio_path:
                         main_text, alternatives = transcribe_audio(temp_audio_path, show_alternative=True)
                         if main_text:
                             if alternatives:
                                 with st.expander("Alternative transcriptions detected", expanded=False):
                                     for i, alt in enumerate(alternatives[:3], 1):
                                         st.write(f"{i}. {alt}")
                             display_analysis_results(main_text)
                         else:
                             st.error("Could not transcribe the audio. Please try again with clearer audio.")
                         if os.path.exists(temp_audio_path):
                             os.remove(temp_audio_path)
     with tab2:
         st.header("Record Your Voice")
         st.write("Use the recorder below to analyze your speech in real-time.")
         st.subheader("Browser-Based Recorder")
         st.write("Click the button below to start/stop recording.")
         audio_data = custom_audio_recorder()
         if audio_data:
             analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
             if analyze_rec_button:
                 with st.spinner("Processing your recording..."):
                     temp_audio_path = process_base64_audio(audio_data)
                     if temp_audio_path:
                         transcribed_text = transcribe_audio(temp_audio_path)
                         if transcribed_text:
                             display_analysis_results(transcribed_text)
                         else:
                             st.error("Could not transcribe the audio. Please try speaking more clearly.")
                         if os.path.exists(temp_audio_path):
                             os.remove(temp_audio_path)
         st.subheader("Manual Text Input")
         st.write("If recording doesn't work, you can type your text here:")
         manual_text = st.text_area("Enter text to analyze:", placeholder="Type what you want to analyze...")
         analyze_text_button = st.button("Analyze Text", key="analyze_manual")
         if analyze_text_button and manual_text:
             display_analysis_results(manual_text)
     show_model_info()
 if __name__ == "__main__":
     main()

 import os
 import streamlit as st
 import tempfile
 st.title("🎙 Voice Based Sentiment Analysis")
 st.write("Detect emotions, sentiment, and sarcasm from your voice with state-of-the-art accuracy using OpenAI Whisper.")
+# Emotion Detection Function
 # Emotion Detection Function
 @st.cache_resource
 def get_emotion_classifier():
+    try:
+        tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions", use_fast=True)
+        model = AutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")
+        model = model.to(device)
+        # Changed from device=-1 if device.type == "cpu" else 0
+        # to ensure proper device selection
+        classifier = pipeline("text-classification",
+                              model=model,
+                              tokenizer=tokenizer,
+                              top_k=None,
+                              device=0 if torch.cuda.is_available() else -1)
+        # Add a verification test to make sure the model is working
+        test_result = classifier("I am happy today")
+        print(f"Emotion classifier test: {test_result}")
+        return classifier
+    except Exception as e:
+        print(f"Error loading emotion model: {str(e)}")
+        st.error(f"Failed to load emotion model. Please check logs.")
+        # Return a basic fallback that won't crash
+        return None
 def perform_emotion_detection(text):
     try:
         if not text or len(text.strip()) < 3:
             return {}, "neutral", {}, "NEUTRAL"
         emotion_classifier = get_emotion_classifier()
         emotion_results = emotion_classifier(text)[0]
         emotion_map = {
+            "admiration": "🤩", "amusement": "😄", "anger": "😡", "annoyance": "😒",
             "approval": "👍", "caring": "🤗", "confusion": "😕", "curiosity": "🧐",
             "desire": "😍", "disappointment": "😞", "disapproval": "👎", "disgust": "🤢",
             "embarrassment": "😳", "excitement": "🤩", "fear": "😨", "gratitude": "🙏",
             "optimism": "🌈", "pride": "😌", "realization": "💡", "relief": "😌",
             "remorse": "😔", "sadness": "😭", "surprise": "😲", "neutral": "😐"
         }
+        positive_emotions = ["admiration", "amusement", "approval", "caring", "desire",
+                             "excitement", "gratitude", "joy", "love", "optimism", "pride", "relief"]
         negative_emotions = ["anger", "annoyance", "disappointment", "disapproval", "disgust",
+                             "embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"]
         neutral_emotions = ["confusion", "curiosity", "realization", "surprise", "neutral"]
         # Fix 1: Create a clean emotions dictionary from results
         emotions_dict = {}
         for result in emotion_results:
             emotions_dict[result['label']] = result['score']
         # Fix 2: Filter out very low scores (below threshold)
+        filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.02}
         # If filtered dictionary is empty, fall back to original
         if not filtered_emotions:
             filtered_emotions = emotions_dict
         # Fix 3: Make sure we properly find the top emotion
         top_emotion = max(filtered_emotions, key=filtered_emotions.get)
         top_score = filtered_emotions[top_emotion]
         # Fix 4: More robust sentiment assignment
         if top_emotion in positive_emotions:
             sentiment = "POSITIVE"
         else:
             # If the top emotion is neutral but there are strong competing emotions, use them
             competing_emotions = sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]
             # Check if there's a close second non-neutral emotion
             if len(competing_emotions) > 1:
+                if (competing_emotions[0][0] in neutral_emotions and
+                        competing_emotions[1][0] not in neutral_emotions and
+                        competing_emotions[1][1] > 0.5 * competing_emotions[0][1]):
                     # Use the second strongest emotion instead
                     top_emotion = competing_emotions[1][0]
                     if top_emotion in positive_emotions:
                     sentiment = "NEUTRAL"
             else:
                 sentiment = "NEUTRAL"
         # Log for debugging
         print(f"Text: {text[:50]}...")
         print(f"Top 3 emotions: {sorted(filtered_emotions.items(), key=lambda x: x[1], reverse=True)[:3]}")
         print(f"Selected top emotion: {top_emotion} ({filtered_emotions.get(top_emotion, 0):.3f})")
         print(f"Sentiment determined: {sentiment}")
+        print(f"All emotions detected: {filtered_emotions}")
+        print(f"Filtered emotions: {filtered_emotions}")
+        print(f"Emotion classification threshold: 0.02")
         return emotions_dict, top_emotion, emotion_map, sentiment
     except Exception as e:
         st.error(f"Emotion detection failed: {str(e)}")
         print(f"Exception in emotion detection: {str(e)}")
         return {}, "neutral", {}, "NEUTRAL"
 # Sarcasm Detection Function
 @st.cache_resource
 def get_sarcasm_classifier():
+    try:
+        tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony", use_fast=True)
+        model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
+        model = model.to(device)
+        classifier = pipeline("text-classification", model=model, tokenizer=tokenizer,
+                              device=0 if torch.cuda.is_available() else -1)
+        # Add a verification test to ensure the model is working
+        test_result = classifier("This is totally amazing")
+        print(f"Sarcasm classifier test: {test_result}")
+        return classifier
+    except Exception as e:
+        print(f"Error loading sarcasm model: {str(e)}")
+        st.error(f"Failed to load sarcasm model. Please check logs.")
+        return None
 def perform_sarcasm_detection(text):
     try:
         if not text or len(text.strip()) < 3:
             return False, 0.0
         sarcasm_classifier = get_sarcasm_classifier()
+        if sarcasm_classifier is None:
+            st.error("Sarcasm classifier not available.")
+            return False, 0.0
         result = sarcasm_classifier(text)[0]
         is_sarcastic = result['label'] == "LABEL_1"
         sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
         st.error(f"Sarcasm detection failed: {str(e)}")
         return False, 0.0
 # Validate audio quality
 def validate_audio(audio_path):
     try:
         sound = AudioSegment.from_file(audio_path)
+        if sound.dBFS < -55:
             st.warning("Audio volume is too low. Please record or upload a louder audio.")
             return False
         if len(sound) < 1000:  # Less than 1 second
         st.error("Invalid or corrupted audio file.")
         return False
 # Speech Recognition with Whisper
+# @st.cache_resource
 @st.cache_resource
 def load_whisper_model():
+    try:
+        model = whisper.load_model("large-v3")
+        return model
+    except Exception as e:
+        print(f"Error loading Whisper model: {str(e)}")
+        st.error(f"Failed to load Whisper model. Please check logs.")
+        return None
 def transcribe_audio(audio_path, show_alternative=False):
     try:
         st.write(f"Processing audio file: {audio_path}")
         sound = AudioSegment.from_file(audio_path)
+        st.write(
+            f"Audio duration: {len(sound) / 1000:.2f}s, Sample rate: {sound.frame_rate}, Channels: {sound.channels}")
         # Convert to WAV format (16kHz, mono) for Whisper
         temp_wav_path = os.path.join(tempfile.gettempdir(), "temp_converted.wav")
+        sound = sound.set_frame_rate(22050)
         sound = sound.set_channels(1)
         sound.export(temp_wav_path, format="wav")
         # Load Whisper model
         model = load_whisper_model()
         # Transcribe audio
         result = model.transcribe(temp_wav_path, language="en")
         main_text = result["text"].strip()
         # Clean up
         if os.path.exists(temp_wav_path):
             os.remove(temp_wav_path)
         # Whisper doesn't provide alternatives, so return empty list
         if show_alternative:
             return main_text, []
         st.error(f"Transcription failed: {str(e)}")
         return "", [] if show_alternative else ""
 # Function to handle uploaded audio files
 def process_uploaded_audio(audio_file):
     if not audio_file:
         return None
     try:
         temp_dir = tempfile.gettempdir()
+        ext = audio_file.name.split('.')[-1].lower()
+        if ext not in ['wav', 'mp3', 'ogg']:
+            st.error("Unsupported audio format. Please upload WAV, MP3, or OGG.")
+            return None
+        temp_file_path = os.path.join(temp_dir, f"uploaded_audio_{int(time.time())}.{ext}")
         with open(temp_file_path, "wb") as f:
             f.write(audio_file.getvalue())
         if not validate_audio(temp_file_path):
             return None
         return temp_file_path
     except Exception as e:
         st.error(f"Error processing uploaded audio: {str(e)}")
         return None
 # Show model information
 def show_model_info():
     st.sidebar.header("🧠 About the Models")
     model_tabs = st.sidebar.tabs(["Emotion", "Sarcasm", "Speech"])
     with model_tabs[0]:
         st.markdown("""
         *Emotion Model*: SamLowe/roberta-base-go_emotions
         - Micro-F1: 0.46
         [🔍 Model Hub](https://huggingface.co/SamLowe/roberta-base-go_emotions)
         """)
     with model_tabs[1]:
         st.markdown("""
         *Sarcasm Model*: cardiffnlp/twitter-roberta-base-irony
         - F1-score: 0.705
         [🔍 Model Hub](https://huggingface.co/cardiffnlp/twitter-roberta-base-irony)
         """)
     with model_tabs[2]:
         st.markdown("""
         *Speech Recognition*: OpenAI Whisper (large-v3)
         [🔍 Model Details](https://github.com/openai/whisper)
         """)
 # Custom audio recorder using HTML/JS
 def custom_audio_recorder():
+    st.warning("Browser-based recording requires microphone access and a modern browser. If recording fails, try uploading an audio file instead.")
     audio_recorder_html = """
     <script>
     var audioRecorder = {
                         audioRecorder.streamBeingCaptured = stream;
                         audioRecorder.mediaRecorder = new MediaRecorder(stream);
                         audioRecorder.audioBlobs = [];
                         audioRecorder.mediaRecorder.addEventListener("dataavailable", event => {
                             audioRecorder.audioBlobs.push(event.data);
                         });
                         audioRecorder.mediaRecorder.start();
                     });
             }
         stop: function() {
             return new Promise(resolve => {
                 let mimeType = audioRecorder.mediaRecorder.mimeType;
                 audioRecorder.mediaRecorder.addEventListener("stop", () => {
                     let audioBlob = new Blob(audioRecorder.audioBlobs, { type: mimeType });
                     resolve(audioBlob);
                 });
                 audioRecorder.mediaRecorder.stop();
                 audioRecorder.stopStream();
                 audioRecorder.resetRecordingProperties();
             });
     var recordButton = document.getElementById('record-button');
     var audioElement = document.getElementById('audio-playback');
     var audioData = document.getElementById('audio-data');
     function toggleRecording() {
         if (!isRecording) {
             audioRecorder.start()
                 .then(audioBlob => {
                     const audioUrl = URL.createObjectURL(audioBlob);
                     audioElement.src = audioUrl;
                     const reader = new FileReader();
                     reader.readAsDataURL(audioBlob);
                     reader.onloadend = function() {
                         const streamlitMessage = {type: "streamlit:setComponentValue", value: base64data};
                         window.parent.postMessage(streamlitMessage, "*");
                     }
                     isRecording = false;
                     recordButton.textContent = 'Start Recording';
                     recordButton.classList.remove('recording');
         recordButton = document.getElementById('record-button');
         audioElement = document.getElementById('audio-playback');
         audioData = document.getElementById('audio-data');
         recordButton.addEventListener('click', toggleRecording);
     });
     </script>
     }
     </style>
     """
     return components.html(audio_recorder_html, height=150)
 # Function to display analysis results
 def display_analysis_results(transcribed_text):
     # Fix 5: Add debugging to track what's happening
     st.session_state.debug_info = st.session_state.get('debug_info', [])
     st.session_state.debug_info.append(f"Processing text: {transcribed_text[:50]}...")
+    st.session_state.debug_info = st.session_state.debug_info[-100:]  # Keep last 100 entries
     emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
     is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)
     # Add results to debug info
     st.session_state.debug_info.append(f"Top emotion: {top_emotion}, Sentiment: {sentiment}")
     st.session_state.debug_info.append(f"Sarcasm: {is_sarcastic}, Score: {sarcasm_score:.3f}")
     st.text_area("Text", transcribed_text, height=150, disabled=True, help="The audio converted to text.")
     confidence_score = min(0.95, max(0.70, len(transcribed_text.split()) / 50))
+    st.caption(f"Estimated transcription confidence: {confidence_score:.2f} (based on text length)")
     st.header("Analysis Results")
     col1, col2 = st.columns([1, 2])
     with col2:
         st.subheader("Emotions")
         if emotions_dict:
+            st.markdown(
+                f"*Dominant:* {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
             sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
             top_emotions = sorted_emotions[:8]
             emotions = [e[0] for e in top_emotions]
             scores = [e[1] for e in top_emotions]
+            fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'},
+                         title="Top Emotions Distribution", color=emotions,
                          color_discrete_sequence=px.colors.qualitative.Bold)
             fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14)
             st.plotly_chart(fig, use_container_width=True)
     with st.expander("Debug Information", expanded=False):
         st.write("Debugging information for troubleshooting:")
         for i, debug_line in enumerate(st.session_state.debug_info[-10:]):
+            st.text(f"{i + 1}. {debug_line}")
         if emotions_dict:
             st.write("Raw emotion scores:")
             for emotion, score in sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True):
         - Speech patterns
         """)
 # Process base64 audio data
 def process_base64_audio(base64_data):
     try:
         base64_binary = base64_data.split(',')[1]
         binary_data = base64.b64decode(base64_binary)
         temp_dir = tempfile.gettempdir()
         temp_file_path = os.path.join(temp_dir, f"recording_{int(time.time())}.wav")
         with open(temp_file_path, "wb") as f:
             f.write(binary_data)
         if not validate_audio(temp_file_path):
             return None
         return temp_file_path
     except Exception as e:
         st.error(f"Error processing audio data: {str(e)}")
         return None
 # Main App Logic
 def main():
     # Fix 7: Initialize session state for debugging
     if 'debug_info' not in st.session_state:
         st.session_state.debug_info = []
     tab1, tab2 = st.tabs(["📁 Upload Audio", "🎙 Record Audio"])
     with tab1:
         st.header("Upload an Audio File")
+        audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"],
+                                      help="Upload an audio file for analysis")
         if audio_file:
             st.audio(audio_file.getvalue())
             st.caption("🎧 Uploaded Audio Playback")
             upload_button = st.button("Analyze Upload", key="analyze_upload")
             if upload_button:
                 with st.spinner('Analyzing audio with advanced precision...'):
                     temp_audio_path = process_uploaded_audio(audio_file)
                     if temp_audio_path:
                         main_text, alternatives = transcribe_audio(temp_audio_path, show_alternative=True)
                         if main_text:
                             if alternatives:
                                 with st.expander("Alternative transcriptions detected", expanded=False):
                                     for i, alt in enumerate(alternatives[:3], 1):
                                         st.write(f"{i}. {alt}")
                             display_analysis_results(main_text)
                         else:
                             st.error("Could not transcribe the audio. Please try again with clearer audio.")
                         if os.path.exists(temp_audio_path):
                             os.remove(temp_audio_path)
     with tab2:
         st.header("Record Your Voice")
         st.write("Use the recorder below to analyze your speech in real-time.")
         st.subheader("Browser-Based Recorder")
         st.write("Click the button below to start/stop recording.")
         audio_data = custom_audio_recorder()
         if audio_data:
             analyze_rec_button = st.button("Analyze Recording", key="analyze_rec")
             if analyze_rec_button:
                 with st.spinner("Processing your recording..."):
                     temp_audio_path = process_base64_audio(audio_data)
                     if temp_audio_path:
                         transcribed_text = transcribe_audio(temp_audio_path)
                         if transcribed_text:
                             display_analysis_results(transcribed_text)
                         else:
                             st.error("Could not transcribe the audio. Please try speaking more clearly.")
                         if os.path.exists(temp_audio_path):
                             os.remove(temp_audio_path)
         st.subheader("Manual Text Input")
         st.write("If recording doesn't work, you can type your text here:")
         manual_text = st.text_area("Enter text to analyze:", placeholder="Type what you want to analyze...")
         analyze_text_button = st.button("Analyze Text", key="analyze_manual")
         if analyze_text_button and manual_text:
             display_analysis_results(manual_text)
     show_model_info()
 if __name__ == "__main__":
     main()