Spaces:

MaroofTechSorcerer
/

Voice_Based_Sentiment_Analysis_with_Sarcasm_Detection

Running

Voice_Based_Sentiment_Analysis_with_Sarcasm_Detection

File size: 7,324 Bytes

3cf77dc

import os
import streamlit as st
import tempfile
import whisper
from transformers import pipeline
import plotly.express as px
import torch
import logging
import warnings
import shutil

# Suppress warnings for a clean console
logging.getLogger("torch").setLevel(logging.CRITICAL)
logging.getLogger("transformers").setLevel(logging.CRITICAL)
warnings.filterwarnings("ignore")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
torch.device("cpu")

# Set Streamlit app layout
st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")

# Interface design
st.title("🎙️ Voice Based Sentiment Analysis")
st.write("Detect emotions, sentiment, and sarcasm from your voice with high accuracy.")

# Sidebar for file upload
st.sidebar.title("Audio Input")
st.sidebar.write("Upload a WAV file for transcription and detailed analysis.")
audio_file = st.sidebar.file_uploader("Choose an audio file", type=["wav"], help="Supports WAV format only.")
upload_button = st.sidebar.button("Analyze", help="Click to process the uploaded audio.")

# Check if FFmpeg is available
def check_ffmpeg():
    return shutil.which("ffmpeg") is not None

# Emotion Detection Function
@st.cache_resource
def get_emotion_classifier():
    emotion_model = "bhadresh-savani/distilbert-base-uncased-emotion"
    return pipeline("text-classification", model=emotion_model, top_k=None, device=-1)

def perform_emotion_detection(text):
    try:
        emotion_classifier = get_emotion_classifier()
        emotion_results = emotion_classifier(text)[0]
        emotion_map = {"anger": "😡", "fear": "😨", "joy": "😊", "love": "❤️", "sadness": "😢", "surprise": "😲"}
        emotions_dict = {result['label']: result['score'] for result in emotion_results}
        top_emotion = max(emotions_dict, key=emotions_dict.get)
        sentiment_map = {"joy": "POSITIVE", "love": "POSITIVE", "anger": "NEGATIVE", "fear": "NEGATIVE", "sadness": "NEGATIVE", "surprise": "NEUTRAL"}
        sentiment = sentiment_map.get(top_emotion, "NEUTRAL")
        return emotions_dict, top_emotion, emotion_map, sentiment
    except Exception as e:
        st.error(f"Emotion detection failed: {str(e)}")
        return {}, "unknown", {}, "UNKNOWN"

# Sarcasm Detection Function
@st.cache_resource
def get_sarcasm_classifier():
    sarcasm_model = "cardiffnlp/twitter-roberta-base-irony"
    return pipeline("text-classification", model=sarcasm_model, device=-1)

def perform_sarcasm_detection(text):
    try:
        sarcasm_classifier = get_sarcasm_classifier()
        result = sarcasm_classifier(text)[0]
        is_sarcastic = result['label'] == "LABEL_1"
        sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
        return is_sarcastic, sarcasm_score
    except Exception as e:
        st.error(f"Sarcasm detection failed: {str(e)}")
        return False, 0.0

# Transcription Function with Whisper
@st.cache_resource
def get_whisper_model():
    return whisper.load_model("base")

def transcribe_audio(audio_file):
    if not check_ffmpeg():
        st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg and add it to your system PATH.")
        st.markdown("**Instructions to install FFmpeg on Windows:**\n"
                    "1. Download FFmpeg from [https://www.gyan.dev/ffmpeg/builds/](https://www.gyan.dev/ffmpeg/builds/) (e.g., `ffmpeg-release-essentials.zip`).\n"
                    "2. Extract the ZIP to a folder (e.g., `C:\\ffmpeg`).\n"
                    "3. Add `C:\\ffmpeg\\bin` to your system PATH:\n"
                    "   - Right-click 'This PC' > 'Properties' > 'Advanced system settings' > 'Environment Variables'.\n"
                    "   - Under 'System variables', edit 'Path' and add the new path.\n"
                    "4. Restart your terminal and rerun the app.")
        return ""

    try:
        model = get_whisper_model()
        # Save uploaded file to a temporary location
        temp_dir = tempfile.gettempdir()
        temp_file_path = os.path.join(temp_dir, "temp_audio.wav")
        with open(temp_file_path, "wb") as f:
            f.write(audio_file.getvalue())
        
        # Verify file exists
        if not os.path.exists(temp_file_path):
            st.error(f"Temporary file not created at {temp_file_path}. Check write permissions.")
            return ""

        # Transcribe using Whisper
        result = model.transcribe(temp_file_path)
        
        # Clean up temporary file
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)
        return result["text"]
    except Exception as e:
        st.error(f"Transcription failed: {str(e)}")
        return ""

# Main App Logic
def main():
    if audio_file and upload_button:
        st.audio(audio_file.getvalue(), format='audio/wav')
        st.caption("🎧 Uploaded Audio Playback")

        with st.spinner('Analyzing audio with advanced precision...'):
            transcribed_text = transcribe_audio(audio_file)
            if not transcribed_text:
                return

            emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
            is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)

        st.header("Transcribed Text")
        st.text_area("Text", transcribed_text, height=150, disabled=True, help="The audio converted to text.")

        st.header("Analysis Results")
        col1, col2 = st.columns([1, 2])

        with col1:
            st.subheader("Sentiment")
            sentiment_icon = "👍" if sentiment == "POSITIVE" else "👎" if sentiment == "NEGATIVE" else "😐"
            st.markdown(f"**{sentiment_icon} {sentiment.capitalize()}** (Based on {top_emotion})")
            st.info("Sentiment reflects the dominant emotion’s tone.")

            st.subheader("Sarcasm")
            sarcasm_icon = "😏" if is_sarcastic else "😐"
            sarcasm_text = "Detected" if is_sarcastic else "Not Detected"
            st.markdown(f"**{sarcasm_icon} {sarcasm_text}** (Score: {sarcasm_score:.3f})")
            st.info("Score indicates sarcasm confidence (0 to 1).")

        with col2:
            st.subheader("Emotions")
            if emotions_dict:
                st.markdown(f"**Dominant:** {emotion_map.get(top_emotion, '❓')} {top_emotion.capitalize()} (Score: {emotions_dict[top_emotion]:.3f})")
                sorted_emotions = sorted(emotions_dict.items(), key=lambda x: x[1], reverse=True)
                emotions = [e[0] for e in sorted_emotions]
                scores = [e[1] for e in sorted_emotions]
                fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'}, 
                             title="Emotion Distribution", color=emotions, 
                             color_discrete_sequence=px.colors.qualitative.Pastel1)
                fig.update_layout(yaxis_range=[0, 1], showlegend=False, title_font_size=14)
                st.plotly_chart(fig, use_container_width=True)
            else:
                st.write("No emotions detected.")

        st.info("Emotions drive sentiment here. Sarcasm is analyzed separately for accuracy.")

    elif upload_button and not audio_file:
        st.sidebar.error("Please upload an audio file first!")

if __name__ == "__main__":
    main()