Spaces:

MaroofTechSorcerer
/

Voice_Based_Sentiment_Analysis_with_Sarcasm_Detection

Running

App Files Files Community

MaroofTechSorcerer commited on May 6

Commit

c4f2255

verified ·

1 Parent(s): 51c2389

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -15

app.py CHANGED Viewed

@@ -23,16 +23,12 @@ except ImportError:
     USE_TORCHAUDIO = False
     st.warning("torchaudio not found. Using pydub (slower). Install torchaudio: pip install torchaudio")
-# Suppress warnings
 logging.getLogger("torch").setLevel(logging.ERROR)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 warnings.filterwarnings("ignore")
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-# Device setup
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-st.write(f"Using device: {device}")
 # Streamlit config
 st.set_page_config(layout="wide", page_title="Voice Sentiment Analysis")
 st.title("🎙 Voice Sentiment Analysis")
@@ -42,19 +38,20 @@ st.markdown("Fast, accurate detection of emotions, sentiment, and sarcasm from v
 @st.cache_resource
 def load_models():
     try:
         whisper_model = whisper.load_model("base")
         emotion_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
         emotion_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
-        emotion_model = emotion_model.to(device).half()
         emotion_classifier = pipeline("text-classification", model=emotion_model, tokenizer=emotion_tokenizer,
-                                     top_k=None, device=0 if torch.cuda.is_available() else -1)
         sarcasm_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
         sarcasm_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
-        sarcasm_model = sarcasm_model.to(device).half()
         sarcasm_classifier = pipeline("text-classification", model=sarcasm_model, tokenizer=sarcasm_tokenizer,
-                                     device=0 if torch.cuda.is_available() else -1)
         return whisper_model, emotion_classifier, sarcasm_classifier
     except Exception as e:
@@ -72,7 +69,7 @@ async def perform_emotion_detection(text):
         results = emotion_classifier(text)[0]
         emotions_dict = {r['label']: r['score'] for r in results}
         filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}
-        top_emotion = max(filtered_emotions, key=filtered_emotions.get)
         positive_emotions = ["joy"]
         negative_emotions = ["anger", "disgust", "fear", "sadness"]
@@ -131,16 +128,16 @@ def transcribe_audio(audio_path):
             waveform, sample_rate = torchaudio.load(audio_path)
             if sample_rate != 16000:
                 resampler = torchaudio.transforms.Resample(sample_rate, 16000)
-                waveformვ: waveform = resampler(waveform)
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                 torchaudio.save(temp_file.name, waveform, 16000)
-                result = whisper_model.transcribe(temp_file.name, language="en")
         else:
             sound = AudioSegment.from_file(audio_path)
             sound = sound.set_frame_rate(16000).set_channels(1)
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                 sound.export(temp_file.name, format="wav")
-                result = whisper_model.transcribe(temp_file.name, language="en")
         os.remove(temp_file.name)
         return result["text"].strip()
     except Exception as e:
@@ -168,6 +165,9 @@ def process_uploaded_audio(audio_file):
 # Process base64 audio
 def process_base64_audio(base64_data):
     try:
         base64_binary = base64_data.split(',')[1]
         binary_data = base64.b64decode(base64_binary)
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
@@ -339,5 +339,4 @@ def main():
             display_analysis_results(manual_text)
 if __name__ == "__main__":
-    main()
-    torch.cuda.empty_cache()

     USE_TORCHAUDIO = False
     st.warning("torchaudio not found. Using pydub (slower). Install torchaudio: pip install torchaudio")
+# Suppress warnings and set logging
 logging.getLogger("torch").setLevel(logging.ERROR)
 logging.getLogger("transformers").setLevel(logging.ERROR)
 warnings.filterwarnings("ignore")
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # Streamlit config
 st.set_page_config(layout="wide", page_title="Voice Sentiment Analysis")
 st.title("🎙 Voice Sentiment Analysis")
 @st.cache_resource
 def load_models():
     try:
+        # Load Whisper model with CPU optimization
         whisper_model = whisper.load_model("base")
+        # Load emotion detection model
         emotion_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
         emotion_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
         emotion_classifier = pipeline("text-classification", model=emotion_model, tokenizer=emotion_tokenizer,
+                                     top_k=None, device=-1)  # CPU only
+        # Load sarcasm detection model
         sarcasm_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
         sarcasm_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
         sarcasm_classifier = pipeline("text-classification", model=sarcasm_model, tokenizer=sarcasm_tokenizer,
+                                     device=-1)  # CPU only
         return whisper_model, emotion_classifier, sarcasm_classifier
     except Exception as e:
         results = emotion_classifier(text)[0]
         emotions_dict = {r['label']: r['score'] for r in results}
         filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}
+        top_emotion = max(filtered_emotions, key=filtered_emotions.get, default="neutral")
         positive_emotions = ["joy"]
         negative_emotions = ["anger", "disgust", "fear", "sadness"]
             waveform, sample_rate = torchaudio.load(audio_path)
             if sample_rate != 16000:
                 resampler = torchaudio.transforms.Resample(sample_rate, 16000)
+                waveform = resampler(waveform)
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                 torchaudio.save(temp_file.name, waveform, 16000)
+                result = whisper_model.transcribe(temp_file.name, language="en", no_speech_threshold=0.6)
         else:
             sound = AudioSegment.from_file(audio_path)
             sound = sound.set_frame_rate(16000).set_channels(1)
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                 sound.export(temp_file.name, format="wav")
+                result = whisper_model.transcribe(temp_file.name, language="en", no_speech_threshold=0.6)
         os.remove(temp_file.name)
         return result["text"].strip()
     except Exception as e:
 # Process base64 audio
 def process_base64_audio(base64_data):
     try:
+        if not base64_data.startswith("data:audio"):
+            st.error("Invalid audio data.")
+            return None
         base64_binary = base64_data.split(',')[1]
         binary_data = base64.b64decode(base64_binary)
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
             display_analysis_results(manual_text)
 if __name__ == "__main__":
+    main()