Spaces:
Runtime error
Runtime error
import streamlit as st | |
import torch | |
import librosa | |
import numpy as np | |
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification | |
import torchaudio | |
# Emojis for emotions | |
EMOTION_EMOJI = { | |
"angry": "😠", | |
"happy": "😄", | |
"sad": "😢", | |
"neutral": "😐" | |
} | |
# Load processor and model | |
processor = Wav2Vec2Processor.from_pretrained("ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition") | |
model = Wav2Vec2ForSequenceClassification.from_pretrained("ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition") | |
# Title | |
st.title("🎙️ Voice Emotion Detector with Emoji") | |
# Upload audio | |
uploaded_file = st.file_uploader("Upload a WAV file", type=["wav"]) | |
if uploaded_file is not None: | |
st.audio(uploaded_file, format="audio/wav") | |
# Load and preprocess audio | |
speech_array, sampling_rate = torchaudio.load(uploaded_file) | |
if sampling_rate != 16000: | |
speech_array = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)(speech_array) | |
speech = speech_array.squeeze().numpy() | |
inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True) | |
with torch.no_grad(): | |
logits = model(**inputs).logits | |
predicted_class_id = torch.argmax(logits).item() | |
emotion = model.config.id2label[predicted_class_id] | |
st.markdown(f"### Emotion Detected: **{emotion}** {EMOTION_EMOJI.get(emotion, '')}") | |