Spaces:

asad231
/

voice-emotion-ai

Runtime error

App Files Files Community

asad231 commited on May 1

Commit

9a97fa7

verified ·

1 Parent(s): b431434

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -29

app.py CHANGED Viewed

@@ -1,39 +1,47 @@
 import gradio as gr
-import tensorflow as tf
 import numpy as np
-import librosa
-# 1. Load your trained model (must be in the same folder, named model.h5)
-model = tf.keras.models.load_model("model.h5")
-# 2. Define labels & emojis (match your model’s output classes)
-EMOTIONS = ["Neutral", "Calm", "Happy", "Sad", "Angry", "Fearful", "Disgust", "Surprised"]
-EMOJI_MAP = {
-    "Neutral": "😐", "Calm": "😌", "Happy": "😄", "Sad": "😢",
-    "Angry": "😠", "Fearful": "😨", "Disgust": "🤢", "Surprised": "😲"
 }
-def predict_emotion(audio_path):
-    # Load & preprocess audio
-    y, sr = librosa.load(audio_path, sr=22050)
-    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
-    features = np.mean(mfcc.T, axis=0).reshape(1, -1)
-    # Run model
-    preds = model.predict(features)
-    idx = np.argmax(preds, axis=1)[0]
-    label = EMOTIONS[idx]
-    emoji = EMOJI_MAP[label]
-    return f"{label} {emoji}"
-# 3. Build Gradio Interface
-demo = gr.Interface(
     fn=predict_emotion,
-    inputs=gr.Audio(source="upload", type="filepath", label="Upload a .wav file"),
-    outputs=gr.Text(label="Predicted Emotion"),
-    title="🎤 Voice Emotion AI",
-    description="Upload a voice clip (.wav) to detect the speaker’s emotion."
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import torch
+import torchaudio
+from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
 import numpy as np
+# Load model and processor
+model_name = "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+processor = Wav2Vec2Processor.from_pretrained(model_name)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
+# Emotion labels and emojis
+id2label = {
+    0: "angry 😠",
+    1: "calm 😌",
+    2: "happy 😄",
+    3: "sad 😢"
 }
+# Audio processing and prediction
+def predict_emotion(audio):
+    if audio is None:
+        return "No audio provided"
+    speech_array, sampling_rate = torchaudio.load(audio)
+    if sampling_rate != 16000:
+        resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)
+        speech_array = resampler(speech_array)
+    input_values = processor(speech_array.squeeze(), return_tensors="pt", sampling_rate=16000).input_values
+    with torch.no_grad():
+        logits = model(input_values).logits
+    predicted_id = torch.argmax(logits, dim=-1).item()
+    return f"Detected Emotion: {id2label[predicted_id]}"
+# Gradio UI
+app = gr.Interface(
     fn=predict_emotion,
+    inputs=gr.Audio(source="upload", type="filepath", label="Upload or Record Audio"),
+    outputs=gr.Textbox(label="Detected Emotion with Emoji"),
+    title="🎙️ Voice Emotion Detector with Emoji",
+    description="Upload or record your voice. The model will detect your emotion and display an emoji."
 )
 if __name__ == "__main__":
+    app.launch()