asad231 commited on
Commit
9a97fa7
·
verified ·
1 Parent(s): b431434

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -29
app.py CHANGED
@@ -1,39 +1,47 @@
1
  import gradio as gr
2
- import tensorflow as tf
 
 
3
  import numpy as np
4
- import librosa
5
 
6
- # 1. Load your trained model (must be in the same folder, named model.h5)
7
- model = tf.keras.models.load_model("model.h5")
 
 
8
 
9
- # 2. Define labels & emojis (match your model’s output classes)
10
- EMOTIONS = ["Neutral", "Calm", "Happy", "Sad", "Angry", "Fearful", "Disgust", "Surprised"]
11
- EMOJI_MAP = {
12
- "Neutral": "😐", "Calm": "😌", "Happy": "😄", "Sad": "😢",
13
- "Angry": "😠", "Fearful": "😨", "Disgust": "🤢", "Surprised": "😲"
 
14
  }
15
 
16
- def predict_emotion(audio_path):
17
- # Load & preprocess audio
18
- y, sr = librosa.load(audio_path, sr=22050)
19
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
20
- features = np.mean(mfcc.T, axis=0).reshape(1, -1)
21
-
22
- # Run model
23
- preds = model.predict(features)
24
- idx = np.argmax(preds, axis=1)[0]
25
- label = EMOTIONS[idx]
26
- emoji = EMOJI_MAP[label]
27
- return f"{label} {emoji}"
28
-
29
- # 3. Build Gradio Interface
30
- demo = gr.Interface(
 
 
 
 
31
  fn=predict_emotion,
32
- inputs=gr.Audio(source="upload", type="filepath", label="Upload a .wav file"),
33
- outputs=gr.Text(label="Predicted Emotion"),
34
- title="🎤 Voice Emotion AI",
35
- description="Upload a voice clip (.wav) to detect the speaker’s emotion."
36
  )
37
 
38
  if __name__ == "__main__":
39
- demo.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
5
  import numpy as np
 
6
 
7
+ # Load model and processor
8
+ model_name = "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
9
+ processor = Wav2Vec2Processor.from_pretrained(model_name)
10
+ model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
11
 
12
+ # Emotion labels and emojis
13
+ id2label = {
14
+ 0: "angry 😠",
15
+ 1: "calm 😌",
16
+ 2: "happy 😄",
17
+ 3: "sad 😢"
18
  }
19
 
20
+ # Audio processing and prediction
21
+ def predict_emotion(audio):
22
+ if audio is None:
23
+ return "No audio provided"
24
+
25
+ speech_array, sampling_rate = torchaudio.load(audio)
26
+ if sampling_rate != 16000:
27
+ resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)
28
+ speech_array = resampler(speech_array)
29
+
30
+ input_values = processor(speech_array.squeeze(), return_tensors="pt", sampling_rate=16000).input_values
31
+ with torch.no_grad():
32
+ logits = model(input_values).logits
33
+
34
+ predicted_id = torch.argmax(logits, dim=-1).item()
35
+ return f"Detected Emotion: {id2label[predicted_id]}"
36
+
37
+ # Gradio UI
38
+ app = gr.Interface(
39
  fn=predict_emotion,
40
+ inputs=gr.Audio(source="upload", type="filepath", label="Upload or Record Audio"),
41
+ outputs=gr.Textbox(label="Detected Emotion with Emoji"),
42
+ title="🎙️ Voice Emotion Detector with Emoji",
43
+ description="Upload or record your voice. The model will detect your emotion and display an emoji."
44
  )
45
 
46
  if __name__ == "__main__":
47
+ app.launch()