Diggz10 commited on
Commit
0ea75df
·
verified ·
1 Parent(s): b6e3c2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -24
app.py CHANGED
@@ -1,21 +1,19 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import librosa
4
- import numpy as np
5
  import soundfile as sf
6
  import os
7
 
8
  # --- Model Loading ---
9
- # We'll use the pipeline abstraction from transformers for simplicity.
10
- # This model is specifically designed for audio classification (emotion detection).
11
- # It will automatically handle the loading of the model and its preprocessor.
12
- classifier = pipeline("audio-classification", model="mrm8488/Emotion-detection-from-audio-files")
13
-
14
- # --- Emotion Labels Mapping (Optional, for clearer output) ---
15
- # The model outputs raw labels, we can define a more readable mapping if needed
16
- # For this specific model, the labels are already pretty clear.
17
- # Example labels from the model's page: 'anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise'
18
-
19
 
20
  # --- Prediction Function ---
21
  def predict_emotion(audio_file):
@@ -23,16 +21,17 @@ def predict_emotion(audio_file):
23
  Predicts emotions from an audio file.
24
 
25
  Args:
26
- audio_file (str or np.ndarray): Path to the audio file or a numpy array
27
- (if using microphone input directly).
28
- Gradio's Audio component usually provides
29
- a file path for file uploads or a tuple
30
- (samplerate, audio_array) for microphone.
31
  Returns:
32
  dict: A dictionary of emotion labels and their probabilities.
33
  """
 
 
 
 
34
  if audio_file is None:
35
- return {"error": "No audio input provided."}
36
 
37
  # Gradio's Audio component can return a path to a temp file for file uploads,
38
  # or a tuple (samplerate, numpy_array) for microphone input.
@@ -47,14 +46,14 @@ def predict_emotion(audio_file):
47
  sf.write(temp_audio_path, audio_array, sample_rate)
48
  audio_path = temp_audio_path
49
  else:
50
- return {"error": "Invalid audio input format."}
51
 
52
  try:
53
  # Perform inference
54
- results = classifier(audio_path)
55
 
56
  # Process results into a dictionary for better display
57
- emotion_scores = {item['label']: item['score'] for item in results}
58
 
59
  return emotion_scores
60
  except Exception as e:
@@ -69,10 +68,14 @@ def predict_emotion(audio_file):
69
  # Define the Gradio interface
70
  iface = gr.Interface(
71
  fn=predict_emotion,
72
- inputs=gr.Audio(type="filepath", label="Upload Audio or Record with Microphone", sources=["microphone", "file"]),
73
- outputs=gr.Label(num_top_classes=7, label="Emotion Probabilities"), # Adjust num_top_classes based on model's output labels
74
  title="AI Audio Emotion Detector",
75
- description="Upload an audio file or record your voice to detect emotions like anger, disgust, fear, happiness, neutral, sadness, and surprise."
 
 
 
 
76
  )
77
 
78
  # Launch the Gradio app
 
1
  import gradio as gr
2
  from transformers import pipeline
 
 
3
  import soundfile as sf
4
  import os
5
 
6
  # --- Model Loading ---
7
+ # We switched to 'superb/wav2vec2-base-superb-er' as it's a well-established and public model for emotion recognition.
8
+ # This should resolve the download issues encountered previously.
9
+ try:
10
+ classifier = pipeline("audio-classification", model="superb/wav2vec2-base-superb-er")
11
+ except Exception as e:
12
+ # If there's an error during model loading, we can display it in the Gradio interface
13
+ # This helps in debugging issues directly on the Hugging Face Space.
14
+ def error_fn(audio_file):
15
+ return {"error": f"Failed to load the model. Please check the logs. Error: {str(e)}"}
16
+ classifier = None
17
 
18
  # --- Prediction Function ---
19
  def predict_emotion(audio_file):
 
21
  Predicts emotions from an audio file.
22
 
23
  Args:
24
+ audio_file (str or tuple): Path to the audio file (from upload) or a tuple
25
+ (samplerate, audio_array) from microphone input.
 
 
 
26
  Returns:
27
  dict: A dictionary of emotion labels and their probabilities.
28
  """
29
+ # Handle case where the model failed to load
30
+ if classifier is None:
31
+ return {"error": "The AI model could not be loaded. The application cannot start."}
32
+
33
  if audio_file is None:
34
+ return {"error": "No audio input provided. Please upload a file or record."}
35
 
36
  # Gradio's Audio component can return a path to a temp file for file uploads,
37
  # or a tuple (samplerate, numpy_array) for microphone input.
 
46
  sf.write(temp_audio_path, audio_array, sample_rate)
47
  audio_path = temp_audio_path
48
  else:
49
+ return {"error": f"Invalid audio input format: {type(audio_file)}"}
50
 
51
  try:
52
  # Perform inference
53
+ results = classifier(audio_path, top_k=5) # top_k ensures we get all relevant emotion scores
54
 
55
  # Process results into a dictionary for better display
56
+ emotion_scores = {item['label']: round(item['score'], 3) for item in results}
57
 
58
  return emotion_scores
59
  except Exception as e:
 
68
  # Define the Gradio interface
69
  iface = gr.Interface(
70
  fn=predict_emotion,
71
+ inputs=gr.Audio(sources=["microphone", "file"], type="filepath", label="Upload Audio or Record with Microphone"),
72
+ outputs=gr.Label(num_top_classes=5, label="Emotion Probabilities"), # This model has 4 emotions + 'no-emotion'
73
  title="AI Audio Emotion Detector",
74
+ description="Upload an audio file or record your voice to detect emotions. This model is trained to recognize 'anger', 'happiness', 'neutral', 'sadness', and 'no-emotion'.",
75
+ examples=[
76
+ # You can add example audio files to your Hugging Face Space and reference them here.
77
+ # For now, we'll leave this empty.
78
+ ]
79
  )
80
 
81
  # Launch the Gradio app