Diggz10 commited on
Commit
cae86cc
·
verified ·
1 Parent(s): a4da09e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py CHANGED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import librosa
4
+ import numpy as np
5
+ import soundfile as sf
6
+ import os
7
+
8
+ # --- Model Loading ---
9
+ # We'll use the pipeline abstraction from transformers for simplicity.
10
+ # This model is specifically designed for audio classification (emotion detection).
11
+ # It will automatically handle the loading of the model and its preprocessor.
12
+ classifier = pipeline("audio-classification", model="mrm8488/Emotion-detection-from-audio-files")
13
+
14
+ # --- Emotion Labels Mapping (Optional, for clearer output) ---
15
+ # The model outputs raw labels, we can define a more readable mapping if needed
16
+ # For this specific model, the labels are already pretty clear.
17
+ # Example labels from the model's page: 'anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise'
18
+
19
+
20
+ # --- Prediction Function ---
21
+ def predict_emotion(audio_file):
22
+ """
23
+ Predicts emotions from an audio file.
24
+
25
+ Args:
26
+ audio_file (str or np.ndarray): Path to the audio file or a numpy array
27
+ (if using microphone input directly).
28
+ Gradio's Audio component usually provides
29
+ a file path for file uploads or a tuple
30
+ (samplerate, audio_array) for microphone.
31
+ Returns:
32
+ dict: A dictionary of emotion labels and their probabilities.
33
+ """
34
+ if audio_file is None:
35
+ return {"error": "No audio input provided."}
36
+
37
+ # Gradio's Audio component can return a path to a temp file for file uploads,
38
+ # or a tuple (samplerate, numpy_array) for microphone input.
39
+ if isinstance(audio_file, str):
40
+ # Handle file path (e.g., from file upload)
41
+ audio_path = audio_file
42
+ elif isinstance(audio_file, tuple):
43
+ # Handle microphone input (samplerate, numpy_array)
44
+ sample_rate, audio_array = audio_file
45
+ # Save the numpy array to a temporary WAV file as the pipeline expects a file path or direct bytes
46
+ temp_audio_path = "temp_audio_from_mic.wav"
47
+ sf.write(temp_audio_path, audio_array, sample_rate)
48
+ audio_path = temp_audio_path
49
+ else:
50
+ return {"error": "Invalid audio input format."}
51
+
52
+ try:
53
+ # Perform inference
54
+ results = classifier(audio_path)
55
+
56
+ # Process results into a dictionary for better display
57
+ emotion_scores = {item['label']: item['score'] for item in results}
58
+
59
+ return emotion_scores
60
+ except Exception as e:
61
+ return {"error": f"An error occurred during prediction: {str(e)}"}
62
+ finally:
63
+ # Clean up temporary file if created
64
+ if 'temp_audio_path' in locals() and os.path.exists(temp_audio_path):
65
+ os.remove(temp_audio_path)
66
+
67
+
68
+ # --- Gradio Interface ---
69
+ # Define the Gradio interface
70
+ iface = gr.Interface(
71
+ fn=predict_emotion,
72
+ inputs=gr.Audio(type="filepath", label="Upload Audio or Record with Microphone", sources=["microphone", "file"]),
73
+ outputs=gr.Label(num_top_classes=7, label="Emotion Probabilities"), # Adjust num_top_classes based on model's output labels
74
+ title="AI Audio Emotion Detector",
75
+ description="Upload an audio file or record your voice to detect emotions like anger, disgust, fear, happiness, neutral, sadness, and surprise."
76
+ )
77
+
78
+ # Launch the Gradio app
79
+ if __name__ == "__main__":
80
+ iface.launch()