Spaces:

Diggz10
/

emotiondetector1

Running

App Files Files Community

emotiondetector1 / app.py

Diggz10

Update app.py

cae86cc verified 23 days ago

raw

history blame

3.37 kB

	import gradio as gr
	from transformers import pipeline
	import librosa
	import numpy as np
	import soundfile as sf
	import os

	# --- Model Loading ---
	# We'll use the pipeline abstraction from transformers for simplicity.
	# This model is specifically designed for audio classification (emotion detection).
	# It will automatically handle the loading of the model and its preprocessor.
	classifier = pipeline("audio-classification", model="mrm8488/Emotion-detection-from-audio-files")

	# --- Emotion Labels Mapping (Optional, for clearer output) ---
	# The model outputs raw labels, we can define a more readable mapping if needed
	# For this specific model, the labels are already pretty clear.
	# Example labels from the model's page: 'anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise'


	# --- Prediction Function ---
	def predict_emotion(audio_file):
	"""
	Predicts emotions from an audio file.

	Args:
	audio_file (str or np.ndarray): Path to the audio file or a numpy array
	(if using microphone input directly).
	Gradio's Audio component usually provides
	a file path for file uploads or a tuple
	(samplerate, audio_array) for microphone.
	Returns:
	dict: A dictionary of emotion labels and their probabilities.
	"""
	if audio_file is None:
	return {"error": "No audio input provided."}

	# Gradio's Audio component can return a path to a temp file for file uploads,
	# or a tuple (samplerate, numpy_array) for microphone input.
	if isinstance(audio_file, str):
	# Handle file path (e.g., from file upload)
	audio_path = audio_file
	elif isinstance(audio_file, tuple):
	# Handle microphone input (samplerate, numpy_array)
	sample_rate, audio_array = audio_file
	# Save the numpy array to a temporary WAV file as the pipeline expects a file path or direct bytes
	temp_audio_path = "temp_audio_from_mic.wav"
	sf.write(temp_audio_path, audio_array, sample_rate)
	audio_path = temp_audio_path
	else:
	return {"error": "Invalid audio input format."}

	try:
	# Perform inference
	results = classifier(audio_path)

	# Process results into a dictionary for better display
	emotion_scores = {item['label']: item['score'] for item in results}

	return emotion_scores
	except Exception as e:
	return {"error": f"An error occurred during prediction: {str(e)}"}
	finally:
	# Clean up temporary file if created
	if 'temp_audio_path' in locals() and os.path.exists(temp_audio_path):
	os.remove(temp_audio_path)


	# --- Gradio Interface ---
	# Define the Gradio interface
	iface = gr.Interface(
	fn=predict_emotion,
	inputs=gr.Audio(type="filepath", label="Upload Audio or Record with Microphone", sources=["microphone", "file"]),
	outputs=gr.Label(num_top_classes=7, label="Emotion Probabilities"), # Adjust num_top_classes based on model's output labels
	title="AI Audio Emotion Detector",
	description="Upload an audio file or record your voice to detect emotions like anger, disgust, fear, happiness, neutral, sadness, and surprise."
	)

	# Launch the Gradio app
	if __name__ == "__main__":
	iface.launch()