Spaces:

Diggz10
/

emotiondetector1

Running

File size: 3,500 Bytes

cae86cc
 
 
 
 
 
0ea75df
 
 
 
 
 
 
 
 
cae86cc
 
 
 
 
 
 
0ea75df
 
cae86cc
 
 
0ea75df
 
 
 
cae86cc
0ea75df
cae86cc
 
 
 
 
 
 
 
 
 
 
 
 
 
0ea75df
cae86cc
 
 
0ea75df
cae86cc
 
0ea75df
cae86cc
 
 
 
 
 
 
 
 
 
 
 
 
 
a391cc5
 
0ea75df
cae86cc
0ea75df
 
 
 
 
cae86cc

import gradio as gr
from transformers import pipeline
import soundfile as sf
import os

# --- Model Loading ---
# We switched to 'superb/wav2vec2-base-superb-er' as it's a well-established and public model for emotion recognition.
try:
    classifier = pipeline("audio-classification", model="superb/wav2vec2-base-superb-er")
except Exception as e:
    # If there's an error during model loading, we can display it in the Gradio interface
    # This helps in debugging issues directly on the Hugging Face Space.
    def error_fn(audio_file):
        return {"error": f"Failed to load the model. Please check the logs. Error: {str(e)}"}
    classifier = None

# --- Prediction Function ---
def predict_emotion(audio_file):
    """
    Predicts emotions from an audio file.

    Args:
        audio_file (str or tuple): Path to the audio file (from upload) or a tuple
                                   (samplerate, audio_array) from microphone input.
    Returns:
        dict: A dictionary of emotion labels and their probabilities.
    """
    # Handle case where the model failed to load
    if classifier is None:
        return {"error": "The AI model could not be loaded. The application cannot start."}
    
    if audio_file is None:
        return {"error": "No audio input provided. Please upload a file or record."}

    # Gradio's Audio component can return a path to a temp file for file uploads,
    # or a tuple (samplerate, numpy_array) for microphone input.
    if isinstance(audio_file, str):
        # Handle file path (e.g., from file upload)
        audio_path = audio_file
    elif isinstance(audio_file, tuple):
        # Handle microphone input (samplerate, numpy_array)
        sample_rate, audio_array = audio_file
        # Save the numpy array to a temporary WAV file as the pipeline expects a file path or direct bytes
        temp_audio_path = "temp_audio_from_mic.wav"
        sf.write(temp_audio_path, audio_array, sample_rate)
        audio_path = temp_audio_path
    else:
        return {"error": f"Invalid audio input format: {type(audio_file)}"}

    try:
        # Perform inference
        results = classifier(audio_path, top_k=5) # top_k ensures we get all relevant emotion scores

        # Process results into a dictionary for better display
        emotion_scores = {item['label']: round(item['score'], 3) for item in results}

        return emotion_scores
    except Exception as e:
        return {"error": f"An error occurred during prediction: {str(e)}"}
    finally:
        # Clean up temporary file if created
        if 'temp_audio_path' in locals() and os.path.exists(temp_audio_path):
            os.remove(temp_audio_path)


# --- Gradio Interface ---
# Define the Gradio interface
iface = gr.Interface(
    fn=predict_emotion,
    # THIS IS THE CORRECTED LINE:
    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload Audio or Record with Microphone"),
    outputs=gr.Label(num_top_classes=5, label="Emotion Probabilities"), # This model has 4 emotions + 'no-emotion'
    title="AI Audio Emotion Detector",
    description="Upload an audio file or record your voice to detect emotions. This model is trained to recognize 'anger', 'happiness', 'neutral', 'sadness', and 'no-emotion'.",
    examples=[
        # You can add example audio files to your Hugging Face Space and reference them here.
        # For now, we'll leave this empty.
    ]
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()