Spaces:

yunusajib
/

Real-Time-Emotional-Detection

Sleeping

App Files Files Community

Real-Time-Emotional-Detection / app.py

yunusajib

Modify some app features

5eff629 verified 2 months ago

raw

history blame

7.32 kB

	import gradio as gr
	import numpy as np
	import cv2
	import pandas as pd
	from datetime import datetime
	import time
	import librosa
	from python_speech_features import mfcc
	import onnxruntime as ort
	import requests
	import os
	from sklearn.preprocessing import StandardScaler
	import joblib

	# Download emotion recognition ONNX model
	MODEL_URL = "https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx"
	MODEL_PATH = "emotion-ferplus-8.onnx"

	if not os.path.exists(MODEL_PATH):
	print("Downloading emotion recognition model...")
	response = requests.get(MODEL_URL)
	with open(MODEL_PATH, "wb") as f:
	f.write(response.content)

	# Initialize face emotion detection
	emotion_session = ort.InferenceSession(MODEL_PATH)
	emotion_labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']

	# Simple voice emotion classifier (replace with your own trained model if needed)
	class VoiceEmotionClassifier:
	def __init__(self):
	self.scaler = StandardScaler()

	def extract_features(self, audio):
	sr, y = audio
	y = y.astype(np.float32)

	# Convert to mono if stereo
	if len(y.shape) > 1:
	y = np.mean(y, axis=0)

	# Resample to 16kHz if needed
	if sr != 16000:
	y = librosa.resample(y, orig_sr=sr, target_sr=16000)
	sr = 16000

	# Extract MFCC features
	mfcc_features = mfcc(y, sr, numcep=13)
	return np.mean(mfcc_features, axis=0)

	def predict(self, audio):
	try:
	features = self.extract_features(audio).reshape(1, -1)
	features = self.scaler.transform(features)

	# Simple rule-based classifier (replace with actual trained model)
	# This is just a placeholder - you should train a proper model
	if features[0, 0] > 0.5:
	return "happy", [{"label": "happy", "score": 0.8}]
	elif features[0, 0] < -0.5:
	return "sad", [{"label": "sad", "score": 0.7}]
	else:
	return "neutral", [{"label": "neutral", "score": 0.9}]
	except Exception as e:
	print(f"Voice analysis error: {e}")
	return "neutral", [{"label": "neutral", "score": 1.0}]

	# Initialize models
	voice_classifier = VoiceEmotionClassifier()

	# Global variables to store results
	emotion_history = []
	current_emotions = {"face": "neutral", "voice": "neutral"}
	last_update_time = time.time()

	def analyze_face(frame):
	"""Analyze facial expressions in the frame using ONNX model"""
	try:
	# Preprocess frame
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
	faces = face_cascade.detectMultiScale(gray, 1.3, 5)

	if len(faces) > 0:
	x, y, w, h = faces[0]
	face_roi = gray[y:y+h, x:x+w]
	face_roi = cv2.resize(face_roi, (64, 64))
	face_roi = face_roi.astype('float32') / 255.0
	face_roi = np.expand_dims(face_roi, axis=0)
	face_roi = np.expand_dims(face_roi, axis=0)

	# Run inference
	input_name = emotion_session.get_inputs()[0].name
	output_name = emotion_session.get_outputs()[0].name
	results = emotion_session.run([output_name], {input_name: face_roi})[0]

	# Get emotion probabilities
	emotion_probs = results[0]
	dominant_emotion = emotion_labels[np.argmax(emotion_probs)]

	# Create emotion dictionary
	emotions = {label: float(prob) for label, prob in zip(emotion_labels, emotion_probs)}
	return dominant_emotion, emotions

	return "neutral", {label: 0.0 for label in emotion_labels}
	except Exception as e:
	print(f"Face analysis error: {e}")
	return "neutral", {label: 0.0 for label in emotion_labels}

	def analyze_voice(audio):
	"""Analyze voice tone from audio"""
	return voice_classifier.predict(audio)

	def update_emotion_history(face_emotion, voice_emotion):
	"""Update the emotion history and current emotions"""
	global current_emotions, emotion_history, last_update_time

	current_time = datetime.now().strftime("%H:%M:%S")

	# Update current emotions
	current_emotions = {
	"face": face_emotion,
	"voice": voice_emotion,
	"timestamp": current_time
	}

	# Add to history (every 5 seconds or when emotion changes significantly)
	if (time.time() - last_update_time) > 5 or not emotion_history:
	emotion_history.append({
	"timestamp": current_time,
	"face": face_emotion,
	"voice": voice_emotion
	})
	last_update_time = time.time()

	# Keep only last 20 entries
	if len(emotion_history) > 20:
	emotion_history = emotion_history[-20:]

	def get_emotion_timeline():
	"""Create a timeline DataFrame for display"""
	if not emotion_history:
	return pd.DataFrame(columns=["Time", "Facial Emotion", "Voice Emotion"])

	df = pd.DataFrame(emotion_history)
	df = df.rename(columns={
	"timestamp": "Time",
	"face": "Facial Emotion",
	"voice": "Voice Emotion"
	})
	return df

	def get_practitioner_advice(face_emotion, voice_emotion):
	"""Generate suggestions based on detected emotions"""
	advice = []

	# Facial emotion advice
	if face_emotion in ["sad", "fear"]:
	advice.append("Patient appears distressed. Consider speaking more slowly and with reassurance.")
	elif face_emotion == "angry":
	advice.append("Patient seems frustrated. Acknowledge their concerns and maintain calm demeanor.")
	elif face_emotion == "disgust":
	advice.append("Patient may be uncomfortable. Check if they're experiencing any discomfort.")
	elif face_emotion == "surprise":
	advice.append("Patient seems surprised. Ensure they understand all information.")

	# Voice emotion advice
	if voice_emotion in ["sad", "fear"]:
	advice.append("Patient's tone suggests anxiety. Provide clear explanations and emotional support.")
	elif voice_emotion == "angry":
	advice.append("Patient sounds upset. Practice active listening and validate their feelings.")
	elif voice_emotion == "happy":
	advice.append("Patient seems positive. This may be a good time to discuss treatment options.")

	return "\n".join(advice) if advice else "Patient appears neutral. Continue with consultation."

	def process_input(video, audio):
	"""Process video and audio inputs to detect emotions"""
	try:
	# Process video frame
	if video is not None:
	frame = cv2.cvtColor(video, cv2.COLOR_RGB2BGR)
	face_emotion, face_details = analyze_face(frame)
	else:
	face_emotion, face_details = "neutral", {}

	# Process audio
	if audio is not None:
	voice_emotion, voice_details = analyze_voice(audio)
	else:
	voice_emotion, voice_details = "neutral", {}

	# Update history and get outputs
	update_em