Spaces:

yunusajib
/

Real-Time-Emotional-Detection

Sleeping

App Files Files Community

Real-Time-Emotional-Detection / app.py

yunusajib

update app

9d66904 verified 2 months ago

raw

history blame

4.54 kB

	import cv2
	import numpy as np
	import pyttsx3
	import onnxruntime as ort
	import librosa
	import sounddevice as sd
	import scipy.io.wavfile as wavfile
	from sklearn.preprocessing import StandardScaler
	import time
	import os
	from gtts import gTTS
	import gradio as gr
	import tempfile

	# ------------------- Speech Emotion Recognition Model -------------------
	class SpeechEmotionRecognizer:
	def __init__(self, model_path):
	self.model = ort.InferenceSession(model_path)
	self.input_name = self.model.get_inputs()[0].name
	self.labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

	# Load or create scaler here (fit on training data offline, then load)
	self.scaler = StandardScaler()

	def extract_features(self, y, sr):
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
	mfcc_mean = np.mean(mfcc.T, axis=0)
	# Normally, scaler should be pre-fitted, here we just scale manually to zero mean, unit var
	mfcc_scaled = (mfcc_mean - np.mean(mfcc_mean)) / np.std(mfcc_mean)
	return mfcc_scaled

	def predict_emotion(self, audio_data, sr):
	features = self.extract_features(audio_data, sr)
	input_data = features.reshape(1, -1).astype(np.float32)
	pred = self.model.run(None, {self.input_name: input_data})[0]
	emotion_idx = np.argmax(pred)
	return self.labels[emotion_idx]

	# ------------------- Facial Emotion Recognition Model -------------------
	class FacialEmotionRecognizer:
	def __init__(self, model_path):
	self.model = ort.InferenceSession(model_path)
	self.input_name = self.model.get_inputs()[0].name
	self.labels = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt']

	def predict_emotion(self, face_img):
	face_img = cv2.resize(face_img, (64, 64))
	face_img = face_img.astype('float32') # FER+ expects float32
	# FER+ model expects input shape (1, 1, 64, 64)
	face_img = np.expand_dims(face_img, axis=0) # (1, 64, 64)
	face_img = np.expand_dims(face_img, axis=0) # (1, 1, 64, 64)
	pred = self.model.run(None, {self.input_name: face_img})[0]
	emotion_idx = np.argmax(pred)
	return self.labels[emotion_idx]

	# ------------------- Utility Functions -------------------

	def speak(text):
	if not text.strip():
	return None
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
	tts = gTTS(text)
	tts.save(tmpfile.name)
	return tmpfile.name

	def record_audio(duration=3, fs=22050):
	print("Recording audio...")
	audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
	sd.wait()
	audio = audio.flatten()
	print("Recording complete.")
	return audio, fs

	def analyze_face(face_roi, emotion_model):
	emotion = emotion_model.predict_emotion(face_roi)
	return emotion

	# ------------------- Main Function -------------------

	def main():
	face_emotion_model = FacialEmotionRecognizer("emotion-ferplus-8.onnx")
	speech_emotion_model = SpeechEmotionRecognizer("speech_emotion_model.onnx")

	cap = cv2.VideoCapture(0)
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")

	print("Press 's' to speak and 'q' to quit.")

	while True:
	ret, frame = cap.read()
	if not ret:
	print("Failed to grab frame.")
	break

	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	faces = face_cascade.detectMultiScale(gray, 1.3, 5)

	for (x, y, w, h) in faces:
	face_roi = gray[y:y+h, x:x+w]
	emotion = analyze_face(face_roi, face_emotion_model)
	label = f"Face: {emotion}"
	cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
	cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

	cv2.imshow("Emotion Recognition", frame)
	key = cv2.waitKey(1) & 0xFF

	if key == ord('s'):
	audio, sr = record_audio()
	speech_emotion = speech_emotion_model.predict_emotion(audio, sr)
	print(f"Speech Emotion: {speech_emotion}")
	audio_file = speak(f"You sound {speech_emotion}")
	if audio_file:
	# Play the TTS audio using cv2 or other player if needed
	pass

	elif key == ord('q'):
	break

	cap.release()
	cv2.destroyAllWindows()

	if __name__ == "__main__":
	main()