import gradio as gr import numpy as np import cv2 import torch import torchvision.transforms as transforms from fer import FER import librosa from python_speech_features import mfcc import pandas as pd from datetime import datetime import time from transformers import pipeline # Initialize models emotion_detector = FER(mtcnn=True) # Facial expression recognition voice_classifier = pipeline("audio-classification", model="superb/hubert-base-superb-er") # Global variables to store results emotion_history = [] current_emotions = {"face": "Neutral", "voice": "Neutral"} last_update_time = time.time() # Preprocessing for face detection transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((48, 48)), transforms.Grayscale(), transforms.ToTensor(), ]) def analyze_face(frame): """Analyze facial expressions in the frame""" try: # Convert frame to RGB (FER expects RGB) rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Detect emotions results = emotion_detector.detect_emotions(rgb_frame) if results: emotions = results[0]['emotions'] dominant_emotion = max(emotions, key=emotions.get) return dominant_emotion, emotions return "Neutral", {"angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 1} except Exception as e: print(f"Face analysis error: {e}") return "Neutral", {"angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 1} def analyze_voice(audio): """Analyze voice tone from audio""" try: sr, y = audio y = y.astype(np.float32) # Convert to mono if stereo if len(y.shape) > 1: y = np.mean(y, axis=0) # Resample to 16kHz if needed if sr != 16000: y = librosa.resample(y, orig_sr=sr, target_sr=16000) sr = 16000 # Classify emotion result = voice_classifier({"sampling_rate": sr, "raw": y}) dominant_emotion = result[0]['label'] return dominant_emotion, result except Exception as e: print(f"Voice analysis error: {e}") return "neutral", [{"label": "neutral", "score": 1.0}] def update_emotion_history(face_emotion, voice_emotion): """Update the emotion history and current emotions""" global current_emotions, emotion_history, last_update_time current_time = datetime.now().strftime("%H:%M:%S") # Update current emotions current_emotions = { "face": face_emotion, "voice": voice_emotion, "timestamp": current_time } # Add to history (every 5 seconds or when emotion changes significantly) if (time.time() - last_update_time) > 5 or not emotion_history: emotion_history.append({ "timestamp": current_time, "face": face_emotion, "voice": voice_emotion }) last_update_time = time.time() # Keep only last 20 entries if len(emotion_history) > 20: emotion_history = emotion_history[-20:] def get_emotion_timeline(): """Create a timeline DataFrame for display""" if not emotion_history: return pd.DataFrame(columns=["Time", "Facial Emotion", "Voice Emotion"]) df = pd.DataFrame(emotion_history) df = df.rename(columns={ "timestamp": "Time", "face": "Facial Emotion", "voice": "Voice Emotion" }) return df def get_practitioner_advice(face_emotion, voice_emotion): """Generate suggestions based on detected emotions""" advice = [] # Facial emotion advice if face_emotion in ["sad", "fear"]: advice.append("Patient appears distressed. Consider speaking more slowly and with reassurance.") elif face_emotion == "angry": advice.append("Patient seems frustrated. Acknowledge their concerns and maintain calm demeanor.") elif face_emotion == "confused": advice.append("Patient may not understand. Consider rephrasing or providing more explanation.") elif face_emotion == "pain": advice.append("Patient appears to be in pain. Consider asking about discomfort.") # Voice emotion advice if voice_emotion in ["sad", "fear"]: advice.append("Patient's tone suggests anxiety. Provide clear explanations and emotional support.") elif voice_emotion == "angry": advice.append("Patient sounds upset. Practice active listening and validate their feelings.") elif voice_emotion == "happy": advice.append("Patient seems positive. This may be a good time to discuss treatment options.") return "\n".join(advice) if advice else "Patient appears neutral. Continue with consultation." def process_input(video, audio): """Process video and audio inputs to detect emotions""" try: # Process video frame if video is not None: frame = cv2.cvtColor(video, cv2.COLOR_RGB2BGR) face_emotion, face_details = analyze_face(frame) else: face_emotion, face_details = "Neutral", {} # Process audio if audio is not None: voice_emotion, voice_details = analyze_voice(audio) else: voice_emotion, voice_details = "neutral", {} # Update history and get outputs update_emotion_history(face_emotion, voice_emotion) timeline_df = get_emotion_timeline() advice = get_practitioner_advice(face_emotion, voice_emotion) # Prepare outputs outputs = { "current_face": face_emotion, "current_voice": voice_emotion, "timeline": timeline_df, "advice": advice, "face_details": str(face_details), "voice_details": str(voice_details) } return outputs except Exception as e: print(f"Processing error: {e}") return { "current_face": "Error", "current_voice": "Error", "timeline": pd.DataFrame(), "advice": "System error occurred", "face_details": "", "voice_details": "" } # Gradio interface with gr.Blocks(title="Patient Emotion Recognition", theme="soft") as demo: gr.Markdown("# Real-Time Patient Emotion Recognition") gr.Markdown("Analyze facial expressions and voice tone during medical consultations") with gr.Row(): with gr.Column(): video_input = gr.Image(label="Live Camera Feed", source="webcam", streaming=True) audio_input = gr.Audio(label="Voice Input", source="microphone", type="numpy") submit_btn = gr.Button("Analyze Emotions") with gr.Column(): current_face = gr.Textbox(label="Current Facial Emotion") current_voice = gr.Textbox(label="Current Voice Emotion") advice_output = gr.Textbox(label="Practitioner Suggestions", lines=3) timeline_output = gr.Dataframe(label="Emotion Timeline", interactive=False) face_details = gr.Textbox(label="Face Analysis Details", visible=False) voice_details = gr.Textbox(label="Voice Analysis Details", visible=False) # Live processing video_input.change( process_input, inputs=[video_input, audio_input], outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details], show_progress="hidden" ) audio_input.change( process_input, inputs=[video_input, audio_input], outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details], show_progress="hidden" ) submit_btn.click( process_input, inputs=[video_input, audio_input], outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details] ) if __name__ == "__main__": demo.launch(debug=True)