|
import gradio as gr |
|
import numpy as np |
|
import cv2 |
|
import torch |
|
import torchvision.transforms as transforms |
|
from fer import FER |
|
import librosa |
|
from python_speech_features import mfcc |
|
import pandas as pd |
|
from datetime import datetime |
|
import time |
|
from transformers import pipeline |
|
|
|
|
|
emotion_detector = FER(mtcnn=True) |
|
voice_classifier = pipeline("audio-classification", model="superb/hubert-base-superb-er") |
|
|
|
|
|
emotion_history = [] |
|
current_emotions = {"face": "Neutral", "voice": "Neutral"} |
|
last_update_time = time.time() |
|
|
|
|
|
transform = transforms.Compose([ |
|
transforms.ToPILImage(), |
|
transforms.Resize((48, 48)), |
|
transforms.Grayscale(), |
|
transforms.ToTensor(), |
|
]) |
|
|
|
def analyze_face(frame): |
|
"""Analyze facial expressions in the frame""" |
|
try: |
|
|
|
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
results = emotion_detector.detect_emotions(rgb_frame) |
|
|
|
if results: |
|
emotions = results[0]['emotions'] |
|
dominant_emotion = max(emotions, key=emotions.get) |
|
return dominant_emotion, emotions |
|
return "Neutral", {"angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 1} |
|
except Exception as e: |
|
print(f"Face analysis error: {e}") |
|
return "Neutral", {"angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 1} |
|
|
|
def analyze_voice(audio): |
|
"""Analyze voice tone from audio""" |
|
try: |
|
sr, y = audio |
|
y = y.astype(np.float32) |
|
|
|
|
|
if len(y.shape) > 1: |
|
y = np.mean(y, axis=0) |
|
|
|
|
|
if sr != 16000: |
|
y = librosa.resample(y, orig_sr=sr, target_sr=16000) |
|
sr = 16000 |
|
|
|
|
|
result = voice_classifier({"sampling_rate": sr, "raw": y}) |
|
dominant_emotion = result[0]['label'] |
|
return dominant_emotion, result |
|
except Exception as e: |
|
print(f"Voice analysis error: {e}") |
|
return "neutral", [{"label": "neutral", "score": 1.0}] |
|
|
|
def update_emotion_history(face_emotion, voice_emotion): |
|
"""Update the emotion history and current emotions""" |
|
global current_emotions, emotion_history, last_update_time |
|
|
|
current_time = datetime.now().strftime("%H:%M:%S") |
|
|
|
|
|
current_emotions = { |
|
"face": face_emotion, |
|
"voice": voice_emotion, |
|
"timestamp": current_time |
|
} |
|
|
|
|
|
if (time.time() - last_update_time) > 5 or not emotion_history: |
|
emotion_history.append({ |
|
"timestamp": current_time, |
|
"face": face_emotion, |
|
"voice": voice_emotion |
|
}) |
|
last_update_time = time.time() |
|
|
|
|
|
if len(emotion_history) > 20: |
|
emotion_history = emotion_history[-20:] |
|
|
|
def get_emotion_timeline(): |
|
"""Create a timeline DataFrame for display""" |
|
if not emotion_history: |
|
return pd.DataFrame(columns=["Time", "Facial Emotion", "Voice Emotion"]) |
|
|
|
df = pd.DataFrame(emotion_history) |
|
df = df.rename(columns={ |
|
"timestamp": "Time", |
|
"face": "Facial Emotion", |
|
"voice": "Voice Emotion" |
|
}) |
|
return df |
|
|
|
def get_practitioner_advice(face_emotion, voice_emotion): |
|
"""Generate suggestions based on detected emotions""" |
|
advice = [] |
|
|
|
|
|
if face_emotion in ["sad", "fear"]: |
|
advice.append("Patient appears distressed. Consider speaking more slowly and with reassurance.") |
|
elif face_emotion == "angry": |
|
advice.append("Patient seems frustrated. Acknowledge their concerns and maintain calm demeanor.") |
|
elif face_emotion == "confused": |
|
advice.append("Patient may not understand. Consider rephrasing or providing more explanation.") |
|
elif face_emotion == "pain": |
|
advice.append("Patient appears to be in pain. Consider asking about discomfort.") |
|
|
|
|
|
if voice_emotion in ["sad", "fear"]: |
|
advice.append("Patient's tone suggests anxiety. Provide clear explanations and emotional support.") |
|
elif voice_emotion == "angry": |
|
advice.append("Patient sounds upset. Practice active listening and validate their feelings.") |
|
elif voice_emotion == "happy": |
|
advice.append("Patient seems positive. This may be a good time to discuss treatment options.") |
|
|
|
return "\n".join(advice) if advice else "Patient appears neutral. Continue with consultation." |
|
|
|
def process_input(video, audio): |
|
"""Process video and audio inputs to detect emotions""" |
|
try: |
|
|
|
if video is not None: |
|
frame = cv2.cvtColor(video, cv2.COLOR_RGB2BGR) |
|
face_emotion, face_details = analyze_face(frame) |
|
else: |
|
face_emotion, face_details = "Neutral", {} |
|
|
|
|
|
if audio is not None: |
|
voice_emotion, voice_details = analyze_voice(audio) |
|
else: |
|
voice_emotion, voice_details = "neutral", {} |
|
|
|
|
|
update_emotion_history(face_emotion, voice_emotion) |
|
timeline_df = get_emotion_timeline() |
|
advice = get_practitioner_advice(face_emotion, voice_emotion) |
|
|
|
|
|
outputs = { |
|
"current_face": face_emotion, |
|
"current_voice": voice_emotion, |
|
"timeline": timeline_df, |
|
"advice": advice, |
|
"face_details": str(face_details), |
|
"voice_details": str(voice_details) |
|
} |
|
|
|
return outputs |
|
except Exception as e: |
|
print(f"Processing error: {e}") |
|
return { |
|
"current_face": "Error", |
|
"current_voice": "Error", |
|
"timeline": pd.DataFrame(), |
|
"advice": "System error occurred", |
|
"face_details": "", |
|
"voice_details": "" |
|
} |
|
|
|
|
|
with gr.Blocks(title="Patient Emotion Recognition", theme="soft") as demo: |
|
gr.Markdown("# Real-Time Patient Emotion Recognition") |
|
gr.Markdown("Analyze facial expressions and voice tone during medical consultations") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
video_input = gr.Image(label="Live Camera Feed", source="webcam", streaming=True) |
|
audio_input = gr.Audio(label="Voice Input", source="microphone", type="numpy") |
|
submit_btn = gr.Button("Analyze Emotions") |
|
|
|
with gr.Column(): |
|
current_face = gr.Textbox(label="Current Facial Emotion") |
|
current_voice = gr.Textbox(label="Current Voice Emotion") |
|
advice_output = gr.Textbox(label="Practitioner Suggestions", lines=3) |
|
timeline_output = gr.Dataframe(label="Emotion Timeline", interactive=False) |
|
face_details = gr.Textbox(label="Face Analysis Details", visible=False) |
|
voice_details = gr.Textbox(label="Voice Analysis Details", visible=False) |
|
|
|
|
|
video_input.change( |
|
process_input, |
|
inputs=[video_input, audio_input], |
|
outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details], |
|
show_progress="hidden" |
|
) |
|
|
|
audio_input.change( |
|
process_input, |
|
inputs=[video_input, audio_input], |
|
outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details], |
|
show_progress="hidden" |
|
) |
|
|
|
submit_btn.click( |
|
process_input, |
|
inputs=[video_input, audio_input], |
|
outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True) |