File size: 21,558 Bytes
d61cd9f ef2be41 d61cd9f ef2be41 d61cd9f 2754fd7 3253f05 d61cd9f 5eff629 3253f05 d61cd9f 3253f05 d61cd9f 3253f05 d61cd9f 2754fd7 d61cd9f 2754fd7 d61cd9f 3253f05 d61cd9f 3253f05 d61cd9f 9d66904 d61cd9f 3253f05 2754fd7 d61cd9f 2754fd7 d61cd9f 2754fd7 d61cd9f 3baa918 d61cd9f ef2be41 d61cd9f ef2be41 d61cd9f 9d66904 d61cd9f ef2be41 d61cd9f 3baa918 d61cd9f 3baa918 d61cd9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 |
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import threading
import queue
import time
from collections import deque
import warnings
warnings.filterwarnings("ignore")
# Try to import OpenCV with fallback
try:
import cv2
CV2_AVAILABLE = True
except ImportError:
CV2_AVAILABLE = False
print("OpenCV not available - using PIL for image processing")
# Try to import librosa with fallback
try:
import librosa
LIBROSA_AVAILABLE = True
except ImportError:
LIBROSA_AVAILABLE = False
print("Librosa not available - using basic audio processing")
# Try to import transformers and torch, with fallbacks
try:
from transformers import pipeline
import torch
HF_AVAILABLE = True
except ImportError:
HF_AVAILABLE = False
print("Transformers not available - using mock emotion detection")
# Additional imports for image processing if OpenCV fails
try:
from PIL import Image, ImageDraw, ImageFont
PIL_AVAILABLE = True
except ImportError:
PIL_AVAILABLE = False
class EmotionRecognitionSystem:
def __init__(self):
self.emotion_history = deque(maxlen=100) # Store last 100 emotion readings
self.audio_queue = queue.Queue()
self.video_queue = queue.Queue()
# Initialize emotion detection models
self.setup_models()
# Emotion thresholds for alerts
self.alert_thresholds = {
'stress': 0.7,
'anxiety': 0.6,
'pain': 0.8,
'confusion': 0.5
}
def setup_models(self):
"""Initialize emotion recognition models"""
if HF_AVAILABLE:
try:
# Facial emotion recognition
self.face_emotion_pipeline = pipeline(
"image-classification",
model="j-hartmann/emotion-english-distilroberta-base",
device=0 if torch.cuda.is_available() else -1
)
# Audio emotion recognition
self.audio_emotion_pipeline = pipeline(
"audio-classification",
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
device=0 if torch.cuda.is_available() else -1
)
self.models_loaded = True
except Exception as e:
print(f"Error loading models: {e}")
self.models_loaded = False
else:
self.models_loaded = False
def detect_face_emotion(self, frame):
"""Detect emotions from facial expressions"""
if not self.models_loaded:
# Mock emotion detection for demo
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
scores = np.random.dirichlet(np.ones(len(emotions)))
return dict(zip(emotions, scores))
try:
# Handle different image formats
if isinstance(frame, np.ndarray):
if CV2_AVAILABLE:
# Convert frame to RGB if it's BGR
if len(frame.shape) == 3 and frame.shape[2] == 3:
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
else:
rgb_frame = frame
else:
# Use numpy operations for color conversion
if len(frame.shape) == 3 and frame.shape[2] == 3:
rgb_frame = frame[:, :, ::-1] # BGR to RGB
else:
rgb_frame = frame
else:
rgb_frame = frame
# Use face emotion model
results = self.face_emotion_pipeline(rgb_frame)
# Convert to standardized format
emotion_scores = {}
for result in results:
emotion_scores[result['label'].lower()] = result['score']
return emotion_scores
except Exception as e:
print(f"Face emotion detection error: {e}")
return {'neutral': 1.0}
def detect_voice_emotion(self, audio_data, sample_rate=16000):
"""Detect emotions from voice tone"""
if not self.models_loaded or audio_data is None:
# Mock emotion detection
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
scores = np.random.dirichlet(np.ones(len(emotions)))
return dict(zip(emotions, scores))
try:
# Process audio with the model
results = self.audio_emotion_pipeline(audio_data)
emotion_scores = {}
for result in results:
emotion_scores[result['label'].lower()] = result['score']
return emotion_scores
except Exception as e:
print(f"Voice emotion detection error: {e}")
return {'neutral': 1.0}
def extract_audio_features(self, audio_data, sample_rate):
"""Extract audio features for emotion analysis"""
if not LIBROSA_AVAILABLE:
# Return mock features if librosa is not available
return {
'mfcc_mean': np.random.random(),
'mfcc_std': np.random.random(),
'spectral_centroid_mean': np.random.random(),
'zcr_mean': np.random.random(),
'spectral_rolloff_mean': np.random.random()
}
try:
# Extract basic audio features
mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
spectral_centroids = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_data)
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_data, sr=sample_rate)
features = {
'mfcc_mean': np.mean(mfccs),
'mfcc_std': np.std(mfccs),
'spectral_centroid_mean': np.mean(spectral_centroids),
'zcr_mean': np.mean(zero_crossing_rate),
'spectral_rolloff_mean': np.mean(spectral_rolloff)
}
return features
except Exception as e:
print(f"Audio feature extraction error: {e}")
return {}
def combine_emotions(self, face_emotions, voice_emotions, weights=(0.6, 0.4)):
"""Combine facial and voice emotion predictions"""
combined = {}
all_emotions = set(face_emotions.keys()) | set(voice_emotions.keys())
for emotion in all_emotions:
face_score = face_emotions.get(emotion, 0)
voice_score = voice_emotions.get(emotion, 0)
combined[emotion] = weights[0] * face_score + weights[1] * voice_score
return combined
def map_to_clinical_emotions(self, emotions):
"""Map detected emotions to clinical categories"""
clinical_mapping = {
'stress': emotions.get('angry', 0) * 0.3 + emotions.get('fear', 0) * 0.4 + emotions.get('disgust', 0) * 0.3,
'anxiety': emotions.get('fear', 0) * 0.6 + emotions.get('surprise', 0) * 0.2 + emotions.get('sad', 0) * 0.2,
'pain': emotions.get('angry', 0) * 0.4 + emotions.get('disgust', 0) * 0.3 + emotions.get('sad', 0) * 0.3,
'confusion': emotions.get('surprise', 0) * 0.5 + emotions.get('neutral', 0) * 0.3 + emotions.get('fear', 0) * 0.2,
'comfort': emotions.get('happy', 0) * 0.7 + emotions.get('neutral', 0) * 0.3
}
return clinical_mapping
def generate_alerts(self, clinical_emotions):
"""Generate alerts based on emotion thresholds"""
alerts = []
suggestions = []
for emotion, score in clinical_emotions.items():
if emotion in self.alert_thresholds and score > self.alert_thresholds[emotion]:
alerts.append(f"β οΈ High {emotion} detected ({score:.2f})")
# Add specific suggestions
if emotion == 'stress':
suggestions.append("Consider: Take a moment to slow down, use calming voice tone")
elif emotion == 'anxiety':
suggestions.append("Consider: Provide reassurance, explain procedures clearly")
elif emotion == 'pain':
suggestions.append("Consider: Assess pain level, offer comfort measures")
elif emotion == 'confusion':
suggestions.append("Consider: Simplify explanations, check understanding")
return alerts, suggestions
def process_frame(self, frame, audio_data=None, sample_rate=16000):
"""Process a single frame and audio data"""
timestamp = datetime.now()
# Detect emotions
face_emotions = self.detect_face_emotion(frame)
voice_emotions = self.detect_voice_emotion(audio_data, sample_rate) if audio_data is not None else {}
# Combine emotions
if voice_emotions:
combined_emotions = self.combine_emotions(face_emotions, voice_emotions)
else:
combined_emotions = face_emotions
# Map to clinical categories
clinical_emotions = self.map_to_clinical_emotions(combined_emotions)
# Generate alerts
alerts, suggestions = self.generate_alerts(clinical_emotions)
# Store in history
emotion_record = {
'timestamp': timestamp,
'face_emotions': face_emotions,
'voice_emotions': voice_emotions,
'clinical_emotions': clinical_emotions,
'alerts': alerts,
'suggestions': suggestions
}
self.emotion_history.append(emotion_record)
return emotion_record
# Initialize the emotion recognition system
emotion_system = EmotionRecognitionSystem()
def process_video_audio(video_frame, audio_data):
"""Process video frame and audio data"""
if video_frame is None:
return None, "No video input", "", ""
# Process the frame
sample_rate = 16000
if audio_data is not None:
audio_array, sr = audio_data
if LIBROSA_AVAILABLE and sr != sample_rate:
audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=sample_rate)
elif not LIBROSA_AVAILABLE:
# Simple resampling if librosa not available
if sr != sample_rate:
# Basic downsampling
step = sr // sample_rate
audio_array = audio_array[::step] if step > 1 else audio_array
else:
audio_array = None
# Get emotion analysis
emotion_record = emotion_system.process_frame(video_frame, audio_array, sample_rate)
# Create visualization
annotated_frame = create_emotion_overlay(video_frame, emotion_record)
# Format results
clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
return annotated_frame, clinical_text, alerts_text, suggestions_text
def create_emotion_overlay(frame, emotion_record):
"""Add emotion information overlay to video frame"""
try:
if CV2_AVAILABLE:
annotated_frame = frame.copy()
# Get top emotion
clinical_emotions = emotion_record['clinical_emotions']
top_emotion = max(clinical_emotions.items(), key=lambda x: x[1])
# Add text overlay
cv2.putText(annotated_frame, f"Primary: {top_emotion[0]} ({top_emotion[1]:.2f})",
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
# Add alert indicator
if emotion_record['alerts']:
cv2.putText(annotated_frame, "ALERT!", (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
return annotated_frame
elif PIL_AVAILABLE:
# Use PIL for image annotation
pil_image = Image.fromarray(frame)
draw = ImageDraw.Draw(pil_image)
# Get top emotion
clinical_emotions = emotion_record['clinical_emotions']
top_emotion = max(clinical_emotions.items(), key=lambda x: x[1])
# Add text overlay
try:
font = ImageFont.load_default()
except:
font = None
text = f"Primary: {top_emotion[0]} ({top_emotion[1]:.2f})"
draw.text((10, 10), text, fill=(0, 255, 0), font=font)
# Add alert indicator
if emotion_record['alerts']:
draw.text((10, 40), "ALERT!", fill=(255, 0, 0), font=font)
return np.array(pil_image)
else:
# Return original frame if no image processing available
return frame
except Exception as e:
print(f"Error creating emotion overlay: {e}")
return frame
def format_clinical_emotions(clinical_emotions):
"""Format clinical emotions for display"""
formatted = []
for emotion, score in clinical_emotions.items():
bar = "β" * int(score * 10)
formatted.append(f"{emotion.capitalize()}: {bar} {score:.3f}")
return "\n".join(formatted)
def create_emotion_timeline():
"""Create emotion timeline chart"""
if not emotion_system.emotion_history:
return create_empty_chart()
# Extract data for plotting
timestamps = [record['timestamp'] for record in emotion_system.emotion_history]
fig = go.Figure()
# Add traces for each clinical emotion
clinical_emotions = ['stress', 'anxiety', 'pain', 'confusion', 'comfort']
colors = ['red', 'orange', 'purple', 'brown', 'green']
for emotion, color in zip(clinical_emotions, colors):
values = [record['clinical_emotions'].get(emotion, 0) for record in emotion_system.emotion_history]
fig.add_trace(go.Scatter(
x=timestamps,
y=values,
mode='lines+markers',
name=emotion.capitalize(),
line=dict(color=color, width=2),
marker=dict(size=4)
))
fig.update_layout(
title="Patient Emotion Timeline",
xaxis_title="Time",
yaxis_title="Emotion Intensity",
height=400,
showlegend=True,
template="plotly_white"
)
return fig
def create_empty_chart():
"""Create empty chart when no data available"""
fig = go.Figure()
fig.add_annotation(
text="No emotion data available yet",
xref="paper", yref="paper",
x=0.5, y=0.5, xanchor='center', yanchor='middle',
showarrow=False, font=dict(size=16)
)
fig.update_layout(
title="Patient Emotion Timeline",
height=400,
template="plotly_white"
)
return fig
def get_session_summary():
"""Generate session summary"""
if not emotion_system.emotion_history:
return "No session data available"
# Calculate averages
avg_emotions = {}
total_alerts = 0
for emotion in ['stress', 'anxiety', 'pain', 'confusion', 'comfort']:
values = [record['clinical_emotions'].get(emotion, 0) for record in emotion_system.emotion_history]
avg_emotions[emotion] = np.mean(values) if values else 0
total_alerts = sum(len(record['alerts']) for record in emotion_system.emotion_history)
# Format summary
summary = f"""
Session Summary:
- Duration: {len(emotion_system.emotion_history)} readings
- Average Stress Level: {avg_emotions['stress']:.3f}
- Average Anxiety Level: {avg_emotions['anxiety']:.3f}
- Average Pain Level: {avg_emotions['pain']:.3f}
- Average Confusion Level: {avg_emotions['confusion']:.3f}
- Average Comfort Level: {avg_emotions['comfort']:.3f}
- Total Alerts: {total_alerts}
Recommendations:
- Monitor stress levels during consultation
- Ensure patient understanding and comfort
- Address any recurring high emotion levels
"""
return summary
def clear_session():
"""Clear session data"""
emotion_system.emotion_history.clear()
return "Session data cleared", create_empty_chart(), ""
# Create Gradio interface
def create_interface():
with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# π₯ Real-Time Patient Emotion Recognition System
This system analyzes patient facial expressions and voice tone during consultations to detect emotions such as stress, anxiety, confusion, or pain.
""")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### πΉ Live Analysis")
# Video input
video_input = gr.Video(
label="Video Feed",
sources=["webcam"],
streaming=True
)
# Audio input
audio_input = gr.Audio(
label="Audio Input",
sources=["microphone"],
type="numpy",
streaming=True
)
# Process button
process_btn = gr.Button("π Process Current Frame", variant="primary")
with gr.Column(scale=2):
gr.Markdown("### π Real-Time Results")
# Annotated video output
video_output = gr.Image(
label="Emotion Analysis",
type="numpy"
)
# Clinical emotions display
clinical_output = gr.Textbox(
label="Clinical Emotion Levels",
lines=6,
interactive=False
)
with gr.Row():
with gr.Column():
gr.Markdown("### β οΈ Alerts")
alerts_output = gr.Textbox(
label="Current Alerts",
lines=3,
interactive=False
)
with gr.Column():
gr.Markdown("### π‘ Suggestions")
suggestions_output = gr.Textbox(
label="Practitioner Suggestions",
lines=3,
interactive=False
)
with gr.Row():
gr.Markdown("### π Emotion Timeline")
timeline_plot = gr.Plot(label="Emotion Timeline")
with gr.Row():
with gr.Column():
gr.Markdown("### π Session Summary")
summary_output = gr.Textbox(
label="Session Summary",
lines=12,
interactive=False
)
with gr.Row():
update_summary_btn = gr.Button("π Update Summary")
clear_btn = gr.Button("ποΈ Clear Session", variant="secondary")
update_timeline_btn = gr.Button("π Update Timeline")
# Event handlers
process_btn.click(
fn=process_video_audio,
inputs=[video_input, audio_input],
outputs=[video_output, clinical_output, alerts_output, suggestions_output]
)
update_timeline_btn.click(
fn=create_emotion_timeline,
outputs=timeline_plot
)
update_summary_btn.click(
fn=get_session_summary,
outputs=summary_output
)
clear_btn.click(
fn=clear_session,
outputs=[summary_output, timeline_plot, clinical_output]
)
# Auto-update timeline every few seconds
demo.load(fn=create_emotion_timeline, outputs=timeline_plot)
gr.Markdown("""
### π Usage Instructions:
1. **Enable camera and microphone** access when prompted
2. **Click "Process Current Frame"** to analyze emotions in real-time
3. **Monitor the timeline** to track emotion changes over time
4. **Review alerts and suggestions** for patient care recommendations
5. **Use session summary** for consultation documentation
### π§ Technical Notes:
- System uses pre-trained emotion recognition models
- Combines facial expression and voice tone analysis
- Provides clinical emotion mapping (stress, anxiety, pain, confusion)
- Generates real-time alerts and suggestions for practitioners
""")
return demo
# Launch the application
if __name__ == "__main__":
demo = create_interface()
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True
) |