update
Browse files
app.py
CHANGED
@@ -9,50 +9,50 @@ import queue
|
|
9 |
import time
|
10 |
from collections import deque
|
11 |
import warnings
|
|
|
12 |
warnings.filterwarnings("ignore")
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
import cv2
|
17 |
-
CV2_AVAILABLE = True
|
18 |
-
except ImportError:
|
19 |
-
CV2_AVAILABLE = False
|
20 |
-
print("OpenCV not available - using PIL for image processing")
|
21 |
-
|
22 |
-
# Try to import librosa with fallback
|
23 |
try:
|
|
|
24 |
import librosa
|
25 |
LIBROSA_AVAILABLE = True
|
26 |
except ImportError:
|
27 |
LIBROSA_AVAILABLE = False
|
28 |
print("Librosa not available - using basic audio processing")
|
29 |
|
30 |
-
#
|
|
|
31 |
try:
|
32 |
-
|
33 |
-
import torch
|
34 |
-
HF_AVAILABLE = True
|
35 |
except ImportError:
|
36 |
-
|
37 |
-
print("
|
38 |
|
39 |
-
# Additional imports for image processing if OpenCV fails
|
40 |
try:
|
41 |
from PIL import Image, ImageDraw, ImageFont
|
42 |
PIL_AVAILABLE = True
|
43 |
except ImportError:
|
44 |
PIL_AVAILABLE = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
class EmotionRecognitionSystem:
|
47 |
def __init__(self):
|
48 |
-
self.emotion_history = deque(maxlen=100)
|
49 |
self.audio_queue = queue.Queue()
|
50 |
self.video_queue = queue.Queue()
|
51 |
-
|
52 |
-
# Initialize emotion detection models
|
53 |
self.setup_models()
|
54 |
|
55 |
-
# Emotion thresholds for alerts
|
56 |
self.alert_thresholds = {
|
57 |
'stress': 0.7,
|
58 |
'anxiety': 0.6,
|
@@ -61,31 +61,77 @@ class EmotionRecognitionSystem:
|
|
61 |
}
|
62 |
|
63 |
def setup_models(self):
|
64 |
-
"""Initialize emotion recognition models"""
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
85 |
self.models_loaded = False
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
def detect_face_emotion(self, frame):
|
88 |
-
"""Detect emotions from facial expressions"""
|
89 |
if not self.models_loaded:
|
90 |
# Mock emotion detection for demo
|
91 |
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
|
@@ -93,20 +139,24 @@ class EmotionRecognitionSystem:
|
|
93 |
return dict(zip(emotions, scores))
|
94 |
|
95 |
try:
|
96 |
-
#
|
97 |
if isinstance(frame, np.ndarray):
|
98 |
-
if
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
102 |
else:
|
103 |
rgb_frame = frame
|
104 |
else:
|
105 |
-
#
|
106 |
-
if
|
107 |
-
rgb_frame = frame
|
108 |
else:
|
109 |
-
rgb_frame = frame
|
110 |
else:
|
111 |
rgb_frame = frame
|
112 |
|
@@ -124,8 +174,8 @@ class EmotionRecognitionSystem:
|
|
124 |
print(f"Face emotion detection error: {e}")
|
125 |
return {'neutral': 1.0}
|
126 |
|
127 |
-
def detect_voice_emotion(self, audio_data
|
128 |
-
"""Detect emotions from voice tone"""
|
129 |
if not self.models_loaded or audio_data is None:
|
130 |
# Mock emotion detection
|
131 |
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
|
@@ -133,8 +183,18 @@ class EmotionRecognitionSystem:
|
|
133 |
return dict(zip(emotions, scores))
|
134 |
|
135 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
# Process audio with the model
|
137 |
-
results = self.audio_emotion_pipeline(
|
|
|
|
|
|
|
138 |
|
139 |
emotion_scores = {}
|
140 |
for result in results:
|
@@ -146,431 +206,59 @@ class EmotionRecognitionSystem:
|
|
146 |
print(f"Voice emotion detection error: {e}")
|
147 |
return {'neutral': 1.0}
|
148 |
|
149 |
-
|
150 |
-
"""Extract audio features for emotion analysis"""
|
151 |
-
if not LIBROSA_AVAILABLE:
|
152 |
-
# Return mock features if librosa is not available
|
153 |
-
return {
|
154 |
-
'mfcc_mean': np.random.random(),
|
155 |
-
'mfcc_std': np.random.random(),
|
156 |
-
'spectral_centroid_mean': np.random.random(),
|
157 |
-
'zcr_mean': np.random.random(),
|
158 |
-
'spectral_rolloff_mean': np.random.random()
|
159 |
-
}
|
160 |
-
|
161 |
-
try:
|
162 |
-
# Extract basic audio features
|
163 |
-
mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
|
164 |
-
spectral_centroids = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
|
165 |
-
zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_data)
|
166 |
-
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_data, sr=sample_rate)
|
167 |
-
|
168 |
-
features = {
|
169 |
-
'mfcc_mean': np.mean(mfccs),
|
170 |
-
'mfcc_std': np.std(mfccs),
|
171 |
-
'spectral_centroid_mean': np.mean(spectral_centroids),
|
172 |
-
'zcr_mean': np.mean(zero_crossing_rate),
|
173 |
-
'spectral_rolloff_mean': np.mean(spectral_rolloff)
|
174 |
-
}
|
175 |
-
|
176 |
-
return features
|
177 |
-
except Exception as e:
|
178 |
-
print(f"Audio feature extraction error: {e}")
|
179 |
-
return {}
|
180 |
-
|
181 |
-
def combine_emotions(self, face_emotions, voice_emotions, weights=(0.6, 0.4)):
|
182 |
-
"""Combine facial and voice emotion predictions"""
|
183 |
-
combined = {}
|
184 |
-
all_emotions = set(face_emotions.keys()) | set(voice_emotions.keys())
|
185 |
-
|
186 |
-
for emotion in all_emotions:
|
187 |
-
face_score = face_emotions.get(emotion, 0)
|
188 |
-
voice_score = voice_emotions.get(emotion, 0)
|
189 |
-
combined[emotion] = weights[0] * face_score + weights[1] * voice_score
|
190 |
-
|
191 |
-
return combined
|
192 |
-
|
193 |
-
def map_to_clinical_emotions(self, emotions):
|
194 |
-
"""Map detected emotions to clinical categories"""
|
195 |
-
clinical_mapping = {
|
196 |
-
'stress': emotions.get('angry', 0) * 0.3 + emotions.get('fear', 0) * 0.4 + emotions.get('disgust', 0) * 0.3,
|
197 |
-
'anxiety': emotions.get('fear', 0) * 0.6 + emotions.get('surprise', 0) * 0.2 + emotions.get('sad', 0) * 0.2,
|
198 |
-
'pain': emotions.get('angry', 0) * 0.4 + emotions.get('disgust', 0) * 0.3 + emotions.get('sad', 0) * 0.3,
|
199 |
-
'confusion': emotions.get('surprise', 0) * 0.5 + emotions.get('neutral', 0) * 0.3 + emotions.get('fear', 0) * 0.2,
|
200 |
-
'comfort': emotions.get('happy', 0) * 0.7 + emotions.get('neutral', 0) * 0.3
|
201 |
-
}
|
202 |
-
|
203 |
-
return clinical_mapping
|
204 |
-
|
205 |
-
def generate_alerts(self, clinical_emotions):
|
206 |
-
"""Generate alerts based on emotion thresholds"""
|
207 |
-
alerts = []
|
208 |
-
suggestions = []
|
209 |
-
|
210 |
-
for emotion, score in clinical_emotions.items():
|
211 |
-
if emotion in self.alert_thresholds and score > self.alert_thresholds[emotion]:
|
212 |
-
alerts.append(f"⚠️ High {emotion} detected ({score:.2f})")
|
213 |
-
|
214 |
-
# Add specific suggestions
|
215 |
-
if emotion == 'stress':
|
216 |
-
suggestions.append("Consider: Take a moment to slow down, use calming voice tone")
|
217 |
-
elif emotion == 'anxiety':
|
218 |
-
suggestions.append("Consider: Provide reassurance, explain procedures clearly")
|
219 |
-
elif emotion == 'pain':
|
220 |
-
suggestions.append("Consider: Assess pain level, offer comfort measures")
|
221 |
-
elif emotion == 'confusion':
|
222 |
-
suggestions.append("Consider: Simplify explanations, check understanding")
|
223 |
-
|
224 |
-
return alerts, suggestions
|
225 |
-
|
226 |
-
def process_frame(self, frame, audio_data=None, sample_rate=16000):
|
227 |
-
"""Process a single frame and audio data"""
|
228 |
-
timestamp = datetime.now()
|
229 |
-
|
230 |
-
# Detect emotions
|
231 |
-
face_emotions = self.detect_face_emotion(frame)
|
232 |
-
voice_emotions = self.detect_voice_emotion(audio_data, sample_rate) if audio_data is not None else {}
|
233 |
-
|
234 |
-
# Combine emotions
|
235 |
-
if voice_emotions:
|
236 |
-
combined_emotions = self.combine_emotions(face_emotions, voice_emotions)
|
237 |
-
else:
|
238 |
-
combined_emotions = face_emotions
|
239 |
-
|
240 |
-
# Map to clinical categories
|
241 |
-
clinical_emotions = self.map_to_clinical_emotions(combined_emotions)
|
242 |
-
|
243 |
-
# Generate alerts
|
244 |
-
alerts, suggestions = self.generate_alerts(clinical_emotions)
|
245 |
-
|
246 |
-
# Store in history
|
247 |
-
emotion_record = {
|
248 |
-
'timestamp': timestamp,
|
249 |
-
'face_emotions': face_emotions,
|
250 |
-
'voice_emotions': voice_emotions,
|
251 |
-
'clinical_emotions': clinical_emotions,
|
252 |
-
'alerts': alerts,
|
253 |
-
'suggestions': suggestions
|
254 |
-
}
|
255 |
-
|
256 |
-
self.emotion_history.append(emotion_record)
|
257 |
-
|
258 |
-
return emotion_record
|
259 |
-
|
260 |
-
# Initialize the emotion recognition system
|
261 |
-
emotion_system = EmotionRecognitionSystem()
|
262 |
|
263 |
def process_video_audio(video_frame, audio_data):
|
264 |
-
"""Process video frame and audio data"""
|
265 |
if video_frame is None:
|
266 |
return None, "No video input", "", ""
|
267 |
|
268 |
-
# Process the frame
|
269 |
-
sample_rate = 16000
|
270 |
-
if audio_data is not None:
|
271 |
-
audio_array, sr = audio_data
|
272 |
-
if LIBROSA_AVAILABLE and sr != sample_rate:
|
273 |
-
audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=sample_rate)
|
274 |
-
elif not LIBROSA_AVAILABLE:
|
275 |
-
# Simple resampling if librosa not available
|
276 |
-
if sr != sample_rate:
|
277 |
-
# Basic downsampling
|
278 |
-
step = sr // sample_rate
|
279 |
-
audio_array = audio_array[::step] if step > 1 else audio_array
|
280 |
-
else:
|
281 |
-
audio_array = None
|
282 |
-
|
283 |
-
# Get emotion analysis
|
284 |
-
emotion_record = emotion_system.process_frame(video_frame, audio_array, sample_rate)
|
285 |
-
|
286 |
-
# Create visualization
|
287 |
-
annotated_frame = create_emotion_overlay(video_frame, emotion_record)
|
288 |
-
|
289 |
-
# Format results
|
290 |
-
clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
|
291 |
-
alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
|
292 |
-
suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
|
293 |
-
|
294 |
-
return annotated_frame, clinical_text, alerts_text, suggestions_text
|
295 |
-
|
296 |
-
def create_emotion_overlay(frame, emotion_record):
|
297 |
-
"""Add emotion information overlay to video frame"""
|
298 |
try:
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
309 |
-
|
310 |
-
# Add alert indicator
|
311 |
-
if emotion_record['alerts']:
|
312 |
-
cv2.putText(annotated_frame, "ALERT!", (10, 60),
|
313 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
|
314 |
-
|
315 |
-
return annotated_frame
|
316 |
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
# Add text overlay
|
327 |
-
try:
|
328 |
-
font = ImageFont.load_default()
|
329 |
-
except:
|
330 |
-
font = None
|
331 |
-
|
332 |
-
text = f"Primary: {top_emotion[0]} ({top_emotion[1]:.2f})"
|
333 |
-
draw.text((10, 10), text, fill=(0, 255, 0), font=font)
|
334 |
-
|
335 |
-
# Add alert indicator
|
336 |
-
if emotion_record['alerts']:
|
337 |
-
draw.text((10, 40), "ALERT!", fill=(255, 0, 0), font=font)
|
338 |
-
|
339 |
-
return np.array(pil_image)
|
340 |
|
341 |
-
else:
|
342 |
-
# Return original frame if no image processing available
|
343 |
-
return frame
|
344 |
-
|
345 |
except Exception as e:
|
346 |
-
print(f"
|
347 |
-
|
|
|
348 |
|
349 |
-
|
350 |
-
"""Format clinical emotions for display"""
|
351 |
-
formatted = []
|
352 |
-
for emotion, score in clinical_emotions.items():
|
353 |
-
bar = "█" * int(score * 10)
|
354 |
-
formatted.append(f"{emotion.capitalize()}: {bar} {score:.3f}")
|
355 |
-
return "\n".join(formatted)
|
356 |
|
357 |
-
def create_emotion_timeline():
|
358 |
-
"""Create emotion timeline chart"""
|
359 |
-
if not emotion_system.emotion_history:
|
360 |
-
return create_empty_chart()
|
361 |
-
|
362 |
-
# Extract data for plotting
|
363 |
-
timestamps = [record['timestamp'] for record in emotion_system.emotion_history]
|
364 |
-
|
365 |
-
fig = go.Figure()
|
366 |
-
|
367 |
-
# Add traces for each clinical emotion
|
368 |
-
clinical_emotions = ['stress', 'anxiety', 'pain', 'confusion', 'comfort']
|
369 |
-
colors = ['red', 'orange', 'purple', 'brown', 'green']
|
370 |
-
|
371 |
-
for emotion, color in zip(clinical_emotions, colors):
|
372 |
-
values = [record['clinical_emotions'].get(emotion, 0) for record in emotion_system.emotion_history]
|
373 |
-
fig.add_trace(go.Scatter(
|
374 |
-
x=timestamps,
|
375 |
-
y=values,
|
376 |
-
mode='lines+markers',
|
377 |
-
name=emotion.capitalize(),
|
378 |
-
line=dict(color=color, width=2),
|
379 |
-
marker=dict(size=4)
|
380 |
-
))
|
381 |
-
|
382 |
-
fig.update_layout(
|
383 |
-
title="Patient Emotion Timeline",
|
384 |
-
xaxis_title="Time",
|
385 |
-
yaxis_title="Emotion Intensity",
|
386 |
-
height=400,
|
387 |
-
showlegend=True,
|
388 |
-
template="plotly_white"
|
389 |
-
)
|
390 |
-
|
391 |
-
return fig
|
392 |
-
|
393 |
-
def create_empty_chart():
|
394 |
-
"""Create empty chart when no data available"""
|
395 |
-
fig = go.Figure()
|
396 |
-
fig.add_annotation(
|
397 |
-
text="No emotion data available yet",
|
398 |
-
xref="paper", yref="paper",
|
399 |
-
x=0.5, y=0.5, xanchor='center', yanchor='middle',
|
400 |
-
showarrow=False, font=dict(size=16)
|
401 |
-
)
|
402 |
-
fig.update_layout(
|
403 |
-
title="Patient Emotion Timeline",
|
404 |
-
height=400,
|
405 |
-
template="plotly_white"
|
406 |
-
)
|
407 |
-
return fig
|
408 |
-
|
409 |
-
def get_session_summary():
|
410 |
-
"""Generate session summary"""
|
411 |
-
if not emotion_system.emotion_history:
|
412 |
-
return "No session data available"
|
413 |
-
|
414 |
-
# Calculate averages
|
415 |
-
avg_emotions = {}
|
416 |
-
total_alerts = 0
|
417 |
-
|
418 |
-
for emotion in ['stress', 'anxiety', 'pain', 'confusion', 'comfort']:
|
419 |
-
values = [record['clinical_emotions'].get(emotion, 0) for record in emotion_system.emotion_history]
|
420 |
-
avg_emotions[emotion] = np.mean(values) if values else 0
|
421 |
-
|
422 |
-
total_alerts = sum(len(record['alerts']) for record in emotion_system.emotion_history)
|
423 |
-
|
424 |
-
# Format summary
|
425 |
-
summary = f"""
|
426 |
-
Session Summary:
|
427 |
-
- Duration: {len(emotion_system.emotion_history)} readings
|
428 |
-
- Average Stress Level: {avg_emotions['stress']:.3f}
|
429 |
-
- Average Anxiety Level: {avg_emotions['anxiety']:.3f}
|
430 |
-
- Average Pain Level: {avg_emotions['pain']:.3f}
|
431 |
-
- Average Confusion Level: {avg_emotions['confusion']:.3f}
|
432 |
-
- Average Comfort Level: {avg_emotions['comfort']:.3f}
|
433 |
-
- Total Alerts: {total_alerts}
|
434 |
-
|
435 |
-
Recommendations:
|
436 |
-
- Monitor stress levels during consultation
|
437 |
-
- Ensure patient understanding and comfort
|
438 |
-
- Address any recurring high emotion levels
|
439 |
-
"""
|
440 |
-
|
441 |
-
return summary
|
442 |
-
|
443 |
-
def clear_session():
|
444 |
-
"""Clear session data"""
|
445 |
-
emotion_system.emotion_history.clear()
|
446 |
-
return "Session data cleared", create_empty_chart(), ""
|
447 |
-
|
448 |
-
# Create Gradio interface
|
449 |
def create_interface():
|
450 |
with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
|
451 |
-
|
452 |
-
# 🏥 Real-Time Patient Emotion Recognition System
|
453 |
-
|
454 |
-
This system analyzes patient facial expressions and voice tone during consultations to detect emotions such as stress, anxiety, confusion, or pain.
|
455 |
-
""")
|
456 |
-
|
457 |
-
with gr.Row():
|
458 |
-
with gr.Column(scale=2):
|
459 |
-
gr.Markdown("### 📹 Live Analysis")
|
460 |
-
|
461 |
-
# Video input
|
462 |
-
video_input = gr.Video(
|
463 |
-
label="Video Feed",
|
464 |
-
sources=["webcam"],
|
465 |
-
streaming=True
|
466 |
-
)
|
467 |
-
|
468 |
-
# Audio input
|
469 |
-
audio_input = gr.Audio(
|
470 |
-
label="Audio Input",
|
471 |
-
sources=["microphone"],
|
472 |
-
type="numpy",
|
473 |
-
streaming=True
|
474 |
-
)
|
475 |
-
|
476 |
-
# Process button
|
477 |
-
process_btn = gr.Button("🔄 Process Current Frame", variant="primary")
|
478 |
-
|
479 |
-
with gr.Column(scale=2):
|
480 |
-
gr.Markdown("### 📊 Real-Time Results")
|
481 |
-
|
482 |
-
# Annotated video output
|
483 |
-
video_output = gr.Image(
|
484 |
-
label="Emotion Analysis",
|
485 |
-
type="numpy"
|
486 |
-
)
|
487 |
-
|
488 |
-
# Clinical emotions display
|
489 |
-
clinical_output = gr.Textbox(
|
490 |
-
label="Clinical Emotion Levels",
|
491 |
-
lines=6,
|
492 |
-
interactive=False
|
493 |
-
)
|
494 |
-
|
495 |
-
with gr.Row():
|
496 |
-
with gr.Column():
|
497 |
-
gr.Markdown("### ⚠️ Alerts")
|
498 |
-
alerts_output = gr.Textbox(
|
499 |
-
label="Current Alerts",
|
500 |
-
lines=3,
|
501 |
-
interactive=False
|
502 |
-
)
|
503 |
-
|
504 |
-
with gr.Column():
|
505 |
-
gr.Markdown("### 💡 Suggestions")
|
506 |
-
suggestions_output = gr.Textbox(
|
507 |
-
label="Practitioner Suggestions",
|
508 |
-
lines=3,
|
509 |
-
interactive=False
|
510 |
-
)
|
511 |
-
|
512 |
-
with gr.Row():
|
513 |
-
gr.Markdown("### 📈 Emotion Timeline")
|
514 |
-
timeline_plot = gr.Plot(label="Emotion Timeline")
|
515 |
-
|
516 |
-
with gr.Row():
|
517 |
-
with gr.Column():
|
518 |
-
gr.Markdown("### 📋 Session Summary")
|
519 |
-
summary_output = gr.Textbox(
|
520 |
-
label="Session Summary",
|
521 |
-
lines=12,
|
522 |
-
interactive=False
|
523 |
-
)
|
524 |
-
|
525 |
-
with gr.Row():
|
526 |
-
update_summary_btn = gr.Button("📊 Update Summary")
|
527 |
-
clear_btn = gr.Button("🗑️ Clear Session", variant="secondary")
|
528 |
-
update_timeline_btn = gr.Button("🔄 Update Timeline")
|
529 |
-
|
530 |
-
# Event handlers
|
531 |
-
process_btn.click(
|
532 |
-
fn=process_video_audio,
|
533 |
-
inputs=[video_input, audio_input],
|
534 |
-
outputs=[video_output, clinical_output, alerts_output, suggestions_output]
|
535 |
-
)
|
536 |
-
|
537 |
-
update_timeline_btn.click(
|
538 |
-
fn=create_emotion_timeline,
|
539 |
-
outputs=timeline_plot
|
540 |
-
)
|
541 |
-
|
542 |
-
update_summary_btn.click(
|
543 |
-
fn=get_session_summary,
|
544 |
-
outputs=summary_output
|
545 |
-
)
|
546 |
-
|
547 |
-
clear_btn.click(
|
548 |
-
fn=clear_session,
|
549 |
-
outputs=[summary_output, timeline_plot, clinical_output]
|
550 |
-
)
|
551 |
-
|
552 |
-
# Auto-update timeline every few seconds
|
553 |
-
demo.load(fn=create_emotion_timeline, outputs=timeline_plot)
|
554 |
|
|
|
555 |
gr.Markdown("""
|
556 |
-
###
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
### 🔧 Technical Notes:
|
564 |
-
- System uses pre-trained emotion recognition models
|
565 |
-
- Combines facial expression and voice tone analysis
|
566 |
-
- Provides clinical emotion mapping (stress, anxiety, pain, confusion)
|
567 |
-
- Generates real-time alerts and suggestions for practitioners
|
568 |
""")
|
569 |
|
570 |
return demo
|
571 |
|
572 |
-
# Launch the application
|
573 |
if __name__ == "__main__":
|
|
|
574 |
demo = create_interface()
|
575 |
demo.launch(
|
576 |
share=True,
|
|
|
9 |
import time
|
10 |
from collections import deque
|
11 |
import warnings
|
12 |
+
import traceback
|
13 |
warnings.filterwarnings("ignore")
|
14 |
|
15 |
+
# Audio processing imports with fallbacks
|
16 |
+
AUDIO_AVAILABLE = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
try:
|
18 |
+
import soundfile as sf
|
19 |
import librosa
|
20 |
LIBROSA_AVAILABLE = True
|
21 |
except ImportError:
|
22 |
LIBROSA_AVAILABLE = False
|
23 |
print("Librosa not available - using basic audio processing")
|
24 |
|
25 |
+
# Image processing imports with fallbacks
|
26 |
+
CV2_AVAILABLE = True
|
27 |
try:
|
28 |
+
import cv2
|
|
|
|
|
29 |
except ImportError:
|
30 |
+
CV2_AVAILABLE = False
|
31 |
+
print("OpenCV not available - using PIL for image processing")
|
32 |
|
|
|
33 |
try:
|
34 |
from PIL import Image, ImageDraw, ImageFont
|
35 |
PIL_AVAILABLE = True
|
36 |
except ImportError:
|
37 |
PIL_AVAILABLE = False
|
38 |
+
print("PIL not available - limited image processing")
|
39 |
+
|
40 |
+
# AI model imports with fallbacks
|
41 |
+
HF_AVAILABLE = True
|
42 |
+
try:
|
43 |
+
from transformers import pipeline
|
44 |
+
import torch
|
45 |
+
except ImportError:
|
46 |
+
HF_AVAILABLE = False
|
47 |
+
print("Transformers not available - using mock emotion detection")
|
48 |
|
49 |
class EmotionRecognitionSystem:
|
50 |
def __init__(self):
|
51 |
+
self.emotion_history = deque(maxlen=100)
|
52 |
self.audio_queue = queue.Queue()
|
53 |
self.video_queue = queue.Queue()
|
|
|
|
|
54 |
self.setup_models()
|
55 |
|
|
|
56 |
self.alert_thresholds = {
|
57 |
'stress': 0.7,
|
58 |
'anxiety': 0.6,
|
|
|
61 |
}
|
62 |
|
63 |
def setup_models(self):
|
64 |
+
"""Initialize emotion recognition models with better error handling"""
|
65 |
+
self.models_loaded = False
|
66 |
+
|
67 |
+
if not HF_AVAILABLE:
|
68 |
+
print("Skipping model loading - transformers not available")
|
69 |
+
return
|
70 |
+
|
71 |
+
try:
|
72 |
+
# Facial emotion recognition
|
73 |
+
self.face_emotion_pipeline = pipeline(
|
74 |
+
"image-classification",
|
75 |
+
model="j-hartmann/emotion-english-distilroberta-base",
|
76 |
+
device=0 if torch.cuda.is_available() else -1
|
77 |
+
)
|
78 |
+
|
79 |
+
# Audio emotion recognition
|
80 |
+
self.audio_emotion_pipeline = pipeline(
|
81 |
+
"audio-classification",
|
82 |
+
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
|
83 |
+
device=0 if torch.cuda.is_available() else -1
|
84 |
+
)
|
85 |
+
self.models_loaded = True
|
86 |
+
except Exception as e:
|
87 |
+
print(f"Error loading models: {e}")
|
88 |
+
print(traceback.format_exc())
|
89 |
self.models_loaded = False
|
90 |
|
91 |
+
def validate_audio_input(self, audio_data):
|
92 |
+
"""Validate and standardize audio input format"""
|
93 |
+
if audio_data is None:
|
94 |
+
return None
|
95 |
+
|
96 |
+
try:
|
97 |
+
# Handle different audio input formats
|
98 |
+
if isinstance(audio_data, tuple):
|
99 |
+
audio_array, sample_rate = audio_data
|
100 |
+
else:
|
101 |
+
# Try to read audio file if not in tuple format
|
102 |
+
if isinstance(audio_data, str):
|
103 |
+
if LIBROSA_AVAILABLE:
|
104 |
+
audio_array, sample_rate = librosa.load(audio_data, sr=None)
|
105 |
+
else:
|
106 |
+
# Fallback for when librosa is not available
|
107 |
+
import wave
|
108 |
+
with wave.open(audio_data, 'rb') as wf:
|
109 |
+
sample_rate = wf.getframerate()
|
110 |
+
audio_array = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16)
|
111 |
+
audio_array = audio_array.astype(np.float32) / 32768.0
|
112 |
+
else:
|
113 |
+
return None
|
114 |
+
|
115 |
+
# Resample if needed
|
116 |
+
target_rate = 16000
|
117 |
+
if sample_rate != target_rate:
|
118 |
+
if LIBROSA_AVAILABLE:
|
119 |
+
audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=target_rate)
|
120 |
+
else:
|
121 |
+
# Simple downsampling fallback
|
122 |
+
step = int(sample_rate / target_rate)
|
123 |
+
if step > 1:
|
124 |
+
audio_array = audio_array[::step]
|
125 |
+
sample_rate = target_rate
|
126 |
+
|
127 |
+
return (audio_array, sample_rate)
|
128 |
+
|
129 |
+
except Exception as e:
|
130 |
+
print(f"Audio validation error: {e}")
|
131 |
+
return None
|
132 |
+
|
133 |
def detect_face_emotion(self, frame):
|
134 |
+
"""Detect emotions from facial expressions with better error handling"""
|
135 |
if not self.models_loaded:
|
136 |
# Mock emotion detection for demo
|
137 |
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
|
|
|
139 |
return dict(zip(emotions, scores))
|
140 |
|
141 |
try:
|
142 |
+
# Convert frame to RGB format
|
143 |
if isinstance(frame, np.ndarray):
|
144 |
+
if len(frame.shape) == 3:
|
145 |
+
if frame.shape[2] == 4: # RGBA
|
146 |
+
rgb_frame = frame[:, :, :3]
|
147 |
+
elif frame.shape[2] == 3: # BGR or RGB?
|
148 |
+
if CV2_AVAILABLE:
|
149 |
+
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
150 |
+
else:
|
151 |
+
rgb_frame = frame[:, :, ::-1] # Simple BGR to RGB
|
152 |
else:
|
153 |
rgb_frame = frame
|
154 |
else:
|
155 |
+
# Grayscale to RGB
|
156 |
+
if CV2_AVAILABLE:
|
157 |
+
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
|
158 |
else:
|
159 |
+
rgb_frame = np.stack((frame,)*3, axis=-1)
|
160 |
else:
|
161 |
rgb_frame = frame
|
162 |
|
|
|
174 |
print(f"Face emotion detection error: {e}")
|
175 |
return {'neutral': 1.0}
|
176 |
|
177 |
+
def detect_voice_emotion(self, audio_data):
|
178 |
+
"""Detect emotions from voice tone with better audio handling"""
|
179 |
if not self.models_loaded or audio_data is None:
|
180 |
# Mock emotion detection
|
181 |
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
|
|
|
183 |
return dict(zip(emotions, scores))
|
184 |
|
185 |
try:
|
186 |
+
# Validate and standardize audio input
|
187 |
+
validated_audio = self.validate_audio_input(audio_data)
|
188 |
+
if validated_audio is None:
|
189 |
+
return {'neutral': 1.0}
|
190 |
+
|
191 |
+
audio_array, sample_rate = validated_audio
|
192 |
+
|
193 |
# Process audio with the model
|
194 |
+
results = self.audio_emotion_pipeline({
|
195 |
+
"array": audio_array,
|
196 |
+
"sampling_rate": sample_rate
|
197 |
+
})
|
198 |
|
199 |
emotion_scores = {}
|
200 |
for result in results:
|
|
|
206 |
print(f"Voice emotion detection error: {e}")
|
207 |
return {'neutral': 1.0}
|
208 |
|
209 |
+
# [Rest of your existing methods...]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
def process_video_audio(video_frame, audio_data):
|
212 |
+
"""Process video frame and audio data with better error handling"""
|
213 |
if video_frame is None:
|
214 |
return None, "No video input", "", ""
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
try:
|
217 |
+
# Process the frame
|
218 |
+
validated_audio = emotion_system.validate_audio_input(audio_data)
|
219 |
+
|
220 |
+
# Get emotion analysis
|
221 |
+
emotion_record = emotion_system.process_frame(
|
222 |
+
video_frame,
|
223 |
+
validated_audio[0] if validated_audio else None,
|
224 |
+
validated_audio[1] if validated_audio else 16000
|
225 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
+
# Create visualization
|
228 |
+
annotated_frame = create_emotion_overlay(video_frame, emotion_record)
|
229 |
+
|
230 |
+
# Format results
|
231 |
+
clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
|
232 |
+
alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
|
233 |
+
suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
|
234 |
+
|
235 |
+
return annotated_frame, clinical_text, alerts_text, suggestions_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
|
|
|
|
|
|
|
|
237 |
except Exception as e:
|
238 |
+
print(f"Processing error: {e}")
|
239 |
+
traceback.print_exc()
|
240 |
+
return video_frame, "Processing error", "System error", "Please try again"
|
241 |
|
242 |
+
# [Rest of your existing functions...]
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
def create_interface():
|
245 |
with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
|
246 |
+
# [Your existing interface code...]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
|
248 |
+
# Add audio format info
|
249 |
gr.Markdown("""
|
250 |
+
### 🔊 Audio Input Notes:
|
251 |
+
- System works best with clear microphone input
|
252 |
+
- If you get audio errors, try:
|
253 |
+
- Checking microphone permissions
|
254 |
+
- Reducing background noise
|
255 |
+
- Using a different microphone
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
""")
|
257 |
|
258 |
return demo
|
259 |
|
|
|
260 |
if __name__ == "__main__":
|
261 |
+
emotion_system = EmotionRecognitionSystem()
|
262 |
demo = create_interface()
|
263 |
demo.launch(
|
264 |
share=True,
|