yunusajib commited on
Commit
38fef5b
·
verified ·
1 Parent(s): d17fe0c
Files changed (1) hide show
  1. app.py +423 -60
app.py CHANGED
@@ -1,83 +1,446 @@
1
  import gradio as gr
 
2
  import numpy as np
3
- import time
4
- from datetime import datetime
 
 
 
 
 
5
 
6
- class EmotionRecognizer:
 
7
  def __init__(self):
8
- self.sample_rate = 16000
9
- self.emotion_history = []
 
 
10
 
11
- def analyze_audio(self, audio_data):
12
- # Mock audio analysis - replace with your actual model
 
 
 
 
13
  emotions = {
14
- 'happy': np.random.random() * 0.5,
15
- 'sad': np.random.random() * 0.3,
16
- 'angry': np.random.random() * 0.2,
17
- 'neutral': np.random.random() * 0.5
 
 
 
18
  }
 
 
 
 
 
19
  return emotions
20
 
21
- def analyze_image(self, image):
22
- # Mock image analysis - replace with your actual model
23
- emotions = {
24
- 'happy': np.random.random() * 0.6,
25
- 'confused': np.random.random() * 0.4,
26
- 'pain': np.random.random() * 0.3,
27
- 'neutral': np.random.random() * 0.5
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  return emotions
30
 
31
- def process_inputs(self, video_frame, audio_data):
32
- # Get current timestamp
33
- timestamp = datetime.now().strftime("%H:%M:%S")
34
-
35
- # Process inputs (mock implementation)
36
- audio_emotions = self.analyze_audio(audio_data) if audio_data else {}
37
- visual_emotions = self.analyze_image(video_frame) if video_frame else {}
 
38
 
39
- # Combine results
40
- combined = {**audio_emotions, **visual_emotions}
41
- self.emotion_history.append((timestamp, combined))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- # Generate outputs
44
- top_emotion = max(combined.items(), key=lambda x: x[1]) if combined else ('none', 0)
45
- stats = f"Top emotion: {top_emotion[0]} ({top_emotion[1]:.2f})"
46
- history = "\n".join([f"{t}: {e}" for t, e in self.emotion_history[-3:]])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- return stats, history
 
 
 
49
 
50
- def create_interface():
51
- recognizer = EmotionRecognizer()
 
 
52
 
53
- def process_frame(video_frame, audio_data):
54
- try:
55
- stats, history = recognizer.process_inputs(video_frame, audio_data)
56
- return stats, history
57
- except Exception as e:
58
- return f"Error: {str(e)}", "No history available"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- with gr.Blocks(title="Emotion Recognition", theme=gr.themes.Soft()) as app:
61
- gr.Markdown("# Patient Emotion Recognition System")
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- with gr.Row():
64
- with gr.Column():
65
- video_input = gr.Image(sources=["webcam"], label="Video Feed")
66
- audio_input = gr.Audio(sources=["microphone"], label="Audio Input")
67
- process_btn = gr.Button("Analyze", variant="primary")
68
-
69
- with gr.Column():
70
- stats_output = gr.Textbox(label="Current Analysis")
71
- history_output = gr.Textbox(label="Recent History", lines=4)
 
 
 
 
72
 
73
- process_btn.click(
74
- fn=process_frame,
75
- inputs=[video_input, audio_input],
76
- outputs=[stats_output, history_output]
77
  )
78
 
79
- return app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  if __name__ == "__main__":
82
- app = create_interface()
83
- app.launch()
 
1
  import gradio as gr
2
+ import cv2
3
  import numpy as np
4
+ import librosa
5
+ import pandas as pd
6
+ import plotly.graph_objects as go
7
+ import plotly.express as px
8
+ from datetime import datetime, timedelta
9
+ import warnings
10
+ warnings.filterwarnings('ignore')
11
 
12
+ # Mock emotion detection functions (replace with actual models in production)
13
+ class EmotionAnalyzer:
14
  def __init__(self):
15
+ # In production, load actual pretrained models here
16
+ self.face_emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust', 'surprise']
17
+ self.voice_emotions = ['calm', 'stressed', 'anxious', 'confused', 'pain', 'frustrated']
18
+ self.session_data = []
19
 
20
+ def analyze_facial_expression(self, frame):
21
+ """Simulate facial expression analysis"""
22
+ # In production: use actual face detection + emotion recognition model
23
+ # Example: face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
24
+
25
+ # Mock analysis - replace with actual model inference
26
  emotions = {
27
+ 'neutral': np.random.uniform(0.1, 0.7),
28
+ 'happy': np.random.uniform(0.0, 0.3),
29
+ 'sad': np.random.uniform(0.0, 0.4),
30
+ 'angry': np.random.uniform(0.0, 0.2),
31
+ 'fear': np.random.uniform(0.0, 0.3),
32
+ 'disgust': np.random.uniform(0.0, 0.1),
33
+ 'surprise': np.random.uniform(0.0, 0.2)
34
  }
35
+
36
+ # Normalize to sum to 1
37
+ total = sum(emotions.values())
38
+ emotions = {k: v/total for k, v in emotions.items()}
39
+
40
  return emotions
41
 
42
+ def analyze_voice_emotion(self, audio_data, sample_rate):
43
+ """Simulate voice emotion analysis"""
44
+ if audio_data is None or len(audio_data) == 0:
45
+ return {'calm': 1.0}
46
+
47
+ # Extract audio features (these would be used with actual models)
48
+ try:
49
+ # Basic audio feature extraction
50
+ mfcc = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
51
+ spectral_centroid = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
52
+ zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_data)
53
+
54
+ # Mock emotion prediction based on audio characteristics
55
+ energy = np.mean(audio_data**2)
56
+ pitch_var = np.var(spectral_centroid)
57
+
58
+ # Simulate emotion detection based on audio features
59
+ emotions = {
60
+ 'calm': max(0.1, 0.8 - energy * 10),
61
+ 'stressed': min(0.8, energy * 5 + pitch_var * 100),
62
+ 'anxious': min(0.7, pitch_var * 150),
63
+ 'confused': np.random.uniform(0.0, 0.3),
64
+ 'pain': min(0.6, energy * 8 if energy > 0.1 else 0.0),
65
+ 'frustrated': min(0.5, energy * 3 + pitch_var * 80)
66
+ }
67
+
68
+ # Normalize
69
+ total = sum(emotions.values())
70
+ emotions = {k: v/total for k, v in emotions.items()}
71
+
72
+ except Exception as e:
73
+ # Fallback if audio processing fails
74
+ emotions = {'calm': 1.0}
75
+
76
  return emotions
77
 
78
+ def process_consultation_data(self, video_file, audio_file):
79
+ """Process video and audio files for emotion analysis"""
80
+ results = {
81
+ 'timestamp': [],
82
+ 'facial_emotions': [],
83
+ 'voice_emotions': [],
84
+ 'alerts': []
85
+ }
86
 
87
+ # Process video file
88
+ if video_file is not None:
89
+ cap = cv2.VideoCapture(video_file)
90
+ frame_count = 0
91
+
92
+ while cap.read()[0] and frame_count < 100: # Limit for demo
93
+ ret, frame = cap.read()
94
+ if not ret:
95
+ break
96
+
97
+ if frame_count % 30 == 0: # Analyze every 30th frame
98
+ facial_emotions = self.analyze_facial_expression(frame)
99
+ timestamp = frame_count / 30 # Assuming 30 FPS
100
+
101
+ results['timestamp'].append(timestamp)
102
+ results['facial_emotions'].append(facial_emotions)
103
+
104
+ # Check for alerts
105
+ if facial_emotions.get('sad', 0) > 0.4 or facial_emotions.get('fear', 0) > 0.3:
106
+ results['alerts'].append(f"High stress/sadness detected at {timestamp:.1f}s")
107
+
108
+ frame_count += 1
109
+
110
+ cap.release()
111
 
112
+ # Process audio file
113
+ if audio_file is not None:
114
+ try:
115
+ audio_data, sample_rate = librosa.load(audio_file, duration=60) # Limit for demo
116
+
117
+ # Analyze audio in chunks
118
+ chunk_duration = 3 # seconds
119
+ chunk_samples = chunk_duration * sample_rate
120
+
121
+ for i in range(0, len(audio_data), chunk_samples):
122
+ chunk = audio_data[i:i+chunk_samples]
123
+ if len(chunk) > sample_rate: # Minimum 1 second
124
+ voice_emotions = self.analyze_voice_emotion(chunk, sample_rate)
125
+ timestamp = i / sample_rate
126
+
127
+ if len(results['voice_emotions']) <= len(results['timestamp']):
128
+ results['voice_emotions'].append(voice_emotions)
129
+
130
+ # Check for voice-based alerts
131
+ if voice_emotions.get('pain', 0) > 0.4 or voice_emotions.get('stressed', 0) > 0.5:
132
+ results['alerts'].append(f"Voice stress/pain detected at {timestamp:.1f}s")
133
+
134
+ except Exception as e:
135
+ print(f"Audio processing error: {e}")
136
 
137
+ return results
138
+
139
+ # Initialize analyzer
140
+ analyzer = EmotionAnalyzer()
141
 
142
+ def create_emotion_timeline(data):
143
+ """Create timeline visualization of emotions"""
144
+ if not data['timestamp']:
145
+ return go.Figure()
146
 
147
+ fig = go.Figure()
148
+
149
+ # Plot facial emotions
150
+ if data['facial_emotions']:
151
+ for emotion in ['sad', 'fear', 'angry', 'neutral', 'happy']:
152
+ values = [emotions.get(emotion, 0) for emotions in data['facial_emotions']]
153
+ fig.add_trace(go.Scatter(
154
+ x=data['timestamp'],
155
+ y=values,
156
+ mode='lines+markers',
157
+ name=f'Face: {emotion.title()}',
158
+ line=dict(width=2)
159
+ ))
160
+
161
+ # Plot voice emotions
162
+ if data['voice_emotions']:
163
+ for emotion in ['stressed', 'anxious', 'pain', 'calm']:
164
+ values = [emotions.get(emotion, 0) for emotions in data['voice_emotions'][:len(data['timestamp'])]]
165
+ if len(values) == len(data['timestamp']):
166
+ fig.add_trace(go.Scatter(
167
+ x=data['timestamp'],
168
+ y=values,
169
+ mode='lines+markers',
170
+ name=f'Voice: {emotion.title()}',
171
+ line=dict(dash='dash', width=2)
172
+ ))
173
+
174
+ fig.update_layout(
175
+ title='Patient Emotion Timeline During Consultation',
176
+ xaxis_title='Time (seconds)',
177
+ yaxis_title='Emotion Intensity',
178
+ height=500,
179
+ hovermode='x unified'
180
+ )
181
 
182
+ return fig
183
+
184
+ def create_emotion_summary(data):
185
+ """Create summary charts of detected emotions"""
186
+ if not data['facial_emotions'] and not data['voice_emotions']:
187
+ return go.Figure(), go.Figure()
188
+
189
+ # Facial emotion summary
190
+ face_fig = go.Figure()
191
+ if data['facial_emotions']:
192
+ face_summary = {}
193
+ for emotions in data['facial_emotions']:
194
+ for emotion, value in emotions.items():
195
+ face_summary[emotion] = face_summary.get(emotion, 0) + value
196
 
197
+ face_fig = px.pie(
198
+ values=list(face_summary.values()),
199
+ names=list(face_summary.keys()),
200
+ title='Facial Expression Summary'
201
+ )
202
+
203
+ # Voice emotion summary
204
+ voice_fig = go.Figure()
205
+ if data['voice_emotions']:
206
+ voice_summary = {}
207
+ for emotions in data['voice_emotions']:
208
+ for emotion, value in emotions.items():
209
+ voice_summary[emotion] = voice_summary.get(emotion, 0) + value
210
 
211
+ voice_fig = px.pie(
212
+ values=list(voice_summary.values()),
213
+ names=list(voice_summary.keys()),
214
+ title='Voice Emotion Summary'
215
  )
216
 
217
+ return face_fig, voice_fig
218
+
219
+ def generate_recommendations(data):
220
+ """Generate recommendations based on detected emotions"""
221
+ recommendations = []
222
+ alerts = data.get('alerts', [])
223
+
224
+ if alerts:
225
+ recommendations.append("⚠️ **ALERTS DETECTED:**")
226
+ for alert in alerts[:5]: # Limit to 5 alerts
227
+ recommendations.append(f"• {alert}")
228
+ recommendations.append("")
229
+
230
+ # Analyze overall emotion patterns
231
+ high_stress_count = 0
232
+ pain_indicators = 0
233
+ confusion_signs = 0
234
+
235
+ for emotions in data.get('facial_emotions', []):
236
+ if emotions.get('sad', 0) > 0.3 or emotions.get('fear', 0) > 0.25:
237
+ high_stress_count += 1
238
+
239
+ for emotions in data.get('voice_emotions', []):
240
+ if emotions.get('pain', 0) > 0.3:
241
+ pain_indicators += 1
242
+ if emotions.get('confused', 0) > 0.3:
243
+ confusion_signs += 1
244
+
245
+ # Generate specific recommendations
246
+ if high_stress_count > len(data.get('facial_emotions', [])) * 0.3:
247
+ recommendations.append("🧘 **Stress Management:** Patient shows signs of elevated stress. Consider:")
248
+ recommendations.append(" • Offering reassurance and clear explanations")
249
+ recommendations.append(" • Allowing more time for questions")
250
+ recommendations.append(" • Suggesting relaxation techniques")
251
+ recommendations.append("")
252
+
253
+ if pain_indicators > 0:
254
+ recommendations.append("🩺 **Pain Assessment:** Voice analysis suggests possible discomfort:")
255
+ recommendations.append(" • Conduct thorough pain assessment")
256
+ recommendations.append(" • Consider pain management options")
257
+ recommendations.append(" • Monitor patient comfort throughout consultation")
258
+ recommendations.append("")
259
+
260
+ if confusion_signs > 0:
261
+ recommendations.append("💭 **Communication:** Signs of confusion detected:")
262
+ recommendations.append(" • Use simpler language and medical terms")
263
+ recommendations.append(" • Repeat important information")
264
+ recommendations.append(" • Provide written summaries")
265
+ recommendations.append("")
266
+
267
+ if not recommendations:
268
+ recommendations.append("✅ **Overall Assessment:** Patient appears comfortable and engaged.")
269
+ recommendations.append("Continue with current consultation approach.")
270
+
271
+ return "\n".join(recommendations)
272
+
273
+ def process_consultation(video_file, audio_file):
274
+ """Main processing function"""
275
+ if video_file is None and audio_file is None:
276
+ return None, None, None, "Please upload video and/or audio files to analyze."
277
+
278
+ # Process the consultation data
279
+ data = analyzer.process_consultation_data(video_file, audio_file)
280
+
281
+ # Create visualizations
282
+ timeline_fig = create_emotion_timeline(data)
283
+ face_summary, voice_summary = create_emotion_summary(data)
284
+
285
+ # Generate recommendations
286
+ recommendations = generate_recommendations(data)
287
+
288
+ return timeline_fig, face_summary, voice_summary, recommendations
289
+
290
+ def real_time_analysis(audio):
291
+ """Real-time audio emotion analysis"""
292
+ if audio is None:
293
+ return "No audio detected"
294
+
295
+ try:
296
+ # Process audio data
297
+ sample_rate, audio_data = audio
298
+
299
+ # Convert to float and normalize
300
+ if audio_data.dtype == np.int16:
301
+ audio_data = audio_data.astype(np.float32) / 32768.0
302
+ elif audio_data.dtype == np.int32:
303
+ audio_data = audio_data.astype(np.float32) / 2147483648.0
304
+
305
+ # Analyze emotions
306
+ emotions = analyzer.analyze_voice_emotion(audio_data, sample_rate)
307
+
308
+ # Format results
309
+ result = "**Real-time Voice Emotion Analysis:**\n\n"
310
+ for emotion, confidence in sorted(emotions.items(), key=lambda x: x[1], reverse=True):
311
+ percentage = confidence * 100
312
+ result += f"• **{emotion.title()}**: {percentage:.1f}%\n"
313
+
314
+ # Add alerts if needed
315
+ if emotions.get('pain', 0) > 0.4:
316
+ result += "\n⚠️ **ALERT**: High pain level detected"
317
+ elif emotions.get('stressed', 0) > 0.5:
318
+ result += "\n⚠️ **ALERT**: High stress level detected"
319
+
320
+ return result
321
+
322
+ except Exception as e:
323
+ return f"Error processing audio: {str(e)}"
324
+
325
+ # Create Gradio interface
326
+ with gr.Blocks(title="Patient Emotion Analysis System", theme=gr.themes.Soft()) as demo:
327
+ gr.Markdown("""
328
+ # 🏥 Patient Emotion Analysis System
329
+
330
+ This system analyzes patient facial expressions and voice tone during consultations to detect emotions
331
+ such as stress, anxiety, confusion, or pain, helping healthcare practitioners provide better care.
332
+
333
+ **Features:**
334
+ - Facial expression analysis from video recordings
335
+ - Voice emotion detection from audio
336
+ - Real-time emotion monitoring
337
+ - Clinical recommendations based on detected emotions
338
+ """)
339
+
340
+ with gr.Tabs():
341
+ # Consultation Analysis Tab
342
+ with gr.Tab("📹 Consultation Analysis"):
343
+ gr.Markdown("### Upload consultation video and/or audio for comprehensive emotion analysis")
344
+
345
+ with gr.Row():
346
+ with gr.Column():
347
+ video_input = gr.File(
348
+ label="Upload Video File",
349
+ file_types=[".mp4", ".avi", ".mov", ".mkv"],
350
+ type="filepath"
351
+ )
352
+ audio_input = gr.File(
353
+ label="Upload Audio File",
354
+ file_types=[".wav", ".mp3", ".m4a", ".flac"],
355
+ type="filepath"
356
+ )
357
+ analyze_btn = gr.Button("🔍 Analyze Consultation", variant="primary", size="lg")
358
+
359
+ with gr.Column():
360
+ recommendations_output = gr.Markdown(label="Clinical Recommendations")
361
+
362
+ with gr.Row():
363
+ timeline_plot = gr.Plot(label="Emotion Timeline")
364
+
365
+ with gr.Row():
366
+ with gr.Column():
367
+ face_summary_plot = gr.Plot(label="Facial Expression Summary")
368
+ with gr.Column():
369
+ voice_summary_plot = gr.Plot(label="Voice Emotion Summary")
370
+
371
+ analyze_btn.click(
372
+ fn=process_consultation,
373
+ inputs=[video_input, audio_input],
374
+ outputs=[timeline_plot, face_summary_plot, voice_summary_plot, recommendations_output]
375
+ )
376
+
377
+ # Real-time Monitoring Tab
378
+ with gr.Tab("🎤 Real-time Monitoring"):
379
+ gr.Markdown("### Real-time voice emotion analysis during consultation")
380
+
381
+ with gr.Row():
382
+ with gr.Column():
383
+ audio_realtime = gr.Audio(
384
+ sources=["microphone"],
385
+ type="numpy",
386
+ label="Real-time Audio Input"
387
+ )
388
+
389
+ with gr.Column():
390
+ realtime_output = gr.Markdown(label="Real-time Analysis Results")
391
+
392
+ audio_realtime.change(
393
+ fn=real_time_analysis,
394
+ inputs=[audio_realtime],
395
+ outputs=[realtime_output]
396
+ )
397
+
398
+ # Information Tab
399
+ with gr.Tab("ℹ️ System Information"):
400
+ gr.Markdown("""
401
+ ### System Overview
402
+
403
+ This Patient Emotion Analysis System uses advanced AI models to analyze:
404
+
405
+ **Facial Expression Analysis:**
406
+ - Detects 7 basic emotions: neutral, happy, sad, angry, fear, disgust, surprise
407
+ - Uses computer vision techniques for face detection and emotion recognition
408
+ - Analyzes video frame-by-frame for temporal emotion patterns
409
+
410
+ **Voice Emotion Analysis:**
411
+ - Extracts audio features: MFCC, spectral centroid, zero-crossing rate
412
+ - Detects emotions: calm, stressed, anxious, confused, pain, frustrated
413
+ - Real-time analysis capability for live consultations
414
+
415
+ **Clinical Applications:**
416
+ - Helps practitioners identify patient distress early
417
+ - Provides objective emotion metrics
418
+ - Suggests intervention strategies
419
+ - Improves patient-practitioner communication
420
+
421
+ **Privacy & Ethics:**
422
+ - All processing is done locally
423
+ - No data is stored permanently
424
+ - Designed to assist, not replace clinical judgment
425
+ - Compliant with healthcare data protection standards
426
+
427
+ ### Technical Implementation Notes:
428
+
429
+ **For Production Use:**
430
+ 1. Replace mock emotion detection with actual pretrained models:
431
+ - FER-2013, AffectNet for facial emotions
432
+ - Audio emotion models (RAVDESS, IEMOCAP datasets)
433
+ 2. Implement proper face detection (OpenCV, dlib, or MediaPipe)
434
+ 3. Add real-time video processing capabilities
435
+ 4. Integrate with hospital systems and EHR
436
+ 5. Add user authentication and data encryption
437
+ 6. Calibrate alert thresholds based on clinical validation
438
+
439
+ **Recommended Models:**
440
+ - **Facial**: FER+ model, OpenFace, or custom CNN trained on medical data
441
+ - **Voice**: Speech emotion recognition using LSTM/Transformer architectures
442
+ - **Integration**: Multi-modal fusion for improved accuracy
443
+ """)
444
 
445
  if __name__ == "__main__":
446
+ demo.launch(share=True)