yunusajib commited on
Commit
d61cd9f
Β·
verified Β·
1 Parent(s): 9d66904

change the model

Browse files
Files changed (1) hide show
  1. app.py +476 -105
app.py CHANGED
@@ -1,124 +1,495 @@
 
1
  import cv2
2
  import numpy as np
3
- import pyttsx3
4
- import onnxruntime as ort
5
  import librosa
6
- import sounddevice as sd
7
- import scipy.io.wavfile as wavfile
8
- from sklearn.preprocessing import StandardScaler
 
 
 
9
  import time
10
- import os
11
- from gtts import gTTS
12
- import gradio as gr
13
- import tempfile
14
-
15
- # ------------------- Speech Emotion Recognition Model -------------------
16
- class SpeechEmotionRecognizer:
17
- def __init__(self, model_path):
18
- self.model = ort.InferenceSession(model_path)
19
- self.input_name = self.model.get_inputs()[0].name
20
- self.labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
21
-
22
- # Load or create scaler here (fit on training data offline, then load)
23
- self.scaler = StandardScaler()
24
-
25
- def extract_features(self, y, sr):
26
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
27
- mfcc_mean = np.mean(mfcc.T, axis=0)
28
- # Normally, scaler should be pre-fitted, here we just scale manually to zero mean, unit var
29
- mfcc_scaled = (mfcc_mean - np.mean(mfcc_mean)) / np.std(mfcc_mean)
30
- return mfcc_scaled
31
 
32
- def predict_emotion(self, audio_data, sr):
33
- features = self.extract_features(audio_data, sr)
34
- input_data = features.reshape(1, -1).astype(np.float32)
35
- pred = self.model.run(None, {self.input_name: input_data})[0]
36
- emotion_idx = np.argmax(pred)
37
- return self.labels[emotion_idx]
 
 
38
 
39
- # ------------------- Facial Emotion Recognition Model -------------------
40
- class FacialEmotionRecognizer:
41
- def __init__(self, model_path):
42
- self.model = ort.InferenceSession(model_path)
43
- self.input_name = self.model.get_inputs()[0].name
44
- self.labels = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt']
45
-
46
- def predict_emotion(self, face_img):
47
- face_img = cv2.resize(face_img, (64, 64))
48
- face_img = face_img.astype('float32') # FER+ expects float32
49
- # FER+ model expects input shape (1, 1, 64, 64)
50
- face_img = np.expand_dims(face_img, axis=0) # (1, 64, 64)
51
- face_img = np.expand_dims(face_img, axis=0) # (1, 1, 64, 64)
52
- pred = self.model.run(None, {self.input_name: face_img})[0]
53
- emotion_idx = np.argmax(pred)
54
- return self.labels[emotion_idx]
55
-
56
- # ------------------- Utility Functions -------------------
57
-
58
- def speak(text):
59
- if not text.strip():
60
- return None
61
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
62
- tts = gTTS(text)
63
- tts.save(tmpfile.name)
64
- return tmpfile.name
65
-
66
- def record_audio(duration=3, fs=22050):
67
- print("Recording audio...")
68
- audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
69
- sd.wait()
70
- audio = audio.flatten()
71
- print("Recording complete.")
72
- return audio, fs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- def analyze_face(face_roi, emotion_model):
75
- emotion = emotion_model.predict_emotion(face_roi)
76
- return emotion
77
 
78
- # ------------------- Main Function -------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- def main():
81
- face_emotion_model = FacialEmotionRecognizer("emotion-ferplus-8.onnx")
82
- speech_emotion_model = SpeechEmotionRecognizer("speech_emotion_model.onnx")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- cap = cv2.VideoCapture(0)
85
- face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
 
 
 
 
 
86
 
87
- print("Press 's' to speak and 'q' to quit.")
 
 
 
88
 
89
- while True:
90
- ret, frame = cap.read()
91
- if not ret:
92
- print("Failed to grab frame.")
93
- break
94
-
95
- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
96
- faces = face_cascade.detectMultiScale(gray, 1.3, 5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- for (x, y, w, h) in faces:
99
- face_roi = gray[y:y+h, x:x+w]
100
- emotion = analyze_face(face_roi, face_emotion_model)
101
- label = f"Face: {emotion}"
102
- cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
103
- cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
 
 
 
 
 
 
 
 
 
104
 
105
- cv2.imshow("Emotion Recognition", frame)
106
- key = cv2.waitKey(1) & 0xFF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
- if key == ord('s'):
109
- audio, sr = record_audio()
110
- speech_emotion = speech_emotion_model.predict_emotion(audio, sr)
111
- print(f"Speech Emotion: {speech_emotion}")
112
- audio_file = speak(f"You sound {speech_emotion}")
113
- if audio_file:
114
- # Play the TTS audio using cv2 or other player if needed
115
- pass
116
 
117
- elif key == ord('q'):
118
- break
 
 
119
 
120
- cap.release()
121
- cv2.destroyAllWindows()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
 
123
  if __name__ == "__main__":
124
- main()
 
 
 
 
 
 
 
1
+ import gradio as gr
2
  import cv2
3
  import numpy as np
 
 
4
  import librosa
5
+ import pandas as pd
6
+ import plotly.graph_objects as go
7
+ import plotly.express as px
8
+ from datetime import datetime, timedelta
9
+ import threading
10
+ import queue
11
  import time
12
+ from collections import deque
13
+ import warnings
14
+ warnings.filterwarnings("ignore")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # Try to import transformers and torch, with fallbacks
17
+ try:
18
+ from transformers import pipeline
19
+ import torch
20
+ HF_AVAILABLE = True
21
+ except ImportError:
22
+ HF_AVAILABLE = False
23
+ print("Transformers not available - using mock emotion detection")
24
 
25
+ class EmotionRecognitionSystem:
26
+ def __init__(self):
27
+ self.emotion_history = deque(maxlen=100) # Store last 100 emotion readings
28
+ self.audio_queue = queue.Queue()
29
+ self.video_queue = queue.Queue()
30
+
31
+ # Initialize emotion detection models
32
+ self.setup_models()
33
+
34
+ # Emotion thresholds for alerts
35
+ self.alert_thresholds = {
36
+ 'stress': 0.7,
37
+ 'anxiety': 0.6,
38
+ 'pain': 0.8,
39
+ 'confusion': 0.5
40
+ }
41
+
42
+ def setup_models(self):
43
+ """Initialize emotion recognition models"""
44
+ if HF_AVAILABLE:
45
+ try:
46
+ # Facial emotion recognition
47
+ self.face_emotion_pipeline = pipeline(
48
+ "image-classification",
49
+ model="j-hartmann/emotion-english-distilroberta-base",
50
+ device=0 if torch.cuda.is_available() else -1
51
+ )
52
+
53
+ # Audio emotion recognition
54
+ self.audio_emotion_pipeline = pipeline(
55
+ "audio-classification",
56
+ model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
57
+ device=0 if torch.cuda.is_available() else -1
58
+ )
59
+ self.models_loaded = True
60
+ except Exception as e:
61
+ print(f"Error loading models: {e}")
62
+ self.models_loaded = False
63
+ else:
64
+ self.models_loaded = False
65
+
66
+ def detect_face_emotion(self, frame):
67
+ """Detect emotions from facial expressions"""
68
+ if not self.models_loaded:
69
+ # Mock emotion detection for demo
70
+ emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
71
+ scores = np.random.dirichlet(np.ones(len(emotions)))
72
+ return dict(zip(emotions, scores))
73
+
74
+ try:
75
+ # Convert frame to RGB
76
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
77
+
78
+ # Use face emotion model
79
+ results = self.face_emotion_pipeline(rgb_frame)
80
+
81
+ # Convert to standardized format
82
+ emotion_scores = {}
83
+ for result in results:
84
+ emotion_scores[result['label'].lower()] = result['score']
85
+
86
+ return emotion_scores
87
+
88
+ except Exception as e:
89
+ print(f"Face emotion detection error: {e}")
90
+ return {'neutral': 1.0}
91
+
92
+ def detect_voice_emotion(self, audio_data, sample_rate=16000):
93
+ """Detect emotions from voice tone"""
94
+ if not self.models_loaded or audio_data is None:
95
+ # Mock emotion detection
96
+ emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
97
+ scores = np.random.dirichlet(np.ones(len(emotions)))
98
+ return dict(zip(emotions, scores))
99
+
100
+ try:
101
+ # Process audio with the model
102
+ results = self.audio_emotion_pipeline(audio_data)
103
+
104
+ emotion_scores = {}
105
+ for result in results:
106
+ emotion_scores[result['label'].lower()] = result['score']
107
+
108
+ return emotion_scores
109
+
110
+ except Exception as e:
111
+ print(f"Voice emotion detection error: {e}")
112
+ return {'neutral': 1.0}
113
+
114
+ def extract_audio_features(self, audio_data, sample_rate):
115
+ """Extract audio features for emotion analysis"""
116
+ try:
117
+ # Extract basic audio features
118
+ mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
119
+ spectral_centroids = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
120
+ zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_data)
121
+ spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_data, sr=sample_rate)
122
+
123
+ features = {
124
+ 'mfcc_mean': np.mean(mfccs),
125
+ 'mfcc_std': np.std(mfccs),
126
+ 'spectral_centroid_mean': np.mean(spectral_centroids),
127
+ 'zcr_mean': np.mean(zero_crossing_rate),
128
+ 'spectral_rolloff_mean': np.mean(spectral_rolloff)
129
+ }
130
+
131
+ return features
132
+ except Exception as e:
133
+ print(f"Audio feature extraction error: {e}")
134
+ return {}
135
+
136
+ def combine_emotions(self, face_emotions, voice_emotions, weights=(0.6, 0.4)):
137
+ """Combine facial and voice emotion predictions"""
138
+ combined = {}
139
+ all_emotions = set(face_emotions.keys()) | set(voice_emotions.keys())
140
+
141
+ for emotion in all_emotions:
142
+ face_score = face_emotions.get(emotion, 0)
143
+ voice_score = voice_emotions.get(emotion, 0)
144
+ combined[emotion] = weights[0] * face_score + weights[1] * voice_score
145
+
146
+ return combined
147
+
148
+ def map_to_clinical_emotions(self, emotions):
149
+ """Map detected emotions to clinical categories"""
150
+ clinical_mapping = {
151
+ 'stress': emotions.get('angry', 0) * 0.3 + emotions.get('fear', 0) * 0.4 + emotions.get('disgust', 0) * 0.3,
152
+ 'anxiety': emotions.get('fear', 0) * 0.6 + emotions.get('surprise', 0) * 0.2 + emotions.get('sad', 0) * 0.2,
153
+ 'pain': emotions.get('angry', 0) * 0.4 + emotions.get('disgust', 0) * 0.3 + emotions.get('sad', 0) * 0.3,
154
+ 'confusion': emotions.get('surprise', 0) * 0.5 + emotions.get('neutral', 0) * 0.3 + emotions.get('fear', 0) * 0.2,
155
+ 'comfort': emotions.get('happy', 0) * 0.7 + emotions.get('neutral', 0) * 0.3
156
+ }
157
+
158
+ return clinical_mapping
159
+
160
+ def generate_alerts(self, clinical_emotions):
161
+ """Generate alerts based on emotion thresholds"""
162
+ alerts = []
163
+ suggestions = []
164
+
165
+ for emotion, score in clinical_emotions.items():
166
+ if emotion in self.alert_thresholds and score > self.alert_thresholds[emotion]:
167
+ alerts.append(f"⚠️ High {emotion} detected ({score:.2f})")
168
+
169
+ # Add specific suggestions
170
+ if emotion == 'stress':
171
+ suggestions.append("Consider: Take a moment to slow down, use calming voice tone")
172
+ elif emotion == 'anxiety':
173
+ suggestions.append("Consider: Provide reassurance, explain procedures clearly")
174
+ elif emotion == 'pain':
175
+ suggestions.append("Consider: Assess pain level, offer comfort measures")
176
+ elif emotion == 'confusion':
177
+ suggestions.append("Consider: Simplify explanations, check understanding")
178
+
179
+ return alerts, suggestions
180
+
181
+ def process_frame(self, frame, audio_data=None, sample_rate=16000):
182
+ """Process a single frame and audio data"""
183
+ timestamp = datetime.now()
184
+
185
+ # Detect emotions
186
+ face_emotions = self.detect_face_emotion(frame)
187
+ voice_emotions = self.detect_voice_emotion(audio_data, sample_rate) if audio_data is not None else {}
188
+
189
+ # Combine emotions
190
+ if voice_emotions:
191
+ combined_emotions = self.combine_emotions(face_emotions, voice_emotions)
192
+ else:
193
+ combined_emotions = face_emotions
194
+
195
+ # Map to clinical categories
196
+ clinical_emotions = self.map_to_clinical_emotions(combined_emotions)
197
+
198
+ # Generate alerts
199
+ alerts, suggestions = self.generate_alerts(clinical_emotions)
200
+
201
+ # Store in history
202
+ emotion_record = {
203
+ 'timestamp': timestamp,
204
+ 'face_emotions': face_emotions,
205
+ 'voice_emotions': voice_emotions,
206
+ 'clinical_emotions': clinical_emotions,
207
+ 'alerts': alerts,
208
+ 'suggestions': suggestions
209
+ }
210
+
211
+ self.emotion_history.append(emotion_record)
212
+
213
+ return emotion_record
214
 
215
+ # Initialize the emotion recognition system
216
+ emotion_system = EmotionRecognitionSystem()
 
217
 
218
+ def process_video_audio(video_frame, audio_data):
219
+ """Process video frame and audio data"""
220
+ if video_frame is None:
221
+ return None, "No video input", "", ""
222
+
223
+ # Process the frame
224
+ sample_rate = 16000
225
+ if audio_data is not None:
226
+ audio_array, sr = audio_data
227
+ if sr != sample_rate:
228
+ audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=sample_rate)
229
+ else:
230
+ audio_array = None
231
+
232
+ # Get emotion analysis
233
+ emotion_record = emotion_system.process_frame(video_frame, audio_array, sample_rate)
234
+
235
+ # Create visualization
236
+ annotated_frame = create_emotion_overlay(video_frame, emotion_record)
237
+
238
+ # Format results
239
+ clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
240
+ alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
241
+ suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
242
+
243
+ return annotated_frame, clinical_text, alerts_text, suggestions_text
244
 
245
+ def create_emotion_overlay(frame, emotion_record):
246
+ """Add emotion information overlay to video frame"""
247
+ annotated_frame = frame.copy()
248
+
249
+ # Get top emotion
250
+ clinical_emotions = emotion_record['clinical_emotions']
251
+ top_emotion = max(clinical_emotions.items(), key=lambda x: x[1])
252
+
253
+ # Add text overlay
254
+ cv2.putText(annotated_frame, f"Primary: {top_emotion[0]} ({top_emotion[1]:.2f})",
255
+ (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
256
+
257
+ # Add alert indicator
258
+ if emotion_record['alerts']:
259
+ cv2.putText(annotated_frame, "ALERT!", (10, 60),
260
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
261
+
262
+ return annotated_frame
263
 
264
+ def format_clinical_emotions(clinical_emotions):
265
+ """Format clinical emotions for display"""
266
+ formatted = []
267
+ for emotion, score in clinical_emotions.items():
268
+ bar = "β–ˆ" * int(score * 10)
269
+ formatted.append(f"{emotion.capitalize()}: {bar} {score:.3f}")
270
+ return "\n".join(formatted)
271
 
272
+ def create_emotion_timeline():
273
+ """Create emotion timeline chart"""
274
+ if not emotion_system.emotion_history:
275
+ return create_empty_chart()
276
 
277
+ # Extract data for plotting
278
+ timestamps = [record['timestamp'] for record in emotion_system.emotion_history]
279
+
280
+ fig = go.Figure()
281
+
282
+ # Add traces for each clinical emotion
283
+ clinical_emotions = ['stress', 'anxiety', 'pain', 'confusion', 'comfort']
284
+ colors = ['red', 'orange', 'purple', 'brown', 'green']
285
+
286
+ for emotion, color in zip(clinical_emotions, colors):
287
+ values = [record['clinical_emotions'].get(emotion, 0) for record in emotion_system.emotion_history]
288
+ fig.add_trace(go.Scatter(
289
+ x=timestamps,
290
+ y=values,
291
+ mode='lines+markers',
292
+ name=emotion.capitalize(),
293
+ line=dict(color=color, width=2),
294
+ marker=dict(size=4)
295
+ ))
296
+
297
+ fig.update_layout(
298
+ title="Patient Emotion Timeline",
299
+ xaxis_title="Time",
300
+ yaxis_title="Emotion Intensity",
301
+ height=400,
302
+ showlegend=True,
303
+ template="plotly_white"
304
+ )
305
+
306
+ return fig
307
 
308
+ def create_empty_chart():
309
+ """Create empty chart when no data available"""
310
+ fig = go.Figure()
311
+ fig.add_annotation(
312
+ text="No emotion data available yet",
313
+ xref="paper", yref="paper",
314
+ x=0.5, y=0.5, xanchor='center', yanchor='middle',
315
+ showarrow=False, font=dict(size=16)
316
+ )
317
+ fig.update_layout(
318
+ title="Patient Emotion Timeline",
319
+ height=400,
320
+ template="plotly_white"
321
+ )
322
+ return fig
323
 
324
+ def get_session_summary():
325
+ """Generate session summary"""
326
+ if not emotion_system.emotion_history:
327
+ return "No session data available"
328
+
329
+ # Calculate averages
330
+ avg_emotions = {}
331
+ total_alerts = 0
332
+
333
+ for emotion in ['stress', 'anxiety', 'pain', 'confusion', 'comfort']:
334
+ values = [record['clinical_emotions'].get(emotion, 0) for record in emotion_system.emotion_history]
335
+ avg_emotions[emotion] = np.mean(values) if values else 0
336
+
337
+ total_alerts = sum(len(record['alerts']) for record in emotion_system.emotion_history)
338
+
339
+ # Format summary
340
+ summary = f"""
341
+ Session Summary:
342
+ - Duration: {len(emotion_system.emotion_history)} readings
343
+ - Average Stress Level: {avg_emotions['stress']:.3f}
344
+ - Average Anxiety Level: {avg_emotions['anxiety']:.3f}
345
+ - Average Pain Level: {avg_emotions['pain']:.3f}
346
+ - Average Confusion Level: {avg_emotions['confusion']:.3f}
347
+ - Average Comfort Level: {avg_emotions['comfort']:.3f}
348
+ - Total Alerts: {total_alerts}
349
 
350
+ Recommendations:
351
+ - Monitor stress levels during consultation
352
+ - Ensure patient understanding and comfort
353
+ - Address any recurring high emotion levels
354
+ """
355
+
356
+ return summary
 
357
 
358
+ def clear_session():
359
+ """Clear session data"""
360
+ emotion_system.emotion_history.clear()
361
+ return "Session data cleared", create_empty_chart(), ""
362
 
363
+ # Create Gradio interface
364
+ def create_interface():
365
+ with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
366
+ gr.Markdown("""
367
+ # πŸ₯ Real-Time Patient Emotion Recognition System
368
+
369
+ This system analyzes patient facial expressions and voice tone during consultations to detect emotions such as stress, anxiety, confusion, or pain.
370
+ """)
371
+
372
+ with gr.Row():
373
+ with gr.Column(scale=2):
374
+ gr.Markdown("### πŸ“Ή Live Analysis")
375
+
376
+ # Video input
377
+ video_input = gr.Video(
378
+ label="Video Feed",
379
+ sources=["webcam"],
380
+ streaming=True
381
+ )
382
+
383
+ # Audio input
384
+ audio_input = gr.Audio(
385
+ label="Audio Input",
386
+ sources=["microphone"],
387
+ type="numpy",
388
+ streaming=True
389
+ )
390
+
391
+ # Process button
392
+ process_btn = gr.Button("πŸ”„ Process Current Frame", variant="primary")
393
+
394
+ with gr.Column(scale=2):
395
+ gr.Markdown("### πŸ“Š Real-Time Results")
396
+
397
+ # Annotated video output
398
+ video_output = gr.Image(
399
+ label="Emotion Analysis",
400
+ type="numpy"
401
+ )
402
+
403
+ # Clinical emotions display
404
+ clinical_output = gr.Textbox(
405
+ label="Clinical Emotion Levels",
406
+ lines=6,
407
+ interactive=False
408
+ )
409
+
410
+ with gr.Row():
411
+ with gr.Column():
412
+ gr.Markdown("### ⚠️ Alerts")
413
+ alerts_output = gr.Textbox(
414
+ label="Current Alerts",
415
+ lines=3,
416
+ interactive=False
417
+ )
418
+
419
+ with gr.Column():
420
+ gr.Markdown("### πŸ’‘ Suggestions")
421
+ suggestions_output = gr.Textbox(
422
+ label="Practitioner Suggestions",
423
+ lines=3,
424
+ interactive=False
425
+ )
426
+
427
+ with gr.Row():
428
+ gr.Markdown("### πŸ“ˆ Emotion Timeline")
429
+ timeline_plot = gr.Plot(label="Emotion Timeline")
430
+
431
+ with gr.Row():
432
+ with gr.Column():
433
+ gr.Markdown("### πŸ“‹ Session Summary")
434
+ summary_output = gr.Textbox(
435
+ label="Session Summary",
436
+ lines=12,
437
+ interactive=False
438
+ )
439
+
440
+ with gr.Row():
441
+ update_summary_btn = gr.Button("πŸ“Š Update Summary")
442
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Session", variant="secondary")
443
+ update_timeline_btn = gr.Button("πŸ”„ Update Timeline")
444
+
445
+ # Event handlers
446
+ process_btn.click(
447
+ fn=process_video_audio,
448
+ inputs=[video_input, audio_input],
449
+ outputs=[video_output, clinical_output, alerts_output, suggestions_output]
450
+ )
451
+
452
+ update_timeline_btn.click(
453
+ fn=create_emotion_timeline,
454
+ outputs=timeline_plot
455
+ )
456
+
457
+ update_summary_btn.click(
458
+ fn=get_session_summary,
459
+ outputs=summary_output
460
+ )
461
+
462
+ clear_btn.click(
463
+ fn=clear_session,
464
+ outputs=[summary_output, timeline_plot, clinical_output]
465
+ )
466
+
467
+ # Auto-update timeline every few seconds
468
+ demo.load(fn=create_emotion_timeline, outputs=timeline_plot)
469
+
470
+ gr.Markdown("""
471
+ ### πŸ“ Usage Instructions:
472
+ 1. **Enable camera and microphone** access when prompted
473
+ 2. **Click "Process Current Frame"** to analyze emotions in real-time
474
+ 3. **Monitor the timeline** to track emotion changes over time
475
+ 4. **Review alerts and suggestions** for patient care recommendations
476
+ 5. **Use session summary** for consultation documentation
477
+
478
+ ### πŸ”§ Technical Notes:
479
+ - System uses pre-trained emotion recognition models
480
+ - Combines facial expression and voice tone analysis
481
+ - Provides clinical emotion mapping (stress, anxiety, pain, confusion)
482
+ - Generates real-time alerts and suggestions for practitioners
483
+ """)
484
+
485
+ return demo
486
 
487
+ # Launch the application
488
  if __name__ == "__main__":
489
+ demo = create_interface()
490
+ demo.launch(
491
+ share=True,
492
+ server_name="0.0.0.0",
493
+ server_port=7860,
494
+ show_error=True
495
+ )