File size: 9,660 Bytes
d61cd9f
ef2be41
d61cd9f
 
 
 
 
 
ef2be41
d61cd9f
 
d287980
d61cd9f
2754fd7
d287980
 
3253f05
d287980
3253f05
 
 
 
 
 
d287980
 
d61cd9f
d287980
d61cd9f
d287980
 
5eff629
3253f05
 
 
 
 
d287980
 
 
 
 
 
 
 
 
 
3253f05
d61cd9f
 
d287980
d61cd9f
 
 
 
 
 
 
 
 
 
 
 
d287980
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d61cd9f
 
d287980
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d61cd9f
d287980
d61cd9f
 
 
 
 
 
 
d287980
3253f05
d287980
 
 
 
 
 
 
 
3253f05
 
 
d287980
 
 
3253f05
d287980
3253f05
 
d61cd9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d287980
 
d61cd9f
 
 
 
 
 
 
d287980
 
 
 
 
 
 
d61cd9f
d287980
 
 
 
d61cd9f
 
 
 
 
 
 
 
 
 
 
d287980
2754fd7
d61cd9f
d287980
d61cd9f
 
 
3253f05
d287980
 
 
 
 
 
 
 
 
3253f05
d287980
 
 
 
 
 
 
 
 
3253f05
 
d287980
 
 
2754fd7
d287980
2754fd7
d61cd9f
 
d287980
d61cd9f
d287980
d61cd9f
d287980
 
 
 
 
 
d61cd9f
 
 
3baa918
 
d287980
d61cd9f
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import threading
import queue
import time
from collections import deque
import warnings
import traceback
warnings.filterwarnings("ignore")

# Audio processing imports with fallbacks
AUDIO_AVAILABLE = True
try:
    import soundfile as sf
    import librosa
    LIBROSA_AVAILABLE = True
except ImportError:
    LIBROSA_AVAILABLE = False
    print("Librosa not available - using basic audio processing")

# Image processing imports with fallbacks
CV2_AVAILABLE = True
try:
    import cv2
except ImportError:
    CV2_AVAILABLE = False
    print("OpenCV not available - using PIL for image processing")

try:
    from PIL import Image, ImageDraw, ImageFont
    PIL_AVAILABLE = True
except ImportError:
    PIL_AVAILABLE = False
    print("PIL not available - limited image processing")

# AI model imports with fallbacks
HF_AVAILABLE = True
try:
    from transformers import pipeline
    import torch
except ImportError:
    HF_AVAILABLE = False
    print("Transformers not available - using mock emotion detection")

class EmotionRecognitionSystem:
    def __init__(self):
        self.emotion_history = deque(maxlen=100)
        self.audio_queue = queue.Queue()
        self.video_queue = queue.Queue()
        self.setup_models()
        
        self.alert_thresholds = {
            'stress': 0.7,
            'anxiety': 0.6,
            'pain': 0.8,
            'confusion': 0.5
        }
        
    def setup_models(self):
        """Initialize emotion recognition models with better error handling"""
        self.models_loaded = False
        
        if not HF_AVAILABLE:
            print("Skipping model loading - transformers not available")
            return
            
        try:
            # Facial emotion recognition
            self.face_emotion_pipeline = pipeline(
                "image-classification",
                model="j-hartmann/emotion-english-distilroberta-base",
                device=0 if torch.cuda.is_available() else -1
            )
            
            # Audio emotion recognition  
            self.audio_emotion_pipeline = pipeline(
                "audio-classification",
                model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
                device=0 if torch.cuda.is_available() else -1
            )
            self.models_loaded = True
        except Exception as e:
            print(f"Error loading models: {e}")
            print(traceback.format_exc())
            self.models_loaded = False
    
    def validate_audio_input(self, audio_data):
        """Validate and standardize audio input format"""
        if audio_data is None:
            return None
            
        try:
            # Handle different audio input formats
            if isinstance(audio_data, tuple):
                audio_array, sample_rate = audio_data
            else:
                # Try to read audio file if not in tuple format
                if isinstance(audio_data, str):
                    if LIBROSA_AVAILABLE:
                        audio_array, sample_rate = librosa.load(audio_data, sr=None)
                    else:
                        # Fallback for when librosa is not available
                        import wave
                        with wave.open(audio_data, 'rb') as wf:
                            sample_rate = wf.getframerate()
                            audio_array = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16)
                            audio_array = audio_array.astype(np.float32) / 32768.0
                else:
                    return None
            
            # Resample if needed
            target_rate = 16000
            if sample_rate != target_rate:
                if LIBROSA_AVAILABLE:
                    audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=target_rate)
                else:
                    # Simple downsampling fallback
                    step = int(sample_rate / target_rate)
                    if step > 1:
                        audio_array = audio_array[::step]
                    sample_rate = target_rate
            
            return (audio_array, sample_rate)
            
        except Exception as e:
            print(f"Audio validation error: {e}")
            return None
    
    def detect_face_emotion(self, frame):
        """Detect emotions from facial expressions with better error handling"""
        if not self.models_loaded:
            # Mock emotion detection for demo
            emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
            scores = np.random.dirichlet(np.ones(len(emotions)))
            return dict(zip(emotions, scores))
        
        try:
            # Convert frame to RGB format
            if isinstance(frame, np.ndarray):
                if len(frame.shape) == 3:
                    if frame.shape[2] == 4:  # RGBA
                        rgb_frame = frame[:, :, :3]
                    elif frame.shape[2] == 3:  # BGR or RGB?
                        if CV2_AVAILABLE:
                            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        else:
                            rgb_frame = frame[:, :, ::-1]  # Simple BGR to RGB
                    else:
                        rgb_frame = frame
                else:
                    # Grayscale to RGB
                    if CV2_AVAILABLE:
                        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
                    else:
                        rgb_frame = np.stack((frame,)*3, axis=-1)
            else:
                rgb_frame = frame
            
            # Use face emotion model
            results = self.face_emotion_pipeline(rgb_frame)
            
            # Convert to standardized format
            emotion_scores = {}
            for result in results:
                emotion_scores[result['label'].lower()] = result['score']
            
            return emotion_scores
            
        except Exception as e:
            print(f"Face emotion detection error: {e}")
            return {'neutral': 1.0}
    
    def detect_voice_emotion(self, audio_data):
        """Detect emotions from voice tone with better audio handling"""
        if not self.models_loaded or audio_data is None:
            # Mock emotion detection
            emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
            scores = np.random.dirichlet(np.ones(len(emotions)))
            return dict(zip(emotions, scores))
        
        try:
            # Validate and standardize audio input
            validated_audio = self.validate_audio_input(audio_data)
            if validated_audio is None:
                return {'neutral': 1.0}
            
            audio_array, sample_rate = validated_audio
            
            # Process audio with the model
            results = self.audio_emotion_pipeline({
                "array": audio_array,
                "sampling_rate": sample_rate
            })
            
            emotion_scores = {}
            for result in results:
                emotion_scores[result['label'].lower()] = result['score']
            
            return emotion_scores
            
        except Exception as e:
            print(f"Voice emotion detection error: {e}")
            return {'neutral': 1.0}
    
    # [Rest of your existing methods...]

def process_video_audio(video_frame, audio_data):
    """Process video frame and audio data with better error handling"""
    if video_frame is None:
        return None, "No video input", "", ""
    
    try:
        # Process the frame
        validated_audio = emotion_system.validate_audio_input(audio_data)
        
        # Get emotion analysis
        emotion_record = emotion_system.process_frame(
            video_frame, 
            validated_audio[0] if validated_audio else None,
            validated_audio[1] if validated_audio else 16000
        )
        
        # Create visualization
        annotated_frame = create_emotion_overlay(video_frame, emotion_record)
        
        # Format results
        clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
        alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
        suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
        
        return annotated_frame, clinical_text, alerts_text, suggestions_text
        
    except Exception as e:
        print(f"Processing error: {e}")
        traceback.print_exc()
        return video_frame, "Processing error", "System error", "Please try again"

# [Rest of your existing functions...]

def create_interface():
    with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
        # [Your existing interface code...]
        
        # Add audio format info
        gr.Markdown("""
        ### πŸ”Š Audio Input Notes:
        - System works best with clear microphone input
        - If you get audio errors, try:
          - Checking microphone permissions
          - Reducing background noise
          - Using a different microphone
        """)
    
    return demo

if __name__ == "__main__":
    emotion_system = EmotionRecognitionSystem()
    demo = create_interface()
    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )