File size: 9,660 Bytes
d61cd9f ef2be41 d61cd9f ef2be41 d61cd9f d287980 d61cd9f 2754fd7 d287980 3253f05 d287980 3253f05 d287980 d61cd9f d287980 d61cd9f d287980 5eff629 3253f05 d287980 3253f05 d61cd9f d287980 d61cd9f d287980 d61cd9f d287980 d61cd9f d287980 d61cd9f d287980 3253f05 d287980 3253f05 d287980 3253f05 d287980 3253f05 d61cd9f d287980 d61cd9f d287980 d61cd9f d287980 d61cd9f d287980 2754fd7 d61cd9f d287980 d61cd9f 3253f05 d287980 3253f05 d287980 3253f05 d287980 2754fd7 d287980 2754fd7 d61cd9f d287980 d61cd9f d287980 d61cd9f d287980 d61cd9f 3baa918 d287980 d61cd9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import threading
import queue
import time
from collections import deque
import warnings
import traceback
warnings.filterwarnings("ignore")
# Audio processing imports with fallbacks
AUDIO_AVAILABLE = True
try:
import soundfile as sf
import librosa
LIBROSA_AVAILABLE = True
except ImportError:
LIBROSA_AVAILABLE = False
print("Librosa not available - using basic audio processing")
# Image processing imports with fallbacks
CV2_AVAILABLE = True
try:
import cv2
except ImportError:
CV2_AVAILABLE = False
print("OpenCV not available - using PIL for image processing")
try:
from PIL import Image, ImageDraw, ImageFont
PIL_AVAILABLE = True
except ImportError:
PIL_AVAILABLE = False
print("PIL not available - limited image processing")
# AI model imports with fallbacks
HF_AVAILABLE = True
try:
from transformers import pipeline
import torch
except ImportError:
HF_AVAILABLE = False
print("Transformers not available - using mock emotion detection")
class EmotionRecognitionSystem:
def __init__(self):
self.emotion_history = deque(maxlen=100)
self.audio_queue = queue.Queue()
self.video_queue = queue.Queue()
self.setup_models()
self.alert_thresholds = {
'stress': 0.7,
'anxiety': 0.6,
'pain': 0.8,
'confusion': 0.5
}
def setup_models(self):
"""Initialize emotion recognition models with better error handling"""
self.models_loaded = False
if not HF_AVAILABLE:
print("Skipping model loading - transformers not available")
return
try:
# Facial emotion recognition
self.face_emotion_pipeline = pipeline(
"image-classification",
model="j-hartmann/emotion-english-distilroberta-base",
device=0 if torch.cuda.is_available() else -1
)
# Audio emotion recognition
self.audio_emotion_pipeline = pipeline(
"audio-classification",
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
device=0 if torch.cuda.is_available() else -1
)
self.models_loaded = True
except Exception as e:
print(f"Error loading models: {e}")
print(traceback.format_exc())
self.models_loaded = False
def validate_audio_input(self, audio_data):
"""Validate and standardize audio input format"""
if audio_data is None:
return None
try:
# Handle different audio input formats
if isinstance(audio_data, tuple):
audio_array, sample_rate = audio_data
else:
# Try to read audio file if not in tuple format
if isinstance(audio_data, str):
if LIBROSA_AVAILABLE:
audio_array, sample_rate = librosa.load(audio_data, sr=None)
else:
# Fallback for when librosa is not available
import wave
with wave.open(audio_data, 'rb') as wf:
sample_rate = wf.getframerate()
audio_array = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16)
audio_array = audio_array.astype(np.float32) / 32768.0
else:
return None
# Resample if needed
target_rate = 16000
if sample_rate != target_rate:
if LIBROSA_AVAILABLE:
audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=target_rate)
else:
# Simple downsampling fallback
step = int(sample_rate / target_rate)
if step > 1:
audio_array = audio_array[::step]
sample_rate = target_rate
return (audio_array, sample_rate)
except Exception as e:
print(f"Audio validation error: {e}")
return None
def detect_face_emotion(self, frame):
"""Detect emotions from facial expressions with better error handling"""
if not self.models_loaded:
# Mock emotion detection for demo
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
scores = np.random.dirichlet(np.ones(len(emotions)))
return dict(zip(emotions, scores))
try:
# Convert frame to RGB format
if isinstance(frame, np.ndarray):
if len(frame.shape) == 3:
if frame.shape[2] == 4: # RGBA
rgb_frame = frame[:, :, :3]
elif frame.shape[2] == 3: # BGR or RGB?
if CV2_AVAILABLE:
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
else:
rgb_frame = frame[:, :, ::-1] # Simple BGR to RGB
else:
rgb_frame = frame
else:
# Grayscale to RGB
if CV2_AVAILABLE:
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
else:
rgb_frame = np.stack((frame,)*3, axis=-1)
else:
rgb_frame = frame
# Use face emotion model
results = self.face_emotion_pipeline(rgb_frame)
# Convert to standardized format
emotion_scores = {}
for result in results:
emotion_scores[result['label'].lower()] = result['score']
return emotion_scores
except Exception as e:
print(f"Face emotion detection error: {e}")
return {'neutral': 1.0}
def detect_voice_emotion(self, audio_data):
"""Detect emotions from voice tone with better audio handling"""
if not self.models_loaded or audio_data is None:
# Mock emotion detection
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
scores = np.random.dirichlet(np.ones(len(emotions)))
return dict(zip(emotions, scores))
try:
# Validate and standardize audio input
validated_audio = self.validate_audio_input(audio_data)
if validated_audio is None:
return {'neutral': 1.0}
audio_array, sample_rate = validated_audio
# Process audio with the model
results = self.audio_emotion_pipeline({
"array": audio_array,
"sampling_rate": sample_rate
})
emotion_scores = {}
for result in results:
emotion_scores[result['label'].lower()] = result['score']
return emotion_scores
except Exception as e:
print(f"Voice emotion detection error: {e}")
return {'neutral': 1.0}
# [Rest of your existing methods...]
def process_video_audio(video_frame, audio_data):
"""Process video frame and audio data with better error handling"""
if video_frame is None:
return None, "No video input", "", ""
try:
# Process the frame
validated_audio = emotion_system.validate_audio_input(audio_data)
# Get emotion analysis
emotion_record = emotion_system.process_frame(
video_frame,
validated_audio[0] if validated_audio else None,
validated_audio[1] if validated_audio else 16000
)
# Create visualization
annotated_frame = create_emotion_overlay(video_frame, emotion_record)
# Format results
clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
return annotated_frame, clinical_text, alerts_text, suggestions_text
except Exception as e:
print(f"Processing error: {e}")
traceback.print_exc()
return video_frame, "Processing error", "System error", "Please try again"
# [Rest of your existing functions...]
def create_interface():
with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
# [Your existing interface code...]
# Add audio format info
gr.Markdown("""
### π Audio Input Notes:
- System works best with clear microphone input
- If you get audio errors, try:
- Checking microphone permissions
- Reducing background noise
- Using a different microphone
""")
return demo
if __name__ == "__main__":
emotion_system = EmotionRecognitionSystem()
demo = create_interface()
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True
) |