yunusajib's picture
update
d287980 verified
raw
history blame
9.66 kB
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import threading
import queue
import time
from collections import deque
import warnings
import traceback
warnings.filterwarnings("ignore")
# Audio processing imports with fallbacks
AUDIO_AVAILABLE = True
try:
import soundfile as sf
import librosa
LIBROSA_AVAILABLE = True
except ImportError:
LIBROSA_AVAILABLE = False
print("Librosa not available - using basic audio processing")
# Image processing imports with fallbacks
CV2_AVAILABLE = True
try:
import cv2
except ImportError:
CV2_AVAILABLE = False
print("OpenCV not available - using PIL for image processing")
try:
from PIL import Image, ImageDraw, ImageFont
PIL_AVAILABLE = True
except ImportError:
PIL_AVAILABLE = False
print("PIL not available - limited image processing")
# AI model imports with fallbacks
HF_AVAILABLE = True
try:
from transformers import pipeline
import torch
except ImportError:
HF_AVAILABLE = False
print("Transformers not available - using mock emotion detection")
class EmotionRecognitionSystem:
def __init__(self):
self.emotion_history = deque(maxlen=100)
self.audio_queue = queue.Queue()
self.video_queue = queue.Queue()
self.setup_models()
self.alert_thresholds = {
'stress': 0.7,
'anxiety': 0.6,
'pain': 0.8,
'confusion': 0.5
}
def setup_models(self):
"""Initialize emotion recognition models with better error handling"""
self.models_loaded = False
if not HF_AVAILABLE:
print("Skipping model loading - transformers not available")
return
try:
# Facial emotion recognition
self.face_emotion_pipeline = pipeline(
"image-classification",
model="j-hartmann/emotion-english-distilroberta-base",
device=0 if torch.cuda.is_available() else -1
)
# Audio emotion recognition
self.audio_emotion_pipeline = pipeline(
"audio-classification",
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
device=0 if torch.cuda.is_available() else -1
)
self.models_loaded = True
except Exception as e:
print(f"Error loading models: {e}")
print(traceback.format_exc())
self.models_loaded = False
def validate_audio_input(self, audio_data):
"""Validate and standardize audio input format"""
if audio_data is None:
return None
try:
# Handle different audio input formats
if isinstance(audio_data, tuple):
audio_array, sample_rate = audio_data
else:
# Try to read audio file if not in tuple format
if isinstance(audio_data, str):
if LIBROSA_AVAILABLE:
audio_array, sample_rate = librosa.load(audio_data, sr=None)
else:
# Fallback for when librosa is not available
import wave
with wave.open(audio_data, 'rb') as wf:
sample_rate = wf.getframerate()
audio_array = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16)
audio_array = audio_array.astype(np.float32) / 32768.0
else:
return None
# Resample if needed
target_rate = 16000
if sample_rate != target_rate:
if LIBROSA_AVAILABLE:
audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=target_rate)
else:
# Simple downsampling fallback
step = int(sample_rate / target_rate)
if step > 1:
audio_array = audio_array[::step]
sample_rate = target_rate
return (audio_array, sample_rate)
except Exception as e:
print(f"Audio validation error: {e}")
return None
def detect_face_emotion(self, frame):
"""Detect emotions from facial expressions with better error handling"""
if not self.models_loaded:
# Mock emotion detection for demo
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
scores = np.random.dirichlet(np.ones(len(emotions)))
return dict(zip(emotions, scores))
try:
# Convert frame to RGB format
if isinstance(frame, np.ndarray):
if len(frame.shape) == 3:
if frame.shape[2] == 4: # RGBA
rgb_frame = frame[:, :, :3]
elif frame.shape[2] == 3: # BGR or RGB?
if CV2_AVAILABLE:
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
else:
rgb_frame = frame[:, :, ::-1] # Simple BGR to RGB
else:
rgb_frame = frame
else:
# Grayscale to RGB
if CV2_AVAILABLE:
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
else:
rgb_frame = np.stack((frame,)*3, axis=-1)
else:
rgb_frame = frame
# Use face emotion model
results = self.face_emotion_pipeline(rgb_frame)
# Convert to standardized format
emotion_scores = {}
for result in results:
emotion_scores[result['label'].lower()] = result['score']
return emotion_scores
except Exception as e:
print(f"Face emotion detection error: {e}")
return {'neutral': 1.0}
def detect_voice_emotion(self, audio_data):
"""Detect emotions from voice tone with better audio handling"""
if not self.models_loaded or audio_data is None:
# Mock emotion detection
emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
scores = np.random.dirichlet(np.ones(len(emotions)))
return dict(zip(emotions, scores))
try:
# Validate and standardize audio input
validated_audio = self.validate_audio_input(audio_data)
if validated_audio is None:
return {'neutral': 1.0}
audio_array, sample_rate = validated_audio
# Process audio with the model
results = self.audio_emotion_pipeline({
"array": audio_array,
"sampling_rate": sample_rate
})
emotion_scores = {}
for result in results:
emotion_scores[result['label'].lower()] = result['score']
return emotion_scores
except Exception as e:
print(f"Voice emotion detection error: {e}")
return {'neutral': 1.0}
# [Rest of your existing methods...]
def process_video_audio(video_frame, audio_data):
"""Process video frame and audio data with better error handling"""
if video_frame is None:
return None, "No video input", "", ""
try:
# Process the frame
validated_audio = emotion_system.validate_audio_input(audio_data)
# Get emotion analysis
emotion_record = emotion_system.process_frame(
video_frame,
validated_audio[0] if validated_audio else None,
validated_audio[1] if validated_audio else 16000
)
# Create visualization
annotated_frame = create_emotion_overlay(video_frame, emotion_record)
# Format results
clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
return annotated_frame, clinical_text, alerts_text, suggestions_text
except Exception as e:
print(f"Processing error: {e}")
traceback.print_exc()
return video_frame, "Processing error", "System error", "Please try again"
# [Rest of your existing functions...]
def create_interface():
with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
# [Your existing interface code...]
# Add audio format info
gr.Markdown("""
### πŸ”Š Audio Input Notes:
- System works best with clear microphone input
- If you get audio errors, try:
- Checking microphone permissions
- Reducing background noise
- Using a different microphone
""")
return demo
if __name__ == "__main__":
emotion_system = EmotionRecognitionSystem()
demo = create_interface()
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True
)