Spaces:

yunusajib
/

Real-Time-Emotional-Detection

Sleeping

File size: 9,660 Bytes

import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import threading
import queue
import time
from collections import deque
import warnings
import traceback
warnings.filterwarnings("ignore")

# Audio processing imports with fallbacks
AUDIO_AVAILABLE = True
try:
    import soundfile as sf
    import librosa
    LIBROSA_AVAILABLE = True
except ImportError:
    LIBROSA_AVAILABLE = False
    print("Librosa not available - using basic audio processing")

# Image processing imports with fallbacks
CV2_AVAILABLE = True
try:
    import cv2
except ImportError:
    CV2_AVAILABLE = False
    print("OpenCV not available - using PIL for image processing")

try:
    from PIL import Image, ImageDraw, ImageFont
    PIL_AVAILABLE = True
except ImportError:
    PIL_AVAILABLE = False
    print("PIL not available - limited image processing")

# AI model imports with fallbacks
HF_AVAILABLE = True
try:
    from transformers import pipeline
    import torch
except ImportError:
    HF_AVAILABLE = False
    print("Transformers not available - using mock emotion detection")

class EmotionRecognitionSystem:
    def __init__(self):
        self.emotion_history = deque(maxlen=100)
        self.audio_queue = queue.Queue()
        self.video_queue = queue.Queue()
        self.setup_models()
        
        self.alert_thresholds = {
            'stress': 0.7,
            'anxiety': 0.6,
            'pain': 0.8,
            'confusion': 0.5
        }
        
    def setup_models(self):
        """Initialize emotion recognition models with better error handling"""
        self.models_loaded = False
        
        if not HF_AVAILABLE:
            print("Skipping model loading - transformers not available")
            return
            
        try:
            # Facial emotion recognition
            self.face_emotion_pipeline = pipeline(
                "image-classification",
                model="j-hartmann/emotion-english-distilroberta-base",
                device=0 if torch.cuda.is_available() else -1
            )
            
            # Audio emotion recognition  
            self.audio_emotion_pipeline = pipeline(
                "audio-classification",
                model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
                device=0 if torch.cuda.is_available() else -1
            )
            self.models_loaded = True
        except Exception as e:
            print(f"Error loading models: {e}")
            print(traceback.format_exc())
            self.models_loaded = False
    
    def validate_audio_input(self, audio_data):
        """Validate and standardize audio input format"""
        if audio_data is None:
            return None
            
        try:
            # Handle different audio input formats
            if isinstance(audio_data, tuple):
                audio_array, sample_rate = audio_data
            else:
                # Try to read audio file if not in tuple format
                if isinstance(audio_data, str):
                    if LIBROSA_AVAILABLE:
                        audio_array, sample_rate = librosa.load(audio_data, sr=None)
                    else:
                        # Fallback for when librosa is not available
                        import wave
                        with wave.open(audio_data, 'rb') as wf:
                            sample_rate = wf.getframerate()
                            audio_array = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16)
                            audio_array = audio_array.astype(np.float32) / 32768.0
                else:
                    return None
            
            # Resample if needed
            target_rate = 16000
            if sample_rate != target_rate:
                if LIBROSA_AVAILABLE:
                    audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=target_rate)
                else:
                    # Simple downsampling fallback
                    step = int(sample_rate / target_rate)
                    if step > 1:
                        audio_array = audio_array[::step]
                    sample_rate = target_rate
            
            return (audio_array, sample_rate)
            
        except Exception as e:
            print(f"Audio validation error: {e}")
            return None
    
    def detect_face_emotion(self, frame):
        """Detect emotions from facial expressions with better error handling"""
        if not self.models_loaded:
            # Mock emotion detection for demo
            emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
            scores = np.random.dirichlet(np.ones(len(emotions)))
            return dict(zip(emotions, scores))
        
        try:
            # Convert frame to RGB format
            if isinstance(frame, np.ndarray):
                if len(frame.shape) == 3:
                    if frame.shape[2] == 4:  # RGBA
                        rgb_frame = frame[:, :, :3]
                    elif frame.shape[2] == 3:  # BGR or RGB?
                        if CV2_AVAILABLE:
                            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        else:
                            rgb_frame = frame[:, :, ::-1]  # Simple BGR to RGB
                    else:
                        rgb_frame = frame
                else:
                    # Grayscale to RGB
                    if CV2_AVAILABLE:
                        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
                    else:
                        rgb_frame = np.stack((frame,)*3, axis=-1)
            else:
                rgb_frame = frame
            
            # Use face emotion model
            results = self.face_emotion_pipeline(rgb_frame)
            
            # Convert to standardized format
            emotion_scores = {}
            for result in results:
                emotion_scores[result['label'].lower()] = result['score']
            
            return emotion_scores
            
        except Exception as e:
            print(f"Face emotion detection error: {e}")
            return {'neutral': 1.0}
    
    def detect_voice_emotion(self, audio_data):
        """Detect emotions from voice tone with better audio handling"""
        if not self.models_loaded or audio_data is None:
            # Mock emotion detection
            emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
            scores = np.random.dirichlet(np.ones(len(emotions)))
            return dict(zip(emotions, scores))
        
        try:
            # Validate and standardize audio input
            validated_audio = self.validate_audio_input(audio_data)
            if validated_audio is None:
                return {'neutral': 1.0}
            
            audio_array, sample_rate = validated_audio
            
            # Process audio with the model
            results = self.audio_emotion_pipeline({
                "array": audio_array,
                "sampling_rate": sample_rate
            })
            
            emotion_scores = {}
            for result in results:
                emotion_scores[result['label'].lower()] = result['score']
            
            return emotion_scores
            
        except Exception as e:
            print(f"Voice emotion detection error: {e}")
            return {'neutral': 1.0}
    
    # [Rest of your existing methods...]

def process_video_audio(video_frame, audio_data):
    """Process video frame and audio data with better error handling"""
    if video_frame is None:
        return None, "No video input", "", ""
    
    try:
        # Process the frame
        validated_audio = emotion_system.validate_audio_input(audio_data)
        
        # Get emotion analysis
        emotion_record = emotion_system.process_frame(
            video_frame, 
            validated_audio[0] if validated_audio else None,
            validated_audio[1] if validated_audio else 16000
        )
        
        # Create visualization
        annotated_frame = create_emotion_overlay(video_frame, emotion_record)
        
        # Format results
        clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
        alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
        suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
        
        return annotated_frame, clinical_text, alerts_text, suggestions_text
        
    except Exception as e:
        print(f"Processing error: {e}")
        traceback.print_exc()
        return video_frame, "Processing error", "System error", "Please try again"

# [Rest of your existing functions...]

def create_interface():
    with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
        # [Your existing interface code...]
        
        # Add audio format info
        gr.Markdown("""
        ### 🔊 Audio Input Notes:
        - System works best with clear microphone input
        - If you get audio errors, try:
          - Checking microphone permissions
          - Reducing background noise
          - Using a different microphone
        """)
    
    return demo

if __name__ == "__main__":
    emotion_system = EmotionRecognitionSystem()
    demo = create_interface()
    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )