Spaces:

yunusajib
/

Real-Time-Emotional-Detection

Sleeping

App Files Files Community

yunusajib commited on Jun 11

Commit

38fef5b

verified ·

1 Parent(s): d17fe0c

app

Browse files

Files changed (1) hide show

app.py +423 -60

app.py CHANGED Viewed

@@ -1,83 +1,446 @@
 import gradio as gr
 import numpy as np
-import time
-from datetime import datetime
-class EmotionRecognizer:
     def __init__(self):
-        self.sample_rate = 16000
-        self.emotion_history = []
-    def analyze_audio(self, audio_data):
-        # Mock audio analysis - replace with your actual model
         emotions = {
-            'happy': np.random.random() * 0.5,
-            'sad': np.random.random() * 0.3,
-            'angry': np.random.random() * 0.2,
-            'neutral': np.random.random() * 0.5
         }
         return emotions
-    def analyze_image(self, image):
-        # Mock image analysis - replace with your actual model
-        emotions = {
-            'happy': np.random.random() * 0.6,
-            'confused': np.random.random() * 0.4,
-            'pain': np.random.random() * 0.3,
-            'neutral': np.random.random() * 0.5
-        }
         return emotions
-    def process_inputs(self, video_frame, audio_data):
-        # Get current timestamp
-        timestamp = datetime.now().strftime("%H:%M:%S")
-        # Process inputs (mock implementation)
-        audio_emotions = self.analyze_audio(audio_data) if audio_data else {}
-        visual_emotions = self.analyze_image(video_frame) if video_frame else {}
-        # Combine results
-        combined = {**audio_emotions, **visual_emotions}
-        self.emotion_history.append((timestamp, combined))
-        # Generate outputs
-        top_emotion = max(combined.items(), key=lambda x: x[1]) if combined else ('none', 0)
-        stats = f"Top emotion: {top_emotion[0]} ({top_emotion[1]:.2f})"
-        history = "\n".join([f"{t}: {e}" for t, e in self.emotion_history[-3:]])
-        return stats, history
-def create_interface():
-    recognizer = EmotionRecognizer()
-    def process_frame(video_frame, audio_data):
-        try:
-            stats, history = recognizer.process_inputs(video_frame, audio_data)
-            return stats, history
-        except Exception as e:
-            return f"Error: {str(e)}", "No history available"
-    with gr.Blocks(title="Emotion Recognition", theme=gr.themes.Soft()) as app:
-        gr.Markdown("# Patient Emotion Recognition System")
-        with gr.Row():
-            with gr.Column():
-                video_input = gr.Image(sources=["webcam"], label="Video Feed")
-                audio_input = gr.Audio(sources=["microphone"], label="Audio Input")
-                process_btn = gr.Button("Analyze", variant="primary")
-            with gr.Column():
-                stats_output = gr.Textbox(label="Current Analysis")
-                history_output = gr.Textbox(label="Recent History", lines=4)
-        process_btn.click(
-            fn=process_frame,
-            inputs=[video_input, audio_input],
-            outputs=[stats_output, history_output]
         )
-    return app
 if __name__ == "__main__":
-    app = create_interface()
-    app.launch()

 import gradio as gr
+import cv2
 import numpy as np
+import librosa
+import pandas as pd
+import plotly.graph_objects as go
+import plotly.express as px
+from datetime import datetime, timedelta
+import warnings
+warnings.filterwarnings('ignore')
+# Mock emotion detection functions (replace with actual models in production)
+class EmotionAnalyzer:
     def __init__(self):
+        # In production, load actual pretrained models here
+        self.face_emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust', 'surprise']
+        self.voice_emotions = ['calm', 'stressed', 'anxious', 'confused', 'pain', 'frustrated']
+        self.session_data = []
+    def analyze_facial_expression(self, frame):
+        """Simulate facial expression analysis"""
+        # In production: use actual face detection + emotion recognition model
+        # Example: face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
+        # Mock analysis - replace with actual model inference
         emotions = {
+            'neutral': np.random.uniform(0.1, 0.7),
+            'happy': np.random.uniform(0.0, 0.3),
+            'sad': np.random.uniform(0.0, 0.4),
+            'angry': np.random.uniform(0.0, 0.2),
+            'fear': np.random.uniform(0.0, 0.3),
+            'disgust': np.random.uniform(0.0, 0.1),
+            'surprise': np.random.uniform(0.0, 0.2)
         }
+        # Normalize to sum to 1
+        total = sum(emotions.values())
+        emotions = {k: v/total for k, v in emotions.items()}
         return emotions
+    def analyze_voice_emotion(self, audio_data, sample_rate):
+        """Simulate voice emotion analysis"""
+        if audio_data is None or len(audio_data) == 0:
+            return {'calm': 1.0}
+        # Extract audio features (these would be used with actual models)
+        try:
+            # Basic audio feature extraction
+            mfcc = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
+            spectral_centroid = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
+            zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_data)
+            # Mock emotion prediction based on audio characteristics
+            energy = np.mean(audio_data**2)
+            pitch_var = np.var(spectral_centroid)
+            # Simulate emotion detection based on audio features
+            emotions = {
+                'calm': max(0.1, 0.8 - energy * 10),
+                'stressed': min(0.8, energy * 5 + pitch_var * 100),
+                'anxious': min(0.7, pitch_var * 150),
+                'confused': np.random.uniform(0.0, 0.3),
+                'pain': min(0.6, energy * 8 if energy > 0.1 else 0.0),
+                'frustrated': min(0.5, energy * 3 + pitch_var * 80)
+            }
+            # Normalize
+            total = sum(emotions.values())
+            emotions = {k: v/total for k, v in emotions.items()}
+        except Exception as e:
+            # Fallback if audio processing fails
+            emotions = {'calm': 1.0}
         return emotions
+    def process_consultation_data(self, video_file, audio_file):
+        """Process video and audio files for emotion analysis"""
+        results = {
+            'timestamp': [],
+            'facial_emotions': [],
+            'voice_emotions': [],
+            'alerts': []
+        }
+        # Process video file
+        if video_file is not None:
+            cap = cv2.VideoCapture(video_file)
+            frame_count = 0
+            while cap.read()[0] and frame_count < 100:  # Limit for demo
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                if frame_count % 30 == 0:  # Analyze every 30th frame
+                    facial_emotions = self.analyze_facial_expression(frame)
+                    timestamp = frame_count / 30  # Assuming 30 FPS
+                    results['timestamp'].append(timestamp)
+                    results['facial_emotions'].append(facial_emotions)
+                    # Check for alerts
+                    if facial_emotions.get('sad', 0) > 0.4 or facial_emotions.get('fear', 0) > 0.3:
+                        results['alerts'].append(f"High stress/sadness detected at {timestamp:.1f}s")
+                frame_count += 1
+            cap.release()
+        # Process audio file
+        if audio_file is not None:
+            try:
+                audio_data, sample_rate = librosa.load(audio_file, duration=60)  # Limit for demo
+                # Analyze audio in chunks
+                chunk_duration = 3  # seconds
+                chunk_samples = chunk_duration * sample_rate
+                for i in range(0, len(audio_data), chunk_samples):
+                    chunk = audio_data[i:i+chunk_samples]
+                    if len(chunk) > sample_rate:  # Minimum 1 second
+                        voice_emotions = self.analyze_voice_emotion(chunk, sample_rate)
+                        timestamp = i / sample_rate
+                        if len(results['voice_emotions']) <= len(results['timestamp']):
+                            results['voice_emotions'].append(voice_emotions)
+                        # Check for voice-based alerts
+                        if voice_emotions.get('pain', 0) > 0.4 or voice_emotions.get('stressed', 0) > 0.5:
+                            results['alerts'].append(f"Voice stress/pain detected at {timestamp:.1f}s")
+            except Exception as e:
+                print(f"Audio processing error: {e}")
+        return results
+# Initialize analyzer
+analyzer = EmotionAnalyzer()
+def create_emotion_timeline(data):
+    """Create timeline visualization of emotions"""
+    if not data['timestamp']:
+        return go.Figure()
+    fig = go.Figure()
+    # Plot facial emotions
+    if data['facial_emotions']:
+        for emotion in ['sad', 'fear', 'angry', 'neutral', 'happy']:
+            values = [emotions.get(emotion, 0) for emotions in data['facial_emotions']]
+            fig.add_trace(go.Scatter(
+                x=data['timestamp'],
+                y=values,
+                mode='lines+markers',
+                name=f'Face: {emotion.title()}',
+                line=dict(width=2)
+            ))
+    # Plot voice emotions
+    if data['voice_emotions']:
+        for emotion in ['stressed', 'anxious', 'pain', 'calm']:
+            values = [emotions.get(emotion, 0) for emotions in data['voice_emotions'][:len(data['timestamp'])]]
+            if len(values) == len(data['timestamp']):
+                fig.add_trace(go.Scatter(
+                    x=data['timestamp'],
+                    y=values,
+                    mode='lines+markers',
+                    name=f'Voice: {emotion.title()}',
+                    line=dict(dash='dash', width=2)
+                ))
+    fig.update_layout(
+        title='Patient Emotion Timeline During Consultation',
+        xaxis_title='Time (seconds)',
+        yaxis_title='Emotion Intensity',
+        height=500,
+        hovermode='x unified'
+    )
+    return fig
+def create_emotion_summary(data):
+    """Create summary charts of detected emotions"""
+    if not data['facial_emotions'] and not data['voice_emotions']:
+        return go.Figure(), go.Figure()
+    # Facial emotion summary
+    face_fig = go.Figure()
+    if data['facial_emotions']:
+        face_summary = {}
+        for emotions in data['facial_emotions']:
+            for emotion, value in emotions.items():
+                face_summary[emotion] = face_summary.get(emotion, 0) + value
+        face_fig = px.pie(
+            values=list(face_summary.values()),
+            names=list(face_summary.keys()),
+            title='Facial Expression Summary'
+        )
+    # Voice emotion summary
+    voice_fig = go.Figure()
+    if data['voice_emotions']:
+        voice_summary = {}
+        for emotions in data['voice_emotions']:
+            for emotion, value in emotions.items():
+                voice_summary[emotion] = voice_summary.get(emotion, 0) + value
+        voice_fig = px.pie(
+            values=list(voice_summary.values()),
+            names=list(voice_summary.keys()),
+            title='Voice Emotion Summary'
         )
+    return face_fig, voice_fig
+def generate_recommendations(data):
+    """Generate recommendations based on detected emotions"""
+    recommendations = []
+    alerts = data.get('alerts', [])
+    if alerts:
+        recommendations.append("⚠️ **ALERTS DETECTED:**")
+        for alert in alerts[:5]:  # Limit to 5 alerts
+            recommendations.append(f"• {alert}")
+        recommendations.append("")
+    # Analyze overall emotion patterns
+    high_stress_count = 0
+    pain_indicators = 0
+    confusion_signs = 0
+    for emotions in data.get('facial_emotions', []):
+        if emotions.get('sad', 0) > 0.3 or emotions.get('fear', 0) > 0.25:
+            high_stress_count += 1
+    for emotions in data.get('voice_emotions', []):
+        if emotions.get('pain', 0) > 0.3:
+            pain_indicators += 1
+        if emotions.get('confused', 0) > 0.3:
+            confusion_signs += 1
+    # Generate specific recommendations
+    if high_stress_count > len(data.get('facial_emotions', [])) * 0.3:
+        recommendations.append("🧘 **Stress Management:** Patient shows signs of elevated stress. Consider:")
+        recommendations.append("  • Offering reassurance and clear explanations")
+        recommendations.append("  • Allowing more time for questions")
+        recommendations.append("  • Suggesting relaxation techniques")
+        recommendations.append("")
+    if pain_indicators > 0:
+        recommendations.append("🩺 **Pain Assessment:** Voice analysis suggests possible discomfort:")
+        recommendations.append("  • Conduct thorough pain assessment")
+        recommendations.append("  • Consider pain management options")
+        recommendations.append("  • Monitor patient comfort throughout consultation")
+        recommendations.append("")
+    if confusion_signs > 0:
+        recommendations.append("💭 **Communication:** Signs of confusion detected:")
+        recommendations.append("  • Use simpler language and medical terms")
+        recommendations.append("  • Repeat important information")
+        recommendations.append("  • Provide written summaries")
+        recommendations.append("")
+    if not recommendations:
+        recommendations.append("✅ **Overall Assessment:** Patient appears comfortable and engaged.")
+        recommendations.append("Continue with current consultation approach.")
+    return "\n".join(recommendations)
+def process_consultation(video_file, audio_file):
+    """Main processing function"""
+    if video_file is None and audio_file is None:
+        return None, None, None, "Please upload video and/or audio files to analyze."
+    # Process the consultation data
+    data = analyzer.process_consultation_data(video_file, audio_file)
+    # Create visualizations
+    timeline_fig = create_emotion_timeline(data)
+    face_summary, voice_summary = create_emotion_summary(data)
+    # Generate recommendations
+    recommendations = generate_recommendations(data)
+    return timeline_fig, face_summary, voice_summary, recommendations
+def real_time_analysis(audio):
+    """Real-time audio emotion analysis"""
+    if audio is None:
+        return "No audio detected"
+    try:
+        # Process audio data
+        sample_rate, audio_data = audio
+        # Convert to float and normalize
+        if audio_data.dtype == np.int16:
+            audio_data = audio_data.astype(np.float32) / 32768.0
+        elif audio_data.dtype == np.int32:
+            audio_data = audio_data.astype(np.float32) / 2147483648.0
+        # Analyze emotions
+        emotions = analyzer.analyze_voice_emotion(audio_data, sample_rate)
+        # Format results
+        result = "**Real-time Voice Emotion Analysis:**\n\n"
+        for emotion, confidence in sorted(emotions.items(), key=lambda x: x[1], reverse=True):
+            percentage = confidence * 100
+            result += f"• **{emotion.title()}**: {percentage:.1f}%\n"
+        # Add alerts if needed
+        if emotions.get('pain', 0) > 0.4:
+            result += "\n⚠️ **ALERT**: High pain level detected"
+        elif emotions.get('stressed', 0) > 0.5:
+            result += "\n⚠️ **ALERT**: High stress level detected"
+        return result
+    except Exception as e:
+        return f"Error processing audio: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="Patient Emotion Analysis System", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🏥 Patient Emotion Analysis System
+    This system analyzes patient facial expressions and voice tone during consultations to detect emotions
+    such as stress, anxiety, confusion, or pain, helping healthcare practitioners provide better care.
+    **Features:**
+    - Facial expression analysis from video recordings
+    - Voice emotion detection from audio
+    - Real-time emotion monitoring
+    - Clinical recommendations based on detected emotions
+    """)
+    with gr.Tabs():
+        # Consultation Analysis Tab
+        with gr.Tab("📹 Consultation Analysis"):
+            gr.Markdown("### Upload consultation video and/or audio for comprehensive emotion analysis")
+            with gr.Row():
+                with gr.Column():
+                    video_input = gr.File(
+                        label="Upload Video File",
+                        file_types=[".mp4", ".avi", ".mov", ".mkv"],
+                        type="filepath"
+                    )
+                    audio_input = gr.File(
+                        label="Upload Audio File",
+                        file_types=[".wav", ".mp3", ".m4a", ".flac"],
+                        type="filepath"
+                    )
+                    analyze_btn = gr.Button("🔍 Analyze Consultation", variant="primary", size="lg")
+                with gr.Column():
+                    recommendations_output = gr.Markdown(label="Clinical Recommendations")
+            with gr.Row():
+                timeline_plot = gr.Plot(label="Emotion Timeline")
+            with gr.Row():
+                with gr.Column():
+                    face_summary_plot = gr.Plot(label="Facial Expression Summary")
+                with gr.Column():
+                    voice_summary_plot = gr.Plot(label="Voice Emotion Summary")
+            analyze_btn.click(
+                fn=process_consultation,
+                inputs=[video_input, audio_input],
+                outputs=[timeline_plot, face_summary_plot, voice_summary_plot, recommendations_output]
+            )
+        # Real-time Monitoring Tab
+        with gr.Tab("🎤 Real-time Monitoring"):
+            gr.Markdown("### Real-time voice emotion analysis during consultation")
+            with gr.Row():
+                with gr.Column():
+                    audio_realtime = gr.Audio(
+                        sources=["microphone"],
+                        type="numpy",
+                        label="Real-time Audio Input"
+                    )
+                with gr.Column():
+                    realtime_output = gr.Markdown(label="Real-time Analysis Results")
+            audio_realtime.change(
+                fn=real_time_analysis,
+                inputs=[audio_realtime],
+                outputs=[realtime_output]
+            )
+        # Information Tab
+        with gr.Tab("ℹ️ System Information"):
+            gr.Markdown("""
+            ### System Overview
+            This Patient Emotion Analysis System uses advanced AI models to analyze:
+            **Facial Expression Analysis:**
+            - Detects 7 basic emotions: neutral, happy, sad, angry, fear, disgust, surprise
+            - Uses computer vision techniques for face detection and emotion recognition
+            - Analyzes video frame-by-frame for temporal emotion patterns
+            **Voice Emotion Analysis:**
+            - Extracts audio features: MFCC, spectral centroid, zero-crossing rate
+            - Detects emotions: calm, stressed, anxious, confused, pain, frustrated
+            - Real-time analysis capability for live consultations
+            **Clinical Applications:**
+            - Helps practitioners identify patient distress early
+            - Provides objective emotion metrics
+            - Suggests intervention strategies
+            - Improves patient-practitioner communication
+            **Privacy & Ethics:**
+            - All processing is done locally
+            - No data is stored permanently
+            - Designed to assist, not replace clinical judgment
+            - Compliant with healthcare data protection standards
+            ### Technical Implementation Notes:
+            **For Production Use:**
+            1. Replace mock emotion detection with actual pretrained models:
+               - FER-2013, AffectNet for facial emotions
+               - Audio emotion models (RAVDESS, IEMOCAP datasets)
+            2. Implement proper face detection (OpenCV, dlib, or MediaPipe)
+            3. Add real-time video processing capabilities
+            4. Integrate with hospital systems and EHR
+            5. Add user authentication and data encryption
+            6. Calibrate alert thresholds based on clinical validation
+            **Recommended Models:**
+            - **Facial**: FER+ model, OpenFace, or custom CNN trained on medical data
+            - **Voice**: Speech emotion recognition using LSTM/Transformer architectures
+            - **Integration**: Multi-modal fusion for improved accuracy
+            """)
 if __name__ == "__main__":
+    demo.launch(share=True)