Spaces:

yunusajib
/

Real-Time-Emotional-Detection

Sleeping

App Files Files Community

yunusajib commited on Jun 11

Commit

d61cd9f

verified ·

1 Parent(s): 9d66904

change the model

Browse files

Files changed (1) hide show

app.py +476 -105

app.py CHANGED Viewed

@@ -1,124 +1,495 @@
 import cv2
 import numpy as np
-import pyttsx3
-import onnxruntime as ort
 import librosa
-import sounddevice as sd
-import scipy.io.wavfile as wavfile
-from sklearn.preprocessing import StandardScaler
 import time
-import os
-from gtts import gTTS
-import gradio as gr
-import tempfile
-# ------------------- Speech Emotion Recognition Model -------------------
-class SpeechEmotionRecognizer:
-    def __init__(self, model_path):
-        self.model = ort.InferenceSession(model_path)
-        self.input_name = self.model.get_inputs()[0].name
-        self.labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
-        # Load or create scaler here (fit on training data offline, then load)
-        self.scaler = StandardScaler()
-    def extract_features(self, y, sr):
-        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
-        mfcc_mean = np.mean(mfcc.T, axis=0)
-        # Normally, scaler should be pre-fitted, here we just scale manually to zero mean, unit var
-        mfcc_scaled = (mfcc_mean - np.mean(mfcc_mean)) / np.std(mfcc_mean)
-        return mfcc_scaled
-    def predict_emotion(self, audio_data, sr):
-        features = self.extract_features(audio_data, sr)
-        input_data = features.reshape(1, -1).astype(np.float32)
-        pred = self.model.run(None, {self.input_name: input_data})[0]
-        emotion_idx = np.argmax(pred)
-        return self.labels[emotion_idx]
-# ------------------- Facial Emotion Recognition Model -------------------
-class FacialEmotionRecognizer:
-    def __init__(self, model_path):
-        self.model = ort.InferenceSession(model_path)
-        self.input_name = self.model.get_inputs()[0].name
-        self.labels = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt']
-    def predict_emotion(self, face_img):
-        face_img = cv2.resize(face_img, (64, 64))
-        face_img = face_img.astype('float32')  # FER+ expects float32
-        # FER+ model expects input shape (1, 1, 64, 64)
-        face_img = np.expand_dims(face_img, axis=0)  # (1, 64, 64)
-        face_img = np.expand_dims(face_img, axis=0)  # (1, 1, 64, 64)
-        pred = self.model.run(None, {self.input_name: face_img})[0]
-        emotion_idx = np.argmax(pred)
-        return self.labels[emotion_idx]
-# ------------------- Utility Functions -------------------
-def speak(text):
-    if not text.strip():
-        return None
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
-        tts = gTTS(text)
-        tts.save(tmpfile.name)
-        return tmpfile.name
-def record_audio(duration=3, fs=22050):
-    print("Recording audio...")
-    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
-    sd.wait()
-    audio = audio.flatten()
-    print("Recording complete.")
-    return audio, fs
-def analyze_face(face_roi, emotion_model):
-    emotion = emotion_model.predict_emotion(face_roi)
-    return emotion
-# ------------------- Main Function -------------------
-def main():
-    face_emotion_model = FacialEmotionRecognizer("emotion-ferplus-8.onnx")
-    speech_emotion_model = SpeechEmotionRecognizer("speech_emotion_model.onnx")
-    cap = cv2.VideoCapture(0)
-    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
-    print("Press 's' to speak and 'q' to quit.")
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            print("Failed to grab frame.")
-            break
-        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        faces = face_cascade.detectMultiScale(gray, 1.3, 5)
-        for (x, y, w, h) in faces:
-            face_roi = gray[y:y+h, x:x+w]
-            emotion = analyze_face(face_roi, face_emotion_model)
-            label = f"Face: {emotion}"
-            cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
-            cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
-        cv2.imshow("Emotion Recognition", frame)
-        key = cv2.waitKey(1) & 0xFF
-        if key == ord('s'):
-            audio, sr = record_audio()
-            speech_emotion = speech_emotion_model.predict_emotion(audio, sr)
-            print(f"Speech Emotion: {speech_emotion}")
-            audio_file = speak(f"You sound {speech_emotion}")
-            if audio_file:
-                # Play the TTS audio using cv2 or other player if needed
-                pass
-        elif key == ord('q'):
-            break
-    cap.release()
-    cv2.destroyAllWindows()
 if __name__ == "__main__":
-    main()

+import gradio as gr
 import cv2
 import numpy as np
 import librosa
+import pandas as pd
+import plotly.graph_objects as go
+import plotly.express as px
+from datetime import datetime, timedelta
+import threading
+import queue
 import time
+from collections import deque
+import warnings
+warnings.filterwarnings("ignore")
+# Try to import transformers and torch, with fallbacks
+try:
+    from transformers import pipeline
+    import torch
+    HF_AVAILABLE = True
+except ImportError:
+    HF_AVAILABLE = False
+    print("Transformers not available - using mock emotion detection")
+class EmotionRecognitionSystem:
+    def __init__(self):
+        self.emotion_history = deque(maxlen=100)  # Store last 100 emotion readings
+        self.audio_queue = queue.Queue()
+        self.video_queue = queue.Queue()
+        # Initialize emotion detection models
+        self.setup_models()
+        # Emotion thresholds for alerts
+        self.alert_thresholds = {
+            'stress': 0.7,
+            'anxiety': 0.6,
+            'pain': 0.8,
+            'confusion': 0.5
+        }
+    def setup_models(self):
+        """Initialize emotion recognition models"""
+        if HF_AVAILABLE:
+            try:
+                # Facial emotion recognition
+                self.face_emotion_pipeline = pipeline(
+                    "image-classification",
+                    model="j-hartmann/emotion-english-distilroberta-base",
+                    device=0 if torch.cuda.is_available() else -1
+                )
+                # Audio emotion recognition
+                self.audio_emotion_pipeline = pipeline(
+                    "audio-classification",
+                    model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
+                    device=0 if torch.cuda.is_available() else -1
+                )
+                self.models_loaded = True
+            except Exception as e:
+                print(f"Error loading models: {e}")
+                self.models_loaded = False
+        else:
+            self.models_loaded = False
+    def detect_face_emotion(self, frame):
+        """Detect emotions from facial expressions"""
+        if not self.models_loaded:
+            # Mock emotion detection for demo
+            emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
+            scores = np.random.dirichlet(np.ones(len(emotions)))
+            return dict(zip(emotions, scores))
+        try:
+            # Convert frame to RGB
+            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            # Use face emotion model
+            results = self.face_emotion_pipeline(rgb_frame)
+            # Convert to standardized format
+            emotion_scores = {}
+            for result in results:
+                emotion_scores[result['label'].lower()] = result['score']
+            return emotion_scores
+        except Exception as e:
+            print(f"Face emotion detection error: {e}")
+            return {'neutral': 1.0}
+    def detect_voice_emotion(self, audio_data, sample_rate=16000):
+        """Detect emotions from voice tone"""
+        if not self.models_loaded or audio_data is None:
+            # Mock emotion detection
+            emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
+            scores = np.random.dirichlet(np.ones(len(emotions)))
+            return dict(zip(emotions, scores))
+        try:
+            # Process audio with the model
+            results = self.audio_emotion_pipeline(audio_data)
+            emotion_scores = {}
+            for result in results:
+                emotion_scores[result['label'].lower()] = result['score']
+            return emotion_scores
+        except Exception as e:
+            print(f"Voice emotion detection error: {e}")
+            return {'neutral': 1.0}
+    def extract_audio_features(self, audio_data, sample_rate):
+        """Extract audio features for emotion analysis"""
+        try:
+            # Extract basic audio features
+            mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
+            spectral_centroids = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
+            zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_data)
+            spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_data, sr=sample_rate)
+            features = {
+                'mfcc_mean': np.mean(mfccs),
+                'mfcc_std': np.std(mfccs),
+                'spectral_centroid_mean': np.mean(spectral_centroids),
+                'zcr_mean': np.mean(zero_crossing_rate),
+                'spectral_rolloff_mean': np.mean(spectral_rolloff)
+            }
+            return features
+        except Exception as e:
+            print(f"Audio feature extraction error: {e}")
+            return {}
+    def combine_emotions(self, face_emotions, voice_emotions, weights=(0.6, 0.4)):
+        """Combine facial and voice emotion predictions"""
+        combined = {}
+        all_emotions = set(face_emotions.keys()) | set(voice_emotions.keys())
+        for emotion in all_emotions:
+            face_score = face_emotions.get(emotion, 0)
+            voice_score = voice_emotions.get(emotion, 0)
+            combined[emotion] = weights[0] * face_score + weights[1] * voice_score
+        return combined
+    def map_to_clinical_emotions(self, emotions):
+        """Map detected emotions to clinical categories"""
+        clinical_mapping = {
+            'stress': emotions.get('angry', 0) * 0.3 + emotions.get('fear', 0) * 0.4 + emotions.get('disgust', 0) * 0.3,
+            'anxiety': emotions.get('fear', 0) * 0.6 + emotions.get('surprise', 0) * 0.2 + emotions.get('sad', 0) * 0.2,
+            'pain': emotions.get('angry', 0) * 0.4 + emotions.get('disgust', 0) * 0.3 + emotions.get('sad', 0) * 0.3,
+            'confusion': emotions.get('surprise', 0) * 0.5 + emotions.get('neutral', 0) * 0.3 + emotions.get('fear', 0) * 0.2,
+            'comfort': emotions.get('happy', 0) * 0.7 + emotions.get('neutral', 0) * 0.3
+        }
+        return clinical_mapping
+    def generate_alerts(self, clinical_emotions):
+        """Generate alerts based on emotion thresholds"""
+        alerts = []
+        suggestions = []
+        for emotion, score in clinical_emotions.items():
+            if emotion in self.alert_thresholds and score > self.alert_thresholds[emotion]:
+                alerts.append(f"⚠️ High {emotion} detected ({score:.2f})")
+                # Add specific suggestions
+                if emotion == 'stress':
+                    suggestions.append("Consider: Take a moment to slow down, use calming voice tone")
+                elif emotion == 'anxiety':
+                    suggestions.append("Consider: Provide reassurance, explain procedures clearly")
+                elif emotion == 'pain':
+                    suggestions.append("Consider: Assess pain level, offer comfort measures")
+                elif emotion == 'confusion':
+                    suggestions.append("Consider: Simplify explanations, check understanding")
+        return alerts, suggestions
+    def process_frame(self, frame, audio_data=None, sample_rate=16000):
+        """Process a single frame and audio data"""
+        timestamp = datetime.now()
+        # Detect emotions
+        face_emotions = self.detect_face_emotion(frame)
+        voice_emotions = self.detect_voice_emotion(audio_data, sample_rate) if audio_data is not None else {}
+        # Combine emotions
+        if voice_emotions:
+            combined_emotions = self.combine_emotions(face_emotions, voice_emotions)
+        else:
+            combined_emotions = face_emotions
+        # Map to clinical categories
+        clinical_emotions = self.map_to_clinical_emotions(combined_emotions)
+        # Generate alerts
+        alerts, suggestions = self.generate_alerts(clinical_emotions)
+        # Store in history
+        emotion_record = {
+            'timestamp': timestamp,
+            'face_emotions': face_emotions,
+            'voice_emotions': voice_emotions,
+            'clinical_emotions': clinical_emotions,
+            'alerts': alerts,
+            'suggestions': suggestions
+        }
+        self.emotion_history.append(emotion_record)
+        return emotion_record
+# Initialize the emotion recognition system
+emotion_system = EmotionRecognitionSystem()
+def process_video_audio(video_frame, audio_data):
+    """Process video frame and audio data"""
+    if video_frame is None:
+        return None, "No video input", "", ""
+    # Process the frame
+    sample_rate = 16000
+    if audio_data is not None:
+        audio_array, sr = audio_data
+        if sr != sample_rate:
+            audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=sample_rate)
+    else:
+        audio_array = None
+    # Get emotion analysis
+    emotion_record = emotion_system.process_frame(video_frame, audio_array, sample_rate)
+    # Create visualization
+    annotated_frame = create_emotion_overlay(video_frame, emotion_record)
+    # Format results
+    clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
+    alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
+    suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
+    return annotated_frame, clinical_text, alerts_text, suggestions_text
+def create_emotion_overlay(frame, emotion_record):
+    """Add emotion information overlay to video frame"""
+    annotated_frame = frame.copy()
+    # Get top emotion
+    clinical_emotions = emotion_record['clinical_emotions']
+    top_emotion = max(clinical_emotions.items(), key=lambda x: x[1])
+    # Add text overlay
+    cv2.putText(annotated_frame, f"Primary: {top_emotion[0]} ({top_emotion[1]:.2f})",
+                (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+    # Add alert indicator
+    if emotion_record['alerts']:
+        cv2.putText(annotated_frame, "ALERT!", (10, 60),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+    return annotated_frame
+def format_clinical_emotions(clinical_emotions):
+    """Format clinical emotions for display"""
+    formatted = []
+    for emotion, score in clinical_emotions.items():
+        bar = "█" * int(score * 10)
+        formatted.append(f"{emotion.capitalize()}: {bar} {score:.3f}")
+    return "\n".join(formatted)
+def create_emotion_timeline():
+    """Create emotion timeline chart"""
+    if not emotion_system.emotion_history:
+        return create_empty_chart()
+    # Extract data for plotting
+    timestamps = [record['timestamp'] for record in emotion_system.emotion_history]
+    fig = go.Figure()
+    # Add traces for each clinical emotion
+    clinical_emotions = ['stress', 'anxiety', 'pain', 'confusion', 'comfort']
+    colors = ['red', 'orange', 'purple', 'brown', 'green']
+    for emotion, color in zip(clinical_emotions, colors):
+        values = [record['clinical_emotions'].get(emotion, 0) for record in emotion_system.emotion_history]
+        fig.add_trace(go.Scatter(
+            x=timestamps,
+            y=values,
+            mode='lines+markers',
+            name=emotion.capitalize(),
+            line=dict(color=color, width=2),
+            marker=dict(size=4)
+        ))
+    fig.update_layout(
+        title="Patient Emotion Timeline",
+        xaxis_title="Time",
+        yaxis_title="Emotion Intensity",
+        height=400,
+        showlegend=True,
+        template="plotly_white"
+    )
+    return fig
+def create_empty_chart():
+    """Create empty chart when no data available"""
+    fig = go.Figure()
+    fig.add_annotation(
+        text="No emotion data available yet",
+        xref="paper", yref="paper",
+        x=0.5, y=0.5, xanchor='center', yanchor='middle',
+        showarrow=False, font=dict(size=16)
+    )
+    fig.update_layout(
+        title="Patient Emotion Timeline",
+        height=400,
+        template="plotly_white"
+    )
+    return fig
+def get_session_summary():
+    """Generate session summary"""
+    if not emotion_system.emotion_history:
+        return "No session data available"
+    # Calculate averages
+    avg_emotions = {}
+    total_alerts = 0
+    for emotion in ['stress', 'anxiety', 'pain', 'confusion', 'comfort']:
+        values = [record['clinical_emotions'].get(emotion, 0) for record in emotion_system.emotion_history]
+        avg_emotions[emotion] = np.mean(values) if values else 0
+    total_alerts = sum(len(record['alerts']) for record in emotion_system.emotion_history)
+    # Format summary
+    summary = f"""
+Session Summary:
+- Duration: {len(emotion_system.emotion_history)} readings
+- Average Stress Level: {avg_emotions['stress']:.3f}
+- Average Anxiety Level: {avg_emotions['anxiety']:.3f}
+- Average Pain Level: {avg_emotions['pain']:.3f}
+- Average Confusion Level: {avg_emotions['confusion']:.3f}
+- Average Comfort Level: {avg_emotions['comfort']:.3f}
+- Total Alerts: {total_alerts}
+Recommendations:
+- Monitor stress levels during consultation
+- Ensure patient understanding and comfort
+- Address any recurring high emotion levels
+"""
+    return summary
+def clear_session():
+    """Clear session data"""
+    emotion_system.emotion_history.clear()
+    return "Session data cleared", create_empty_chart(), ""
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # 🏥 Real-Time Patient Emotion Recognition System
+        This system analyzes patient facial expressions and voice tone during consultations to detect emotions such as stress, anxiety, confusion, or pain.
+        """)
+        with gr.Row():
+            with gr.Column(scale=2):
+                gr.Markdown("### 📹 Live Analysis")
+                # Video input
+                video_input = gr.Video(
+                    label="Video Feed",
+                    sources=["webcam"],
+                    streaming=True
+                )
+                # Audio input
+                audio_input = gr.Audio(
+                    label="Audio Input",
+                    sources=["microphone"],
+                    type="numpy",
+                    streaming=True
+                )
+                # Process button
+                process_btn = gr.Button("🔄 Process Current Frame", variant="primary")
+            with gr.Column(scale=2):
+                gr.Markdown("### 📊 Real-Time Results")
+                # Annotated video output
+                video_output = gr.Image(
+                    label="Emotion Analysis",
+                    type="numpy"
+                )
+                # Clinical emotions display
+                clinical_output = gr.Textbox(
+                    label="Clinical Emotion Levels",
+                    lines=6,
+                    interactive=False
+                )
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### ⚠️ Alerts")
+                alerts_output = gr.Textbox(
+                    label="Current Alerts",
+                    lines=3,
+                    interactive=False
+                )
+            with gr.Column():
+                gr.Markdown("### 💡 Suggestions")
+                suggestions_output = gr.Textbox(
+                    label="Practitioner Suggestions",
+                    lines=3,
+                    interactive=False
+                )
+        with gr.Row():
+            gr.Markdown("### 📈 Emotion Timeline")
+            timeline_plot = gr.Plot(label="Emotion Timeline")
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### 📋 Session Summary")
+                summary_output = gr.Textbox(
+                    label="Session Summary",
+                    lines=12,
+                    interactive=False
+                )
+                with gr.Row():
+                    update_summary_btn = gr.Button("📊 Update Summary")
+                    clear_btn = gr.Button("🗑️ Clear Session", variant="secondary")
+                    update_timeline_btn = gr.Button("🔄 Update Timeline")
+        # Event handlers
+        process_btn.click(
+            fn=process_video_audio,
+            inputs=[video_input, audio_input],
+            outputs=[video_output, clinical_output, alerts_output, suggestions_output]
+        )
+        update_timeline_btn.click(
+            fn=create_emotion_timeline,
+            outputs=timeline_plot
+        )
+        update_summary_btn.click(
+            fn=get_session_summary,
+            outputs=summary_output
+        )
+        clear_btn.click(
+            fn=clear_session,
+            outputs=[summary_output, timeline_plot, clinical_output]
+        )
+        # Auto-update timeline every few seconds
+        demo.load(fn=create_emotion_timeline, outputs=timeline_plot)
+        gr.Markdown("""
+        ### 📝 Usage Instructions:
+        1. **Enable camera and microphone** access when prompted
+        2. **Click "Process Current Frame"** to analyze emotions in real-time
+        3. **Monitor the timeline** to track emotion changes over time
+        4. **Review alerts and suggestions** for patient care recommendations
+        5. **Use session summary** for consultation documentation
+        ### 🔧 Technical Notes:
+        - System uses pre-trained emotion recognition models
+        - Combines facial expression and voice tone analysis
+        - Provides clinical emotion mapping (stress, anxiety, pain, confusion)
+        - Generates real-time alerts and suggestions for practitioners
+        """)
+    return demo
+# Launch the application
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(
+        share=True,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )