File size: 8,204 Bytes
2f58ee5
 
af1f625
2f58ee5
 
 
 
500c313
 
2f58ee5
 
 
 
 
 
 
 
500c313
 
 
 
 
 
2f58ee5
500c313
2f58ee5
 
 
 
 
 
 
 
 
 
500c313
 
 
 
 
2f58ee5
 
500c313
 
2f58ee5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500c313
 
 
 
 
 
 
 
 
 
 
 
9ef5697
500c313
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ef5697
500c313
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f58ee5
500c313
 
 
 
 
2f58ee5
500c313
 
2f58ee5
60845fc
 
 
 
500c313
 
2f58ee5
500c313
 
 
 
2f58ee5
500c313
 
 
 
 
 
 
9ef5697
60845fc
9ef5697
 
 
 
 
60845fc
9ef5697
 
 
 
 
 
 
 
 
60845fc
 
 
 
9ef5697
 
60845fc
 
500c313
 
 
 
 
 
6f5fce0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# app.py

import streamlit as st
import cv2
import numpy as np
import tensorflow as tf
import os
from streamlit_webrtc import webrtc_streamer, VideoTransformerBase, WebRtcMode
import av # Part of streamlit-webrtc's dependencies for frame handling

# --- Streamlit Page Configuration (MUST BE THE FIRST STREAMLIT COMMAND) ---
st.set_page_config(page_title="Real-time Emotion Recognition", layout="wide")

# --- 1. Load Model and Face Detector (Cached for Performance) ---

@st.cache_resource
def load_emotion_model():
    # Path to your trained model.
    # In a Docker container, the app's working directory will be /app.
    # So if your models folder is at /app/models, then 'models/...' is correct.
    # Ensure your Dockerfile copies the 'models' folder correctly.
    model_path = 'models/emotion_model_best.h5' 
    
    if not os.path.exists(model_path):
        st.error(f"Error: Model file not found at {model_path}. Please ensure it's copied into the Docker image and path is correct.")
        st.stop()
    try:
        model = tf.keras.models.load_model(model_path)
        return model
    except Exception as e:
        st.error(f"Error loading model from {model_path}: {e}")
        st.stop()

@st.cache_resource
def load_face_detector():
    # Path to your Haar Cascade file.
    # Ensure 'haarcascade_frontalface_default.xml' is in the root of your project
    # directory (which is copied to /app in Docker) for this path to be correct.
    cascade_path = 'haarcascade_frontalface_default.xml'

    if not os.path.exists(cascade_path):
        st.error(f"Error: Haar Cascade file not found at {cascade_path}.")
        st.markdown("Please ensure `haarcascade_frontalface_default.xml` is in the root of your project directory alongside `src/` and `models/`.")
        st.markdown("Download from: [https://github.com/opencv/opencv/blob/4.x/data/haarcascades/haarcascade_frontalface_default.xml](https://github.com/opencv/opencv/blob/4.x/data/haarcascades/haarcascade_frontalface_default.xml)")
        st.stop()
    face_cascade = cv2.CascadeClassifier(cascade_path)
    if face_cascade.empty():
        st.error(f"Error: Could not load Haar Cascade classifier from {cascade_path}. Check file integrity.")
        st.stop()
    return face_cascade

# Load the model and face detector when the app starts
model = load_emotion_model()
face_detector = load_face_detector()

# --- 2. Define Constants and Labels ---
IMG_HEIGHT = 48
IMG_WIDTH = 48
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

label_colors = {
    'angry': (0, 0, 255),    # BGR Red
    'disgust': (0, 165, 255), # BGR Orange
    'fear': (0, 255, 255),   # BGR Yellow
    'happy': (0, 255, 0),    # BGR Green
    'neutral': (255, 255, 0), # BGR Cyan
    'sad': (255, 0, 0),      # BGR Blue
    'surprise': (255, 0, 255) # BGR Magenta
}

FACE_DETECTION_DOWNSCALE = 0.5 # Scale factor for face detection

# --- 3. Video Processing Class ---
# This class will receive frames from the client and process them on the server
class EmotionDetector(VideoTransformerBase):
    def __init__(self, model, face_detector):
        self.model = model
        self.face_detector = face_detector

    def transform(self, frame: av.VideoFrame) -> np.ndarray:
        # Convert av.VideoFrame to NumPy array.
        # Requesting "bgr24" format directly from `av` to align with OpenCV's default.
        img_bgr = frame.to_ndarray(format="bgr24")

        # Convert to grayscale for face detection and emotion prediction
        gray_frame = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)

        # Scale down for faster face detection
        small_frame = cv2.resize(gray_frame, (0, 0), fx=FACE_DETECTION_DOWNSCALE, fy=FACE_DETECTION_DOWNSCALE)
        
        # Detect faces
        faces = self.face_detector.detectMultiScale(small_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

        # Scale face coordinates back to original frame size
        original_faces = []
        for (x, y, w, h) in faces:
            x_orig = int(x / FACE_DETECTION_DOWNSCALE)
            y_orig = int(y / FACE_DETECTION_DOWNSCALE)
            w_orig = int(w / FACE_DETECTION_DOWNSCALE)
            h_orig = int(h / FACE_DETECTION_DOWNSCALE) 
            original_faces.append((x_orig, y_orig, w_orig, h_orig))

        # Process each detected face
        for (x, y, w, h) in original_faces:
            # Draw rectangle on the BGR image (img_bgr)
            cv2.rectangle(img_bgr, (x, y), (x+w, y+h), (255, 0, 0), 2)

            # Extract face ROI for emotion prediction
            # Ensure ROI coordinates are within image bounds
            face_roi = gray_frame[max(0, y):min(gray_frame.shape[0], y+h), max(0, x):min(gray_frame.shape[1], x+w)]

            if face_roi.size == 0: # Skip if ROI is empty (e.g., face partially out of frame)
                continue

            face_roi = cv2.resize(face_roi, (IMG_WIDTH, IMG_HEIGHT))
            face_roi = np.expand_dims(face_roi, axis=0) # Add batch dimension
            face_roi = np.expand_dims(face_roi, axis=-1) # Add channel dimension (for grayscale)
            face_roi = face_roi / 255.0 # Normalize pixel values

            predictions = self.model.predict(face_roi, verbose=0)[0]
            emotion_index = np.argmax(predictions)
            predicted_emotion = emotion_labels[emotion_index]
            confidence = predictions[emotion_index] * 100

            text_color = label_colors.get(predicted_emotion, (255, 255, 255))
            text = f"{predicted_emotion} ({confidence:.2f}%)"
            
            # Position text above face, or below if not enough space above
            text_y = y - 10 if y - 10 > 10 else y + h + 20
            
            # Draw text on the BGR image (img_bgr)
            cv2.putText(img_bgr, text, (x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.9, text_color, 2, cv2.LINE_AA)

        # Convert the processed BGR image back to RGB for Streamlit/WebRTC display
        return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

# app.py

# ... (previous code above) ...

# --- 4. Streamlit App Layout and WebRTC Stream ---
st.title("Live Facial Emotion Recognition")

st.markdown("""
This application uses a deep learning model to detect emotions from faces in real-time.
It accesses your webcam directly via your browser (WebRTC) and processes the video frames on the server.
""")

# Place the webrtc_streamer widget.
# It automatically renders a video player and "Connect" / "Disconnect" buttons.
webrtc_ctx = webrtc_streamer(
    key="emotion_detection_stream",
    mode=WebRtcMode.SENDRECV, # Send video from client, receive processed video from server
    video_processor_factory=lambda: EmotionDetector(model, face_detector),
    media_stream_constraints={"video": True, "audio": False}, # Only video, no audio
    
    async_processing=False, # Keep this False for now to avoid asyncio errors
    
    # Optional: tries to auto-start. Can comment out if you prefer manual start.
    # desired_playing_state={"playing": True}, 

    # --- ENHANCED RTC CONFIGURATION ---
    # Providing a very robust list of public STUN servers for better NAT traversal
    rtc_configuration={
        "iceServers": [
            {"urls": ["stun:stun.l.google.com:19302"]},
            {"urls": ["stun:stun1.l.google.com:19302"]},
            {"urls": ["stun:stun2.l.google.com:19302"]},
            {"urls": ["stun:stun3.l.google.com:19302"]},
            {"urls": ["stun:stun4.l.google.com:19302"]},
            {"urls": ["stun:stun.services.mozilla.com"]}, 
            {"urls": ["stun:global.stun.twilio.com:3478"]},
            {"urls": ["stun:stun.nextcloud.com:3478"]},
            {"urls": ["stun:stun.schlund.de"]},
            {"urls": ["stun:stun.stunprotocol.org"]},  # Added another
            {"urls": ["stun:stunserver.org"]},         # Added another
        ]
    },
    # --- Enable Debug Logging ---
    log_level="debug", # <--- CRITICAL FOR DIAGNOSIS
)

# Provide feedback based on the stream state
if webrtc_ctx.state.playing:
    st.success("Webcam stream active. Looking for faces...")
else:
    st.info("Webcam stream not active. Click the 'Start' button above to begin, and allow camera access.")