Spaces:
Runtime error
Runtime error
Testimony Adekoya
commited on
Commit
Β·
e65b3b4
1
Parent(s):
f7db860
WIP: Quick demo for project
Browse files- ai_alert_generator.py +214 -0
- app.py +191 -439
- facial_detection.py +415 -0
- gradio_interface.py +277 -0
ai_alert_generator.py
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import google.generativeai as genai
|
2 |
+
import numpy as np
|
3 |
+
from scipy.io import wavfile
|
4 |
+
import tempfile
|
5 |
+
import os
|
6 |
+
|
7 |
+
from facial_detection import OpenCVFaceDetector, MetricsCalculator, DrowsinessAnalyzer, AlertManager, VisualizationRenderer, StatusLogger
|
8 |
+
|
9 |
+
class AIAlertGenerator:
|
10 |
+
"""Generate AI-powered voice alerts using Gemini"""
|
11 |
+
|
12 |
+
def __init__(self, api_key=None):
|
13 |
+
self.model = None
|
14 |
+
if api_key:
|
15 |
+
try:
|
16 |
+
genai.configure(api_key=api_key)
|
17 |
+
self.model = genai.GenerativeModel('gemini-1.5-flash')
|
18 |
+
print("β
Gemini AI initialized for voice alerts")
|
19 |
+
except Exception as e:
|
20 |
+
print(f"β οΈ Failed to initialize Gemini: {e}")
|
21 |
+
|
22 |
+
def generate_alert_text(self, alert_type, severity="medium"):
|
23 |
+
"""Generate contextual alert text using Gemini"""
|
24 |
+
if not self.model:
|
25 |
+
return self._get_default_alert_text(alert_type, severity)
|
26 |
+
|
27 |
+
try:
|
28 |
+
prompts = {
|
29 |
+
"EYES_CLOSED": f"Generate a brief, urgent wake-up message (max 12 words) for a drowsy driver whose eyes are closing. Severity: {severity}. Sound caring but firm.",
|
30 |
+
"YAWNING": f"Generate a brief, gentle alert (max 10 words) for a driver who is yawning frequently. Severity: {severity}. Sound encouraging.",
|
31 |
+
"HEAD_NOD": f"Generate a brief, firm alert (max 10 words) for a driver whose head is nodding. Severity: {severity}. Sound urgent but supportive.",
|
32 |
+
"COMBINED": f"Generate a brief, critical alert (max 15 words) for a driver showing multiple drowsiness signs. Severity: {severity}. Sound very urgent but caring."
|
33 |
+
}
|
34 |
+
|
35 |
+
prompt_key = "COMBINED" if isinstance(alert_type, list) and len(alert_type) > 1 else alert_type[0] if isinstance(alert_type, list) else alert_type
|
36 |
+
prompt = prompts.get(prompt_key, prompts["EYES_CLOSED"])
|
37 |
+
|
38 |
+
response = self.model.generate_content(prompt)
|
39 |
+
alert_text = response.text.strip().replace('"', '').replace("'", "")
|
40 |
+
|
41 |
+
return alert_text[:100]
|
42 |
+
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Error generating AI alert: {e}")
|
45 |
+
return self._get_default_alert_text(alert_type, severity)
|
46 |
+
|
47 |
+
def _get_default_alert_text(self, alert_type, severity):
|
48 |
+
"""Fallback alert messages"""
|
49 |
+
default_alerts = {
|
50 |
+
"EYES_CLOSED": {
|
51 |
+
"critical": "WAKE UP NOW! Pull over immediately!",
|
52 |
+
"high": "Eyes closing! Stay alert and pull over soon!",
|
53 |
+
"medium": "Please keep your eyes open while driving!"
|
54 |
+
},
|
55 |
+
"YAWNING": {
|
56 |
+
"critical": "Excessive yawning detected! Take a break!",
|
57 |
+
"high": "You seem tired. Consider resting soon.",
|
58 |
+
"medium": "Frequent yawning noticed. Stay alert!"
|
59 |
+
},
|
60 |
+
"HEAD_NOD": {
|
61 |
+
"critical": "Head nodding detected! Stop driving now!",
|
62 |
+
"high": "Your head is nodding. Pull over safely!",
|
63 |
+
"medium": "Head movement detected. Stay focused!"
|
64 |
+
}
|
65 |
+
}
|
66 |
+
|
67 |
+
alert_key = alert_type[0] if isinstance(alert_type, list) else alert_type
|
68 |
+
return default_alerts.get(alert_key, {}).get(severity, "Stay alert while driving!")
|
69 |
+
|
70 |
+
def create_audio_alert(self, text, sample_rate=22050):
|
71 |
+
"""Create audio alert (generates beep pattern)"""
|
72 |
+
try:
|
73 |
+
duration = 2.0
|
74 |
+
freq = 800
|
75 |
+
frames = int(duration * sample_rate)
|
76 |
+
|
77 |
+
# Create attention-grabbing beep pattern
|
78 |
+
t = np.linspace(0, duration, frames)
|
79 |
+
beep1 = np.sin(2 * np.pi * freq * t) * np.exp(-t * 3)
|
80 |
+
beep2 = np.sin(2 * np.pi * (freq * 1.5) * t) * np.exp(-t * 3)
|
81 |
+
|
82 |
+
# Combine beeps with pause
|
83 |
+
silence = np.zeros(int(0.1 * sample_rate))
|
84 |
+
audio = np.concatenate([beep1, silence, beep2, silence, beep1])
|
85 |
+
|
86 |
+
# Normalize and convert to int16
|
87 |
+
audio = (audio * 32767).astype(np.int16)
|
88 |
+
|
89 |
+
# Save to temporary file
|
90 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
|
91 |
+
wavfile.write(temp_file.name, sample_rate, audio)
|
92 |
+
|
93 |
+
return temp_file.name, text
|
94 |
+
|
95 |
+
except Exception as e:
|
96 |
+
print(f"Error creating audio alert: {e}")
|
97 |
+
return None, text
|
98 |
+
|
99 |
+
class DrowsinessDetectionSystem:
|
100 |
+
"""Main system coordinator"""
|
101 |
+
|
102 |
+
def __init__(self):
|
103 |
+
self.face_detector = OpenCVFaceDetector()
|
104 |
+
self.metrics_calculator = MetricsCalculator()
|
105 |
+
self.drowsiness_analyzer = DrowsinessAnalyzer()
|
106 |
+
self.alert_manager = AlertManager()
|
107 |
+
self.visualization_renderer = VisualizationRenderer()
|
108 |
+
self.logger = StatusLogger()
|
109 |
+
|
110 |
+
print("β
Drowsiness Detection System initialized with OpenCV")
|
111 |
+
|
112 |
+
def process_frame(self, frame):
|
113 |
+
"""Process a single frame and return results"""
|
114 |
+
try:
|
115 |
+
# Detect face and landmarks
|
116 |
+
face_rects, landmarks_list = self.face_detector.detect_landmarks(frame)
|
117 |
+
|
118 |
+
if not face_rects or not landmarks_list:
|
119 |
+
self.logger.log("No face detected")
|
120 |
+
return frame, ["π€ No face detected"], False, {}
|
121 |
+
|
122 |
+
# Process first detected face
|
123 |
+
face_rect = face_rects[0]
|
124 |
+
landmarks = landmarks_list[0]
|
125 |
+
|
126 |
+
# Calculate metrics
|
127 |
+
ear_left = ear_right = 0.25 # Default values
|
128 |
+
|
129 |
+
if 'left_eye_corners' in landmarks:
|
130 |
+
ear_left = self.metrics_calculator.calculate_ear_from_points(landmarks['left_eye_corners'])
|
131 |
+
if 'right_eye_corners' in landmarks:
|
132 |
+
ear_right = self.metrics_calculator.calculate_ear_from_points(landmarks['right_eye_corners'])
|
133 |
+
|
134 |
+
ear = (ear_left + ear_right) / 2.0
|
135 |
+
|
136 |
+
mar = 0.3 # Default value
|
137 |
+
if 'mouth_corners' in landmarks:
|
138 |
+
mar = self.metrics_calculator.calculate_mar_from_points(landmarks['mouth_corners'])
|
139 |
+
|
140 |
+
# Head pose estimation
|
141 |
+
frame_center = (frame.shape[1] // 2, frame.shape[0] // 2)
|
142 |
+
head_angles = self.metrics_calculator.estimate_head_pose_simple(
|
143 |
+
landmarks.get('nose_tip'),
|
144 |
+
landmarks.get('chin'),
|
145 |
+
frame_center
|
146 |
+
)
|
147 |
+
|
148 |
+
# Analyze drowsiness
|
149 |
+
indicators = self.drowsiness_analyzer.analyze_drowsiness(ear, mar, head_angles)
|
150 |
+
severity = self.drowsiness_analyzer.get_severity_level(indicators)
|
151 |
+
|
152 |
+
# Check for alerts
|
153 |
+
should_alert = self.alert_manager.should_trigger_alert(indicators)
|
154 |
+
|
155 |
+
# Render visualization
|
156 |
+
self.visualization_renderer.draw_landmarks_and_contours(frame, landmarks, face_rect)
|
157 |
+
self.visualization_renderer.draw_metrics_overlay(frame, ear, mar, head_angles[0], indicators)
|
158 |
+
|
159 |
+
# Generate status text
|
160 |
+
status_text = self._generate_status_text(ear, mar, head_angles[0], indicators)
|
161 |
+
|
162 |
+
# Log events
|
163 |
+
if indicators:
|
164 |
+
self.logger.log(f"Drowsiness detected: {', '.join(indicators)} (Severity: {severity})")
|
165 |
+
|
166 |
+
# Prepare metrics
|
167 |
+
metrics = {
|
168 |
+
'ear': ear,
|
169 |
+
'mar': mar,
|
170 |
+
'head_angle': head_angles[0],
|
171 |
+
'indicators': indicators,
|
172 |
+
'severity': severity
|
173 |
+
}
|
174 |
+
|
175 |
+
return frame, status_text, should_alert, metrics
|
176 |
+
|
177 |
+
except Exception as e:
|
178 |
+
error_msg = f"Error processing frame: {str(e)}"
|
179 |
+
self.logger.log(error_msg)
|
180 |
+
return frame, [error_msg], False, {}
|
181 |
+
|
182 |
+
def _generate_status_text(self, ear, mar, head_angle, indicators):
|
183 |
+
"""Generate human-readable status text"""
|
184 |
+
status = []
|
185 |
+
|
186 |
+
# EAR status
|
187 |
+
if ear < self.drowsiness_analyzer.EAR_THRESHOLD:
|
188 |
+
status.append(f"ποΈ Eyes closing! EAR: {ear:.3f}")
|
189 |
+
else:
|
190 |
+
status.append(f"ποΈ Eyes open - EAR: {ear:.3f}")
|
191 |
+
|
192 |
+
# MAR status
|
193 |
+
if mar > self.drowsiness_analyzer.YAWN_THRESHOLD:
|
194 |
+
status.append(f"π₯± Yawning detected! MAR: {mar:.3f}")
|
195 |
+
else:
|
196 |
+
status.append(f"π Normal mouth - MAR: {mar:.3f}")
|
197 |
+
|
198 |
+
# Head pose status
|
199 |
+
if abs(head_angle) > self.drowsiness_analyzer.NOD_THRESHOLD:
|
200 |
+
status.append(f"π Head nodding! Angle: {head_angle:.1f}Β°")
|
201 |
+
else:
|
202 |
+
status.append(f"π Head pose normal - Pitch: {head_angle:.1f}Β°")
|
203 |
+
|
204 |
+
# Overall status
|
205 |
+
if indicators:
|
206 |
+
status.append(f"β οΈ ALERT: {', '.join(indicators)}")
|
207 |
+
else:
|
208 |
+
status.append("β
Driver appears alert")
|
209 |
+
|
210 |
+
return status
|
211 |
+
|
212 |
+
def get_logs(self):
|
213 |
+
"""Get recent system logs"""
|
214 |
+
return "\n".join(self.logger.get_recent_logs())
|
app.py
CHANGED
@@ -1,457 +1,209 @@
|
|
1 |
-
|
2 |
-
import numpy as np
|
3 |
-
import dlib
|
4 |
-
import gradio as gr
|
5 |
-
import threading
|
6 |
-
import time
|
7 |
-
import queue
|
8 |
-
import pygame
|
9 |
-
import io
|
10 |
-
import google.generativeai as genai
|
11 |
-
from scipy.spatial import distance as dist
|
12 |
-
from collections import deque
|
13 |
-
import tempfile
|
14 |
import os
|
15 |
-
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
self.LEFT_EYE = list(range(36, 42))
|
57 |
-
self.RIGHT_EYE = list(range(42, 48))
|
58 |
-
self.MOUTH = list(range(48, 68))
|
59 |
-
|
60 |
-
# 3D model points for head pose estimation
|
61 |
-
self.model_points = np.array([
|
62 |
-
(0.0, 0.0, 0.0), # Nose tip
|
63 |
-
(0.0, -330.0, -65.0), # Chin
|
64 |
-
(-225.0, 170.0, -135.0), # Left eye left corner
|
65 |
-
(225.0, 170.0, -135.0), # Right eye right corner
|
66 |
-
(-150.0, -150.0, -125.0), # Left Mouth corner
|
67 |
-
(150.0, -150.0, -125.0) # Right mouth corner
|
68 |
-
])
|
69 |
-
|
70 |
-
self.status_log = deque(maxlen=100)
|
71 |
-
|
72 |
-
def calculate_ear(self, eye_landmarks):
|
73 |
-
"""Calculate Eye Aspect Ratio"""
|
74 |
-
# Vertical eye landmarks
|
75 |
-
A = dist.euclidean(eye_landmarks[1], eye_landmarks[5])
|
76 |
-
B = dist.euclidean(eye_landmarks[2], eye_landmarks[4])
|
77 |
-
# Horizontal eye landmark
|
78 |
-
C = dist.euclidean(eye_landmarks[0], eye_landmarks[3])
|
79 |
-
# EAR calculation
|
80 |
-
ear = (A + B) / (2.0 * C)
|
81 |
-
return ear
|
82 |
-
|
83 |
-
def calculate_mar(self, mouth_landmarks):
|
84 |
-
"""Calculate Mouth Aspect Ratio for yawn detection"""
|
85 |
-
# Vertical mouth landmarks
|
86 |
-
A = dist.euclidean(mouth_landmarks[2], mouth_landmarks[10]) # 50, 58
|
87 |
-
B = dist.euclidean(mouth_landmarks[4], mouth_landmarks[8]) # 52, 56
|
88 |
-
# Horizontal mouth landmark
|
89 |
-
C = dist.euclidean(mouth_landmarks[0], mouth_landmarks[6]) # 48, 54
|
90 |
-
# MAR calculation
|
91 |
-
mar = (A + B) / (2.0 * C)
|
92 |
-
return mar
|
93 |
-
|
94 |
-
def get_head_pose(self, landmarks, img_size):
|
95 |
-
"""Estimate head pose using facial landmarks"""
|
96 |
-
image_points = np.array([
|
97 |
-
(landmarks[30][0], landmarks[30][1]), # Nose tip
|
98 |
-
(landmarks[8][0], landmarks[8][1]), # Chin
|
99 |
-
(landmarks[36][0], landmarks[36][1]), # Left eye left corner
|
100 |
-
(landmarks[45][0], landmarks[45][1]), # Right eye right corner
|
101 |
-
(landmarks[48][0], landmarks[48][1]), # Left Mouth corner
|
102 |
-
(landmarks[54][0], landmarks[54][1]) # Right mouth corner
|
103 |
-
], dtype="double")
|
104 |
-
|
105 |
-
# Camera internals
|
106 |
-
focal_length = img_size[1]
|
107 |
-
center = (img_size[1]/2, img_size[0]/2)
|
108 |
-
camera_matrix = np.array([
|
109 |
-
[focal_length, 0, center[0]],
|
110 |
-
[0, focal_length, center[1]],
|
111 |
-
[0, 0, 1]], dtype="double")
|
112 |
-
|
113 |
-
dist_coeffs = np.zeros((4,1)) # Assuming no lens distortion
|
114 |
-
|
115 |
-
# Solve PnP
|
116 |
-
(success, rotation_vector, translation_vector) = cv2.solvePnP(
|
117 |
-
self.model_points, image_points, camera_matrix, dist_coeffs,
|
118 |
-
flags=cv2.SOLVEPNP_ITERATIVE)
|
119 |
-
|
120 |
-
# Convert rotation vector to rotation matrix
|
121 |
-
(rotation_matrix, jacobian) = cv2.Rodrigues(rotation_vector)
|
122 |
-
|
123 |
-
# Calculate Euler angles
|
124 |
-
sy = np.sqrt(rotation_matrix[0,0] * rotation_matrix[0,0] + rotation_matrix[1,0] * rotation_matrix[1,0])
|
125 |
-
singular = sy < 1e-6
|
126 |
-
if not singular:
|
127 |
-
x = np.arctan2(rotation_matrix[2,1], rotation_matrix[2,2])
|
128 |
-
y = np.arctan2(-rotation_matrix[2,0], sy)
|
129 |
-
z = np.arctan2(rotation_matrix[1,0], rotation_matrix[0,0])
|
130 |
-
else:
|
131 |
-
x = np.arctan2(-rotation_matrix[1,2], rotation_matrix[1,1])
|
132 |
-
y = np.arctan2(-rotation_matrix[2,0], sy)
|
133 |
-
z = 0
|
134 |
-
|
135 |
-
# Convert to degrees
|
136 |
-
angles = np.array([x, y, z]) * 180.0 / np.pi
|
137 |
-
return angles
|
138 |
|
139 |
-
|
140 |
-
|
141 |
-
if not self.model:
|
142 |
-
return self.play_default_alert()
|
143 |
-
|
144 |
-
try:
|
145 |
-
prompts = {
|
146 |
-
"drowsy": f"Generate a brief, urgent but caring voice alert (max 15 words) to wake up a drowsy driver. Severity: {severity}. Make it sound natural and concerned.",
|
147 |
-
"yawn": f"Generate a brief, gentle voice alert (max 12 words) for a driver who is yawning frequently. Severity: {severity}. Sound caring but alert.",
|
148 |
-
"nod": f"Generate a brief, firm voice alert (max 12 words) for a driver whose head is nodding. Severity: {severity}. Sound urgent but supportive."
|
149 |
-
}
|
150 |
-
|
151 |
-
response = self.model.generate_content(prompts.get(alert_type, prompts["drowsy"]))
|
152 |
-
alert_text = response.text.strip().replace('"', '').replace("'", "")
|
153 |
-
|
154 |
-
# Use text-to-speech (you would need to install pyttsx3 or use cloud TTS)
|
155 |
-
# For this example, we'll use a placeholder
|
156 |
-
self.log_status(f"π ALERT: {alert_text}")
|
157 |
-
return alert_text
|
158 |
-
|
159 |
-
except Exception as e:
|
160 |
-
self.log_status(f"Error generating alert: {str(e)}")
|
161 |
-
return self.play_default_alert()
|
162 |
|
163 |
-
|
164 |
-
"""Play default beep alert"""
|
165 |
try:
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
frames = int(duration * sample_rate)
|
171 |
-
arr = np.zeros(frames)
|
172 |
-
|
173 |
-
for i in range(frames):
|
174 |
-
arr[i] = np.sin(2 * np.pi * freq * i / sample_rate)
|
175 |
-
|
176 |
-
arr = (arr * 32767).astype(np.int16)
|
177 |
-
sound = pygame.sndarray.make_sound(arr)
|
178 |
-
sound.play()
|
179 |
-
|
180 |
-
alert_text = "β οΈ WAKE UP! Please stay alert while driving!"
|
181 |
-
self.log_status(f"π {alert_text}")
|
182 |
-
return alert_text
|
183 |
-
|
184 |
-
except Exception as e:
|
185 |
-
self.log_status(f"Error playing alert: {str(e)}")
|
186 |
-
return "Alert system activated"
|
187 |
|
188 |
-
|
189 |
-
"
|
190 |
-
|
191 |
-
|
192 |
|
193 |
-
|
194 |
-
"
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
status_text = []
|
199 |
-
alert_message = ""
|
200 |
-
|
201 |
-
if len(faces) == 0:
|
202 |
-
status_text.append("π€ No face detected")
|
203 |
-
self.log_status("No face detected in frame")
|
204 |
-
return frame, status_text, alert_message
|
205 |
-
|
206 |
-
for face in faces:
|
207 |
-
landmarks = self.predictor(gray, face)
|
208 |
-
landmarks = np.array([[p.x, p.y] for p in landmarks.parts()])
|
209 |
-
|
210 |
-
# Draw face rectangle
|
211 |
-
x, y, w, h = face.left(), face.top(), face.width(), face.height()
|
212 |
-
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
|
213 |
-
|
214 |
-
# Eye Aspect Ratio calculation
|
215 |
-
left_eye = landmarks[self.LEFT_EYE]
|
216 |
-
right_eye = landmarks[self.RIGHT_EYE]
|
217 |
-
|
218 |
-
left_ear = self.calculate_ear(left_eye)
|
219 |
-
right_ear = self.calculate_ear(right_eye)
|
220 |
-
ear = (left_ear + right_ear) / 2.0
|
221 |
-
self.ear_history.append(ear)
|
222 |
-
|
223 |
-
# Draw eye contours
|
224 |
-
cv2.drawContours(frame, [cv2.convexHull(left_eye)], -1, (0, 255, 0), 1)
|
225 |
-
cv2.drawContours(frame, [cv2.convexHull(right_eye)], -1, (0, 255, 0), 1)
|
226 |
-
|
227 |
-
# Yawn detection
|
228 |
-
mouth = landmarks[self.MOUTH]
|
229 |
-
mar = self.calculate_mar(mouth)
|
230 |
-
self.yawn_history.append(mar)
|
231 |
-
|
232 |
-
# Draw mouth contour
|
233 |
-
cv2.drawContours(frame, [cv2.convexHull(mouth)], -1, (0, 255, 255), 1)
|
234 |
-
|
235 |
-
# Head pose estimation
|
236 |
-
head_angles = self.get_head_pose(landmarks, frame.shape)
|
237 |
-
self.head_pose_history.append(head_angles[0]) # Pitch angle
|
238 |
-
|
239 |
-
# Drowsiness detection logic
|
240 |
-
drowsy_indicators = []
|
241 |
-
|
242 |
-
# Check EAR
|
243 |
-
if ear < self.EAR_THRESHOLD:
|
244 |
-
self.ear_counter += 1
|
245 |
-
if self.ear_counter >= self.EAR_CONSECUTIVE_FRAMES:
|
246 |
-
drowsy_indicators.append("EYES_CLOSED")
|
247 |
-
status_text.append(f"ποΈ Eyes closed! EAR: {ear:.3f}")
|
248 |
-
else:
|
249 |
-
self.ear_counter = 0
|
250 |
-
status_text.append(f"ποΈ Eyes open - EAR: {ear:.3f}")
|
251 |
-
|
252 |
-
# Check for yawning
|
253 |
-
if mar > self.YAWN_THRESHOLD:
|
254 |
-
self.yawn_counter += 1
|
255 |
-
if self.yawn_counter >= self.YAWN_CONSECUTIVE_FRAMES:
|
256 |
-
drowsy_indicators.append("YAWNING")
|
257 |
-
status_text.append(f"π₯± Yawning detected! MAR: {mar:.3f}")
|
258 |
-
else:
|
259 |
-
self.yawn_counter = 0
|
260 |
-
status_text.append(f"π Normal mouth - MAR: {mar:.3f}")
|
261 |
-
|
262 |
-
# Check head nodding
|
263 |
-
if abs(head_angles[0]) > self.NOD_THRESHOLD:
|
264 |
-
self.nod_counter += 1
|
265 |
-
if self.nod_counter >= 10:
|
266 |
-
drowsy_indicators.append("HEAD_NOD")
|
267 |
-
status_text.append(f"π Head nodding! Angle: {head_angles[0]:.1f}Β°")
|
268 |
-
else:
|
269 |
-
self.nod_counter = 0
|
270 |
-
status_text.append(f"π Head pose - Pitch: {head_angles[0]:.1f}Β°")
|
271 |
-
|
272 |
-
# Generate alerts
|
273 |
-
current_time = time.time()
|
274 |
-
if drowsy_indicators and (current_time - self.last_alert_time) > self.alert_cooldown:
|
275 |
-
self.drowsy_alert = True
|
276 |
-
self.last_alert_time = current_time
|
277 |
-
|
278 |
-
# Determine alert type and severity
|
279 |
-
if "EYES_CLOSED" in drowsy_indicators:
|
280 |
-
severity = "high" if len(drowsy_indicators) > 1 else "medium"
|
281 |
-
alert_message = self.generate_voice_alert("drowsy", severity)
|
282 |
-
elif "YAWNING" in drowsy_indicators:
|
283 |
-
alert_message = self.generate_voice_alert("yawn", "medium")
|
284 |
-
elif "HEAD_NOD" in drowsy_indicators:
|
285 |
-
alert_message = self.generate_voice_alert("nod", "medium")
|
286 |
-
|
287 |
-
# Visual alert on frame
|
288 |
-
cv2.putText(frame, "β οΈ DROWSINESS ALERT! β οΈ", (50, 50),
|
289 |
-
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
|
290 |
-
|
291 |
-
# Draw landmark points
|
292 |
-
for (x, y) in landmarks:
|
293 |
-
cv2.circle(frame, (x, y), 1, (255, 255, 255), -1)
|
294 |
-
|
295 |
-
# Add metrics overlay
|
296 |
-
cv2.putText(frame, f"EAR: {ear:.3f}", (10, frame.shape[0] - 80),
|
297 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
298 |
-
cv2.putText(frame, f"MAR: {mar:.3f}", (10, frame.shape[0] - 60),
|
299 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
300 |
-
cv2.putText(frame, f"Head: {head_angles[0]:.1f}Β°", (10, frame.shape[0] - 40),
|
301 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
302 |
-
|
303 |
-
return frame, status_text, alert_message
|
304 |
|
305 |
-
def
|
306 |
-
"""
|
|
|
|
|
307 |
|
308 |
-
|
309 |
-
|
310 |
|
311 |
-
|
312 |
-
"""Initialize the drowsiness detection system"""
|
313 |
-
global detector
|
314 |
-
try:
|
315 |
-
detector = DrowsinessDetector(gemini_key if gemini_key.strip() else None)
|
316 |
-
return "β
System initialized successfully!", "System ready for detection."
|
317 |
-
except Exception as e:
|
318 |
-
return f"β Error initializing system: {str(e)}", "System initialization failed."
|
319 |
|
320 |
-
|
321 |
-
"""Process a single video frame"""
|
322 |
-
global detector
|
323 |
-
|
324 |
-
if detector is None:
|
325 |
-
detector = DrowsinessDetector(gemini_key if gemini_key.strip() else None)
|
326 |
-
|
327 |
-
try:
|
328 |
-
processed_frame, status_list, alert_msg = detector.detect_drowsiness(frame)
|
329 |
-
|
330 |
-
# Format status text
|
331 |
-
status_text = "\n".join(status_list) if status_list else "Processing..."
|
332 |
-
|
333 |
-
# Get recent logs
|
334 |
-
log_text = "\n".join(list(detector.status_log)[-10:]) if detector.status_log else "No logs yet."
|
335 |
-
|
336 |
-
return processed_frame, status_text, alert_msg, log_text
|
337 |
-
|
338 |
-
except Exception as e:
|
339 |
-
error_msg = f"Error processing frame: {str(e)}"
|
340 |
-
return frame, error_msg, "", error_msg
|
341 |
|
342 |
-
# Create
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
This system uses computer vision and AI to detect driver drowsiness through:
|
348 |
-
- **Eye Aspect Ratio (EAR)** - Detects closed/droopy eyes
|
349 |
-
- **Mouth Aspect Ratio (MAR)** - Detects yawning
|
350 |
-
- **Head Pose Estimation** - Detects head nodding
|
351 |
-
- **AI Voice Alerts** - Uses Gemini AI for personalized wake-up messages
|
352 |
-
|
353 |
-
### π Setup Instructions:
|
354 |
-
1. Download `shape_predictor_68_face_landmarks.dat` from [dlib models](http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2)
|
355 |
-
2. Place it in the same directory as this script
|
356 |
-
3. (Optional) Enter your Gemini API key for AI-powered voice alerts
|
357 |
-
4. Start your webcam and begin monitoring!
|
358 |
-
""")
|
359 |
-
|
360 |
-
with gr.Row():
|
361 |
-
with gr.Column(scale=2):
|
362 |
-
# Gemini API key input
|
363 |
-
gemini_key_input = os.getenv('GEMINI_API_KEY')
|
364 |
-
|
365 |
-
|
366 |
-
# Initialize button
|
367 |
-
init_btn = gr.Button("π Initialize System", variant="primary")
|
368 |
-
init_status = gr.Textbox(label="Initialization Status", interactive=False)
|
369 |
-
|
370 |
-
with gr.Column(scale=1):
|
371 |
-
# System info
|
372 |
-
gr.Markdown("""
|
373 |
-
### π Detection Thresholds:
|
374 |
-
- **EAR Threshold**: 0.25
|
375 |
-
- **Yawn Threshold**: 0.6
|
376 |
-
- **Head Nod**: 15Β° deviation
|
377 |
-
- **Alert Cooldown**: 10 seconds
|
378 |
-
""")
|
379 |
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
video_input = gr.Video(
|
384 |
-
sources=["webcam"],
|
385 |
-
label="πΉ Camera Feed",
|
386 |
-
streaming=True
|
387 |
-
)
|
388 |
-
|
389 |
-
with gr.Column(scale=1):
|
390 |
-
# Status displays
|
391 |
-
current_status = gr.Textbox(
|
392 |
-
label="π Current Status",
|
393 |
-
lines=6,
|
394 |
-
interactive=False
|
395 |
-
)
|
396 |
-
|
397 |
-
alert_display = gr.Textbox(
|
398 |
-
label="π Latest Alert",
|
399 |
-
interactive=False,
|
400 |
-
placeholder="No alerts yet..."
|
401 |
-
)
|
402 |
-
|
403 |
-
system_logs = gr.Textbox(
|
404 |
-
label="π System Logs",
|
405 |
-
lines=8,
|
406 |
-
interactive=False,
|
407 |
-
placeholder="System logs will appear here..."
|
408 |
-
)
|
409 |
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
fn=process_video_frame,
|
419 |
-
inputs=[video_input, gemini_key_input],
|
420 |
-
outputs=[video_input, current_status, alert_display, system_logs],
|
421 |
-
stream_every=0.1, # Process every 100ms
|
422 |
-
show_progress=False
|
423 |
)
|
424 |
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
- **Poor detection**: Adjust camera angle and distance (arm's length recommended)
|
430 |
-
- **No alerts**: Check if Gemini API key is valid (optional feature)
|
431 |
-
- **High CPU usage**: Reduce video resolution or increase stream interval
|
432 |
-
|
433 |
-
### β οΈ Safety Notice:
|
434 |
-
This system is for demonstration purposes. Always prioritize real-world driving safety measures.
|
435 |
-
Pull over safely if you feel drowsy while driving.
|
436 |
-
""")
|
437 |
-
|
438 |
-
return demo
|
439 |
|
440 |
-
# Main execution
|
441 |
if __name__ == "__main__":
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# main.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import os
|
3 |
+
import sys
|
4 |
+
from gradio_interface import GradioWebRTCInterface
|
5 |
+
from dotenv import load_dotenv
|
6 |
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
def check_opencv_installation():
|
10 |
+
"""Check if OpenCV is properly installed with required cascades"""
|
11 |
+
try:
|
12 |
+
import cv2
|
13 |
+
|
14 |
+
# Check for required cascade files
|
15 |
+
cascade_files = [
|
16 |
+
'haarcascade_frontalface_default.xml',
|
17 |
+
'haarcascade_eye.xml',
|
18 |
+
'haarcascade_smile.xml'
|
19 |
+
]
|
20 |
+
|
21 |
+
missing_cascades = []
|
22 |
+
for cascade in cascade_files:
|
23 |
+
cascade_path = cv2.data.haarcascades + cascade
|
24 |
+
if not os.path.exists(cascade_path):
|
25 |
+
missing_cascades.append(cascade)
|
26 |
+
|
27 |
+
if missing_cascades:
|
28 |
+
print(f"β Missing OpenCV cascade files: {missing_cascades}")
|
29 |
+
print("π‘ Please reinstall OpenCV: pip install opencv-python")
|
30 |
+
return False
|
31 |
+
|
32 |
+
print("β
OpenCV and required cascade files found!")
|
33 |
+
return True
|
34 |
+
|
35 |
+
except ImportError:
|
36 |
+
print("β OpenCV not found. Please install: pip install opencv-python")
|
37 |
+
return False
|
38 |
+
|
39 |
+
def check_optional_dependencies():
|
40 |
+
"""Check for optional dependencies and provide info"""
|
41 |
+
optional_deps = {
|
42 |
+
'mediapipe': 'Enhanced facial landmark detection',
|
43 |
+
'google.generativeai': 'AI-powered voice alerts',
|
44 |
+
'scipy': 'Advanced mathematical computations'
|
45 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
available = []
|
48 |
+
missing = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
+
for dep, description in optional_deps.items():
|
|
|
51 |
try:
|
52 |
+
__import__(dep)
|
53 |
+
available.append(f"β
{dep} - {description}")
|
54 |
+
except ImportError:
|
55 |
+
missing.append(f"βͺ {dep} - {description}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
+
if available:
|
58 |
+
print("π¦ Available optional features:")
|
59 |
+
for item in available:
|
60 |
+
print(f" {item}")
|
61 |
|
62 |
+
if missing:
|
63 |
+
print("π¦ Optional features (install for enhanced functionality):")
|
64 |
+
for item in missing:
|
65 |
+
print(f" {item}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
+
def main():
|
68 |
+
"""Main entry point"""
|
69 |
+
print("π Starting AI Driver Drowsiness Detection System...")
|
70 |
+
print("π§ Using OpenCV-based detection (no external model downloads required)")
|
71 |
|
72 |
+
if not check_opencv_installation():
|
73 |
+
sys.exit(1)
|
74 |
|
75 |
+
check_optional_dependencies()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
print("\nπ All core requirements satisfied!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
+
# Create and launch interface
|
80 |
+
try:
|
81 |
+
interface_manager = GradioWebRTCInterface()
|
82 |
+
demo = interface_manager.create_interface()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
+
print("π Launching Gradio interface...")
|
85 |
+
print("π± The interface will be available in your browser")
|
86 |
+
print("π A public link will be generated for sharing")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
demo.launch(
|
89 |
+
server_name="0.0.0.0",
|
90 |
+
server_port=7860,
|
91 |
+
share=True,
|
92 |
+
show_error=True,
|
93 |
+
enable_queue=True,
|
94 |
+
max_threads=10,
|
95 |
+
favicon_path=None
|
|
|
|
|
|
|
|
|
|
|
96 |
)
|
97 |
|
98 |
+
except Exception as e:
|
99 |
+
print(f"β Error launching interface: {e}")
|
100 |
+
print("π‘ Try running: pip install --upgrade gradio")
|
101 |
+
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
|
|
103 |
if __name__ == "__main__":
|
104 |
+
main()
|
105 |
+
|
106 |
+
# requirements.txt
|
107 |
+
"""
|
108 |
+
opencv-python>=4.5.0
|
109 |
+
gradio>=4.0.0
|
110 |
+
numpy>=1.21.0
|
111 |
+
scipy>=1.7.0
|
112 |
+
google-generativeai>=0.3.0
|
113 |
+
mediapipe>=0.10.0 # Optional for enhanced detection
|
114 |
+
"""
|
115 |
+
|
116 |
+
# README.md
|
117 |
+
"""
|
118 |
+
# π AI Driver Drowsiness Detection System
|
119 |
+
|
120 |
+
A real-time drowsiness detection system using computer vision and AI-powered alerts.
|
121 |
+
|
122 |
+
## β¨ Features
|
123 |
+
|
124 |
+
- **No External Downloads**: Uses OpenCV's built-in face detection models
|
125 |
+
- **Real-time Processing**: WebRTC streaming for low latency
|
126 |
+
- **Multi-modal Detection**: Eyes, mouth, and head pose analysis
|
127 |
+
- **AI Voice Alerts**: Contextual messages powered by Gemini AI
|
128 |
+
- **Adaptive System**: Graceful fallback without external dependencies
|
129 |
+
- **Easy Setup**: Simple pip install, no model downloads required
|
130 |
+
|
131 |
+
## π Quick Start
|
132 |
+
|
133 |
+
1. **Install dependencies:**
|
134 |
+
```bash
|
135 |
+
pip install opencv-python gradio numpy scipy google-generativeai
|
136 |
+
|
137 |
+
# Optional for enhanced detection:
|
138 |
+
pip install mediapipe
|
139 |
+
```
|
140 |
+
|
141 |
+
2. **Run the system:**
|
142 |
+
```bash
|
143 |
+
python main.py
|
144 |
+
```
|
145 |
+
|
146 |
+
3. **Open browser** and navigate to the provided URL
|
147 |
+
|
148 |
+
4. **Optional**: Enter Gemini API key for AI-powered voice alerts
|
149 |
+
|
150 |
+
## π§ How It Works
|
151 |
+
|
152 |
+
### Detection Methods
|
153 |
+
- **Primary**: MediaPipe Face Mesh (if available) for precise landmarks
|
154 |
+
- **Fallback**: OpenCV Haar Cascades for basic face/eye/mouth detection
|
155 |
+
- **Hybrid Approach**: Automatically selects best available method
|
156 |
+
|
157 |
+
### Drowsiness Indicators
|
158 |
+
- **Eye Aspect Ratio (EAR)**: Detects eye closure patterns
|
159 |
+
- **Mouth Aspect Ratio (MAR)**: Identifies yawning behavior
|
160 |
+
- **Head Pose**: Tracks head nodding and position
|
161 |
+
|
162 |
+
### Alert System
|
163 |
+
- **AI-Generated**: Contextual messages via Gemini
|
164 |
+
- **Audio Alerts**: Attention-grabbing beep patterns
|
165 |
+
- **Visual Feedback**: Real-time overlay on video stream
|
166 |
+
- **Smart Cooldown**: Prevents alert spam
|
167 |
+
|
168 |
+
## βοΈ Configuration
|
169 |
+
|
170 |
+
### Detection Thresholds
|
171 |
+
- **EAR Threshold**: 0.20 (adjustable for sensitivity)
|
172 |
+
- **MAR Threshold**: 0.8 (calibrated for yawn detection)
|
173 |
+
- **Head Nod**: 20Β° deviation threshold
|
174 |
+
- **Alert Cooldown**: 8 seconds between alerts
|
175 |
+
|
176 |
+
### Performance Optimization
|
177 |
+
- **Stream Rate**: 10 FPS processing (configurable)
|
178 |
+
- **Queue Management**: Prevents frame backlog
|
179 |
+
- **Multi-threading**: Separate processing pipeline
|
180 |
+
- **Graceful Degradation**: Maintains functionality with limited resources
|
181 |
+
|
182 |
+
## π‘οΈ Safety Notice
|
183 |
+
|
184 |
+
**This system is for demonstration and research purposes only.**
|
185 |
+
|
186 |
+
- Not a substitute for responsible driving practices
|
187 |
+
- Always pull over safely if feeling drowsy
|
188 |
+
- Use as supplementary tool alongside other safety measures
|
189 |
+
- Ensure proper camera setup and lighting
|
190 |
+
|
191 |
+
## π System Requirements
|
192 |
+
|
193 |
+
- **Python**: 3.7+
|
194 |
+
- **Camera**: Webcam or built-in camera
|
195 |
+
- **OS**: Windows, macOS, Linux
|
196 |
+
- **RAM**: 4GB+ recommended
|
197 |
+
- **CPU**: Multi-core recommended for real-time processing
|
198 |
+
|
199 |
+
## π Troubleshooting
|
200 |
+
|
201 |
+
- **No face detected**: Check lighting and camera position
|
202 |
+
- **Poor detection**: Ensure face is clearly visible and well-lit
|
203 |
+
- **High CPU usage**: Reduce stream rate or video resolution
|
204 |
+
- **Audio issues**: Check browser permissions and audio settings
|
205 |
+
|
206 |
+
## π License
|
207 |
+
|
208 |
+
MIT License - See LICENSE file for details
|
209 |
+
"""
|
facial_detection.py
ADDED
@@ -0,0 +1,415 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# facial_detection.py
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
from scipy.spatial import distance as dist
|
5 |
+
from collections import deque
|
6 |
+
import time
|
7 |
+
from datetime import datetime
|
8 |
+
|
9 |
+
class OpenCVFaceDetector:
|
10 |
+
"""Face detection and landmark estimation using OpenCV"""
|
11 |
+
|
12 |
+
def __init__(self):
|
13 |
+
# Load OpenCV's pre-trained face detection models
|
14 |
+
self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
15 |
+
self.eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
|
16 |
+
self.mouth_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_smile.xml')
|
17 |
+
|
18 |
+
# Try to load MediaPipe for better landmark detection (fallback if not available)
|
19 |
+
self.use_mediapipe = False
|
20 |
+
try:
|
21 |
+
import mediapipe as mp
|
22 |
+
self.mp_face_mesh = mp.solutions.face_mesh
|
23 |
+
self.mp_drawing = mp.solutions.drawing_utils
|
24 |
+
self.face_mesh = self.mp_face_mesh.FaceMesh(
|
25 |
+
static_image_mode=False,
|
26 |
+
max_num_faces=1,
|
27 |
+
refine_landmarks=True,
|
28 |
+
min_detection_confidence=0.5,
|
29 |
+
min_tracking_confidence=0.5
|
30 |
+
)
|
31 |
+
self.use_mediapipe = True
|
32 |
+
print("β
Using MediaPipe for enhanced landmark detection")
|
33 |
+
except ImportError:
|
34 |
+
print("β οΈ MediaPipe not available, using OpenCV cascade classifiers")
|
35 |
+
|
36 |
+
# Define landmark indices for MediaPipe (68-point equivalent)
|
37 |
+
self.LEFT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
|
38 |
+
self.RIGHT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
|
39 |
+
self.MOUTH_INDICES = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 62]
|
40 |
+
|
41 |
+
def detect_faces_opencv(self, frame):
|
42 |
+
"""Detect faces using OpenCV Haar cascades"""
|
43 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
44 |
+
faces = self.face_cascade.detectMultiScale(gray, 1.3, 5)
|
45 |
+
return faces, gray
|
46 |
+
|
47 |
+
def estimate_landmarks_opencv(self, frame, face_rect):
|
48 |
+
"""Estimate key facial landmarks using OpenCV cascades"""
|
49 |
+
x, y, w, h = face_rect
|
50 |
+
roi_gray = frame[y:y+h, x:x+w]
|
51 |
+
roi_color = frame[y:y+h, x:x+w]
|
52 |
+
|
53 |
+
# Detect eyes
|
54 |
+
eyes = self.eye_cascade.detectMultiScale(roi_gray, 1.1, 3)
|
55 |
+
# Detect mouth/smile
|
56 |
+
mouths = self.mouth_cascade.detectMultiScale(roi_gray, 1.1, 3)
|
57 |
+
|
58 |
+
landmarks = {}
|
59 |
+
|
60 |
+
# Process eyes
|
61 |
+
if len(eyes) >= 2:
|
62 |
+
# Sort eyes by x-coordinate (left to right)
|
63 |
+
eyes = sorted(eyes, key=lambda e: e[0])
|
64 |
+
landmarks['left_eye'] = (x + eyes[0][0] + eyes[0][2]//2, y + eyes[0][1] + eyes[0][3]//2)
|
65 |
+
landmarks['right_eye'] = (x + eyes[1][0] + eyes[1][2]//2, y + eyes[1][1] + eyes[1][3]//2)
|
66 |
+
|
67 |
+
# Estimate eye corners based on eye rectangles
|
68 |
+
landmarks['left_eye_corners'] = [
|
69 |
+
(x + eyes[0][0], y + eyes[0][1] + eyes[0][3]//2), # left corner
|
70 |
+
(x + eyes[0][0] + eyes[0][2], y + eyes[0][1] + eyes[0][3]//2), # right corner
|
71 |
+
(x + eyes[0][0] + eyes[0][2]//2, y + eyes[0][1]), # top
|
72 |
+
(x + eyes[0][0] + eyes[0][2]//2, y + eyes[0][1] + eyes[0][3]) # bottom
|
73 |
+
]
|
74 |
+
landmarks['right_eye_corners'] = [
|
75 |
+
(x + eyes[1][0], y + eyes[1][1] + eyes[1][3]//2),
|
76 |
+
(x + eyes[1][0] + eyes[1][2], y + eyes[1][1] + eyes[1][3]//2),
|
77 |
+
(x + eyes[1][0] + eyes[1][2]//2, y + eyes[1][1]),
|
78 |
+
(x + eyes[1][0] + eyes[1][2]//2, y + eyes[1][1] + eyes[1][3])
|
79 |
+
]
|
80 |
+
|
81 |
+
# Process mouth
|
82 |
+
if len(mouths) > 0:
|
83 |
+
mouth = mouths[0] # Take the first detected mouth
|
84 |
+
landmarks['mouth_center'] = (x + mouth[0] + mouth[2]//2, y + mouth[1] + mouth[3]//2)
|
85 |
+
landmarks['mouth_corners'] = [
|
86 |
+
(x + mouth[0], y + mouth[1] + mouth[3]//2), # left corner
|
87 |
+
(x + mouth[0] + mouth[2], y + mouth[1] + mouth[3]//2), # right corner
|
88 |
+
(x + mouth[0] + mouth[2]//2, y + mouth[1]), # top
|
89 |
+
(x + mouth[0] + mouth[2]//2, y + mouth[1] + mouth[3]) # bottom
|
90 |
+
]
|
91 |
+
|
92 |
+
# Estimate nose tip (center of face, slightly above mouth)
|
93 |
+
landmarks['nose_tip'] = (x + w//2, y + int(h*0.6))
|
94 |
+
|
95 |
+
# Estimate chin (bottom center of face)
|
96 |
+
landmarks['chin'] = (x + w//2, y + h)
|
97 |
+
|
98 |
+
return landmarks
|
99 |
+
|
100 |
+
def detect_landmarks_mediapipe(self, frame):
|
101 |
+
"""Detect landmarks using MediaPipe"""
|
102 |
+
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
103 |
+
results = self.face_mesh.process(rgb_frame)
|
104 |
+
|
105 |
+
landmarks_dict = {}
|
106 |
+
|
107 |
+
if results.multi_face_landmarks:
|
108 |
+
face_landmarks = results.multi_face_landmarks[0]
|
109 |
+
h, w, _ = frame.shape
|
110 |
+
|
111 |
+
# Extract eye landmarks
|
112 |
+
left_eye_points = []
|
113 |
+
right_eye_points = []
|
114 |
+
mouth_points = []
|
115 |
+
|
116 |
+
for i in self.LEFT_EYE_INDICES[:6]: # Take first 6 points for eye shape
|
117 |
+
point = face_landmarks.landmark[i]
|
118 |
+
left_eye_points.append((int(point.x * w), int(point.y * h)))
|
119 |
+
|
120 |
+
for i in self.RIGHT_EYE_INDICES[:6]:
|
121 |
+
point = face_landmarks.landmark[i]
|
122 |
+
right_eye_points.append((int(point.x * w), int(point.y * h)))
|
123 |
+
|
124 |
+
for i in self.MOUTH_INDICES[:8]: # Take key mouth points
|
125 |
+
point = face_landmarks.landmark[i]
|
126 |
+
mouth_points.append((int(point.x * w), int(point.y * h)))
|
127 |
+
|
128 |
+
landmarks_dict['left_eye_corners'] = left_eye_points
|
129 |
+
landmarks_dict['right_eye_corners'] = right_eye_points
|
130 |
+
landmarks_dict['mouth_corners'] = mouth_points
|
131 |
+
|
132 |
+
# Key points
|
133 |
+
nose_tip = face_landmarks.landmark[1] # Nose tip
|
134 |
+
chin = face_landmarks.landmark[175] # Chin
|
135 |
+
|
136 |
+
landmarks_dict['nose_tip'] = (int(nose_tip.x * w), int(nose_tip.y * h))
|
137 |
+
landmarks_dict['chin'] = (int(chin.x * w), int(chin.y * h))
|
138 |
+
|
139 |
+
# Calculate face bounding box
|
140 |
+
x_coords = [int(lm.x * w) for lm in face_landmarks.landmark]
|
141 |
+
y_coords = [int(lm.y * h) for lm in face_landmarks.landmark]
|
142 |
+
|
143 |
+
face_rect = (min(x_coords), min(y_coords),
|
144 |
+
max(x_coords) - min(x_coords),
|
145 |
+
max(y_coords) - min(y_coords))
|
146 |
+
|
147 |
+
return face_rect, landmarks_dict
|
148 |
+
|
149 |
+
return None, {}
|
150 |
+
|
151 |
+
def detect_landmarks(self, frame):
|
152 |
+
"""Main method to detect face and landmarks"""
|
153 |
+
if self.use_mediapipe:
|
154 |
+
face_rect, landmarks = self.detect_landmarks_mediapipe(frame)
|
155 |
+
if face_rect is not None:
|
156 |
+
return [face_rect], [landmarks]
|
157 |
+
|
158 |
+
# Fallback to OpenCV
|
159 |
+
faces, gray = self.detect_faces_opencv(frame)
|
160 |
+
landmarks_list = []
|
161 |
+
face_rects = []
|
162 |
+
|
163 |
+
for face in faces:
|
164 |
+
landmarks = self.estimate_landmarks_opencv(gray, face)
|
165 |
+
if landmarks:
|
166 |
+
landmarks_list.append(landmarks)
|
167 |
+
face_rects.append(face)
|
168 |
+
|
169 |
+
return face_rects, landmarks_list
|
170 |
+
|
171 |
+
class MetricsCalculator:
|
172 |
+
"""Calculate drowsiness metrics from facial landmarks"""
|
173 |
+
|
174 |
+
@staticmethod
|
175 |
+
def calculate_ear_from_points(eye_points):
|
176 |
+
"""Calculate Eye Aspect Ratio from eye corner points"""
|
177 |
+
if len(eye_points) < 4:
|
178 |
+
return 0.3 # Default value
|
179 |
+
|
180 |
+
# For 4-point eye estimation: [left, right, top, bottom]
|
181 |
+
if len(eye_points) == 4:
|
182 |
+
left, right, top, bottom = eye_points
|
183 |
+
# Vertical distances
|
184 |
+
vertical_dist = dist.euclidean(top, bottom)
|
185 |
+
# Horizontal distance
|
186 |
+
horizontal_dist = dist.euclidean(left, right)
|
187 |
+
|
188 |
+
if horizontal_dist == 0:
|
189 |
+
return 0.3
|
190 |
+
|
191 |
+
ear = vertical_dist / horizontal_dist
|
192 |
+
return ear
|
193 |
+
|
194 |
+
# For 6-point eye estimation (MediaPipe style)
|
195 |
+
elif len(eye_points) >= 6:
|
196 |
+
# Calculate vertical distances
|
197 |
+
v1 = dist.euclidean(eye_points[1], eye_points[5])
|
198 |
+
v2 = dist.euclidean(eye_points[2], eye_points[4])
|
199 |
+
# Horizontal distance
|
200 |
+
h = dist.euclidean(eye_points[0], eye_points[3])
|
201 |
+
|
202 |
+
if h == 0:
|
203 |
+
return 0.3
|
204 |
+
|
205 |
+
ear = (v1 + v2) / (2.0 * h)
|
206 |
+
return ear
|
207 |
+
|
208 |
+
return 0.3
|
209 |
+
|
210 |
+
@staticmethod
|
211 |
+
def calculate_mar_from_points(mouth_points):
|
212 |
+
"""Calculate Mouth Aspect Ratio from mouth points"""
|
213 |
+
if len(mouth_points) < 4:
|
214 |
+
return 0.3 # Default value
|
215 |
+
|
216 |
+
if len(mouth_points) == 4:
|
217 |
+
# [left, right, top, bottom]
|
218 |
+
left, right, top, bottom = mouth_points
|
219 |
+
vertical_dist = dist.euclidean(top, bottom)
|
220 |
+
horizontal_dist = dist.euclidean(left, right)
|
221 |
+
|
222 |
+
if horizontal_dist == 0:
|
223 |
+
return 0.3
|
224 |
+
|
225 |
+
mar = vertical_dist / horizontal_dist
|
226 |
+
return mar
|
227 |
+
|
228 |
+
elif len(mouth_points) >= 8:
|
229 |
+
# More sophisticated mouth analysis
|
230 |
+
# Calculate multiple vertical distances
|
231 |
+
v1 = dist.euclidean(mouth_points[1], mouth_points[7])
|
232 |
+
v2 = dist.euclidean(mouth_points[2], mouth_points[6])
|
233 |
+
v3 = dist.euclidean(mouth_points[3], mouth_points[5])
|
234 |
+
|
235 |
+
# Horizontal distance
|
236 |
+
h = dist.euclidean(mouth_points[0], mouth_points[4])
|
237 |
+
|
238 |
+
if h == 0:
|
239 |
+
return 0.3
|
240 |
+
|
241 |
+
mar = (v1 + v2 + v3) / (3.0 * h)
|
242 |
+
return mar
|
243 |
+
|
244 |
+
return 0.3
|
245 |
+
|
246 |
+
@staticmethod
|
247 |
+
def estimate_head_pose_simple(nose_tip, chin, frame_center):
|
248 |
+
"""Simple head pose estimation using nose and chin"""
|
249 |
+
if nose_tip is None or chin is None:
|
250 |
+
return np.array([0, 0, 0])
|
251 |
+
|
252 |
+
# Calculate head tilt based on nose-chin line deviation from vertical
|
253 |
+
nose_chin_vector = np.array([chin[0] - nose_tip[0], chin[1] - nose_tip[1]])
|
254 |
+
vertical_vector = np.array([0, 1])
|
255 |
+
|
256 |
+
# Calculate angle from vertical
|
257 |
+
dot_product = np.dot(nose_chin_vector, vertical_vector)
|
258 |
+
norms = np.linalg.norm(nose_chin_vector) * np.linalg.norm(vertical_vector)
|
259 |
+
|
260 |
+
if norms == 0:
|
261 |
+
return np.array([0, 0, 0])
|
262 |
+
|
263 |
+
cos_angle = dot_product / norms
|
264 |
+
angle = np.arccos(np.clip(cos_angle, -1, 1)) * 180 / np.pi
|
265 |
+
|
266 |
+
# Determine direction of tilt
|
267 |
+
if nose_chin_vector[0] < 0:
|
268 |
+
angle = -angle
|
269 |
+
|
270 |
+
# Simple pitch estimation based on nose position relative to frame center
|
271 |
+
pitch = (nose_tip[1] - frame_center[1]) / frame_center[1] * 30 # Scale to degrees
|
272 |
+
|
273 |
+
return np.array([pitch, 0, angle]) # [pitch, yaw, roll]
|
274 |
+
|
275 |
+
class DrowsinessAnalyzer:
|
276 |
+
"""Analyze drowsiness based on facial metrics"""
|
277 |
+
|
278 |
+
def __init__(self):
|
279 |
+
# Thresholds
|
280 |
+
self.EAR_THRESHOLD = 0.20 # Adjusted for OpenCV detection
|
281 |
+
self.EAR_CONSECUTIVE_FRAMES = 15
|
282 |
+
self.YAWN_THRESHOLD = 0.8 # Adjusted for mouth detection
|
283 |
+
self.YAWN_CONSECUTIVE_FRAMES = 10
|
284 |
+
self.NOD_THRESHOLD = 20
|
285 |
+
|
286 |
+
# Counters
|
287 |
+
self.ear_counter = 0
|
288 |
+
self.yawn_counter = 0
|
289 |
+
self.nod_counter = 0
|
290 |
+
|
291 |
+
# History tracking
|
292 |
+
self.ear_history = deque(maxlen=30)
|
293 |
+
self.yawn_history = deque(maxlen=30)
|
294 |
+
self.head_pose_history = deque(maxlen=30)
|
295 |
+
|
296 |
+
def analyze_drowsiness(self, ear, mar, head_angles):
|
297 |
+
"""Analyze current metrics and return drowsiness indicators"""
|
298 |
+
drowsiness_indicators = []
|
299 |
+
|
300 |
+
# Update history
|
301 |
+
self.ear_history.append(ear)
|
302 |
+
self.yawn_history.append(mar)
|
303 |
+
self.head_pose_history.append(head_angles[0])
|
304 |
+
|
305 |
+
# Check EAR (eyes closed detection)
|
306 |
+
if ear < self.EAR_THRESHOLD:
|
307 |
+
self.ear_counter += 1
|
308 |
+
if self.ear_counter >= self.EAR_CONSECUTIVE_FRAMES:
|
309 |
+
drowsiness_indicators.append("EYES_CLOSED")
|
310 |
+
else:
|
311 |
+
self.ear_counter = 0
|
312 |
+
|
313 |
+
# Check yawning
|
314 |
+
if mar > self.YAWN_THRESHOLD:
|
315 |
+
self.yawn_counter += 1
|
316 |
+
if self.yawn_counter >= self.YAWN_CONSECUTIVE_FRAMES:
|
317 |
+
drowsiness_indicators.append("YAWNING")
|
318 |
+
else:
|
319 |
+
self.yawn_counter = 0
|
320 |
+
|
321 |
+
# Check head nodding
|
322 |
+
if abs(head_angles[0]) > self.NOD_THRESHOLD:
|
323 |
+
self.nod_counter += 1
|
324 |
+
if self.nod_counter >= 8:
|
325 |
+
drowsiness_indicators.append("HEAD_NOD")
|
326 |
+
else:
|
327 |
+
self.nod_counter = 0
|
328 |
+
|
329 |
+
return drowsiness_indicators
|
330 |
+
|
331 |
+
def get_severity_level(self, indicators):
|
332 |
+
"""Determine severity based on indicators"""
|
333 |
+
if len(indicators) >= 2:
|
334 |
+
return "critical"
|
335 |
+
elif "EYES_CLOSED" in indicators:
|
336 |
+
return "high"
|
337 |
+
elif indicators:
|
338 |
+
return "medium"
|
339 |
+
else:
|
340 |
+
return "normal"
|
341 |
+
|
342 |
+
class AlertManager:
|
343 |
+
"""Manage alert generation and timing"""
|
344 |
+
|
345 |
+
def __init__(self, cooldown_seconds=8):
|
346 |
+
self.last_alert_time = 0
|
347 |
+
self.cooldown_seconds = cooldown_seconds
|
348 |
+
|
349 |
+
def should_trigger_alert(self, indicators):
|
350 |
+
"""Check if alert should be triggered"""
|
351 |
+
current_time = time.time()
|
352 |
+
if indicators and (current_time - self.last_alert_time) > self.cooldown_seconds:
|
353 |
+
self.last_alert_time = current_time
|
354 |
+
return True
|
355 |
+
return False
|
356 |
+
|
357 |
+
class VisualizationRenderer:
|
358 |
+
"""Handle visual rendering of detection results"""
|
359 |
+
|
360 |
+
@staticmethod
|
361 |
+
def draw_landmarks_and_contours(frame, landmarks, face_rect):
|
362 |
+
"""Draw facial landmarks and detection areas"""
|
363 |
+
x, y, w, h = face_rect
|
364 |
+
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
|
365 |
+
|
366 |
+
# Draw eye areas
|
367 |
+
if 'left_eye_corners' in landmarks:
|
368 |
+
points = np.array(landmarks['left_eye_corners'], np.int32)
|
369 |
+
cv2.polylines(frame, [points], True, (0, 255, 0), 2)
|
370 |
+
|
371 |
+
if 'right_eye_corners' in landmarks:
|
372 |
+
points = np.array(landmarks['right_eye_corners'], np.int32)
|
373 |
+
cv2.polylines(frame, [points], True, (0, 255, 0), 2)
|
374 |
+
|
375 |
+
# Draw mouth area
|
376 |
+
if 'mouth_corners' in landmarks:
|
377 |
+
points = np.array(landmarks['mouth_corners'], np.int32)
|
378 |
+
cv2.polylines(frame, [points], True, (0, 255, 255), 2)
|
379 |
+
|
380 |
+
# Draw key points
|
381 |
+
key_points = ['nose_tip', 'chin']
|
382 |
+
for point_name in key_points:
|
383 |
+
if point_name in landmarks:
|
384 |
+
cv2.circle(frame, landmarks[point_name], 3, (255, 0, 0), -1)
|
385 |
+
|
386 |
+
@staticmethod
|
387 |
+
def draw_metrics_overlay(frame, ear, mar, head_angle, indicators):
|
388 |
+
"""Draw metrics and alerts on frame"""
|
389 |
+
# Metrics text
|
390 |
+
cv2.putText(frame, f"EAR: {ear:.3f}", (10, frame.shape[0] - 80),
|
391 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
392 |
+
cv2.putText(frame, f"MAR: {mar:.3f}", (10, frame.shape[0] - 60),
|
393 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
394 |
+
cv2.putText(frame, f"Head: {head_angle:.1f}Β°", (10, frame.shape[0] - 40),
|
395 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
396 |
+
|
397 |
+
# Alert overlay
|
398 |
+
if indicators:
|
399 |
+
cv2.putText(frame, "β οΈ DROWSINESS ALERT! β οΈ", (50, 50),
|
400 |
+
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
|
401 |
+
|
402 |
+
class StatusLogger:
|
403 |
+
"""Handle logging and status tracking"""
|
404 |
+
|
405 |
+
def __init__(self, max_logs=100):
|
406 |
+
self.status_log = deque(maxlen=max_logs)
|
407 |
+
|
408 |
+
def log(self, message):
|
409 |
+
"""Add timestamped log entry"""
|
410 |
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
411 |
+
self.status_log.append(f"[{timestamp}] {message}")
|
412 |
+
|
413 |
+
def get_recent_logs(self, count=10):
|
414 |
+
"""Get recent log entries"""
|
415 |
+
return list(self.status_log)[-count:]
|
gradio_interface.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import queue
|
4 |
+
import threading
|
5 |
+
from concurrent.futures import ThreadPoolExecutor
|
6 |
+
from ai_alert_generator import AIAlertGenerator, DrowsinessDetectionSystem
|
7 |
+
|
8 |
+
class GradioWebRTCInterface:
|
9 |
+
"""Enhanced Gradio interface with WebRTC support"""
|
10 |
+
|
11 |
+
def __init__(self):
|
12 |
+
self.detection_system = None
|
13 |
+
self.ai_alert_generator = None
|
14 |
+
self.processing = False
|
15 |
+
|
16 |
+
def initialize_system(self, gemini_key):
|
17 |
+
"""Initialize the detection system"""
|
18 |
+
try:
|
19 |
+
self.detection_system = DrowsinessDetectionSystem()
|
20 |
+
self.ai_alert_generator = AIAlertGenerator(gemini_key if gemini_key.strip() else None)
|
21 |
+
|
22 |
+
return "β
System initialized successfully!", "π Ready for detection"
|
23 |
+
except Exception as e:
|
24 |
+
return f"β Error: {str(e)}", "β Initialization failed"
|
25 |
+
|
26 |
+
def process_video_stream(self, frame, gemini_key):
|
27 |
+
"""Process video stream"""
|
28 |
+
if self.detection_system is None:
|
29 |
+
self.detection_system = DrowsinessDetectionSystem()
|
30 |
+
self.ai_alert_generator = AIAlertGenerator(gemini_key if gemini_key.strip() else None)
|
31 |
+
|
32 |
+
try:
|
33 |
+
# Process frame
|
34 |
+
processed_frame, status_list, should_alert, metrics = self.detection_system.process_frame(frame)
|
35 |
+
|
36 |
+
# Generate alert if needed
|
37 |
+
alert_text = ""
|
38 |
+
alert_audio = None
|
39 |
+
|
40 |
+
if should_alert and metrics.get('indicators'):
|
41 |
+
alert_text = self.ai_alert_generator.generate_alert_text(
|
42 |
+
metrics['indicators'],
|
43 |
+
metrics.get('severity', 'medium')
|
44 |
+
)
|
45 |
+
|
46 |
+
# Create audio alert
|
47 |
+
try:
|
48 |
+
audio_file, _ = self.ai_alert_generator.create_audio_alert(alert_text)
|
49 |
+
alert_audio = audio_file
|
50 |
+
except Exception as e:
|
51 |
+
print(f"Audio generation error: {e}")
|
52 |
+
|
53 |
+
# Format status
|
54 |
+
status_text = "\n".join(status_list)
|
55 |
+
|
56 |
+
# Get logs
|
57 |
+
logs = self.detection_system.get_logs()
|
58 |
+
|
59 |
+
return processed_frame, status_text, alert_text, alert_audio, logs
|
60 |
+
|
61 |
+
except Exception as e:
|
62 |
+
error_msg = f"Processing error: {str(e)}"
|
63 |
+
return frame, error_msg, "", None, error_msg
|
64 |
+
|
65 |
+
def create_interface(self):
|
66 |
+
"""Create the Gradio interface with WebRTC support"""
|
67 |
+
with gr.Blocks(
|
68 |
+
title="π AI Driver Drowsiness Detection System",
|
69 |
+
theme=gr.themes.Soft(),
|
70 |
+
css="""
|
71 |
+
.alert-box {
|
72 |
+
background-color: #ffebee;
|
73 |
+
border: 2px solid #f44336;
|
74 |
+
border-radius: 8px;
|
75 |
+
padding: 10px;
|
76 |
+
}
|
77 |
+
.status-box {
|
78 |
+
background-color: #e8f5e8;
|
79 |
+
border: 2px solid #4caf50;
|
80 |
+
border-radius: 8px;
|
81 |
+
padding: 10px;
|
82 |
+
}
|
83 |
+
.metric-display {
|
84 |
+
font-family: 'Courier New', monospace;
|
85 |
+
font-size: 14px;
|
86 |
+
}
|
87 |
+
.header-text {
|
88 |
+
text-align: center;
|
89 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
90 |
+
color: white;
|
91 |
+
padding: 20px;
|
92 |
+
border-radius: 10px;
|
93 |
+
margin-bottom: 20px;
|
94 |
+
}
|
95 |
+
"""
|
96 |
+
) as interface:
|
97 |
+
|
98 |
+
gr.HTML("""
|
99 |
+
<div class="header-text">
|
100 |
+
<h1>π AI-Powered Driver Drowsiness Detection System</h1>
|
101 |
+
<p><strong>Real-time monitoring with OpenCV, Computer Vision & AI Alerts</strong></p>
|
102 |
+
<p><em>No external model downloads required - Uses built-in OpenCV detection</em></p>
|
103 |
+
</div>
|
104 |
+
""")
|
105 |
+
|
106 |
+
with gr.Tab("π₯ Live Detection"):
|
107 |
+
with gr.Row():
|
108 |
+
with gr.Column(scale=2):
|
109 |
+
# WebRTC video input
|
110 |
+
video_input = gr.Video(
|
111 |
+
label="πΉ Camera Feed (WebRTC Streaming)",
|
112 |
+
sources=["webcam"],
|
113 |
+
streaming=True,
|
114 |
+
mirror_webcam=False,
|
115 |
+
height=480
|
116 |
+
)
|
117 |
+
|
118 |
+
# System controls
|
119 |
+
with gr.Row():
|
120 |
+
gemini_key = os.getenv("GEMINI_API_KEY")
|
121 |
+
# init_btn = gr.Button("π Initialize", variant="primary", scale=1)
|
122 |
+
|
123 |
+
with gr.Column(scale=1):
|
124 |
+
# System status
|
125 |
+
init_status = gr.Textbox(
|
126 |
+
label="π§ System Status",
|
127 |
+
interactive=False,
|
128 |
+
lines=2,
|
129 |
+
elem_classes=["status-box"]
|
130 |
+
)
|
131 |
+
|
132 |
+
# Detection metrics
|
133 |
+
current_status = gr.Textbox(
|
134 |
+
label="π Detection Metrics",
|
135 |
+
interactive=False,
|
136 |
+
lines=8,
|
137 |
+
elem_classes=["metric-display"]
|
138 |
+
)
|
139 |
+
|
140 |
+
# Alert display
|
141 |
+
alert_text_display = gr.Textbox(
|
142 |
+
label="π¨ Active Alert",
|
143 |
+
interactive=False,
|
144 |
+
lines=3,
|
145 |
+
elem_classes=["alert-box"]
|
146 |
+
)
|
147 |
+
|
148 |
+
# Audio alert output
|
149 |
+
alert_audio = gr.Audio(
|
150 |
+
label="π Alert Sound",
|
151 |
+
autoplay=True,
|
152 |
+
visible=True
|
153 |
+
)
|
154 |
+
|
155 |
+
# System logs panel
|
156 |
+
with gr.Row():
|
157 |
+
system_logs = gr.Textbox(
|
158 |
+
label="π System Activity Log",
|
159 |
+
lines=6,
|
160 |
+
interactive=False,
|
161 |
+
elem_classes=["metric-display"]
|
162 |
+
)
|
163 |
+
|
164 |
+
with gr.Tab("βοΈ System Configuration"):
|
165 |
+
with gr.Row():
|
166 |
+
with gr.Column():
|
167 |
+
gr.Markdown("""
|
168 |
+
### π§ Detection Parameters
|
169 |
+
|
170 |
+
**Current OpenCV-based thresholds:**
|
171 |
+
- **Eye Aspect Ratio (EAR)**: < 0.20 for 15+ frames
|
172 |
+
- **Mouth Aspect Ratio (MAR)**: > 0.8 for 10+ frames
|
173 |
+
- **Head Nod Angle**: > 20Β° deviation for 8+ frames
|
174 |
+
- **Alert Cooldown**: 8 seconds between alerts
|
175 |
+
|
176 |
+
### π― Detection Methods
|
177 |
+
- **Primary**: MediaPipe Face Mesh (if available)
|
178 |
+
- **Fallback**: OpenCV Haar Cascades
|
179 |
+
- **No external downloads**: Uses built-in OpenCV models
|
180 |
+
""")
|
181 |
+
|
182 |
+
with gr.Column():
|
183 |
+
gr.Markdown("""
|
184 |
+
### π Easy Setup
|
185 |
+
|
186 |
+
**Install dependencies:**
|
187 |
+
```bash
|
188 |
+
pip install opencv-python gradio numpy scipy google-generativeai
|
189 |
+
|
190 |
+
# Optional for better detection:
|
191 |
+
pip install mediapipe
|
192 |
+
```
|
193 |
+
|
194 |
+
**No model downloads required!**
|
195 |
+
- Uses OpenCV's built-in face detection
|
196 |
+
- MediaPipe auto-detects if available
|
197 |
+
- Gemini API key is optional for AI alerts
|
198 |
+
""")
|
199 |
+
|
200 |
+
gr.Markdown("""
|
201 |
+
### π Advanced Features
|
202 |
+
- **Real-time WebRTC Processing**: Low latency video streaming
|
203 |
+
- **Multi-modal Detection**: Eyes, mouth, and head pose analysis
|
204 |
+
- **AI-Powered Alerts**: Contextual voice messages via Gemini
|
205 |
+
- **Adaptive Fallback**: Graceful degradation without external models
|
206 |
+
- **Visual Feedback**: Live metrics overlay on video
|
207 |
+
- **Comprehensive Logging**: Detailed activity tracking
|
208 |
+
""")
|
209 |
+
|
210 |
+
with gr.Tab("π Detection Info"):
|
211 |
+
gr.Markdown("""
|
212 |
+
### ποΈ Eye Aspect Ratio (EAR)
|
213 |
+
|
214 |
+
**How it works:**
|
215 |
+
- Calculates ratio of eye height to width
|
216 |
+
- Lower values indicate closed/closing eyes
|
217 |
+
- Triggers alert when consistently low
|
218 |
+
|
219 |
+
**Detection method:**
|
220 |
+
- **MediaPipe**: Uses precise eye landmarks
|
221 |
+
- **OpenCV**: Estimates from eye rectangles
|
222 |
+
|
223 |
+
### π Mouth Aspect Ratio (MAR)
|
224 |
+
|
225 |
+
**Yawn detection:**
|
226 |
+
- Measures mouth opening relative to width
|
227 |
+
- Higher values indicate yawning
|
228 |
+
- Accounts for talking vs. yawning patterns
|
229 |
+
|
230 |
+
### π Head Pose Estimation
|
231 |
+
|
232 |
+
**Nodding detection:**
|
233 |
+
- Tracks head tilt and position
|
234 |
+
- Detects forward head movement
|
235 |
+
- Uses nose-chin alignment for pose estimation
|
236 |
+
|
237 |
+
### π§ AI Alert Generation
|
238 |
+
|
239 |
+
**Smart alerts:**
|
240 |
+
- Context-aware messages via Gemini
|
241 |
+
- Severity-based escalation
|
242 |
+
- Fallback to audio beeps
|
243 |
+
- Cooldown prevents alert spam
|
244 |
+
""")
|
245 |
+
|
246 |
+
# Event handlers
|
247 |
+
init_btn.click(
|
248 |
+
fn=self.initialize_system,
|
249 |
+
inputs=[gemini_key],
|
250 |
+
outputs=[init_status, alert_text_display]
|
251 |
+
)
|
252 |
+
|
253 |
+
# WebRTC stream processing
|
254 |
+
video_input.stream(
|
255 |
+
fn=self.process_video_stream,
|
256 |
+
inputs=[video_input, gemini_key],
|
257 |
+
outputs=[video_input, current_status, alert_text_display, alert_audio, system_logs],
|
258 |
+
stream_every=0.1, # 10 FPS processing
|
259 |
+
show_progress=False
|
260 |
+
)
|
261 |
+
|
262 |
+
# Safety notice
|
263 |
+
gr.HTML("""
|
264 |
+
<div style="margin-top: 20px; padding: 15px; background: linear-gradient(135deg, #ffeaa7 0%, #fab1a0 100%); border-radius: 8px; border-left: 5px solid #e17055;">
|
265 |
+
<h3>β οΈ Important Safety Notice</h3>
|
266 |
+
<p><strong>This system is for demonstration and research purposes only.</strong></p>
|
267 |
+
<ul style="margin: 10px 0;">
|
268 |
+
<li><strong>Not a substitute</strong> for responsible driving practices</li>
|
269 |
+
<li><strong>Pull over safely</strong> if you feel drowsy while driving</li>
|
270 |
+
<li><strong>Ensure proper setup</strong>: good lighting, stable camera position</li>
|
271 |
+
<li><strong>Use as supplementary tool</strong> alongside other safety measures</li>
|
272 |
+
</ul>
|
273 |
+
<p style="margin-top: 15px;"><em>Always prioritize real-world driving safety over technology assistance.</em></p>
|
274 |
+
</div>
|
275 |
+
""")
|
276 |
+
|
277 |
+
return interface
|