Testimony Adekoya commited on
Commit
e65b3b4
Β·
1 Parent(s): f7db860

WIP: Quick demo for project

Browse files
Files changed (4) hide show
  1. ai_alert_generator.py +214 -0
  2. app.py +191 -439
  3. facial_detection.py +415 -0
  4. gradio_interface.py +277 -0
ai_alert_generator.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ import numpy as np
3
+ from scipy.io import wavfile
4
+ import tempfile
5
+ import os
6
+
7
+ from facial_detection import OpenCVFaceDetector, MetricsCalculator, DrowsinessAnalyzer, AlertManager, VisualizationRenderer, StatusLogger
8
+
9
+ class AIAlertGenerator:
10
+ """Generate AI-powered voice alerts using Gemini"""
11
+
12
+ def __init__(self, api_key=None):
13
+ self.model = None
14
+ if api_key:
15
+ try:
16
+ genai.configure(api_key=api_key)
17
+ self.model = genai.GenerativeModel('gemini-1.5-flash')
18
+ print("βœ… Gemini AI initialized for voice alerts")
19
+ except Exception as e:
20
+ print(f"⚠️ Failed to initialize Gemini: {e}")
21
+
22
+ def generate_alert_text(self, alert_type, severity="medium"):
23
+ """Generate contextual alert text using Gemini"""
24
+ if not self.model:
25
+ return self._get_default_alert_text(alert_type, severity)
26
+
27
+ try:
28
+ prompts = {
29
+ "EYES_CLOSED": f"Generate a brief, urgent wake-up message (max 12 words) for a drowsy driver whose eyes are closing. Severity: {severity}. Sound caring but firm.",
30
+ "YAWNING": f"Generate a brief, gentle alert (max 10 words) for a driver who is yawning frequently. Severity: {severity}. Sound encouraging.",
31
+ "HEAD_NOD": f"Generate a brief, firm alert (max 10 words) for a driver whose head is nodding. Severity: {severity}. Sound urgent but supportive.",
32
+ "COMBINED": f"Generate a brief, critical alert (max 15 words) for a driver showing multiple drowsiness signs. Severity: {severity}. Sound very urgent but caring."
33
+ }
34
+
35
+ prompt_key = "COMBINED" if isinstance(alert_type, list) and len(alert_type) > 1 else alert_type[0] if isinstance(alert_type, list) else alert_type
36
+ prompt = prompts.get(prompt_key, prompts["EYES_CLOSED"])
37
+
38
+ response = self.model.generate_content(prompt)
39
+ alert_text = response.text.strip().replace('"', '').replace("'", "")
40
+
41
+ return alert_text[:100]
42
+
43
+ except Exception as e:
44
+ print(f"Error generating AI alert: {e}")
45
+ return self._get_default_alert_text(alert_type, severity)
46
+
47
+ def _get_default_alert_text(self, alert_type, severity):
48
+ """Fallback alert messages"""
49
+ default_alerts = {
50
+ "EYES_CLOSED": {
51
+ "critical": "WAKE UP NOW! Pull over immediately!",
52
+ "high": "Eyes closing! Stay alert and pull over soon!",
53
+ "medium": "Please keep your eyes open while driving!"
54
+ },
55
+ "YAWNING": {
56
+ "critical": "Excessive yawning detected! Take a break!",
57
+ "high": "You seem tired. Consider resting soon.",
58
+ "medium": "Frequent yawning noticed. Stay alert!"
59
+ },
60
+ "HEAD_NOD": {
61
+ "critical": "Head nodding detected! Stop driving now!",
62
+ "high": "Your head is nodding. Pull over safely!",
63
+ "medium": "Head movement detected. Stay focused!"
64
+ }
65
+ }
66
+
67
+ alert_key = alert_type[0] if isinstance(alert_type, list) else alert_type
68
+ return default_alerts.get(alert_key, {}).get(severity, "Stay alert while driving!")
69
+
70
+ def create_audio_alert(self, text, sample_rate=22050):
71
+ """Create audio alert (generates beep pattern)"""
72
+ try:
73
+ duration = 2.0
74
+ freq = 800
75
+ frames = int(duration * sample_rate)
76
+
77
+ # Create attention-grabbing beep pattern
78
+ t = np.linspace(0, duration, frames)
79
+ beep1 = np.sin(2 * np.pi * freq * t) * np.exp(-t * 3)
80
+ beep2 = np.sin(2 * np.pi * (freq * 1.5) * t) * np.exp(-t * 3)
81
+
82
+ # Combine beeps with pause
83
+ silence = np.zeros(int(0.1 * sample_rate))
84
+ audio = np.concatenate([beep1, silence, beep2, silence, beep1])
85
+
86
+ # Normalize and convert to int16
87
+ audio = (audio * 32767).astype(np.int16)
88
+
89
+ # Save to temporary file
90
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
91
+ wavfile.write(temp_file.name, sample_rate, audio)
92
+
93
+ return temp_file.name, text
94
+
95
+ except Exception as e:
96
+ print(f"Error creating audio alert: {e}")
97
+ return None, text
98
+
99
+ class DrowsinessDetectionSystem:
100
+ """Main system coordinator"""
101
+
102
+ def __init__(self):
103
+ self.face_detector = OpenCVFaceDetector()
104
+ self.metrics_calculator = MetricsCalculator()
105
+ self.drowsiness_analyzer = DrowsinessAnalyzer()
106
+ self.alert_manager = AlertManager()
107
+ self.visualization_renderer = VisualizationRenderer()
108
+ self.logger = StatusLogger()
109
+
110
+ print("βœ… Drowsiness Detection System initialized with OpenCV")
111
+
112
+ def process_frame(self, frame):
113
+ """Process a single frame and return results"""
114
+ try:
115
+ # Detect face and landmarks
116
+ face_rects, landmarks_list = self.face_detector.detect_landmarks(frame)
117
+
118
+ if not face_rects or not landmarks_list:
119
+ self.logger.log("No face detected")
120
+ return frame, ["πŸ‘€ No face detected"], False, {}
121
+
122
+ # Process first detected face
123
+ face_rect = face_rects[0]
124
+ landmarks = landmarks_list[0]
125
+
126
+ # Calculate metrics
127
+ ear_left = ear_right = 0.25 # Default values
128
+
129
+ if 'left_eye_corners' in landmarks:
130
+ ear_left = self.metrics_calculator.calculate_ear_from_points(landmarks['left_eye_corners'])
131
+ if 'right_eye_corners' in landmarks:
132
+ ear_right = self.metrics_calculator.calculate_ear_from_points(landmarks['right_eye_corners'])
133
+
134
+ ear = (ear_left + ear_right) / 2.0
135
+
136
+ mar = 0.3 # Default value
137
+ if 'mouth_corners' in landmarks:
138
+ mar = self.metrics_calculator.calculate_mar_from_points(landmarks['mouth_corners'])
139
+
140
+ # Head pose estimation
141
+ frame_center = (frame.shape[1] // 2, frame.shape[0] // 2)
142
+ head_angles = self.metrics_calculator.estimate_head_pose_simple(
143
+ landmarks.get('nose_tip'),
144
+ landmarks.get('chin'),
145
+ frame_center
146
+ )
147
+
148
+ # Analyze drowsiness
149
+ indicators = self.drowsiness_analyzer.analyze_drowsiness(ear, mar, head_angles)
150
+ severity = self.drowsiness_analyzer.get_severity_level(indicators)
151
+
152
+ # Check for alerts
153
+ should_alert = self.alert_manager.should_trigger_alert(indicators)
154
+
155
+ # Render visualization
156
+ self.visualization_renderer.draw_landmarks_and_contours(frame, landmarks, face_rect)
157
+ self.visualization_renderer.draw_metrics_overlay(frame, ear, mar, head_angles[0], indicators)
158
+
159
+ # Generate status text
160
+ status_text = self._generate_status_text(ear, mar, head_angles[0], indicators)
161
+
162
+ # Log events
163
+ if indicators:
164
+ self.logger.log(f"Drowsiness detected: {', '.join(indicators)} (Severity: {severity})")
165
+
166
+ # Prepare metrics
167
+ metrics = {
168
+ 'ear': ear,
169
+ 'mar': mar,
170
+ 'head_angle': head_angles[0],
171
+ 'indicators': indicators,
172
+ 'severity': severity
173
+ }
174
+
175
+ return frame, status_text, should_alert, metrics
176
+
177
+ except Exception as e:
178
+ error_msg = f"Error processing frame: {str(e)}"
179
+ self.logger.log(error_msg)
180
+ return frame, [error_msg], False, {}
181
+
182
+ def _generate_status_text(self, ear, mar, head_angle, indicators):
183
+ """Generate human-readable status text"""
184
+ status = []
185
+
186
+ # EAR status
187
+ if ear < self.drowsiness_analyzer.EAR_THRESHOLD:
188
+ status.append(f"πŸ‘οΈ Eyes closing! EAR: {ear:.3f}")
189
+ else:
190
+ status.append(f"πŸ‘οΈ Eyes open - EAR: {ear:.3f}")
191
+
192
+ # MAR status
193
+ if mar > self.drowsiness_analyzer.YAWN_THRESHOLD:
194
+ status.append(f"πŸ₯± Yawning detected! MAR: {mar:.3f}")
195
+ else:
196
+ status.append(f"πŸ‘„ Normal mouth - MAR: {mar:.3f}")
197
+
198
+ # Head pose status
199
+ if abs(head_angle) > self.drowsiness_analyzer.NOD_THRESHOLD:
200
+ status.append(f"πŸ“‰ Head nodding! Angle: {head_angle:.1f}Β°")
201
+ else:
202
+ status.append(f"πŸ“ Head pose normal - Pitch: {head_angle:.1f}Β°")
203
+
204
+ # Overall status
205
+ if indicators:
206
+ status.append(f"⚠️ ALERT: {', '.join(indicators)}")
207
+ else:
208
+ status.append("βœ… Driver appears alert")
209
+
210
+ return status
211
+
212
+ def get_logs(self):
213
+ """Get recent system logs"""
214
+ return "\n".join(self.logger.get_recent_logs())
app.py CHANGED
@@ -1,457 +1,209 @@
1
- import cv2
2
- import numpy as np
3
- import dlib
4
- import gradio as gr
5
- import threading
6
- import time
7
- import queue
8
- import pygame
9
- import io
10
- import google.generativeai as genai
11
- from scipy.spatial import distance as dist
12
- from collections import deque
13
- import tempfile
14
  import os
15
- from datetime import datetime
 
 
16
 
17
- class DrowsinessDetector:
18
- def __init__(self, gemini_api_key=None):
19
- # Initialize face detector and landmark predictor
20
- self.detector = dlib.get_frontal_face_detector()
21
- self.predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
22
-
23
- # EAR (Eye Aspect Ratio) parameters
24
- self.EAR_THRESHOLD = 0.25
25
- self.EAR_CONSECUTIVE_FRAMES = 20
26
- self.ear_counter = 0
27
- self.ear_history = deque(maxlen=30)
28
-
29
- # Yawn detection parameters
30
- self.YAWN_THRESHOLD = 0.6
31
- self.YAWN_CONSECUTIVE_FRAMES = 15
32
- self.yawn_counter = 0
33
- self.yawn_history = deque(maxlen=30)
34
-
35
- # Head pose estimation parameters
36
- self.NOD_THRESHOLD = 15
37
- self.nod_counter = 0
38
- self.head_pose_history = deque(maxlen=30)
39
-
40
- # Alert system
41
- self.drowsy_alert = False
42
- self.last_alert_time = 0
43
- self.alert_cooldown = 10 # seconds
44
-
45
- # Initialize pygame for audio
46
- pygame.mixer.init()
47
-
48
- # Initialize Gemini AI
49
- if gemini_api_key:
50
- genai.configure(api_key=gemini_api_key)
51
- self.model = genai.GenerativeModel('gemini-pro')
52
- else:
53
- self.model = None
54
-
55
- # Eye and mouth landmark indices
56
- self.LEFT_EYE = list(range(36, 42))
57
- self.RIGHT_EYE = list(range(42, 48))
58
- self.MOUTH = list(range(48, 68))
59
-
60
- # 3D model points for head pose estimation
61
- self.model_points = np.array([
62
- (0.0, 0.0, 0.0), # Nose tip
63
- (0.0, -330.0, -65.0), # Chin
64
- (-225.0, 170.0, -135.0), # Left eye left corner
65
- (225.0, 170.0, -135.0), # Right eye right corner
66
- (-150.0, -150.0, -125.0), # Left Mouth corner
67
- (150.0, -150.0, -125.0) # Right mouth corner
68
- ])
69
-
70
- self.status_log = deque(maxlen=100)
71
-
72
- def calculate_ear(self, eye_landmarks):
73
- """Calculate Eye Aspect Ratio"""
74
- # Vertical eye landmarks
75
- A = dist.euclidean(eye_landmarks[1], eye_landmarks[5])
76
- B = dist.euclidean(eye_landmarks[2], eye_landmarks[4])
77
- # Horizontal eye landmark
78
- C = dist.euclidean(eye_landmarks[0], eye_landmarks[3])
79
- # EAR calculation
80
- ear = (A + B) / (2.0 * C)
81
- return ear
82
-
83
- def calculate_mar(self, mouth_landmarks):
84
- """Calculate Mouth Aspect Ratio for yawn detection"""
85
- # Vertical mouth landmarks
86
- A = dist.euclidean(mouth_landmarks[2], mouth_landmarks[10]) # 50, 58
87
- B = dist.euclidean(mouth_landmarks[4], mouth_landmarks[8]) # 52, 56
88
- # Horizontal mouth landmark
89
- C = dist.euclidean(mouth_landmarks[0], mouth_landmarks[6]) # 48, 54
90
- # MAR calculation
91
- mar = (A + B) / (2.0 * C)
92
- return mar
93
-
94
- def get_head_pose(self, landmarks, img_size):
95
- """Estimate head pose using facial landmarks"""
96
- image_points = np.array([
97
- (landmarks[30][0], landmarks[30][1]), # Nose tip
98
- (landmarks[8][0], landmarks[8][1]), # Chin
99
- (landmarks[36][0], landmarks[36][1]), # Left eye left corner
100
- (landmarks[45][0], landmarks[45][1]), # Right eye right corner
101
- (landmarks[48][0], landmarks[48][1]), # Left Mouth corner
102
- (landmarks[54][0], landmarks[54][1]) # Right mouth corner
103
- ], dtype="double")
104
-
105
- # Camera internals
106
- focal_length = img_size[1]
107
- center = (img_size[1]/2, img_size[0]/2)
108
- camera_matrix = np.array([
109
- [focal_length, 0, center[0]],
110
- [0, focal_length, center[1]],
111
- [0, 0, 1]], dtype="double")
112
-
113
- dist_coeffs = np.zeros((4,1)) # Assuming no lens distortion
114
-
115
- # Solve PnP
116
- (success, rotation_vector, translation_vector) = cv2.solvePnP(
117
- self.model_points, image_points, camera_matrix, dist_coeffs,
118
- flags=cv2.SOLVEPNP_ITERATIVE)
119
-
120
- # Convert rotation vector to rotation matrix
121
- (rotation_matrix, jacobian) = cv2.Rodrigues(rotation_vector)
122
-
123
- # Calculate Euler angles
124
- sy = np.sqrt(rotation_matrix[0,0] * rotation_matrix[0,0] + rotation_matrix[1,0] * rotation_matrix[1,0])
125
- singular = sy < 1e-6
126
- if not singular:
127
- x = np.arctan2(rotation_matrix[2,1], rotation_matrix[2,2])
128
- y = np.arctan2(-rotation_matrix[2,0], sy)
129
- z = np.arctan2(rotation_matrix[1,0], rotation_matrix[0,0])
130
- else:
131
- x = np.arctan2(-rotation_matrix[1,2], rotation_matrix[1,1])
132
- y = np.arctan2(-rotation_matrix[2,0], sy)
133
- z = 0
134
-
135
- # Convert to degrees
136
- angles = np.array([x, y, z]) * 180.0 / np.pi
137
- return angles
138
 
139
- def generate_voice_alert(self, alert_type, severity="medium"):
140
- """Generate voice alert using Gemini AI"""
141
- if not self.model:
142
- return self.play_default_alert()
143
-
144
- try:
145
- prompts = {
146
- "drowsy": f"Generate a brief, urgent but caring voice alert (max 15 words) to wake up a drowsy driver. Severity: {severity}. Make it sound natural and concerned.",
147
- "yawn": f"Generate a brief, gentle voice alert (max 12 words) for a driver who is yawning frequently. Severity: {severity}. Sound caring but alert.",
148
- "nod": f"Generate a brief, firm voice alert (max 12 words) for a driver whose head is nodding. Severity: {severity}. Sound urgent but supportive."
149
- }
150
-
151
- response = self.model.generate_content(prompts.get(alert_type, prompts["drowsy"]))
152
- alert_text = response.text.strip().replace('"', '').replace("'", "")
153
-
154
- # Use text-to-speech (you would need to install pyttsx3 or use cloud TTS)
155
- # For this example, we'll use a placeholder
156
- self.log_status(f"πŸ”Š ALERT: {alert_text}")
157
- return alert_text
158
-
159
- except Exception as e:
160
- self.log_status(f"Error generating alert: {str(e)}")
161
- return self.play_default_alert()
162
 
163
- def play_default_alert(self):
164
- """Play default beep alert"""
165
  try:
166
- # Generate a simple beep sound
167
- duration = 0.5 # seconds
168
- freq = 800 # Hz
169
- sample_rate = 22050
170
- frames = int(duration * sample_rate)
171
- arr = np.zeros(frames)
172
-
173
- for i in range(frames):
174
- arr[i] = np.sin(2 * np.pi * freq * i / sample_rate)
175
-
176
- arr = (arr * 32767).astype(np.int16)
177
- sound = pygame.sndarray.make_sound(arr)
178
- sound.play()
179
-
180
- alert_text = "⚠️ WAKE UP! Please stay alert while driving!"
181
- self.log_status(f"πŸ”Š {alert_text}")
182
- return alert_text
183
-
184
- except Exception as e:
185
- self.log_status(f"Error playing alert: {str(e)}")
186
- return "Alert system activated"
187
 
188
- def log_status(self, message):
189
- """Log status messages with timestamp"""
190
- timestamp = datetime.now().strftime("%H:%M:%S")
191
- self.status_log.append(f"[{timestamp}] {message}")
192
 
193
- def detect_drowsiness(self, frame):
194
- """Main drowsiness detection function"""
195
- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
196
- faces = self.detector(gray)
197
-
198
- status_text = []
199
- alert_message = ""
200
-
201
- if len(faces) == 0:
202
- status_text.append("πŸ‘€ No face detected")
203
- self.log_status("No face detected in frame")
204
- return frame, status_text, alert_message
205
-
206
- for face in faces:
207
- landmarks = self.predictor(gray, face)
208
- landmarks = np.array([[p.x, p.y] for p in landmarks.parts()])
209
-
210
- # Draw face rectangle
211
- x, y, w, h = face.left(), face.top(), face.width(), face.height()
212
- cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
213
-
214
- # Eye Aspect Ratio calculation
215
- left_eye = landmarks[self.LEFT_EYE]
216
- right_eye = landmarks[self.RIGHT_EYE]
217
-
218
- left_ear = self.calculate_ear(left_eye)
219
- right_ear = self.calculate_ear(right_eye)
220
- ear = (left_ear + right_ear) / 2.0
221
- self.ear_history.append(ear)
222
-
223
- # Draw eye contours
224
- cv2.drawContours(frame, [cv2.convexHull(left_eye)], -1, (0, 255, 0), 1)
225
- cv2.drawContours(frame, [cv2.convexHull(right_eye)], -1, (0, 255, 0), 1)
226
-
227
- # Yawn detection
228
- mouth = landmarks[self.MOUTH]
229
- mar = self.calculate_mar(mouth)
230
- self.yawn_history.append(mar)
231
-
232
- # Draw mouth contour
233
- cv2.drawContours(frame, [cv2.convexHull(mouth)], -1, (0, 255, 255), 1)
234
-
235
- # Head pose estimation
236
- head_angles = self.get_head_pose(landmarks, frame.shape)
237
- self.head_pose_history.append(head_angles[0]) # Pitch angle
238
-
239
- # Drowsiness detection logic
240
- drowsy_indicators = []
241
-
242
- # Check EAR
243
- if ear < self.EAR_THRESHOLD:
244
- self.ear_counter += 1
245
- if self.ear_counter >= self.EAR_CONSECUTIVE_FRAMES:
246
- drowsy_indicators.append("EYES_CLOSED")
247
- status_text.append(f"πŸ‘οΈ Eyes closed! EAR: {ear:.3f}")
248
- else:
249
- self.ear_counter = 0
250
- status_text.append(f"πŸ‘οΈ Eyes open - EAR: {ear:.3f}")
251
-
252
- # Check for yawning
253
- if mar > self.YAWN_THRESHOLD:
254
- self.yawn_counter += 1
255
- if self.yawn_counter >= self.YAWN_CONSECUTIVE_FRAMES:
256
- drowsy_indicators.append("YAWNING")
257
- status_text.append(f"πŸ₯± Yawning detected! MAR: {mar:.3f}")
258
- else:
259
- self.yawn_counter = 0
260
- status_text.append(f"πŸ‘„ Normal mouth - MAR: {mar:.3f}")
261
-
262
- # Check head nodding
263
- if abs(head_angles[0]) > self.NOD_THRESHOLD:
264
- self.nod_counter += 1
265
- if self.nod_counter >= 10:
266
- drowsy_indicators.append("HEAD_NOD")
267
- status_text.append(f"πŸ“‰ Head nodding! Angle: {head_angles[0]:.1f}Β°")
268
- else:
269
- self.nod_counter = 0
270
- status_text.append(f"πŸ“ Head pose - Pitch: {head_angles[0]:.1f}Β°")
271
-
272
- # Generate alerts
273
- current_time = time.time()
274
- if drowsy_indicators and (current_time - self.last_alert_time) > self.alert_cooldown:
275
- self.drowsy_alert = True
276
- self.last_alert_time = current_time
277
-
278
- # Determine alert type and severity
279
- if "EYES_CLOSED" in drowsy_indicators:
280
- severity = "high" if len(drowsy_indicators) > 1 else "medium"
281
- alert_message = self.generate_voice_alert("drowsy", severity)
282
- elif "YAWNING" in drowsy_indicators:
283
- alert_message = self.generate_voice_alert("yawn", "medium")
284
- elif "HEAD_NOD" in drowsy_indicators:
285
- alert_message = self.generate_voice_alert("nod", "medium")
286
-
287
- # Visual alert on frame
288
- cv2.putText(frame, "⚠️ DROWSINESS ALERT! ⚠️", (50, 50),
289
- cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
290
-
291
- # Draw landmark points
292
- for (x, y) in landmarks:
293
- cv2.circle(frame, (x, y), 1, (255, 255, 255), -1)
294
-
295
- # Add metrics overlay
296
- cv2.putText(frame, f"EAR: {ear:.3f}", (10, frame.shape[0] - 80),
297
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
298
- cv2.putText(frame, f"MAR: {mar:.3f}", (10, frame.shape[0] - 60),
299
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
300
- cv2.putText(frame, f"Head: {head_angles[0]:.1f}Β°", (10, frame.shape[0] - 40),
301
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
302
-
303
- return frame, status_text, alert_message
304
 
305
- def create_gradio_interface():
306
- """Create Gradio interface for the drowsiness detection system"""
 
 
307
 
308
- # Global detector instance
309
- detector = None
310
 
311
- def initialize_system(gemini_key):
312
- """Initialize the drowsiness detection system"""
313
- global detector
314
- try:
315
- detector = DrowsinessDetector(gemini_key if gemini_key.strip() else None)
316
- return "βœ… System initialized successfully!", "System ready for detection."
317
- except Exception as e:
318
- return f"❌ Error initializing system: {str(e)}", "System initialization failed."
319
 
320
- def process_video_frame(frame, gemini_key):
321
- """Process a single video frame"""
322
- global detector
323
-
324
- if detector is None:
325
- detector = DrowsinessDetector(gemini_key if gemini_key.strip() else None)
326
-
327
- try:
328
- processed_frame, status_list, alert_msg = detector.detect_drowsiness(frame)
329
-
330
- # Format status text
331
- status_text = "\n".join(status_list) if status_list else "Processing..."
332
-
333
- # Get recent logs
334
- log_text = "\n".join(list(detector.status_log)[-10:]) if detector.status_log else "No logs yet."
335
-
336
- return processed_frame, status_text, alert_msg, log_text
337
-
338
- except Exception as e:
339
- error_msg = f"Error processing frame: {str(e)}"
340
- return frame, error_msg, "", error_msg
341
 
342
- # Create the Gradio interface
343
- with gr.Blocks(title="Driver Drowsiness Detection System", theme=gr.themes.Soft()) as demo:
344
- gr.Markdown("""
345
- # πŸš— Real-time Driver Drowsiness Detection System
346
-
347
- This system uses computer vision and AI to detect driver drowsiness through:
348
- - **Eye Aspect Ratio (EAR)** - Detects closed/droopy eyes
349
- - **Mouth Aspect Ratio (MAR)** - Detects yawning
350
- - **Head Pose Estimation** - Detects head nodding
351
- - **AI Voice Alerts** - Uses Gemini AI for personalized wake-up messages
352
-
353
- ### πŸ“‹ Setup Instructions:
354
- 1. Download `shape_predictor_68_face_landmarks.dat` from [dlib models](http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2)
355
- 2. Place it in the same directory as this script
356
- 3. (Optional) Enter your Gemini API key for AI-powered voice alerts
357
- 4. Start your webcam and begin monitoring!
358
- """)
359
-
360
- with gr.Row():
361
- with gr.Column(scale=2):
362
- # Gemini API key input
363
- gemini_key_input = os.getenv('GEMINI_API_KEY')
364
-
365
-
366
- # Initialize button
367
- init_btn = gr.Button("πŸš€ Initialize System", variant="primary")
368
- init_status = gr.Textbox(label="Initialization Status", interactive=False)
369
-
370
- with gr.Column(scale=1):
371
- # System info
372
- gr.Markdown("""
373
- ### πŸ“Š Detection Thresholds:
374
- - **EAR Threshold**: 0.25
375
- - **Yawn Threshold**: 0.6
376
- - **Head Nod**: 15Β° deviation
377
- - **Alert Cooldown**: 10 seconds
378
- """)
379
 
380
- with gr.Row():
381
- with gr.Column(scale=2):
382
- # Video input/output
383
- video_input = gr.Video(
384
- sources=["webcam"],
385
- label="πŸ“Ή Camera Feed",
386
- streaming=True
387
- )
388
-
389
- with gr.Column(scale=1):
390
- # Status displays
391
- current_status = gr.Textbox(
392
- label="πŸ“ˆ Current Status",
393
- lines=6,
394
- interactive=False
395
- )
396
-
397
- alert_display = gr.Textbox(
398
- label="πŸ”Š Latest Alert",
399
- interactive=False,
400
- placeholder="No alerts yet..."
401
- )
402
-
403
- system_logs = gr.Textbox(
404
- label="πŸ“ System Logs",
405
- lines=8,
406
- interactive=False,
407
- placeholder="System logs will appear here..."
408
- )
409
 
410
- # Event handlers
411
- init_btn.click(
412
- fn=initialize_system,
413
- inputs=[gemini_key_input],
414
- outputs=[init_status, alert_display]
415
- )
416
-
417
- video_input.stream(
418
- fn=process_video_frame,
419
- inputs=[video_input, gemini_key_input],
420
- outputs=[video_input, current_status, alert_display, system_logs],
421
- stream_every=0.1, # Process every 100ms
422
- show_progress=False
423
  )
424
 
425
- # Instructions
426
- gr.Markdown("""
427
- ### πŸ”§ Troubleshooting:
428
- - **No face detected**: Ensure good lighting and face is visible to camera
429
- - **Poor detection**: Adjust camera angle and distance (arm's length recommended)
430
- - **No alerts**: Check if Gemini API key is valid (optional feature)
431
- - **High CPU usage**: Reduce video resolution or increase stream interval
432
-
433
- ### ⚠️ Safety Notice:
434
- This system is for demonstration purposes. Always prioritize real-world driving safety measures.
435
- Pull over safely if you feel drowsy while driving.
436
- """)
437
-
438
- return demo
439
 
440
- # Main execution
441
  if __name__ == "__main__":
442
- # Check for required files
443
- if not os.path.exists('shape_predictor_68_face_landmarks.dat'):
444
- print("❌ Missing required file: shape_predictor_68_face_landmarks.dat")
445
- print("πŸ“₯ Please download from: http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
446
- print("πŸ“ Extract and place in the same directory as this script")
447
- else:
448
- print("βœ… All required files found!")
449
-
450
- # Create and launch the interface
451
- demo = create_gradio_interface()
452
- demo.launch(
453
- share=True,
454
- server_name="0.0.0.0",
455
- server_port=7860,
456
- show_error=True
457
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
+ import sys
4
+ from gradio_interface import GradioWebRTCInterface
5
+ from dotenv import load_dotenv
6
 
7
+ load_dotenv()
8
+
9
+ def check_opencv_installation():
10
+ """Check if OpenCV is properly installed with required cascades"""
11
+ try:
12
+ import cv2
13
+
14
+ # Check for required cascade files
15
+ cascade_files = [
16
+ 'haarcascade_frontalface_default.xml',
17
+ 'haarcascade_eye.xml',
18
+ 'haarcascade_smile.xml'
19
+ ]
20
+
21
+ missing_cascades = []
22
+ for cascade in cascade_files:
23
+ cascade_path = cv2.data.haarcascades + cascade
24
+ if not os.path.exists(cascade_path):
25
+ missing_cascades.append(cascade)
26
+
27
+ if missing_cascades:
28
+ print(f"❌ Missing OpenCV cascade files: {missing_cascades}")
29
+ print("πŸ’‘ Please reinstall OpenCV: pip install opencv-python")
30
+ return False
31
+
32
+ print("βœ… OpenCV and required cascade files found!")
33
+ return True
34
+
35
+ except ImportError:
36
+ print("❌ OpenCV not found. Please install: pip install opencv-python")
37
+ return False
38
+
39
+ def check_optional_dependencies():
40
+ """Check for optional dependencies and provide info"""
41
+ optional_deps = {
42
+ 'mediapipe': 'Enhanced facial landmark detection',
43
+ 'google.generativeai': 'AI-powered voice alerts',
44
+ 'scipy': 'Advanced mathematical computations'
45
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ available = []
48
+ missing = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ for dep, description in optional_deps.items():
 
51
  try:
52
+ __import__(dep)
53
+ available.append(f"βœ… {dep} - {description}")
54
+ except ImportError:
55
+ missing.append(f"βšͺ {dep} - {description}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ if available:
58
+ print("πŸ“¦ Available optional features:")
59
+ for item in available:
60
+ print(f" {item}")
61
 
62
+ if missing:
63
+ print("πŸ“¦ Optional features (install for enhanced functionality):")
64
+ for item in missing:
65
+ print(f" {item}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ def main():
68
+ """Main entry point"""
69
+ print("πŸš— Starting AI Driver Drowsiness Detection System...")
70
+ print("πŸ”§ Using OpenCV-based detection (no external model downloads required)")
71
 
72
+ if not check_opencv_installation():
73
+ sys.exit(1)
74
 
75
+ check_optional_dependencies()
 
 
 
 
 
 
 
76
 
77
+ print("\nπŸš€ All core requirements satisfied!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # Create and launch interface
80
+ try:
81
+ interface_manager = GradioWebRTCInterface()
82
+ demo = interface_manager.create_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ print("🌐 Launching Gradio interface...")
85
+ print("πŸ“± The interface will be available in your browser")
86
+ print("πŸ”— A public link will be generated for sharing")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ demo.launch(
89
+ server_name="0.0.0.0",
90
+ server_port=7860,
91
+ share=True,
92
+ show_error=True,
93
+ enable_queue=True,
94
+ max_threads=10,
95
+ favicon_path=None
 
 
 
 
 
96
  )
97
 
98
+ except Exception as e:
99
+ print(f"❌ Error launching interface: {e}")
100
+ print("πŸ’‘ Try running: pip install --upgrade gradio")
101
+ sys.exit(1)
 
 
 
 
 
 
 
 
 
 
102
 
 
103
  if __name__ == "__main__":
104
+ main()
105
+
106
+ # requirements.txt
107
+ """
108
+ opencv-python>=4.5.0
109
+ gradio>=4.0.0
110
+ numpy>=1.21.0
111
+ scipy>=1.7.0
112
+ google-generativeai>=0.3.0
113
+ mediapipe>=0.10.0 # Optional for enhanced detection
114
+ """
115
+
116
+ # README.md
117
+ """
118
+ # πŸš— AI Driver Drowsiness Detection System
119
+
120
+ A real-time drowsiness detection system using computer vision and AI-powered alerts.
121
+
122
+ ## ✨ Features
123
+
124
+ - **No External Downloads**: Uses OpenCV's built-in face detection models
125
+ - **Real-time Processing**: WebRTC streaming for low latency
126
+ - **Multi-modal Detection**: Eyes, mouth, and head pose analysis
127
+ - **AI Voice Alerts**: Contextual messages powered by Gemini AI
128
+ - **Adaptive System**: Graceful fallback without external dependencies
129
+ - **Easy Setup**: Simple pip install, no model downloads required
130
+
131
+ ## πŸš€ Quick Start
132
+
133
+ 1. **Install dependencies:**
134
+ ```bash
135
+ pip install opencv-python gradio numpy scipy google-generativeai
136
+
137
+ # Optional for enhanced detection:
138
+ pip install mediapipe
139
+ ```
140
+
141
+ 2. **Run the system:**
142
+ ```bash
143
+ python main.py
144
+ ```
145
+
146
+ 3. **Open browser** and navigate to the provided URL
147
+
148
+ 4. **Optional**: Enter Gemini API key for AI-powered voice alerts
149
+
150
+ ## πŸ”§ How It Works
151
+
152
+ ### Detection Methods
153
+ - **Primary**: MediaPipe Face Mesh (if available) for precise landmarks
154
+ - **Fallback**: OpenCV Haar Cascades for basic face/eye/mouth detection
155
+ - **Hybrid Approach**: Automatically selects best available method
156
+
157
+ ### Drowsiness Indicators
158
+ - **Eye Aspect Ratio (EAR)**: Detects eye closure patterns
159
+ - **Mouth Aspect Ratio (MAR)**: Identifies yawning behavior
160
+ - **Head Pose**: Tracks head nodding and position
161
+
162
+ ### Alert System
163
+ - **AI-Generated**: Contextual messages via Gemini
164
+ - **Audio Alerts**: Attention-grabbing beep patterns
165
+ - **Visual Feedback**: Real-time overlay on video stream
166
+ - **Smart Cooldown**: Prevents alert spam
167
+
168
+ ## βš™οΈ Configuration
169
+
170
+ ### Detection Thresholds
171
+ - **EAR Threshold**: 0.20 (adjustable for sensitivity)
172
+ - **MAR Threshold**: 0.8 (calibrated for yawn detection)
173
+ - **Head Nod**: 20Β° deviation threshold
174
+ - **Alert Cooldown**: 8 seconds between alerts
175
+
176
+ ### Performance Optimization
177
+ - **Stream Rate**: 10 FPS processing (configurable)
178
+ - **Queue Management**: Prevents frame backlog
179
+ - **Multi-threading**: Separate processing pipeline
180
+ - **Graceful Degradation**: Maintains functionality with limited resources
181
+
182
+ ## πŸ›‘οΈ Safety Notice
183
+
184
+ **This system is for demonstration and research purposes only.**
185
+
186
+ - Not a substitute for responsible driving practices
187
+ - Always pull over safely if feeling drowsy
188
+ - Use as supplementary tool alongside other safety measures
189
+ - Ensure proper camera setup and lighting
190
+
191
+ ## πŸ“‹ System Requirements
192
+
193
+ - **Python**: 3.7+
194
+ - **Camera**: Webcam or built-in camera
195
+ - **OS**: Windows, macOS, Linux
196
+ - **RAM**: 4GB+ recommended
197
+ - **CPU**: Multi-core recommended for real-time processing
198
+
199
+ ## πŸ” Troubleshooting
200
+
201
+ - **No face detected**: Check lighting and camera position
202
+ - **Poor detection**: Ensure face is clearly visible and well-lit
203
+ - **High CPU usage**: Reduce stream rate or video resolution
204
+ - **Audio issues**: Check browser permissions and audio settings
205
+
206
+ ## πŸ“ License
207
+
208
+ MIT License - See LICENSE file for details
209
+ """
facial_detection.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # facial_detection.py
2
+ import cv2
3
+ import numpy as np
4
+ from scipy.spatial import distance as dist
5
+ from collections import deque
6
+ import time
7
+ from datetime import datetime
8
+
9
+ class OpenCVFaceDetector:
10
+ """Face detection and landmark estimation using OpenCV"""
11
+
12
+ def __init__(self):
13
+ # Load OpenCV's pre-trained face detection models
14
+ self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
15
+ self.eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
16
+ self.mouth_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_smile.xml')
17
+
18
+ # Try to load MediaPipe for better landmark detection (fallback if not available)
19
+ self.use_mediapipe = False
20
+ try:
21
+ import mediapipe as mp
22
+ self.mp_face_mesh = mp.solutions.face_mesh
23
+ self.mp_drawing = mp.solutions.drawing_utils
24
+ self.face_mesh = self.mp_face_mesh.FaceMesh(
25
+ static_image_mode=False,
26
+ max_num_faces=1,
27
+ refine_landmarks=True,
28
+ min_detection_confidence=0.5,
29
+ min_tracking_confidence=0.5
30
+ )
31
+ self.use_mediapipe = True
32
+ print("βœ… Using MediaPipe for enhanced landmark detection")
33
+ except ImportError:
34
+ print("⚠️ MediaPipe not available, using OpenCV cascade classifiers")
35
+
36
+ # Define landmark indices for MediaPipe (68-point equivalent)
37
+ self.LEFT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
38
+ self.RIGHT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
39
+ self.MOUTH_INDICES = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 62]
40
+
41
+ def detect_faces_opencv(self, frame):
42
+ """Detect faces using OpenCV Haar cascades"""
43
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
44
+ faces = self.face_cascade.detectMultiScale(gray, 1.3, 5)
45
+ return faces, gray
46
+
47
+ def estimate_landmarks_opencv(self, frame, face_rect):
48
+ """Estimate key facial landmarks using OpenCV cascades"""
49
+ x, y, w, h = face_rect
50
+ roi_gray = frame[y:y+h, x:x+w]
51
+ roi_color = frame[y:y+h, x:x+w]
52
+
53
+ # Detect eyes
54
+ eyes = self.eye_cascade.detectMultiScale(roi_gray, 1.1, 3)
55
+ # Detect mouth/smile
56
+ mouths = self.mouth_cascade.detectMultiScale(roi_gray, 1.1, 3)
57
+
58
+ landmarks = {}
59
+
60
+ # Process eyes
61
+ if len(eyes) >= 2:
62
+ # Sort eyes by x-coordinate (left to right)
63
+ eyes = sorted(eyes, key=lambda e: e[0])
64
+ landmarks['left_eye'] = (x + eyes[0][0] + eyes[0][2]//2, y + eyes[0][1] + eyes[0][3]//2)
65
+ landmarks['right_eye'] = (x + eyes[1][0] + eyes[1][2]//2, y + eyes[1][1] + eyes[1][3]//2)
66
+
67
+ # Estimate eye corners based on eye rectangles
68
+ landmarks['left_eye_corners'] = [
69
+ (x + eyes[0][0], y + eyes[0][1] + eyes[0][3]//2), # left corner
70
+ (x + eyes[0][0] + eyes[0][2], y + eyes[0][1] + eyes[0][3]//2), # right corner
71
+ (x + eyes[0][0] + eyes[0][2]//2, y + eyes[0][1]), # top
72
+ (x + eyes[0][0] + eyes[0][2]//2, y + eyes[0][1] + eyes[0][3]) # bottom
73
+ ]
74
+ landmarks['right_eye_corners'] = [
75
+ (x + eyes[1][0], y + eyes[1][1] + eyes[1][3]//2),
76
+ (x + eyes[1][0] + eyes[1][2], y + eyes[1][1] + eyes[1][3]//2),
77
+ (x + eyes[1][0] + eyes[1][2]//2, y + eyes[1][1]),
78
+ (x + eyes[1][0] + eyes[1][2]//2, y + eyes[1][1] + eyes[1][3])
79
+ ]
80
+
81
+ # Process mouth
82
+ if len(mouths) > 0:
83
+ mouth = mouths[0] # Take the first detected mouth
84
+ landmarks['mouth_center'] = (x + mouth[0] + mouth[2]//2, y + mouth[1] + mouth[3]//2)
85
+ landmarks['mouth_corners'] = [
86
+ (x + mouth[0], y + mouth[1] + mouth[3]//2), # left corner
87
+ (x + mouth[0] + mouth[2], y + mouth[1] + mouth[3]//2), # right corner
88
+ (x + mouth[0] + mouth[2]//2, y + mouth[1]), # top
89
+ (x + mouth[0] + mouth[2]//2, y + mouth[1] + mouth[3]) # bottom
90
+ ]
91
+
92
+ # Estimate nose tip (center of face, slightly above mouth)
93
+ landmarks['nose_tip'] = (x + w//2, y + int(h*0.6))
94
+
95
+ # Estimate chin (bottom center of face)
96
+ landmarks['chin'] = (x + w//2, y + h)
97
+
98
+ return landmarks
99
+
100
+ def detect_landmarks_mediapipe(self, frame):
101
+ """Detect landmarks using MediaPipe"""
102
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
103
+ results = self.face_mesh.process(rgb_frame)
104
+
105
+ landmarks_dict = {}
106
+
107
+ if results.multi_face_landmarks:
108
+ face_landmarks = results.multi_face_landmarks[0]
109
+ h, w, _ = frame.shape
110
+
111
+ # Extract eye landmarks
112
+ left_eye_points = []
113
+ right_eye_points = []
114
+ mouth_points = []
115
+
116
+ for i in self.LEFT_EYE_INDICES[:6]: # Take first 6 points for eye shape
117
+ point = face_landmarks.landmark[i]
118
+ left_eye_points.append((int(point.x * w), int(point.y * h)))
119
+
120
+ for i in self.RIGHT_EYE_INDICES[:6]:
121
+ point = face_landmarks.landmark[i]
122
+ right_eye_points.append((int(point.x * w), int(point.y * h)))
123
+
124
+ for i in self.MOUTH_INDICES[:8]: # Take key mouth points
125
+ point = face_landmarks.landmark[i]
126
+ mouth_points.append((int(point.x * w), int(point.y * h)))
127
+
128
+ landmarks_dict['left_eye_corners'] = left_eye_points
129
+ landmarks_dict['right_eye_corners'] = right_eye_points
130
+ landmarks_dict['mouth_corners'] = mouth_points
131
+
132
+ # Key points
133
+ nose_tip = face_landmarks.landmark[1] # Nose tip
134
+ chin = face_landmarks.landmark[175] # Chin
135
+
136
+ landmarks_dict['nose_tip'] = (int(nose_tip.x * w), int(nose_tip.y * h))
137
+ landmarks_dict['chin'] = (int(chin.x * w), int(chin.y * h))
138
+
139
+ # Calculate face bounding box
140
+ x_coords = [int(lm.x * w) for lm in face_landmarks.landmark]
141
+ y_coords = [int(lm.y * h) for lm in face_landmarks.landmark]
142
+
143
+ face_rect = (min(x_coords), min(y_coords),
144
+ max(x_coords) - min(x_coords),
145
+ max(y_coords) - min(y_coords))
146
+
147
+ return face_rect, landmarks_dict
148
+
149
+ return None, {}
150
+
151
+ def detect_landmarks(self, frame):
152
+ """Main method to detect face and landmarks"""
153
+ if self.use_mediapipe:
154
+ face_rect, landmarks = self.detect_landmarks_mediapipe(frame)
155
+ if face_rect is not None:
156
+ return [face_rect], [landmarks]
157
+
158
+ # Fallback to OpenCV
159
+ faces, gray = self.detect_faces_opencv(frame)
160
+ landmarks_list = []
161
+ face_rects = []
162
+
163
+ for face in faces:
164
+ landmarks = self.estimate_landmarks_opencv(gray, face)
165
+ if landmarks:
166
+ landmarks_list.append(landmarks)
167
+ face_rects.append(face)
168
+
169
+ return face_rects, landmarks_list
170
+
171
+ class MetricsCalculator:
172
+ """Calculate drowsiness metrics from facial landmarks"""
173
+
174
+ @staticmethod
175
+ def calculate_ear_from_points(eye_points):
176
+ """Calculate Eye Aspect Ratio from eye corner points"""
177
+ if len(eye_points) < 4:
178
+ return 0.3 # Default value
179
+
180
+ # For 4-point eye estimation: [left, right, top, bottom]
181
+ if len(eye_points) == 4:
182
+ left, right, top, bottom = eye_points
183
+ # Vertical distances
184
+ vertical_dist = dist.euclidean(top, bottom)
185
+ # Horizontal distance
186
+ horizontal_dist = dist.euclidean(left, right)
187
+
188
+ if horizontal_dist == 0:
189
+ return 0.3
190
+
191
+ ear = vertical_dist / horizontal_dist
192
+ return ear
193
+
194
+ # For 6-point eye estimation (MediaPipe style)
195
+ elif len(eye_points) >= 6:
196
+ # Calculate vertical distances
197
+ v1 = dist.euclidean(eye_points[1], eye_points[5])
198
+ v2 = dist.euclidean(eye_points[2], eye_points[4])
199
+ # Horizontal distance
200
+ h = dist.euclidean(eye_points[0], eye_points[3])
201
+
202
+ if h == 0:
203
+ return 0.3
204
+
205
+ ear = (v1 + v2) / (2.0 * h)
206
+ return ear
207
+
208
+ return 0.3
209
+
210
+ @staticmethod
211
+ def calculate_mar_from_points(mouth_points):
212
+ """Calculate Mouth Aspect Ratio from mouth points"""
213
+ if len(mouth_points) < 4:
214
+ return 0.3 # Default value
215
+
216
+ if len(mouth_points) == 4:
217
+ # [left, right, top, bottom]
218
+ left, right, top, bottom = mouth_points
219
+ vertical_dist = dist.euclidean(top, bottom)
220
+ horizontal_dist = dist.euclidean(left, right)
221
+
222
+ if horizontal_dist == 0:
223
+ return 0.3
224
+
225
+ mar = vertical_dist / horizontal_dist
226
+ return mar
227
+
228
+ elif len(mouth_points) >= 8:
229
+ # More sophisticated mouth analysis
230
+ # Calculate multiple vertical distances
231
+ v1 = dist.euclidean(mouth_points[1], mouth_points[7])
232
+ v2 = dist.euclidean(mouth_points[2], mouth_points[6])
233
+ v3 = dist.euclidean(mouth_points[3], mouth_points[5])
234
+
235
+ # Horizontal distance
236
+ h = dist.euclidean(mouth_points[0], mouth_points[4])
237
+
238
+ if h == 0:
239
+ return 0.3
240
+
241
+ mar = (v1 + v2 + v3) / (3.0 * h)
242
+ return mar
243
+
244
+ return 0.3
245
+
246
+ @staticmethod
247
+ def estimate_head_pose_simple(nose_tip, chin, frame_center):
248
+ """Simple head pose estimation using nose and chin"""
249
+ if nose_tip is None or chin is None:
250
+ return np.array([0, 0, 0])
251
+
252
+ # Calculate head tilt based on nose-chin line deviation from vertical
253
+ nose_chin_vector = np.array([chin[0] - nose_tip[0], chin[1] - nose_tip[1]])
254
+ vertical_vector = np.array([0, 1])
255
+
256
+ # Calculate angle from vertical
257
+ dot_product = np.dot(nose_chin_vector, vertical_vector)
258
+ norms = np.linalg.norm(nose_chin_vector) * np.linalg.norm(vertical_vector)
259
+
260
+ if norms == 0:
261
+ return np.array([0, 0, 0])
262
+
263
+ cos_angle = dot_product / norms
264
+ angle = np.arccos(np.clip(cos_angle, -1, 1)) * 180 / np.pi
265
+
266
+ # Determine direction of tilt
267
+ if nose_chin_vector[0] < 0:
268
+ angle = -angle
269
+
270
+ # Simple pitch estimation based on nose position relative to frame center
271
+ pitch = (nose_tip[1] - frame_center[1]) / frame_center[1] * 30 # Scale to degrees
272
+
273
+ return np.array([pitch, 0, angle]) # [pitch, yaw, roll]
274
+
275
+ class DrowsinessAnalyzer:
276
+ """Analyze drowsiness based on facial metrics"""
277
+
278
+ def __init__(self):
279
+ # Thresholds
280
+ self.EAR_THRESHOLD = 0.20 # Adjusted for OpenCV detection
281
+ self.EAR_CONSECUTIVE_FRAMES = 15
282
+ self.YAWN_THRESHOLD = 0.8 # Adjusted for mouth detection
283
+ self.YAWN_CONSECUTIVE_FRAMES = 10
284
+ self.NOD_THRESHOLD = 20
285
+
286
+ # Counters
287
+ self.ear_counter = 0
288
+ self.yawn_counter = 0
289
+ self.nod_counter = 0
290
+
291
+ # History tracking
292
+ self.ear_history = deque(maxlen=30)
293
+ self.yawn_history = deque(maxlen=30)
294
+ self.head_pose_history = deque(maxlen=30)
295
+
296
+ def analyze_drowsiness(self, ear, mar, head_angles):
297
+ """Analyze current metrics and return drowsiness indicators"""
298
+ drowsiness_indicators = []
299
+
300
+ # Update history
301
+ self.ear_history.append(ear)
302
+ self.yawn_history.append(mar)
303
+ self.head_pose_history.append(head_angles[0])
304
+
305
+ # Check EAR (eyes closed detection)
306
+ if ear < self.EAR_THRESHOLD:
307
+ self.ear_counter += 1
308
+ if self.ear_counter >= self.EAR_CONSECUTIVE_FRAMES:
309
+ drowsiness_indicators.append("EYES_CLOSED")
310
+ else:
311
+ self.ear_counter = 0
312
+
313
+ # Check yawning
314
+ if mar > self.YAWN_THRESHOLD:
315
+ self.yawn_counter += 1
316
+ if self.yawn_counter >= self.YAWN_CONSECUTIVE_FRAMES:
317
+ drowsiness_indicators.append("YAWNING")
318
+ else:
319
+ self.yawn_counter = 0
320
+
321
+ # Check head nodding
322
+ if abs(head_angles[0]) > self.NOD_THRESHOLD:
323
+ self.nod_counter += 1
324
+ if self.nod_counter >= 8:
325
+ drowsiness_indicators.append("HEAD_NOD")
326
+ else:
327
+ self.nod_counter = 0
328
+
329
+ return drowsiness_indicators
330
+
331
+ def get_severity_level(self, indicators):
332
+ """Determine severity based on indicators"""
333
+ if len(indicators) >= 2:
334
+ return "critical"
335
+ elif "EYES_CLOSED" in indicators:
336
+ return "high"
337
+ elif indicators:
338
+ return "medium"
339
+ else:
340
+ return "normal"
341
+
342
+ class AlertManager:
343
+ """Manage alert generation and timing"""
344
+
345
+ def __init__(self, cooldown_seconds=8):
346
+ self.last_alert_time = 0
347
+ self.cooldown_seconds = cooldown_seconds
348
+
349
+ def should_trigger_alert(self, indicators):
350
+ """Check if alert should be triggered"""
351
+ current_time = time.time()
352
+ if indicators and (current_time - self.last_alert_time) > self.cooldown_seconds:
353
+ self.last_alert_time = current_time
354
+ return True
355
+ return False
356
+
357
+ class VisualizationRenderer:
358
+ """Handle visual rendering of detection results"""
359
+
360
+ @staticmethod
361
+ def draw_landmarks_and_contours(frame, landmarks, face_rect):
362
+ """Draw facial landmarks and detection areas"""
363
+ x, y, w, h = face_rect
364
+ cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
365
+
366
+ # Draw eye areas
367
+ if 'left_eye_corners' in landmarks:
368
+ points = np.array(landmarks['left_eye_corners'], np.int32)
369
+ cv2.polylines(frame, [points], True, (0, 255, 0), 2)
370
+
371
+ if 'right_eye_corners' in landmarks:
372
+ points = np.array(landmarks['right_eye_corners'], np.int32)
373
+ cv2.polylines(frame, [points], True, (0, 255, 0), 2)
374
+
375
+ # Draw mouth area
376
+ if 'mouth_corners' in landmarks:
377
+ points = np.array(landmarks['mouth_corners'], np.int32)
378
+ cv2.polylines(frame, [points], True, (0, 255, 255), 2)
379
+
380
+ # Draw key points
381
+ key_points = ['nose_tip', 'chin']
382
+ for point_name in key_points:
383
+ if point_name in landmarks:
384
+ cv2.circle(frame, landmarks[point_name], 3, (255, 0, 0), -1)
385
+
386
+ @staticmethod
387
+ def draw_metrics_overlay(frame, ear, mar, head_angle, indicators):
388
+ """Draw metrics and alerts on frame"""
389
+ # Metrics text
390
+ cv2.putText(frame, f"EAR: {ear:.3f}", (10, frame.shape[0] - 80),
391
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
392
+ cv2.putText(frame, f"MAR: {mar:.3f}", (10, frame.shape[0] - 60),
393
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
394
+ cv2.putText(frame, f"Head: {head_angle:.1f}Β°", (10, frame.shape[0] - 40),
395
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
396
+
397
+ # Alert overlay
398
+ if indicators:
399
+ cv2.putText(frame, "⚠️ DROWSINESS ALERT! ⚠️", (50, 50),
400
+ cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
401
+
402
+ class StatusLogger:
403
+ """Handle logging and status tracking"""
404
+
405
+ def __init__(self, max_logs=100):
406
+ self.status_log = deque(maxlen=max_logs)
407
+
408
+ def log(self, message):
409
+ """Add timestamped log entry"""
410
+ timestamp = datetime.now().strftime("%H:%M:%S")
411
+ self.status_log.append(f"[{timestamp}] {message}")
412
+
413
+ def get_recent_logs(self, count=10):
414
+ """Get recent log entries"""
415
+ return list(self.status_log)[-count:]
gradio_interface.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import queue
4
+ import threading
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from ai_alert_generator import AIAlertGenerator, DrowsinessDetectionSystem
7
+
8
+ class GradioWebRTCInterface:
9
+ """Enhanced Gradio interface with WebRTC support"""
10
+
11
+ def __init__(self):
12
+ self.detection_system = None
13
+ self.ai_alert_generator = None
14
+ self.processing = False
15
+
16
+ def initialize_system(self, gemini_key):
17
+ """Initialize the detection system"""
18
+ try:
19
+ self.detection_system = DrowsinessDetectionSystem()
20
+ self.ai_alert_generator = AIAlertGenerator(gemini_key if gemini_key.strip() else None)
21
+
22
+ return "βœ… System initialized successfully!", "πŸš€ Ready for detection"
23
+ except Exception as e:
24
+ return f"❌ Error: {str(e)}", "❌ Initialization failed"
25
+
26
+ def process_video_stream(self, frame, gemini_key):
27
+ """Process video stream"""
28
+ if self.detection_system is None:
29
+ self.detection_system = DrowsinessDetectionSystem()
30
+ self.ai_alert_generator = AIAlertGenerator(gemini_key if gemini_key.strip() else None)
31
+
32
+ try:
33
+ # Process frame
34
+ processed_frame, status_list, should_alert, metrics = self.detection_system.process_frame(frame)
35
+
36
+ # Generate alert if needed
37
+ alert_text = ""
38
+ alert_audio = None
39
+
40
+ if should_alert and metrics.get('indicators'):
41
+ alert_text = self.ai_alert_generator.generate_alert_text(
42
+ metrics['indicators'],
43
+ metrics.get('severity', 'medium')
44
+ )
45
+
46
+ # Create audio alert
47
+ try:
48
+ audio_file, _ = self.ai_alert_generator.create_audio_alert(alert_text)
49
+ alert_audio = audio_file
50
+ except Exception as e:
51
+ print(f"Audio generation error: {e}")
52
+
53
+ # Format status
54
+ status_text = "\n".join(status_list)
55
+
56
+ # Get logs
57
+ logs = self.detection_system.get_logs()
58
+
59
+ return processed_frame, status_text, alert_text, alert_audio, logs
60
+
61
+ except Exception as e:
62
+ error_msg = f"Processing error: {str(e)}"
63
+ return frame, error_msg, "", None, error_msg
64
+
65
+ def create_interface(self):
66
+ """Create the Gradio interface with WebRTC support"""
67
+ with gr.Blocks(
68
+ title="πŸš— AI Driver Drowsiness Detection System",
69
+ theme=gr.themes.Soft(),
70
+ css="""
71
+ .alert-box {
72
+ background-color: #ffebee;
73
+ border: 2px solid #f44336;
74
+ border-radius: 8px;
75
+ padding: 10px;
76
+ }
77
+ .status-box {
78
+ background-color: #e8f5e8;
79
+ border: 2px solid #4caf50;
80
+ border-radius: 8px;
81
+ padding: 10px;
82
+ }
83
+ .metric-display {
84
+ font-family: 'Courier New', monospace;
85
+ font-size: 14px;
86
+ }
87
+ .header-text {
88
+ text-align: center;
89
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
90
+ color: white;
91
+ padding: 20px;
92
+ border-radius: 10px;
93
+ margin-bottom: 20px;
94
+ }
95
+ """
96
+ ) as interface:
97
+
98
+ gr.HTML("""
99
+ <div class="header-text">
100
+ <h1>πŸš— AI-Powered Driver Drowsiness Detection System</h1>
101
+ <p><strong>Real-time monitoring with OpenCV, Computer Vision & AI Alerts</strong></p>
102
+ <p><em>No external model downloads required - Uses built-in OpenCV detection</em></p>
103
+ </div>
104
+ """)
105
+
106
+ with gr.Tab("πŸŽ₯ Live Detection"):
107
+ with gr.Row():
108
+ with gr.Column(scale=2):
109
+ # WebRTC video input
110
+ video_input = gr.Video(
111
+ label="πŸ“Ή Camera Feed (WebRTC Streaming)",
112
+ sources=["webcam"],
113
+ streaming=True,
114
+ mirror_webcam=False,
115
+ height=480
116
+ )
117
+
118
+ # System controls
119
+ with gr.Row():
120
+ gemini_key = os.getenv("GEMINI_API_KEY")
121
+ # init_btn = gr.Button("πŸš€ Initialize", variant="primary", scale=1)
122
+
123
+ with gr.Column(scale=1):
124
+ # System status
125
+ init_status = gr.Textbox(
126
+ label="πŸ”§ System Status",
127
+ interactive=False,
128
+ lines=2,
129
+ elem_classes=["status-box"]
130
+ )
131
+
132
+ # Detection metrics
133
+ current_status = gr.Textbox(
134
+ label="πŸ“Š Detection Metrics",
135
+ interactive=False,
136
+ lines=8,
137
+ elem_classes=["metric-display"]
138
+ )
139
+
140
+ # Alert display
141
+ alert_text_display = gr.Textbox(
142
+ label="🚨 Active Alert",
143
+ interactive=False,
144
+ lines=3,
145
+ elem_classes=["alert-box"]
146
+ )
147
+
148
+ # Audio alert output
149
+ alert_audio = gr.Audio(
150
+ label="πŸ”Š Alert Sound",
151
+ autoplay=True,
152
+ visible=True
153
+ )
154
+
155
+ # System logs panel
156
+ with gr.Row():
157
+ system_logs = gr.Textbox(
158
+ label="πŸ“ System Activity Log",
159
+ lines=6,
160
+ interactive=False,
161
+ elem_classes=["metric-display"]
162
+ )
163
+
164
+ with gr.Tab("βš™οΈ System Configuration"):
165
+ with gr.Row():
166
+ with gr.Column():
167
+ gr.Markdown("""
168
+ ### πŸ”§ Detection Parameters
169
+
170
+ **Current OpenCV-based thresholds:**
171
+ - **Eye Aspect Ratio (EAR)**: < 0.20 for 15+ frames
172
+ - **Mouth Aspect Ratio (MAR)**: > 0.8 for 10+ frames
173
+ - **Head Nod Angle**: > 20Β° deviation for 8+ frames
174
+ - **Alert Cooldown**: 8 seconds between alerts
175
+
176
+ ### 🎯 Detection Methods
177
+ - **Primary**: MediaPipe Face Mesh (if available)
178
+ - **Fallback**: OpenCV Haar Cascades
179
+ - **No external downloads**: Uses built-in OpenCV models
180
+ """)
181
+
182
+ with gr.Column():
183
+ gr.Markdown("""
184
+ ### πŸ“‹ Easy Setup
185
+
186
+ **Install dependencies:**
187
+ ```bash
188
+ pip install opencv-python gradio numpy scipy google-generativeai
189
+
190
+ # Optional for better detection:
191
+ pip install mediapipe
192
+ ```
193
+
194
+ **No model downloads required!**
195
+ - Uses OpenCV's built-in face detection
196
+ - MediaPipe auto-detects if available
197
+ - Gemini API key is optional for AI alerts
198
+ """)
199
+
200
+ gr.Markdown("""
201
+ ### πŸš€ Advanced Features
202
+ - **Real-time WebRTC Processing**: Low latency video streaming
203
+ - **Multi-modal Detection**: Eyes, mouth, and head pose analysis
204
+ - **AI-Powered Alerts**: Contextual voice messages via Gemini
205
+ - **Adaptive Fallback**: Graceful degradation without external models
206
+ - **Visual Feedback**: Live metrics overlay on video
207
+ - **Comprehensive Logging**: Detailed activity tracking
208
+ """)
209
+
210
+ with gr.Tab("πŸ“Š Detection Info"):
211
+ gr.Markdown("""
212
+ ### πŸ‘οΈ Eye Aspect Ratio (EAR)
213
+
214
+ **How it works:**
215
+ - Calculates ratio of eye height to width
216
+ - Lower values indicate closed/closing eyes
217
+ - Triggers alert when consistently low
218
+
219
+ **Detection method:**
220
+ - **MediaPipe**: Uses precise eye landmarks
221
+ - **OpenCV**: Estimates from eye rectangles
222
+
223
+ ### πŸ‘„ Mouth Aspect Ratio (MAR)
224
+
225
+ **Yawn detection:**
226
+ - Measures mouth opening relative to width
227
+ - Higher values indicate yawning
228
+ - Accounts for talking vs. yawning patterns
229
+
230
+ ### πŸ“ Head Pose Estimation
231
+
232
+ **Nodding detection:**
233
+ - Tracks head tilt and position
234
+ - Detects forward head movement
235
+ - Uses nose-chin alignment for pose estimation
236
+
237
+ ### 🧠 AI Alert Generation
238
+
239
+ **Smart alerts:**
240
+ - Context-aware messages via Gemini
241
+ - Severity-based escalation
242
+ - Fallback to audio beeps
243
+ - Cooldown prevents alert spam
244
+ """)
245
+
246
+ # Event handlers
247
+ init_btn.click(
248
+ fn=self.initialize_system,
249
+ inputs=[gemini_key],
250
+ outputs=[init_status, alert_text_display]
251
+ )
252
+
253
+ # WebRTC stream processing
254
+ video_input.stream(
255
+ fn=self.process_video_stream,
256
+ inputs=[video_input, gemini_key],
257
+ outputs=[video_input, current_status, alert_text_display, alert_audio, system_logs],
258
+ stream_every=0.1, # 10 FPS processing
259
+ show_progress=False
260
+ )
261
+
262
+ # Safety notice
263
+ gr.HTML("""
264
+ <div style="margin-top: 20px; padding: 15px; background: linear-gradient(135deg, #ffeaa7 0%, #fab1a0 100%); border-radius: 8px; border-left: 5px solid #e17055;">
265
+ <h3>⚠️ Important Safety Notice</h3>
266
+ <p><strong>This system is for demonstration and research purposes only.</strong></p>
267
+ <ul style="margin: 10px 0;">
268
+ <li><strong>Not a substitute</strong> for responsible driving practices</li>
269
+ <li><strong>Pull over safely</strong> if you feel drowsy while driving</li>
270
+ <li><strong>Ensure proper setup</strong>: good lighting, stable camera position</li>
271
+ <li><strong>Use as supplementary tool</strong> alongside other safety measures</li>
272
+ </ul>
273
+ <p style="margin-top: 15px;"><em>Always prioritize real-world driving safety over technology assistance.</em></p>
274
+ </div>
275
+ """)
276
+
277
+ return interface