import streamlit as st import matplotlib.pyplot as plt import numpy as np import plotly.graph_objects as go import plotly.express as px import cv2 import dlib from scipy.spatial import distance as dist # For EAR calculation import time # for progress bar simulation and potentially camera loop # Constants for detection (from eye_eyebrow_detector.py) EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR = 0.15 CALIBRATION_FRAMES = 30 # Reduced for faster demo calibration EAR_THRESHOLD = 0.20 DLIB_SHAPE_PREDICTOR_PATH = "shape_predictor_68_face_landmarks.dat" # Display states (from eye_eyebrow_detector.py) STATE_YES = "Yes" STATE_NO = "No" STATE_NORMAL = "Normal" STATE_CALIBRATING = "Calibrating..." # Landmark indices (from eye_eyebrow_detector.py) (user_L_eye_indices_start, user_L_eye_indices_end) = (42, 48) (user_R_eye_indices_start, user_R_eye_indices_end) = (36, 42) user_L_eye_top_indices = [43, 44] user_R_eye_top_indices = [37, 38] user_L_eyebrow_y_calc_indices = range(23, 26) user_R_eyebrow_y_calc_indices = range(18, 21) # Initialize dlib's face detector and facial landmark predictor # We'll initialize this inside the function or manage its state # to avoid issues with Streamlit's rerun behavior. # Stock photo URLs provided FACIAL_RECOGNITION_IMAGES = [ "https://pixabay.com/get/g12854d8ea8c029d2435717f123bb6b3afe5f218d14e94f3f1bd28aedaf46900b3c663fdca24e3e5ff97ed203a4ac97bdd34215b14df2f288e76f20602a81cb7d_1280.jpg", "https://pixabay.com/get/gf7f1fe0deb60c9c2217635915c6efdd85c3a35b943185d9d7c1b08ead1ec8f6d082af4bfe7a16759a66c38872d828da9c7d28f9ccd6ed4c243f50471537c072d_1280.jpg", "https://pixabay.com/get/g5226c742de43d538d1d4dd7e927224fb5be1b7f0f197f568dedc10336530b516cf9b2b3acc3128a4ea78a43ca348c8ce101234788ff131ed802e296e799ddc00_1280.jpg", "https://pixabay.com/get/g95d27127dde404c64753341780b8d8871f128bda7dfd5cc3ef287e4e838a1719fc91bc6c4bb24c52ef7cf27dad266a50d474142afe73e25f207ef9ef375c268e_1280.jpg" ] AI_DATA_VIZ_IMAGES = [ "https://pixabay.com/get/g155188879e1e171fb82c63d79b2963561b3a77f46ecb38053344fb6a1e236c2f406d66b1c3ae23260573869a9458daee7bfc00f37ef6840fce3a379da3d608e4_1280.jpg", "https://pixabay.com/get/g2620d81b6747dcda89657292ec3627897d7e61e906e76de11ecf6babedfcbe40aa0d0608950e1474795bc3a2abc67660ebc08977ba37da526834bec3cf342ba1_1280.jpg", "https://pixabay.com/get/ge8f809c48922d0dd956c8896157bd3ea8f606948d2ff72e507bad98b42b823e6409cc2923100bc91b15a499f72263fd8ca0f0949ac5ad2bbbb176f16e3dd0043_1280.jpg", "https://pixabay.com/get/g20331e7a18a7b2759056b7a9a73d20c34ff4f863ec4660535f9e5a1b15d3ad4b5b72bb07c385dd3ce154dc23b72fedd5c1eb9e2a4f2b335dfb17534d2b11d8e0_1280.jpg" ] PRESENTATION_SLIDE_IMAGES = [ "https://pixabay.com/get/gb57703b075295316bc1711f9701b18b84cfb89f469bb77f415392cc8986f922927cabc9afd50638f77ed51f53bcc62f423b96fbeb5f008abd1017db5b33e9e96_1280.jpg", "https://pixabay.com/get/gf4116a5ec8333a8a6bb33dcfe0baecc03580e6f7af95f2895880c9ec051479f3af002ecde96686e5fb6d3a860cf794fef532f27d373318317330932475a8b46c_1280.jpg" ] def section_header(title): """Generate a section header with consistent styling""" st.markdown(f'
{title}
', unsafe_allow_html=True) def render_intro_section(): """Render the introduction section of the presentation""" section_header("Introduction") col1, col2 = st.columns([3, 2]) with col1: st.markdown(""" # Facial Gesture Recognition Facial gesture recognition is an exciting field at the intersection of computer vision and artificial intelligence that focuses on identifying and interpreting human facial expressions and movements. This presentation explores a system that can: - Detect facial landmarks in real-time video - Track specific facial movements (eyes, eyebrows) - Classify gestures into meaningful actions - Respond to gestures with appropriate system actions Using a combination of **Convolutional Neural Networks (CNN)** and **Long Short-Term Memory (LSTM)** architecture, this system achieves high accuracy in real-time environments. """) with col2: st.image(FACIAL_RECOGNITION_IMAGES[0], use_container_width=True) st.caption("Facial recognition technology") st.markdown("---") st.markdown(""" ### Why Facial Gesture Recognition Matters Facial gestures provide a natural, intuitive way for humans to communicate with computers: - **Accessibility**: Enables computer control for people with mobility limitations - **Hands-free Interaction**: Useful in environments where hands are occupied or contaminated - **Enhanced User Experience**: Creates more natural human-computer interactions - **Safety Applications**: Driver drowsiness detection, attention monitoring """) def render_objective_section(): """Render the project objectives section""" section_header("Project Objective") col1, col2 = st.columns([1, 1]) with col1: st.markdown(""" ## Primary Goal Create an intelligent system that automatically recognizes facial gestures from a video stream in real-time. ### Key Objectives 1. **Real-time Processing**: Analyze video frames with minimal latency 2. **Accurate Detection**: Precisely identify facial landmarks 3. **Gesture Classification**: Correctly interpret facial movements 4. **Responsive Output**: Provide immediate feedback based on detected gestures """) st.markdown(""" ### Target Gestures The system focuses on recognizing the following facial gestures: - Eye movements (blinks, winks) - Eyebrow movements (raising, furrowing) - Normal/neutral state """) with col2: # Add an interactive element - demo selector st.markdown("### Interactive Demo") gesture_type = st.selectbox( "Select a gesture type to learn more", ["Eye Movements", "Eyebrow Movements", "Neutral State"] ) if gesture_type == "Eye Movements": st.info("Eye movements like blinks and winks can be used for selection or confirmation actions.") elif gesture_type == "Eyebrow Movements": st.info("Eyebrow raising can indicate interest or be used as a trigger for specific actions.") elif gesture_type == "Neutral State": st.info("The neutral state serves as the baseline for detecting deviations that signal intentional gestures.") def render_architecture_section(): """Render the architecture and methodology section""" section_header("Architecture & Methodology") st.markdown(""" ## CNN-LSTM Architecture The system employs a hybrid deep learning architecture combining: - **Convolutional Neural Networks (CNN)**: Extract spatial features from facial images - **Long Short-Term Memory (LSTM)**: Capture temporal patterns in sequential frames """) # Display CNN-LSTM architecture diagram st.caption("Visual representation of CNN-LSTM architecture") col1, col2 = st.columns([1, 1]) with col1: st.markdown(""" ### CNN Component The CNN portion of the architecture: - Processes individual video frames - Extracts spatial features from facial regions - Identifies key patterns in facial structure - Uses multiple convolutional layers with pooling """) # Create interactive CNN visualization st.markdown("#### CNN Layer Visualization") layer_slider = st.slider("Explore CNN layers", 1, 5, 1) fig, ax = plt.subplots(figsize=(6, 4)) plt.title(f"CNN Layer {layer_slider} Feature Maps") # Generate mock feature map visualization grid_size = 4 feature_maps = np.random.rand(grid_size, grid_size, 9) for i in range(9): plt.subplot(3, 3, i+1) plt.imshow(feature_maps[:,:,i], cmap='viridis') plt.axis('off') plt.tight_layout() st.pyplot(fig) with col2: st.markdown(""" ### LSTM Component The LSTM network: - Processes sequences of CNN-extracted features - Captures temporal dependencies between frames - Maintains memory of previous facial states - Enables detection of dynamic gestures over time """) # Add interactive LSTM cell visualization st.markdown("#### LSTM Cell Structure") st.image("https://upload.wikimedia.org/wikipedia/commons/9/93/LSTM_Cell.svg", caption="LSTM Cell Structure", use_container_width=True) st.markdown(""" ### Combined Model Benefits This hybrid architecture provides several advantages: 1. **Spatial-Temporal Processing**: Captures both spatial features and temporal patterns 2. **Sequence Understanding**: Recognizes gestures that develop over multiple frames 3. **Contextual Awareness**: Considers the progression of facial movements 4. **Robust Classification**: Higher accuracy for dynamic gestures """) def render_process_section(): """Render the process flow section""" section_header("Process Flow") st.markdown(""" ## System Workflow The facial gesture recognition process follows these key steps: """) # Create tabs for different stages of the process tab1, tab2, tab3 = st.tabs(["Data Collection", "Image Processing", "Model Training"]) with tab1: col1, col2 = st.columns([3, 2]) with col1: st.markdown(""" ### Data Collection The system requires a comprehensive dataset of facial gestures: - **Video Capture**: Short video clips recorded using webcam - **Gesture Performance**: Subjects perform predefined facial gestures - **Labeling**: Each video is labeled with the corresponding gesture - **Dataset Diversity**: Multiple subjects, lighting conditions, and angles A balanced dataset with various examples of each gesture is crucial for model generalization. """) with col2: st.caption("") with tab2: col1, col2 = st.columns([2, 3]) with col1: st.image(AI_DATA_VIZ_IMAGES[0], use_container_width=True) st.caption("Image processing visualization") with col2: st.markdown(""" ### Image Processing Raw video frames undergo several preprocessing steps: 1. **Facial Detection**: Locating the face in each frame 2. **Landmark Extraction**: Identifying 68 key facial points 3. **Region Isolation**: Extracting regions of interest (eyes, eyebrows) 4. **Normalization**: Converting to grayscale, normalizing pixel values 5. **Augmentation**: Generating additional training samples through transformations These steps ensure the input data is optimized for the neural network. """) # Interactive element - landmark detection demo show_landmarks = st.checkbox("Show facial landmarks example (eyes and eyebrows)") if show_landmarks: landmark_cols = st.columns(2) with landmark_cols[0]: # Mock landmark visualization using matplotlib - focusing on eyes and eyebrows fig, ax = plt.subplots(figsize=(4, 4)) # Create a simple face outline circle = plt.Circle((0.5, 0.5), 0.4, fill=False, color='blue') ax.add_patch(circle) # Add eye landmarks with extra detail (6 points per eye) # Left eye left_eye_x = [0.30, 0.33, 0.37, 0.41, 0.38, 0.34] left_eye_y = [0.60, 0.58, 0.58, 0.60, 0.62, 0.62] ax.plot(left_eye_x, left_eye_y, 'g-', linewidth=2) for x, y in zip(left_eye_x, left_eye_y): ax.plot(x, y, 'go', markersize=4) # Right eye right_eye_x = [0.59, 0.62, 0.66, 0.70, 0.67, 0.63] right_eye_y = [0.60, 0.58, 0.58, 0.60, 0.62, 0.62] ax.plot(right_eye_x, right_eye_y, 'g-', linewidth=2) for x, y in zip(right_eye_x, right_eye_y): ax.plot(x, y, 'go', markersize=4) # Add detailed eyebrow landmarks (5 points per eyebrow) # Left eyebrow left_brow_x = [0.25, 0.30, 0.35, 0.40, 0.45] left_brow_y = [0.70, 0.72, 0.73, 0.72, 0.70] ax.plot(left_brow_x, left_brow_y, 'r-', linewidth=2) for x, y in zip(left_brow_x, left_brow_y): ax.plot(x, y, 'ro', markersize=4) # Right eyebrow right_brow_x = [0.55, 0.60, 0.65, 0.70, 0.75] right_brow_y = [0.70, 0.72, 0.73, 0.72, 0.70] ax.plot(right_brow_x, right_brow_y, 'r-', linewidth=2) for x, y in zip(right_brow_x, right_brow_y): ax.plot(x, y, 'ro', markersize=4) # Add labels ax.text(0.36, 0.67, "Left Eye", fontsize=9, ha='center') ax.text(0.64, 0.67, "Right Eye", fontsize=9, ha='center') ax.text(0.35, 0.76, "Left Eyebrow", fontsize=9, ha='center') ax.text(0.65, 0.76, "Right Eyebrow", fontsize=9, ha='center') ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_title("Eye and Eyebrow Landmarks") ax.axis('off') st.pyplot(fig) with landmark_cols[1]: st.markdown(""" **Focused Facial Landmarks Analysis:** This system specifically analyzes: - **Eyes (6 points each)**: Tracks eye openness, blinks, and winking - **Eyebrows (5 points each)**: Detects eyebrow raising, furrowing, and expressions While the shape_predictor_68_face_landmarks model can identify 68 facial landmarks including: - 9 points for the nose - 20 points for the mouth - 17 points for the face contour This implementation focuses exclusively on eye and eyebrow movements for gesture recognition. """) with tab3: st.markdown(""" ### Model Training The CNN-LSTM model is trained using the processed dataset: 1. **Data Splitting**: Division into training, validation, and test sets 2. **CNN Training**: Learning spatial feature extraction 3. **LSTM Training**: Learning temporal patterns 4. **Hyperparameter Tuning**: Optimizing model architecture and parameters 5. **Validation**: Evaluating performance on validation set 6. **Testing**: Final evaluation on test set """) # Interactive training visualization st.markdown("#### Training Visualization") # Mock training metrics epochs = 50 train_loss = 1.5 * np.exp(-0.05 * np.arange(epochs)) + 0.1 * np.random.rand(epochs) val_loss = 1.7 * np.exp(-0.04 * np.arange(epochs)) + 0.15 * np.random.rand(epochs) train_acc = 1 - train_loss * 0.5 val_acc = 1 - val_loss * 0.5 # Create interactive plot metric = st.radio("Select metric to visualize", ["Loss", "Accuracy"]) if metric == "Loss": fig = px.line( x=list(range(1, epochs+1)), y=[train_loss, val_loss], labels={"x": "Epoch", "y": "Loss"}, title="Training and Validation Loss", line_shape="spline" ) fig.update_layout(legend_title_text="Legend") fig.add_scatter(x=list(range(1, epochs+1)), y=train_loss, name="Training Loss", line=dict(color="blue")) fig.add_scatter(x=list(range(1, epochs+1)), y=val_loss, name="Validation Loss", line=dict(color="red")) else: fig = px.line( x=list(range(1, epochs+1)), y=[train_acc, val_acc], labels={"x": "Epoch", "y": "Accuracy"}, title="Training and Validation Accuracy", line_shape="spline" ) fig.update_layout(legend_title_text="Legend") fig.add_scatter(x=list(range(1, epochs+1)), y=train_acc, name="Training Accuracy", line=dict(color="green")) fig.add_scatter(x=list(range(1, epochs+1)), y=val_acc, name="Validation Accuracy", line=dict(color="orange")) st.plotly_chart(fig) def render_technology_section(): """Render the technologies section""" section_header("Technologies") st.markdown(""" ## Core Technologies The facial gesture recognition system relies on several key technologies: """) col1, col2, col3 = st.columns(3) with col1: st.markdown(""" ### Python Ecosystem - **Python**: Core programming language - **NumPy**: Numerical operations - **Pandas**: Data management - **Matplotlib/Plotly**: Visualization """) st.image(AI_DATA_VIZ_IMAGES[2], use_container_width=True) st.caption("Python data analysis visualization") with col2: st.markdown(""" ### Deep Learning - **TensorFlow/Keras**: Neural network framework - **CNN**: Spatial feature extraction - **LSTM**: Temporal sequence processing - **Transfer Learning**: Leveraging pre-trained models """) # Create an interactive visualization of model architecture st.markdown("#### Model Architecture") fig = go.Figure() # Draw rectangles representing layers layers = [ {"name": "Input", "width": 0.8, "height": 0.15, "x": 0.5, "y": 0.9, "color": "lightblue"}, {"name": "Conv2D", "width": 0.8, "height": 0.1, "x": 0.5, "y": 0.75, "color": "lightgreen"}, {"name": "MaxPooling", "width": 0.7, "height": 0.1, "x": 0.5, "y": 0.63, "color": "lightgreen"}, {"name": "Conv2D", "width": 0.6, "height": 0.1, "x": 0.5, "y": 0.51, "color": "lightgreen"}, {"name": "MaxPooling", "width": 0.5, "height": 0.1, "x": 0.5, "y": 0.39, "color": "lightgreen"}, {"name": "LSTM", "width": 0.8, "height": 0.1, "x": 0.5, "y": 0.27, "color": "lightpink"}, {"name": "Dense", "width": 0.6, "height": 0.1, "x": 0.5, "y": 0.15, "color": "lightyellow"}, {"name": "Output", "width": 0.4, "height": 0.1, "x": 0.5, "y": 0.05, "color": "lightblue"} ] for layer in layers: # Add layer rectangle fig.add_shape( type="rect", x0=layer["x"] - layer["width"]/2, y0=layer["y"] - layer["height"]/2, x1=layer["x"] + layer["width"]/2, y1=layer["y"] + layer["height"]/2, line=dict(color="black"), fillcolor=layer["color"] ) # Add layer name fig.add_annotation( x=layer["x"], y=layer["y"], text=layer["name"], showarrow=False ) # Add connection lines between layers (except for the last layer) if layer != layers[-1]: next_layer = layers[layers.index(layer) + 1] fig.add_shape( type="line", x0=layer["x"], y0=layer["y"] - layer["height"]/2, x1=next_layer["x"], y1=next_layer["y"] + next_layer["height"]/2, line=dict(color="gray", width=1) ) fig.update_layout( showlegend=False, width=300, height=500, xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]) ) st.plotly_chart(fig) with col3: st.markdown(""" ### Computer Vision - **OpenCV**: Image and video processing - **Dlib**: Facial landmark detection - **MediaPipe**: Real-time face mesh tracking - **Image augmentation**: Diverse training samples """) st.image(AI_DATA_VIZ_IMAGES[3], use_container_width=True) st.caption("Computer vision analysis") st.markdown("---") # Technology performance comparison st.markdown("### Performance Comparison") # Create a mock performance comparison chart performance_data = { 'Method': ['Traditional CV', 'CNN Only', 'LSTM Only', 'CNN-LSTM'], 'Accuracy': [65, 82, 78, 93], 'Speed (FPS)': [45, 28, 32, 25], 'Memory (MB)': [120, 350, 280, 420] } metric_to_show = st.selectbox("Select performance metric", ["Accuracy", "Speed (FPS)", "Memory (MB)"]) fig = px.bar( performance_data, x='Method', y=metric_to_show, color='Method', text=performance_data[metric_to_show], title=f"Performance Comparison - {metric_to_show}" ) # Customize the chart appearance fig.update_traces(texttemplate='%{text}', textposition='outside') fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide') st.plotly_chart(fig) def render_applications_section(): """Render the applications section""" section_header("Applications") st.markdown(""" ## Potential Applications Facial gesture recognition technology has numerous practical applications across various domains: """) col1, col2 = st.columns([1, 1]) with col1: st.markdown(""" ### Human-Computer Interaction - **Hands-free Computing**: Control computers without physical input devices - **Accessible Technology**: Enable computer usage for people with mobility limitations - **Interactive Presentations**: Control slides and demonstrations with facial gestures - **Gaming**: Enhanced immersion through facial expression controls """) st.markdown(""" ### Healthcare - **Patient Monitoring**: Track patient attentiveness or consciousness - **Rehabilitation**: Provide feedback for facial exercises - **Pain Assessment**: Detect discomfort through facial expressions - **Mental Health**: Analyze emotional responses during therapy """) with col2: st.markdown(""" ### Automotive Applications - **Driver Monitoring**: Detect drowsiness or distraction - **In-car Controls**: Adjust settings with facial gestures - **Personalized Experience**: Recognize driver identity and preferences - **Security**: Additional authentication layer """) st.markdown(""" ### Accessibility - **Assistive Technology**: Enable computer control for users with mobility impairments - **Communication Aids**: Help non-verbal individuals express themselves - **Smart Home Control**: Manage home automation with facial gestures - **Public Kiosks**: Enable gesture-based interaction with public information systems """) # Interactive application explorer st.markdown("### Application Explorer") application_area = st.selectbox( "Select an application area to explore", ["Human-Computer Interaction", "Healthcare", "Automotive", "Accessibility", "Education"] ) if application_area == "Human-Computer Interaction": st.info(""" **Featured Application: Gesture-Controlled Presentation System** A system that allows presenters to control slideshows using facial gestures: - Eye blinks to advance slides - Eyebrow raises to go back - Head nods/shakes to confirm/cancel actions This enables hands-free presentations, allowing speakers to maintain natural gestures while speaking. """) elif application_area == "Healthcare": st.info(""" **Featured Application: Pain Assessment Tool** A system that monitors patient facial expressions to detect signs of pain: - Real-time monitoring without requiring verbal communication - Particularly useful for non-verbal patients or those with cognitive impairments - Alerts medical staff when pain indicators are detected - Maintains a log of pain expression events for medical review """) elif application_area == "Automotive": st.info(""" **Featured Application: Driver Alertness Monitoring** A system that detects signs of driver fatigue or distraction: - Monitors eye closure duration and blink rate - Detects head nodding indicative of drowsiness - Provides audio alerts when fatigue signs are detected - Suggests breaks when sustained fatigue patterns are observed """) elif application_area == "Accessibility": st.info(""" **Featured Application: Facial Gesture Computer Control** A complete computer control system for people with limited mobility: - Cursor movement through slight head movements - Selection through eye blinks or eyebrow raises - Scrolling through specific eye movements - Text input through an on-screen keyboard navigated by facial gestures """) elif application_area == "Education": st.info(""" **Featured Application: Student Engagement Analytics** A system that monitors student facial expressions during online learning: - Tracks attentiveness and engagement through eye movements - Identifies confusion through facial expressions - Provides analytics to instructors about student engagement - Helps identify content that may need additional explanation """) # Conclusion st.markdown("---") st.markdown(""" ## Conclusion Facial gesture recognition using AI represents a significant advancement in human-computer interaction. By combining CNN and LSTM architectures, we've created a system that can: - Accurately recognize facial gestures in real-time - Process video streams with minimal latency - Adapt to different users and environments - Enable new possibilities for accessibility and interaction This technology continues to evolve, with ongoing improvements in accuracy, speed, and adaptability. """) st.success("Thank you for exploring this presentation on Facial Gesture Recognition using AI!") # Using the SVG file from assets instead of embedding directly st.image("assets/workflow_diagram.svg") def get_landmark_point_from_detector(landmarks, index): """Helper function from eye_eyebrow_detector.py""" return (landmarks.part(index).x, landmarks.part(index).y) def eye_aspect_ratio_from_detector(eye_pts): """Helper function from eye_eyebrow_detector.py""" A = dist.euclidean(eye_pts[1], eye_pts[5]) B = dist.euclidean(eye_pts[2], eye_pts[4]) C = dist.euclidean(eye_pts[0], eye_pts[3]) ear_val = (A + B) / (2.0 * C) return ear_val def initialize_dlib_components(): """Initializes dlib detector and predictor.""" try: detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(DLIB_SHAPE_PREDICTOR_PATH) return detector, predictor except RuntimeError as e: st.error(f"Failed to load dlib model: {e}. Please ensure '{DLIB_SHAPE_PREDICTOR_PATH}' is in the correct path.") return None, None def render_live_demo_section(): """Render the live facial gesture recognition demo section""" section_header("Live Facial Gesture Demo") st.write("This demo uses your webcam to perform real-time eye and eyebrow gesture detection.") st.warning("Ensure you have a webcam connected and have granted permission if prompted by your browser. Also, make sure `shape_predictor_68_face_landmarks.dat` is in the application's root directory.") if 'detector' not in st.session_state or 'predictor' not in st.session_state: st.session_state.detector, st.session_state.predictor = initialize_dlib_components() if st.session_state.detector is None or st.session_state.predictor is None: st.error("Dlib components could not be initialized. The demo cannot run.") return # Initialize session state variables for the demo if 'run_demo' not in st.session_state: st.session_state.run_demo = False if 'calibration_counter' not in st.session_state: st.session_state.calibration_counter = 0 if 'calibration_data_user_L_eyebrow_y' not in st.session_state: st.session_state.calibration_data_user_L_eyebrow_y = [] if 'calibration_data_user_R_eyebrow_y' not in st.session_state: st.session_state.calibration_data_user_R_eyebrow_y = [] if 'calibration_data_user_L_eye_top_y' not in st.session_state: st.session_state.calibration_data_user_L_eye_top_y = [] if 'calibration_data_user_R_eye_top_y' not in st.session_state: st.session_state.calibration_data_user_R_eye_top_y = [] if 'normal_user_L_eyebrow_y_avg' not in st.session_state: st.session_state.normal_user_L_eyebrow_y_avg = 0 if 'normal_user_R_eyebrow_y_avg' not in st.session_state: st.session_state.normal_user_R_eyebrow_y_avg = 0 if 'normal_user_L_eye_top_y_avg' not in st.session_state: st.session_state.normal_user_L_eye_top_y_avg = 0 if 'normal_user_R_eye_top_y_avg' not in st.session_state: st.session_state.normal_user_R_eye_top_y_avg = 0 if 'normal_dist_L_eyebrow_to_eye' not in st.session_state: st.session_state.normal_dist_L_eyebrow_to_eye = 0 if 'normal_dist_R_eyebrow_to_eye' not in st.session_state: st.session_state.normal_dist_R_eyebrow_to_eye = 0 if 'current_state_demo' not in st.session_state: st.session_state.current_state_demo = STATE_CALIBRATING if 'camera_active' not in st.session_state: st.session_state.camera_active = False col1, col2 = st.columns(2) with col1: if st.button("Start/Restart Demo"): st.session_state.run_demo = True st.session_state.camera_active = True # Reset calibration st.session_state.calibration_counter = 0 st.session_state.calibration_data_user_L_eyebrow_y = [] st.session_state.calibration_data_user_R_eyebrow_y = [] st.session_state.calibration_data_user_L_eye_top_y = [] st.session_state.calibration_data_user_R_eye_top_y = [] st.session_state.current_state_demo = STATE_CALIBRATING st.info("Calibration started. Look at the camera with a normal expression.") with col2: if st.button("Stop Demo"): st.session_state.run_demo = False st.session_state.camera_active = False if st.session_state.run_demo and st.session_state.camera_active: # Placeholder for video feed frame_placeholder = st.empty() # Attempt to open the webcam # We manage cap in session_state to persist it across reruns if needed, # but for a continuous loop, it's tricky. # A common pattern is to release it if we stop. if 'cap' not in st.session_state or not st.session_state.cap.isOpened(): st.session_state.cap = cv2.VideoCapture(0) if not st.session_state.cap.isOpened(): st.error("Cannot open webcam.") st.session_state.run_demo = False # Stop demo if camera fails return detector = st.session_state.detector predictor = st.session_state.predictor while st.session_state.run_demo and st.session_state.cap.isOpened(): ret, frame = st.session_state.cap.read() if not ret: st.error("Failed to grab frame from webcam.") break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = detector(gray) display_text = st.session_state.current_state_demo if st.session_state.calibration_counter < CALIBRATION_FRAMES: st.session_state.current_state_demo = STATE_CALIBRATING display_text = f"{STATE_CALIBRATING} ({st.session_state.calibration_counter}/{CALIBRATION_FRAMES})" for face in faces: landmarks = predictor(gray, face) user_L_eyebrow_current_y_pts = [landmarks.part(i).y for i in user_L_eyebrow_y_calc_indices] current_user_L_eyebrow_y_avg = np.mean(user_L_eyebrow_current_y_pts) if user_L_eyebrow_current_y_pts else 0 user_R_eyebrow_current_y_pts = [landmarks.part(i).y for i in user_R_eyebrow_y_calc_indices] current_user_R_eyebrow_y_avg = np.mean(user_R_eyebrow_current_y_pts) if user_R_eyebrow_current_y_pts else 0 user_L_eye_top_current_y_pts = [landmarks.part(i).y for i in user_L_eye_top_indices] current_user_L_eye_top_y_avg = np.mean(user_L_eye_top_current_y_pts) if user_L_eye_top_current_y_pts else 0 user_R_eye_top_current_y_pts = [landmarks.part(i).y for i in user_R_eye_top_indices] current_user_R_eye_top_y_avg = np.mean(user_R_eye_top_current_y_pts) if user_R_eye_top_current_y_pts else 0 user_L_eye_all_pts = np.array([get_landmark_point_from_detector(landmarks, i) for i in range(user_L_eye_indices_start, user_L_eye_indices_end)], dtype="int") user_R_eye_all_pts = np.array([get_landmark_point_from_detector(landmarks, i) for i in range(user_R_eye_indices_start, user_R_eye_indices_end)], dtype="int") left_ear = eye_aspect_ratio_from_detector(user_L_eye_all_pts) right_ear = eye_aspect_ratio_from_detector(user_R_eye_all_pts) avg_ear = (left_ear + right_ear) / 2.0 if st.session_state.calibration_counter < CALIBRATION_FRAMES: st.session_state.calibration_data_user_L_eyebrow_y.append(current_user_L_eyebrow_y_avg) st.session_state.calibration_data_user_R_eyebrow_y.append(current_user_R_eyebrow_y_avg) st.session_state.calibration_data_user_L_eye_top_y.append(current_user_L_eye_top_y_avg) st.session_state.calibration_data_user_R_eye_top_y.append(current_user_R_eye_top_y_avg) st.session_state.calibration_counter += 1 display_text = f"{STATE_CALIBRATING} ({st.session_state.calibration_counter}/{CALIBRATION_FRAMES})" if st.session_state.calibration_counter == CALIBRATION_FRAMES: st.session_state.normal_user_L_eyebrow_y_avg = np.mean(st.session_state.calibration_data_user_L_eyebrow_y) if st.session_state.calibration_data_user_L_eyebrow_y else 0 st.session_state.normal_user_R_eyebrow_y_avg = np.mean(st.session_state.calibration_data_user_R_eyebrow_y) if st.session_state.calibration_data_user_R_eyebrow_y else 0 st.session_state.normal_user_L_eye_top_y_avg = np.mean(st.session_state.calibration_data_user_L_eye_top_y) if st.session_state.calibration_data_user_L_eye_top_y else 0 st.session_state.normal_user_R_eye_top_y_avg = np.mean(st.session_state.calibration_data_user_R_eye_top_y) if st.session_state.calibration_data_user_R_eye_top_y else 0 st.session_state.normal_dist_L_eyebrow_to_eye = st.session_state.normal_user_L_eye_top_y_avg - st.session_state.normal_user_L_eyebrow_y_avg st.session_state.normal_dist_R_eyebrow_to_eye = st.session_state.normal_user_R_eye_top_y_avg - st.session_state.normal_user_R_eyebrow_y_avg st.session_state.current_state_demo = STATE_NORMAL display_text = STATE_NORMAL st.success("Calibration finished.") else: # Detection Phase st.session_state.current_state_demo = STATE_NORMAL # Default to normal after calibration display_text = STATE_NORMAL if st.session_state.normal_dist_L_eyebrow_to_eye != 0 and st.session_state.normal_dist_R_eyebrow_to_eye != 0: if avg_ear < EAR_THRESHOLD: st.session_state.current_state_demo = STATE_YES display_text = STATE_YES else: current_dist_L = current_user_L_eye_top_y_avg - current_user_L_eyebrow_y_avg current_dist_R = current_user_R_eye_top_y_avg - current_user_R_eyebrow_y_avg threshold_dist_L = st.session_state.normal_dist_L_eyebrow_to_eye * (1 + EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) threshold_dist_R = st.session_state.normal_dist_R_eyebrow_to_eye * (1 + EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) if st.session_state.normal_dist_L_eyebrow_to_eye <= 0: threshold_dist_L = st.session_state.normal_dist_L_eyebrow_to_eye + abs(st.session_state.normal_dist_L_eyebrow_to_eye * EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) + 5 if st.session_state.normal_dist_R_eyebrow_to_eye <= 0: threshold_dist_R = st.session_state.normal_dist_R_eyebrow_to_eye + abs(st.session_state.normal_dist_R_eyebrow_to_eye * EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) + 5 if current_dist_L > threshold_dist_L and current_dist_R > threshold_dist_R: st.session_state.current_state_demo = STATE_NO display_text = STATE_NO # Display the detected state on the frame color = (255, 255, 0) # Default for Normal/Calibrating if st.session_state.current_state_demo == STATE_YES: color = (0, 255, 0) elif st.session_state.current_state_demo == STATE_NO: color = (0, 0, 255) # Make text larger and position it higher cv2.putText(frame, display_text, (frame.shape[1] // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3, cv2.LINE_AA) # Convert frame to RGB for Streamlit frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_placeholder.image(frame_rgb, channels="RGB") # Add a small delay to make the video smoother and allow Streamlit to process # time.sleep(0.01) # Removed for faster processing, relying on inherent delays # Release camera when demo stops or an error occurs if 'cap' in st.session_state and st.session_state.cap.isOpened(): st.session_state.cap.release() if st.session_state.camera_active is False and 'cap' in st.session_state: # if explicitly stopped del st.session_state.cap elif not st.session_state.run_demo and st.session_state.camera_active: # This case handles when Stop Demo is clicked, ensuring camera is released. if 'cap' in st.session_state and st.session_state.cap.isOpened(): st.session_state.cap.release() del st.session_state.cap # Ensure it's re-initialized if started again st.session_state.camera_active = False st.info("Live demo stopped.") # Example of how to call this new section in a main app structure: # if __name__ == "__main__": # st.set_page_config(layout="wide") # # Apply custom CSS (optional) # # st.markdown(CUSTOM_CSS, unsafe_allow_html=True) # # render_intro_section() # render_objective_section() # render_architecture_section() # render_process_section() # render_technology_section() # render_applications_section() # render_live_demo_section() # New section added here # # st.sidebar.title("Navigation") # page = st.sidebar.radio("Go to", ["Introduction", "Objective", "Architecture", "Process Flow", "Technologies", "Applications", "Live Demo"]) # # if page == "Introduction": render_intro_section() # elif page == "Objective": render_objective_section() # # ... etc. for other sections # elif page == "Live Demo": render_live_demo_section() # Call if selected from sidebar too. # # This part is just an example of how one might structure the main app. # # The key is that `render_live_demo_section()` can now be called.