Spaces:
Sleeping
Sleeping
import streamlit as st | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import plotly.graph_objects as go | |
import plotly.express as px | |
import cv2 | |
import dlib | |
from scipy.spatial import distance as dist # For EAR calculation | |
import time # for progress bar simulation and potentially camera loop | |
# Constants for detection (from eye_eyebrow_detector.py) | |
EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR = 0.15 | |
CALIBRATION_FRAMES = 30 # Reduced for faster demo calibration | |
EAR_THRESHOLD = 0.20 | |
DLIB_SHAPE_PREDICTOR_PATH = "shape_predictor_68_face_landmarks.dat" | |
# Display states (from eye_eyebrow_detector.py) | |
STATE_YES = "Yes" | |
STATE_NO = "No" | |
STATE_NORMAL = "Normal" | |
STATE_CALIBRATING = "Calibrating..." | |
# Landmark indices (from eye_eyebrow_detector.py) | |
(user_L_eye_indices_start, user_L_eye_indices_end) = (42, 48) | |
(user_R_eye_indices_start, user_R_eye_indices_end) = (36, 42) | |
user_L_eye_top_indices = [43, 44] | |
user_R_eye_top_indices = [37, 38] | |
user_L_eyebrow_y_calc_indices = range(23, 26) | |
user_R_eyebrow_y_calc_indices = range(18, 21) | |
# Initialize dlib's face detector and facial landmark predictor | |
# We'll initialize this inside the function or manage its state | |
# to avoid issues with Streamlit's rerun behavior. | |
# Stock photo URLs provided | |
FACIAL_RECOGNITION_IMAGES = [ | |
"https://pixabay.com/get/g12854d8ea8c029d2435717f123bb6b3afe5f218d14e94f3f1bd28aedaf46900b3c663fdca24e3e5ff97ed203a4ac97bdd34215b14df2f288e76f20602a81cb7d_1280.jpg", | |
"https://pixabay.com/get/gf7f1fe0deb60c9c2217635915c6efdd85c3a35b943185d9d7c1b08ead1ec8f6d082af4bfe7a16759a66c38872d828da9c7d28f9ccd6ed4c243f50471537c072d_1280.jpg", | |
"https://pixabay.com/get/g5226c742de43d538d1d4dd7e927224fb5be1b7f0f197f568dedc10336530b516cf9b2b3acc3128a4ea78a43ca348c8ce101234788ff131ed802e296e799ddc00_1280.jpg", | |
"https://pixabay.com/get/g95d27127dde404c64753341780b8d8871f128bda7dfd5cc3ef287e4e838a1719fc91bc6c4bb24c52ef7cf27dad266a50d474142afe73e25f207ef9ef375c268e_1280.jpg" | |
] | |
AI_DATA_VIZ_IMAGES = [ | |
"https://pixabay.com/get/g155188879e1e171fb82c63d79b2963561b3a77f46ecb38053344fb6a1e236c2f406d66b1c3ae23260573869a9458daee7bfc00f37ef6840fce3a379da3d608e4_1280.jpg", | |
"https://pixabay.com/get/g2620d81b6747dcda89657292ec3627897d7e61e906e76de11ecf6babedfcbe40aa0d0608950e1474795bc3a2abc67660ebc08977ba37da526834bec3cf342ba1_1280.jpg", | |
"https://pixabay.com/get/ge8f809c48922d0dd956c8896157bd3ea8f606948d2ff72e507bad98b42b823e6409cc2923100bc91b15a499f72263fd8ca0f0949ac5ad2bbbb176f16e3dd0043_1280.jpg", | |
"https://pixabay.com/get/g20331e7a18a7b2759056b7a9a73d20c34ff4f863ec4660535f9e5a1b15d3ad4b5b72bb07c385dd3ce154dc23b72fedd5c1eb9e2a4f2b335dfb17534d2b11d8e0_1280.jpg" | |
] | |
PRESENTATION_SLIDE_IMAGES = [ | |
"https://pixabay.com/get/gb57703b075295316bc1711f9701b18b84cfb89f469bb77f415392cc8986f922927cabc9afd50638f77ed51f53bcc62f423b96fbeb5f008abd1017db5b33e9e96_1280.jpg", | |
"https://pixabay.com/get/gf4116a5ec8333a8a6bb33dcfe0baecc03580e6f7af95f2895880c9ec051479f3af002ecde96686e5fb6d3a860cf794fef532f27d373318317330932475a8b46c_1280.jpg" | |
] | |
def section_header(title): | |
"""Generate a section header with consistent styling""" | |
st.markdown(f'<p class="section-header">{title}</p>', unsafe_allow_html=True) | |
def render_intro_section(): | |
"""Render the introduction section of the presentation""" | |
section_header("Introduction") | |
col1, col2 = st.columns([3, 2]) | |
with col1: | |
st.markdown(""" | |
# Facial Gesture Recognition | |
Facial gesture recognition is an exciting field at the intersection of computer vision and artificial intelligence that focuses on identifying and interpreting human facial expressions and movements. | |
This presentation explores a system that can: | |
- Detect facial landmarks in real-time video | |
- Track specific facial movements (eyes, eyebrows) | |
- Classify gestures into meaningful actions | |
- Respond to gestures with appropriate system actions | |
Using a combination of **Convolutional Neural Networks (CNN)** and **Long Short-Term Memory (LSTM)** architecture, this system achieves high accuracy in real-time environments. | |
""") | |
with col2: | |
st.image(FACIAL_RECOGNITION_IMAGES[0], use_container_width=True) | |
st.caption("Facial recognition technology") | |
st.markdown("---") | |
st.markdown(""" | |
### Why Facial Gesture Recognition Matters | |
Facial gestures provide a natural, intuitive way for humans to communicate with computers: | |
- **Accessibility**: Enables computer control for people with mobility limitations | |
- **Hands-free Interaction**: Useful in environments where hands are occupied or contaminated | |
- **Enhanced User Experience**: Creates more natural human-computer interactions | |
- **Safety Applications**: Driver drowsiness detection, attention monitoring | |
""") | |
def render_objective_section(): | |
"""Render the project objectives section""" | |
section_header("Project Objective") | |
col1, col2 = st.columns([1, 1]) | |
with col1: | |
st.markdown(""" | |
## Primary Goal | |
Create an intelligent system that automatically recognizes facial gestures from a video stream in real-time. | |
### Key Objectives | |
1. **Real-time Processing**: Analyze video frames with minimal latency | |
2. **Accurate Detection**: Precisely identify facial landmarks | |
3. **Gesture Classification**: Correctly interpret facial movements | |
4. **Responsive Output**: Provide immediate feedback based on detected gestures | |
""") | |
st.markdown(""" | |
### Target Gestures | |
The system focuses on recognizing the following facial gestures: | |
- Eye movements (blinks, winks) | |
- Eyebrow movements (raising, furrowing) | |
- Normal/neutral state | |
""") | |
with col2: | |
# Add an interactive element - demo selector | |
st.markdown("### Interactive Demo") | |
gesture_type = st.selectbox( | |
"Select a gesture type to learn more", | |
["Eye Movements", "Eyebrow Movements", "Neutral State"] | |
) | |
if gesture_type == "Eye Movements": | |
st.info("Eye movements like blinks and winks can be used for selection or confirmation actions.") | |
elif gesture_type == "Eyebrow Movements": | |
st.info("Eyebrow raising can indicate interest or be used as a trigger for specific actions.") | |
elif gesture_type == "Neutral State": | |
st.info("The neutral state serves as the baseline for detecting deviations that signal intentional gestures.") | |
def render_architecture_section(): | |
"""Render the architecture and methodology section""" | |
section_header("Architecture & Methodology") | |
st.markdown(""" | |
## CNN-LSTM Architecture | |
The system employs a hybrid deep learning architecture combining: | |
- **Convolutional Neural Networks (CNN)**: Extract spatial features from facial images | |
- **Long Short-Term Memory (LSTM)**: Capture temporal patterns in sequential frames | |
""") | |
# Display CNN-LSTM architecture diagram | |
st.caption("Visual representation of CNN-LSTM architecture") | |
col1, col2 = st.columns([1, 1]) | |
with col1: | |
st.markdown(""" | |
### CNN Component | |
The CNN portion of the architecture: | |
- Processes individual video frames | |
- Extracts spatial features from facial regions | |
- Identifies key patterns in facial structure | |
- Uses multiple convolutional layers with pooling | |
""") | |
# Create interactive CNN visualization | |
st.markdown("#### CNN Layer Visualization") | |
layer_slider = st.slider("Explore CNN layers", 1, 5, 1) | |
fig, ax = plt.subplots(figsize=(6, 4)) | |
plt.title(f"CNN Layer {layer_slider} Feature Maps") | |
# Generate mock feature map visualization | |
grid_size = 4 | |
feature_maps = np.random.rand(grid_size, grid_size, 9) | |
for i in range(9): | |
plt.subplot(3, 3, i+1) | |
plt.imshow(feature_maps[:,:,i], cmap='viridis') | |
plt.axis('off') | |
plt.tight_layout() | |
st.pyplot(fig) | |
with col2: | |
st.markdown(""" | |
### LSTM Component | |
The LSTM network: | |
- Processes sequences of CNN-extracted features | |
- Captures temporal dependencies between frames | |
- Maintains memory of previous facial states | |
- Enables detection of dynamic gestures over time | |
""") | |
# Add interactive LSTM cell visualization | |
st.markdown("#### LSTM Cell Structure") | |
st.image("https://upload.wikimedia.org/wikipedia/commons/9/93/LSTM_Cell.svg", caption="LSTM Cell Structure", use_container_width=True) | |
st.markdown(""" | |
### Combined Model Benefits | |
This hybrid architecture provides several advantages: | |
1. **Spatial-Temporal Processing**: Captures both spatial features and temporal patterns | |
2. **Sequence Understanding**: Recognizes gestures that develop over multiple frames | |
3. **Contextual Awareness**: Considers the progression of facial movements | |
4. **Robust Classification**: Higher accuracy for dynamic gestures | |
""") | |
def render_process_section(): | |
"""Render the process flow section""" | |
section_header("Process Flow") | |
st.markdown(""" | |
## System Workflow | |
The facial gesture recognition process follows these key steps: | |
""") | |
# Create tabs for different stages of the process | |
tab1, tab2, tab3 = st.tabs(["Data Collection", "Image Processing", "Model Training"]) | |
with tab1: | |
col1, col2 = st.columns([3, 2]) | |
with col1: | |
st.markdown(""" | |
### Data Collection | |
The system requires a comprehensive dataset of facial gestures: | |
- **Video Capture**: Short video clips recorded using webcam | |
- **Gesture Performance**: Subjects perform predefined facial gestures | |
- **Labeling**: Each video is labeled with the corresponding gesture | |
- **Dataset Diversity**: Multiple subjects, lighting conditions, and angles | |
A balanced dataset with various examples of each gesture is crucial for model generalization. | |
""") | |
with col2: | |
st.caption("") | |
with tab2: | |
col1, col2 = st.columns([2, 3]) | |
with col1: | |
st.image(AI_DATA_VIZ_IMAGES[0], use_container_width=True) | |
st.caption("Image processing visualization") | |
with col2: | |
st.markdown(""" | |
### Image Processing | |
Raw video frames undergo several preprocessing steps: | |
1. **Facial Detection**: Locating the face in each frame | |
2. **Landmark Extraction**: Identifying 68 key facial points | |
3. **Region Isolation**: Extracting regions of interest (eyes, eyebrows) | |
4. **Normalization**: Converting to grayscale, normalizing pixel values | |
5. **Augmentation**: Generating additional training samples through transformations | |
These steps ensure the input data is optimized for the neural network. | |
""") | |
# Interactive element - landmark detection demo | |
show_landmarks = st.checkbox("Show facial landmarks example (eyes and eyebrows)") | |
if show_landmarks: | |
landmark_cols = st.columns(2) | |
with landmark_cols[0]: | |
# Mock landmark visualization using matplotlib - focusing on eyes and eyebrows | |
fig, ax = plt.subplots(figsize=(4, 4)) | |
# Create a simple face outline | |
circle = plt.Circle((0.5, 0.5), 0.4, fill=False, color='blue') | |
ax.add_patch(circle) | |
# Add eye landmarks with extra detail (6 points per eye) | |
# Left eye | |
left_eye_x = [0.30, 0.33, 0.37, 0.41, 0.38, 0.34] | |
left_eye_y = [0.60, 0.58, 0.58, 0.60, 0.62, 0.62] | |
ax.plot(left_eye_x, left_eye_y, 'g-', linewidth=2) | |
for x, y in zip(left_eye_x, left_eye_y): | |
ax.plot(x, y, 'go', markersize=4) | |
# Right eye | |
right_eye_x = [0.59, 0.62, 0.66, 0.70, 0.67, 0.63] | |
right_eye_y = [0.60, 0.58, 0.58, 0.60, 0.62, 0.62] | |
ax.plot(right_eye_x, right_eye_y, 'g-', linewidth=2) | |
for x, y in zip(right_eye_x, right_eye_y): | |
ax.plot(x, y, 'go', markersize=4) | |
# Add detailed eyebrow landmarks (5 points per eyebrow) | |
# Left eyebrow | |
left_brow_x = [0.25, 0.30, 0.35, 0.40, 0.45] | |
left_brow_y = [0.70, 0.72, 0.73, 0.72, 0.70] | |
ax.plot(left_brow_x, left_brow_y, 'r-', linewidth=2) | |
for x, y in zip(left_brow_x, left_brow_y): | |
ax.plot(x, y, 'ro', markersize=4) | |
# Right eyebrow | |
right_brow_x = [0.55, 0.60, 0.65, 0.70, 0.75] | |
right_brow_y = [0.70, 0.72, 0.73, 0.72, 0.70] | |
ax.plot(right_brow_x, right_brow_y, 'r-', linewidth=2) | |
for x, y in zip(right_brow_x, right_brow_y): | |
ax.plot(x, y, 'ro', markersize=4) | |
# Add labels | |
ax.text(0.36, 0.67, "Left Eye", fontsize=9, ha='center') | |
ax.text(0.64, 0.67, "Right Eye", fontsize=9, ha='center') | |
ax.text(0.35, 0.76, "Left Eyebrow", fontsize=9, ha='center') | |
ax.text(0.65, 0.76, "Right Eyebrow", fontsize=9, ha='center') | |
ax.set_xlim(0, 1) | |
ax.set_ylim(0, 1) | |
ax.set_title("Eye and Eyebrow Landmarks") | |
ax.axis('off') | |
st.pyplot(fig) | |
with landmark_cols[1]: | |
st.markdown(""" | |
**Focused Facial Landmarks Analysis:** | |
This system specifically analyzes: | |
- **Eyes (6 points each)**: Tracks eye openness, blinks, and winking | |
- **Eyebrows (5 points each)**: Detects eyebrow raising, furrowing, and expressions | |
While the shape_predictor_68_face_landmarks model can identify 68 facial landmarks including: | |
- 9 points for the nose | |
- 20 points for the mouth | |
- 17 points for the face contour | |
This implementation focuses exclusively on eye and eyebrow movements for gesture recognition. | |
""") | |
with tab3: | |
st.markdown(""" | |
### Model Training | |
The CNN-LSTM model is trained using the processed dataset: | |
1. **Data Splitting**: Division into training, validation, and test sets | |
2. **CNN Training**: Learning spatial feature extraction | |
3. **LSTM Training**: Learning temporal patterns | |
4. **Hyperparameter Tuning**: Optimizing model architecture and parameters | |
5. **Validation**: Evaluating performance on validation set | |
6. **Testing**: Final evaluation on test set | |
""") | |
# Interactive training visualization | |
st.markdown("#### Training Visualization") | |
# Mock training metrics | |
epochs = 50 | |
train_loss = 1.5 * np.exp(-0.05 * np.arange(epochs)) + 0.1 * np.random.rand(epochs) | |
val_loss = 1.7 * np.exp(-0.04 * np.arange(epochs)) + 0.15 * np.random.rand(epochs) | |
train_acc = 1 - train_loss * 0.5 | |
val_acc = 1 - val_loss * 0.5 | |
# Create interactive plot | |
metric = st.radio("Select metric to visualize", ["Loss", "Accuracy"]) | |
if metric == "Loss": | |
fig = px.line( | |
x=list(range(1, epochs+1)), | |
y=[train_loss, val_loss], | |
labels={"x": "Epoch", "y": "Loss"}, | |
title="Training and Validation Loss", | |
line_shape="spline" | |
) | |
fig.update_layout(legend_title_text="Legend") | |
fig.add_scatter(x=list(range(1, epochs+1)), y=train_loss, name="Training Loss", line=dict(color="blue")) | |
fig.add_scatter(x=list(range(1, epochs+1)), y=val_loss, name="Validation Loss", line=dict(color="red")) | |
else: | |
fig = px.line( | |
x=list(range(1, epochs+1)), | |
y=[train_acc, val_acc], | |
labels={"x": "Epoch", "y": "Accuracy"}, | |
title="Training and Validation Accuracy", | |
line_shape="spline" | |
) | |
fig.update_layout(legend_title_text="Legend") | |
fig.add_scatter(x=list(range(1, epochs+1)), y=train_acc, name="Training Accuracy", line=dict(color="green")) | |
fig.add_scatter(x=list(range(1, epochs+1)), y=val_acc, name="Validation Accuracy", line=dict(color="orange")) | |
st.plotly_chart(fig) | |
def render_technology_section(): | |
"""Render the technologies section""" | |
section_header("Technologies") | |
st.markdown(""" | |
## Core Technologies | |
The facial gesture recognition system relies on several key technologies: | |
""") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.markdown(""" | |
### Python Ecosystem | |
- **Python**: Core programming language | |
- **NumPy**: Numerical operations | |
- **Pandas**: Data management | |
- **Matplotlib/Plotly**: Visualization | |
""") | |
st.image(AI_DATA_VIZ_IMAGES[2], use_container_width=True) | |
st.caption("Python data analysis visualization") | |
with col2: | |
st.markdown(""" | |
### Deep Learning | |
- **TensorFlow/Keras**: Neural network framework | |
- **CNN**: Spatial feature extraction | |
- **LSTM**: Temporal sequence processing | |
- **Transfer Learning**: Leveraging pre-trained models | |
""") | |
# Create an interactive visualization of model architecture | |
st.markdown("#### Model Architecture") | |
fig = go.Figure() | |
# Draw rectangles representing layers | |
layers = [ | |
{"name": "Input", "width": 0.8, "height": 0.15, "x": 0.5, "y": 0.9, "color": "lightblue"}, | |
{"name": "Conv2D", "width": 0.8, "height": 0.1, "x": 0.5, "y": 0.75, "color": "lightgreen"}, | |
{"name": "MaxPooling", "width": 0.7, "height": 0.1, "x": 0.5, "y": 0.63, "color": "lightgreen"}, | |
{"name": "Conv2D", "width": 0.6, "height": 0.1, "x": 0.5, "y": 0.51, "color": "lightgreen"}, | |
{"name": "MaxPooling", "width": 0.5, "height": 0.1, "x": 0.5, "y": 0.39, "color": "lightgreen"}, | |
{"name": "LSTM", "width": 0.8, "height": 0.1, "x": 0.5, "y": 0.27, "color": "lightpink"}, | |
{"name": "Dense", "width": 0.6, "height": 0.1, "x": 0.5, "y": 0.15, "color": "lightyellow"}, | |
{"name": "Output", "width": 0.4, "height": 0.1, "x": 0.5, "y": 0.05, "color": "lightblue"} | |
] | |
for layer in layers: | |
# Add layer rectangle | |
fig.add_shape( | |
type="rect", | |
x0=layer["x"] - layer["width"]/2, | |
y0=layer["y"] - layer["height"]/2, | |
x1=layer["x"] + layer["width"]/2, | |
y1=layer["y"] + layer["height"]/2, | |
line=dict(color="black"), | |
fillcolor=layer["color"] | |
) | |
# Add layer name | |
fig.add_annotation( | |
x=layer["x"], | |
y=layer["y"], | |
text=layer["name"], | |
showarrow=False | |
) | |
# Add connection lines between layers (except for the last layer) | |
if layer != layers[-1]: | |
next_layer = layers[layers.index(layer) + 1] | |
fig.add_shape( | |
type="line", | |
x0=layer["x"], | |
y0=layer["y"] - layer["height"]/2, | |
x1=next_layer["x"], | |
y1=next_layer["y"] + next_layer["height"]/2, | |
line=dict(color="gray", width=1) | |
) | |
fig.update_layout( | |
showlegend=False, | |
width=300, | |
height=500, | |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]), | |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]) | |
) | |
st.plotly_chart(fig) | |
with col3: | |
st.markdown(""" | |
### Computer Vision | |
- **OpenCV**: Image and video processing | |
- **Dlib**: Facial landmark detection | |
- **MediaPipe**: Real-time face mesh tracking | |
- **Image augmentation**: Diverse training samples | |
""") | |
st.image(AI_DATA_VIZ_IMAGES[3], use_container_width=True) | |
st.caption("Computer vision analysis") | |
st.markdown("---") | |
# Technology performance comparison | |
st.markdown("### Performance Comparison") | |
# Create a mock performance comparison chart | |
performance_data = { | |
'Method': ['Traditional CV', 'CNN Only', 'LSTM Only', 'CNN-LSTM'], | |
'Accuracy': [65, 82, 78, 93], | |
'Speed (FPS)': [45, 28, 32, 25], | |
'Memory (MB)': [120, 350, 280, 420] | |
} | |
metric_to_show = st.selectbox("Select performance metric", ["Accuracy", "Speed (FPS)", "Memory (MB)"]) | |
fig = px.bar( | |
performance_data, | |
x='Method', | |
y=metric_to_show, | |
color='Method', | |
text=performance_data[metric_to_show], | |
title=f"Performance Comparison - {metric_to_show}" | |
) | |
# Customize the chart appearance | |
fig.update_traces(texttemplate='%{text}', textposition='outside') | |
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide') | |
st.plotly_chart(fig) | |
def render_applications_section(): | |
"""Render the applications section""" | |
section_header("Applications") | |
st.markdown(""" | |
## Potential Applications | |
Facial gesture recognition technology has numerous practical applications across various domains: | |
""") | |
col1, col2 = st.columns([1, 1]) | |
with col1: | |
st.markdown(""" | |
### Human-Computer Interaction | |
- **Hands-free Computing**: Control computers without physical input devices | |
- **Accessible Technology**: Enable computer usage for people with mobility limitations | |
- **Interactive Presentations**: Control slides and demonstrations with facial gestures | |
- **Gaming**: Enhanced immersion through facial expression controls | |
""") | |
st.markdown(""" | |
### Healthcare | |
- **Patient Monitoring**: Track patient attentiveness or consciousness | |
- **Rehabilitation**: Provide feedback for facial exercises | |
- **Pain Assessment**: Detect discomfort through facial expressions | |
- **Mental Health**: Analyze emotional responses during therapy | |
""") | |
with col2: | |
st.markdown(""" | |
### Automotive Applications | |
- **Driver Monitoring**: Detect drowsiness or distraction | |
- **In-car Controls**: Adjust settings with facial gestures | |
- **Personalized Experience**: Recognize driver identity and preferences | |
- **Security**: Additional authentication layer | |
""") | |
st.markdown(""" | |
### Accessibility | |
- **Assistive Technology**: Enable computer control for users with mobility impairments | |
- **Communication Aids**: Help non-verbal individuals express themselves | |
- **Smart Home Control**: Manage home automation with facial gestures | |
- **Public Kiosks**: Enable gesture-based interaction with public information systems | |
""") | |
# Interactive application explorer | |
st.markdown("### Application Explorer") | |
application_area = st.selectbox( | |
"Select an application area to explore", | |
["Human-Computer Interaction", "Healthcare", "Automotive", "Accessibility", "Education"] | |
) | |
if application_area == "Human-Computer Interaction": | |
st.info(""" | |
**Featured Application: Gesture-Controlled Presentation System** | |
A system that allows presenters to control slideshows using facial gestures: | |
- Eye blinks to advance slides | |
- Eyebrow raises to go back | |
- Head nods/shakes to confirm/cancel actions | |
This enables hands-free presentations, allowing speakers to maintain natural gestures while speaking. | |
""") | |
elif application_area == "Healthcare": | |
st.info(""" | |
**Featured Application: Pain Assessment Tool** | |
A system that monitors patient facial expressions to detect signs of pain: | |
- Real-time monitoring without requiring verbal communication | |
- Particularly useful for non-verbal patients or those with cognitive impairments | |
- Alerts medical staff when pain indicators are detected | |
- Maintains a log of pain expression events for medical review | |
""") | |
elif application_area == "Automotive": | |
st.info(""" | |
**Featured Application: Driver Alertness Monitoring** | |
A system that detects signs of driver fatigue or distraction: | |
- Monitors eye closure duration and blink rate | |
- Detects head nodding indicative of drowsiness | |
- Provides audio alerts when fatigue signs are detected | |
- Suggests breaks when sustained fatigue patterns are observed | |
""") | |
elif application_area == "Accessibility": | |
st.info(""" | |
**Featured Application: Facial Gesture Computer Control** | |
A complete computer control system for people with limited mobility: | |
- Cursor movement through slight head movements | |
- Selection through eye blinks or eyebrow raises | |
- Scrolling through specific eye movements | |
- Text input through an on-screen keyboard navigated by facial gestures | |
""") | |
elif application_area == "Education": | |
st.info(""" | |
**Featured Application: Student Engagement Analytics** | |
A system that monitors student facial expressions during online learning: | |
- Tracks attentiveness and engagement through eye movements | |
- Identifies confusion through facial expressions | |
- Provides analytics to instructors about student engagement | |
- Helps identify content that may need additional explanation | |
""") | |
# Conclusion | |
st.markdown("---") | |
st.markdown(""" | |
## Conclusion | |
Facial gesture recognition using AI represents a significant advancement in human-computer interaction. By combining CNN and LSTM architectures, we've created a system that can: | |
- Accurately recognize facial gestures in real-time | |
- Process video streams with minimal latency | |
- Adapt to different users and environments | |
- Enable new possibilities for accessibility and interaction | |
This technology continues to evolve, with ongoing improvements in accuracy, speed, and adaptability. | |
""") | |
st.success("Thank you for exploring this presentation on Facial Gesture Recognition using AI!") | |
# Using the SVG file from assets instead of embedding directly | |
st.image("assets/workflow_diagram.svg") | |
def get_landmark_point_from_detector(landmarks, index): | |
"""Helper function from eye_eyebrow_detector.py""" | |
return (landmarks.part(index).x, landmarks.part(index).y) | |
def eye_aspect_ratio_from_detector(eye_pts): | |
"""Helper function from eye_eyebrow_detector.py""" | |
A = dist.euclidean(eye_pts[1], eye_pts[5]) | |
B = dist.euclidean(eye_pts[2], eye_pts[4]) | |
C = dist.euclidean(eye_pts[0], eye_pts[3]) | |
ear_val = (A + B) / (2.0 * C) | |
return ear_val | |
def initialize_dlib_components(): | |
"""Initializes dlib detector and predictor.""" | |
try: | |
detector = dlib.get_frontal_face_detector() | |
predictor = dlib.shape_predictor(DLIB_SHAPE_PREDICTOR_PATH) | |
return detector, predictor | |
except RuntimeError as e: | |
st.error(f"Failed to load dlib model: {e}. Please ensure '{DLIB_SHAPE_PREDICTOR_PATH}' is in the correct path.") | |
return None, None | |
def render_live_demo_section(): | |
"""Render the live facial gesture recognition demo section""" | |
section_header("Live Facial Gesture Demo") | |
st.write("This demo uses your webcam to perform real-time eye and eyebrow gesture detection.") | |
st.warning("Ensure you have a webcam connected and have granted permission if prompted by your browser. Also, make sure `shape_predictor_68_face_landmarks.dat` is in the application's root directory.") | |
if 'detector' not in st.session_state or 'predictor' not in st.session_state: | |
st.session_state.detector, st.session_state.predictor = initialize_dlib_components() | |
if st.session_state.detector is None or st.session_state.predictor is None: | |
st.error("Dlib components could not be initialized. The demo cannot run.") | |
return | |
# Initialize session state variables for the demo | |
if 'run_demo' not in st.session_state: | |
st.session_state.run_demo = False | |
if 'calibration_counter' not in st.session_state: | |
st.session_state.calibration_counter = 0 | |
if 'calibration_data_user_L_eyebrow_y' not in st.session_state: | |
st.session_state.calibration_data_user_L_eyebrow_y = [] | |
if 'calibration_data_user_R_eyebrow_y' not in st.session_state: | |
st.session_state.calibration_data_user_R_eyebrow_y = [] | |
if 'calibration_data_user_L_eye_top_y' not in st.session_state: | |
st.session_state.calibration_data_user_L_eye_top_y = [] | |
if 'calibration_data_user_R_eye_top_y' not in st.session_state: | |
st.session_state.calibration_data_user_R_eye_top_y = [] | |
if 'normal_user_L_eyebrow_y_avg' not in st.session_state: | |
st.session_state.normal_user_L_eyebrow_y_avg = 0 | |
if 'normal_user_R_eyebrow_y_avg' not in st.session_state: | |
st.session_state.normal_user_R_eyebrow_y_avg = 0 | |
if 'normal_user_L_eye_top_y_avg' not in st.session_state: | |
st.session_state.normal_user_L_eye_top_y_avg = 0 | |
if 'normal_user_R_eye_top_y_avg' not in st.session_state: | |
st.session_state.normal_user_R_eye_top_y_avg = 0 | |
if 'normal_dist_L_eyebrow_to_eye' not in st.session_state: | |
st.session_state.normal_dist_L_eyebrow_to_eye = 0 | |
if 'normal_dist_R_eyebrow_to_eye' not in st.session_state: | |
st.session_state.normal_dist_R_eyebrow_to_eye = 0 | |
if 'current_state_demo' not in st.session_state: | |
st.session_state.current_state_demo = STATE_CALIBRATING | |
if 'camera_active' not in st.session_state: | |
st.session_state.camera_active = False | |
col1, col2 = st.columns(2) | |
with col1: | |
if st.button("Start/Restart Demo"): | |
st.session_state.run_demo = True | |
st.session_state.camera_active = True | |
# Reset calibration | |
st.session_state.calibration_counter = 0 | |
st.session_state.calibration_data_user_L_eyebrow_y = [] | |
st.session_state.calibration_data_user_R_eyebrow_y = [] | |
st.session_state.calibration_data_user_L_eye_top_y = [] | |
st.session_state.calibration_data_user_R_eye_top_y = [] | |
st.session_state.current_state_demo = STATE_CALIBRATING | |
st.info("Calibration started. Look at the camera with a normal expression.") | |
with col2: | |
if st.button("Stop Demo"): | |
st.session_state.run_demo = False | |
st.session_state.camera_active = False | |
if st.session_state.run_demo and st.session_state.camera_active: | |
# Placeholder for video feed | |
frame_placeholder = st.empty() | |
# Attempt to open the webcam | |
# We manage cap in session_state to persist it across reruns if needed, | |
# but for a continuous loop, it's tricky. | |
# A common pattern is to release it if we stop. | |
if 'cap' not in st.session_state or not st.session_state.cap.isOpened(): | |
st.session_state.cap = cv2.VideoCapture(0) | |
if not st.session_state.cap.isOpened(): | |
st.error("Cannot open webcam.") | |
st.session_state.run_demo = False # Stop demo if camera fails | |
return | |
detector = st.session_state.detector | |
predictor = st.session_state.predictor | |
while st.session_state.run_demo and st.session_state.cap.isOpened(): | |
ret, frame = st.session_state.cap.read() | |
if not ret: | |
st.error("Failed to grab frame from webcam.") | |
break | |
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
faces = detector(gray) | |
display_text = st.session_state.current_state_demo | |
if st.session_state.calibration_counter < CALIBRATION_FRAMES: | |
st.session_state.current_state_demo = STATE_CALIBRATING | |
display_text = f"{STATE_CALIBRATING} ({st.session_state.calibration_counter}/{CALIBRATION_FRAMES})" | |
for face in faces: | |
landmarks = predictor(gray, face) | |
user_L_eyebrow_current_y_pts = [landmarks.part(i).y for i in user_L_eyebrow_y_calc_indices] | |
current_user_L_eyebrow_y_avg = np.mean(user_L_eyebrow_current_y_pts) if user_L_eyebrow_current_y_pts else 0 | |
user_R_eyebrow_current_y_pts = [landmarks.part(i).y for i in user_R_eyebrow_y_calc_indices] | |
current_user_R_eyebrow_y_avg = np.mean(user_R_eyebrow_current_y_pts) if user_R_eyebrow_current_y_pts else 0 | |
user_L_eye_top_current_y_pts = [landmarks.part(i).y for i in user_L_eye_top_indices] | |
current_user_L_eye_top_y_avg = np.mean(user_L_eye_top_current_y_pts) if user_L_eye_top_current_y_pts else 0 | |
user_R_eye_top_current_y_pts = [landmarks.part(i).y for i in user_R_eye_top_indices] | |
current_user_R_eye_top_y_avg = np.mean(user_R_eye_top_current_y_pts) if user_R_eye_top_current_y_pts else 0 | |
user_L_eye_all_pts = np.array([get_landmark_point_from_detector(landmarks, i) for i in range(user_L_eye_indices_start, user_L_eye_indices_end)], dtype="int") | |
user_R_eye_all_pts = np.array([get_landmark_point_from_detector(landmarks, i) for i in range(user_R_eye_indices_start, user_R_eye_indices_end)], dtype="int") | |
left_ear = eye_aspect_ratio_from_detector(user_L_eye_all_pts) | |
right_ear = eye_aspect_ratio_from_detector(user_R_eye_all_pts) | |
avg_ear = (left_ear + right_ear) / 2.0 | |
if st.session_state.calibration_counter < CALIBRATION_FRAMES: | |
st.session_state.calibration_data_user_L_eyebrow_y.append(current_user_L_eyebrow_y_avg) | |
st.session_state.calibration_data_user_R_eyebrow_y.append(current_user_R_eyebrow_y_avg) | |
st.session_state.calibration_data_user_L_eye_top_y.append(current_user_L_eye_top_y_avg) | |
st.session_state.calibration_data_user_R_eye_top_y.append(current_user_R_eye_top_y_avg) | |
st.session_state.calibration_counter += 1 | |
display_text = f"{STATE_CALIBRATING} ({st.session_state.calibration_counter}/{CALIBRATION_FRAMES})" | |
if st.session_state.calibration_counter == CALIBRATION_FRAMES: | |
st.session_state.normal_user_L_eyebrow_y_avg = np.mean(st.session_state.calibration_data_user_L_eyebrow_y) if st.session_state.calibration_data_user_L_eyebrow_y else 0 | |
st.session_state.normal_user_R_eyebrow_y_avg = np.mean(st.session_state.calibration_data_user_R_eyebrow_y) if st.session_state.calibration_data_user_R_eyebrow_y else 0 | |
st.session_state.normal_user_L_eye_top_y_avg = np.mean(st.session_state.calibration_data_user_L_eye_top_y) if st.session_state.calibration_data_user_L_eye_top_y else 0 | |
st.session_state.normal_user_R_eye_top_y_avg = np.mean(st.session_state.calibration_data_user_R_eye_top_y) if st.session_state.calibration_data_user_R_eye_top_y else 0 | |
st.session_state.normal_dist_L_eyebrow_to_eye = st.session_state.normal_user_L_eye_top_y_avg - st.session_state.normal_user_L_eyebrow_y_avg | |
st.session_state.normal_dist_R_eyebrow_to_eye = st.session_state.normal_user_R_eye_top_y_avg - st.session_state.normal_user_R_eyebrow_y_avg | |
st.session_state.current_state_demo = STATE_NORMAL | |
display_text = STATE_NORMAL | |
st.success("Calibration finished.") | |
else: # Detection Phase | |
st.session_state.current_state_demo = STATE_NORMAL # Default to normal after calibration | |
display_text = STATE_NORMAL | |
if st.session_state.normal_dist_L_eyebrow_to_eye != 0 and st.session_state.normal_dist_R_eyebrow_to_eye != 0: | |
if avg_ear < EAR_THRESHOLD: | |
st.session_state.current_state_demo = STATE_YES | |
display_text = STATE_YES | |
else: | |
current_dist_L = current_user_L_eye_top_y_avg - current_user_L_eyebrow_y_avg | |
current_dist_R = current_user_R_eye_top_y_avg - current_user_R_eyebrow_y_avg | |
threshold_dist_L = st.session_state.normal_dist_L_eyebrow_to_eye * (1 + EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) | |
threshold_dist_R = st.session_state.normal_dist_R_eyebrow_to_eye * (1 + EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) | |
if st.session_state.normal_dist_L_eyebrow_to_eye <= 0: threshold_dist_L = st.session_state.normal_dist_L_eyebrow_to_eye + abs(st.session_state.normal_dist_L_eyebrow_to_eye * EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) + 5 | |
if st.session_state.normal_dist_R_eyebrow_to_eye <= 0: threshold_dist_R = st.session_state.normal_dist_R_eyebrow_to_eye + abs(st.session_state.normal_dist_R_eyebrow_to_eye * EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) + 5 | |
if current_dist_L > threshold_dist_L and current_dist_R > threshold_dist_R: | |
st.session_state.current_state_demo = STATE_NO | |
display_text = STATE_NO | |
# Display the detected state on the frame | |
color = (255, 255, 0) # Default for Normal/Calibrating | |
if st.session_state.current_state_demo == STATE_YES: | |
color = (0, 255, 0) | |
elif st.session_state.current_state_demo == STATE_NO: | |
color = (0, 0, 255) | |
# Make text larger and position it higher | |
cv2.putText(frame, display_text, (frame.shape[1] // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3, cv2.LINE_AA) | |
# Convert frame to RGB for Streamlit | |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
frame_placeholder.image(frame_rgb, channels="RGB") | |
# Add a small delay to make the video smoother and allow Streamlit to process | |
# time.sleep(0.01) # Removed for faster processing, relying on inherent delays | |
# Release camera when demo stops or an error occurs | |
if 'cap' in st.session_state and st.session_state.cap.isOpened(): | |
st.session_state.cap.release() | |
if st.session_state.camera_active is False and 'cap' in st.session_state: # if explicitly stopped | |
del st.session_state.cap | |
elif not st.session_state.run_demo and st.session_state.camera_active: | |
# This case handles when Stop Demo is clicked, ensuring camera is released. | |
if 'cap' in st.session_state and st.session_state.cap.isOpened(): | |
st.session_state.cap.release() | |
del st.session_state.cap # Ensure it's re-initialized if started again | |
st.session_state.camera_active = False | |
st.info("Live demo stopped.") | |
# Example of how to call this new section in a main app structure: | |
# if __name__ == "__main__": | |
# st.set_page_config(layout="wide") | |
# # Apply custom CSS (optional) | |
# # st.markdown(CUSTOM_CSS, unsafe_allow_html=True) | |
# | |
# render_intro_section() | |
# render_objective_section() | |
# render_architecture_section() | |
# render_process_section() | |
# render_technology_section() | |
# render_applications_section() | |
# render_live_demo_section() # New section added here | |
# | |
# st.sidebar.title("Navigation") | |
# page = st.sidebar.radio("Go to", ["Introduction", "Objective", "Architecture", "Process Flow", "Technologies", "Applications", "Live Demo"]) | |
# | |
# if page == "Introduction": render_intro_section() | |
# elif page == "Objective": render_objective_section() | |
# # ... etc. for other sections | |
# elif page == "Live Demo": render_live_demo_section() # Call if selected from sidebar too. | |
# # This part is just an example of how one might structure the main app. | |
# # The key is that `render_live_demo_section()` can now be called. | |