Spaces:
Sleeping
Sleeping
File size: 42,210 Bytes
496c8b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 |
import streamlit as st
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import cv2
import dlib
from scipy.spatial import distance as dist # For EAR calculation
import time # for progress bar simulation and potentially camera loop
# Constants for detection (from eye_eyebrow_detector.py)
EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR = 0.15
CALIBRATION_FRAMES = 30 # Reduced for faster demo calibration
EAR_THRESHOLD = 0.20
DLIB_SHAPE_PREDICTOR_PATH = "shape_predictor_68_face_landmarks.dat"
# Display states (from eye_eyebrow_detector.py)
STATE_YES = "Yes"
STATE_NO = "No"
STATE_NORMAL = "Normal"
STATE_CALIBRATING = "Calibrating..."
# Landmark indices (from eye_eyebrow_detector.py)
(user_L_eye_indices_start, user_L_eye_indices_end) = (42, 48)
(user_R_eye_indices_start, user_R_eye_indices_end) = (36, 42)
user_L_eye_top_indices = [43, 44]
user_R_eye_top_indices = [37, 38]
user_L_eyebrow_y_calc_indices = range(23, 26)
user_R_eyebrow_y_calc_indices = range(18, 21)
# Initialize dlib's face detector and facial landmark predictor
# We'll initialize this inside the function or manage its state
# to avoid issues with Streamlit's rerun behavior.
# Stock photo URLs provided
FACIAL_RECOGNITION_IMAGES = [
"https://pixabay.com/get/g12854d8ea8c029d2435717f123bb6b3afe5f218d14e94f3f1bd28aedaf46900b3c663fdca24e3e5ff97ed203a4ac97bdd34215b14df2f288e76f20602a81cb7d_1280.jpg",
"https://pixabay.com/get/gf7f1fe0deb60c9c2217635915c6efdd85c3a35b943185d9d7c1b08ead1ec8f6d082af4bfe7a16759a66c38872d828da9c7d28f9ccd6ed4c243f50471537c072d_1280.jpg",
"https://pixabay.com/get/g5226c742de43d538d1d4dd7e927224fb5be1b7f0f197f568dedc10336530b516cf9b2b3acc3128a4ea78a43ca348c8ce101234788ff131ed802e296e799ddc00_1280.jpg",
"https://pixabay.com/get/g95d27127dde404c64753341780b8d8871f128bda7dfd5cc3ef287e4e838a1719fc91bc6c4bb24c52ef7cf27dad266a50d474142afe73e25f207ef9ef375c268e_1280.jpg"
]
AI_DATA_VIZ_IMAGES = [
"https://pixabay.com/get/g155188879e1e171fb82c63d79b2963561b3a77f46ecb38053344fb6a1e236c2f406d66b1c3ae23260573869a9458daee7bfc00f37ef6840fce3a379da3d608e4_1280.jpg",
"https://pixabay.com/get/g2620d81b6747dcda89657292ec3627897d7e61e906e76de11ecf6babedfcbe40aa0d0608950e1474795bc3a2abc67660ebc08977ba37da526834bec3cf342ba1_1280.jpg",
"https://pixabay.com/get/ge8f809c48922d0dd956c8896157bd3ea8f606948d2ff72e507bad98b42b823e6409cc2923100bc91b15a499f72263fd8ca0f0949ac5ad2bbbb176f16e3dd0043_1280.jpg",
"https://pixabay.com/get/g20331e7a18a7b2759056b7a9a73d20c34ff4f863ec4660535f9e5a1b15d3ad4b5b72bb07c385dd3ce154dc23b72fedd5c1eb9e2a4f2b335dfb17534d2b11d8e0_1280.jpg"
]
PRESENTATION_SLIDE_IMAGES = [
"https://pixabay.com/get/gb57703b075295316bc1711f9701b18b84cfb89f469bb77f415392cc8986f922927cabc9afd50638f77ed51f53bcc62f423b96fbeb5f008abd1017db5b33e9e96_1280.jpg",
"https://pixabay.com/get/gf4116a5ec8333a8a6bb33dcfe0baecc03580e6f7af95f2895880c9ec051479f3af002ecde96686e5fb6d3a860cf794fef532f27d373318317330932475a8b46c_1280.jpg"
]
def section_header(title):
"""Generate a section header with consistent styling"""
st.markdown(f'<p class="section-header">{title}</p>', unsafe_allow_html=True)
def render_intro_section():
"""Render the introduction section of the presentation"""
section_header("Introduction")
col1, col2 = st.columns([3, 2])
with col1:
st.markdown("""
# Facial Gesture Recognition
Facial gesture recognition is an exciting field at the intersection of computer vision and artificial intelligence that focuses on identifying and interpreting human facial expressions and movements.
This presentation explores a system that can:
- Detect facial landmarks in real-time video
- Track specific facial movements (eyes, eyebrows)
- Classify gestures into meaningful actions
- Respond to gestures with appropriate system actions
Using a combination of **Convolutional Neural Networks (CNN)** and **Long Short-Term Memory (LSTM)** architecture, this system achieves high accuracy in real-time environments.
""")
with col2:
st.image(FACIAL_RECOGNITION_IMAGES[0], use_container_width=True)
st.caption("Facial recognition technology")
st.markdown("---")
st.markdown("""
### Why Facial Gesture Recognition Matters
Facial gestures provide a natural, intuitive way for humans to communicate with computers:
- **Accessibility**: Enables computer control for people with mobility limitations
- **Hands-free Interaction**: Useful in environments where hands are occupied or contaminated
- **Enhanced User Experience**: Creates more natural human-computer interactions
- **Safety Applications**: Driver drowsiness detection, attention monitoring
""")
def render_objective_section():
"""Render the project objectives section"""
section_header("Project Objective")
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("""
## Primary Goal
Create an intelligent system that automatically recognizes facial gestures from a video stream in real-time.
### Key Objectives
1. **Real-time Processing**: Analyze video frames with minimal latency
2. **Accurate Detection**: Precisely identify facial landmarks
3. **Gesture Classification**: Correctly interpret facial movements
4. **Responsive Output**: Provide immediate feedback based on detected gestures
""")
st.markdown("""
### Target Gestures
The system focuses on recognizing the following facial gestures:
- Eye movements (blinks, winks)
- Eyebrow movements (raising, furrowing)
- Normal/neutral state
""")
with col2:
# Add an interactive element - demo selector
st.markdown("### Interactive Demo")
gesture_type = st.selectbox(
"Select a gesture type to learn more",
["Eye Movements", "Eyebrow Movements", "Neutral State"]
)
if gesture_type == "Eye Movements":
st.info("Eye movements like blinks and winks can be used for selection or confirmation actions.")
elif gesture_type == "Eyebrow Movements":
st.info("Eyebrow raising can indicate interest or be used as a trigger for specific actions.")
elif gesture_type == "Neutral State":
st.info("The neutral state serves as the baseline for detecting deviations that signal intentional gestures.")
def render_architecture_section():
"""Render the architecture and methodology section"""
section_header("Architecture & Methodology")
st.markdown("""
## CNN-LSTM Architecture
The system employs a hybrid deep learning architecture combining:
- **Convolutional Neural Networks (CNN)**: Extract spatial features from facial images
- **Long Short-Term Memory (LSTM)**: Capture temporal patterns in sequential frames
""")
# Display CNN-LSTM architecture diagram
st.caption("Visual representation of CNN-LSTM architecture")
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("""
### CNN Component
The CNN portion of the architecture:
- Processes individual video frames
- Extracts spatial features from facial regions
- Identifies key patterns in facial structure
- Uses multiple convolutional layers with pooling
""")
# Create interactive CNN visualization
st.markdown("#### CNN Layer Visualization")
layer_slider = st.slider("Explore CNN layers", 1, 5, 1)
fig, ax = plt.subplots(figsize=(6, 4))
plt.title(f"CNN Layer {layer_slider} Feature Maps")
# Generate mock feature map visualization
grid_size = 4
feature_maps = np.random.rand(grid_size, grid_size, 9)
for i in range(9):
plt.subplot(3, 3, i+1)
plt.imshow(feature_maps[:,:,i], cmap='viridis')
plt.axis('off')
plt.tight_layout()
st.pyplot(fig)
with col2:
st.markdown("""
### LSTM Component
The LSTM network:
- Processes sequences of CNN-extracted features
- Captures temporal dependencies between frames
- Maintains memory of previous facial states
- Enables detection of dynamic gestures over time
""")
# Add interactive LSTM cell visualization
st.markdown("#### LSTM Cell Structure")
st.image("https://upload.wikimedia.org/wikipedia/commons/9/93/LSTM_Cell.svg", caption="LSTM Cell Structure", use_container_width=True)
st.markdown("""
### Combined Model Benefits
This hybrid architecture provides several advantages:
1. **Spatial-Temporal Processing**: Captures both spatial features and temporal patterns
2. **Sequence Understanding**: Recognizes gestures that develop over multiple frames
3. **Contextual Awareness**: Considers the progression of facial movements
4. **Robust Classification**: Higher accuracy for dynamic gestures
""")
def render_process_section():
"""Render the process flow section"""
section_header("Process Flow")
st.markdown("""
## System Workflow
The facial gesture recognition process follows these key steps:
""")
# Create tabs for different stages of the process
tab1, tab2, tab3 = st.tabs(["Data Collection", "Image Processing", "Model Training"])
with tab1:
col1, col2 = st.columns([3, 2])
with col1:
st.markdown("""
### Data Collection
The system requires a comprehensive dataset of facial gestures:
- **Video Capture**: Short video clips recorded using webcam
- **Gesture Performance**: Subjects perform predefined facial gestures
- **Labeling**: Each video is labeled with the corresponding gesture
- **Dataset Diversity**: Multiple subjects, lighting conditions, and angles
A balanced dataset with various examples of each gesture is crucial for model generalization.
""")
with col2:
st.caption("")
with tab2:
col1, col2 = st.columns([2, 3])
with col1:
st.image(AI_DATA_VIZ_IMAGES[0], use_container_width=True)
st.caption("Image processing visualization")
with col2:
st.markdown("""
### Image Processing
Raw video frames undergo several preprocessing steps:
1. **Facial Detection**: Locating the face in each frame
2. **Landmark Extraction**: Identifying 68 key facial points
3. **Region Isolation**: Extracting regions of interest (eyes, eyebrows)
4. **Normalization**: Converting to grayscale, normalizing pixel values
5. **Augmentation**: Generating additional training samples through transformations
These steps ensure the input data is optimized for the neural network.
""")
# Interactive element - landmark detection demo
show_landmarks = st.checkbox("Show facial landmarks example (eyes and eyebrows)")
if show_landmarks:
landmark_cols = st.columns(2)
with landmark_cols[0]:
# Mock landmark visualization using matplotlib - focusing on eyes and eyebrows
fig, ax = plt.subplots(figsize=(4, 4))
# Create a simple face outline
circle = plt.Circle((0.5, 0.5), 0.4, fill=False, color='blue')
ax.add_patch(circle)
# Add eye landmarks with extra detail (6 points per eye)
# Left eye
left_eye_x = [0.30, 0.33, 0.37, 0.41, 0.38, 0.34]
left_eye_y = [0.60, 0.58, 0.58, 0.60, 0.62, 0.62]
ax.plot(left_eye_x, left_eye_y, 'g-', linewidth=2)
for x, y in zip(left_eye_x, left_eye_y):
ax.plot(x, y, 'go', markersize=4)
# Right eye
right_eye_x = [0.59, 0.62, 0.66, 0.70, 0.67, 0.63]
right_eye_y = [0.60, 0.58, 0.58, 0.60, 0.62, 0.62]
ax.plot(right_eye_x, right_eye_y, 'g-', linewidth=2)
for x, y in zip(right_eye_x, right_eye_y):
ax.plot(x, y, 'go', markersize=4)
# Add detailed eyebrow landmarks (5 points per eyebrow)
# Left eyebrow
left_brow_x = [0.25, 0.30, 0.35, 0.40, 0.45]
left_brow_y = [0.70, 0.72, 0.73, 0.72, 0.70]
ax.plot(left_brow_x, left_brow_y, 'r-', linewidth=2)
for x, y in zip(left_brow_x, left_brow_y):
ax.plot(x, y, 'ro', markersize=4)
# Right eyebrow
right_brow_x = [0.55, 0.60, 0.65, 0.70, 0.75]
right_brow_y = [0.70, 0.72, 0.73, 0.72, 0.70]
ax.plot(right_brow_x, right_brow_y, 'r-', linewidth=2)
for x, y in zip(right_brow_x, right_brow_y):
ax.plot(x, y, 'ro', markersize=4)
# Add labels
ax.text(0.36, 0.67, "Left Eye", fontsize=9, ha='center')
ax.text(0.64, 0.67, "Right Eye", fontsize=9, ha='center')
ax.text(0.35, 0.76, "Left Eyebrow", fontsize=9, ha='center')
ax.text(0.65, 0.76, "Right Eyebrow", fontsize=9, ha='center')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_title("Eye and Eyebrow Landmarks")
ax.axis('off')
st.pyplot(fig)
with landmark_cols[1]:
st.markdown("""
**Focused Facial Landmarks Analysis:**
This system specifically analyzes:
- **Eyes (6 points each)**: Tracks eye openness, blinks, and winking
- **Eyebrows (5 points each)**: Detects eyebrow raising, furrowing, and expressions
While the shape_predictor_68_face_landmarks model can identify 68 facial landmarks including:
- 9 points for the nose
- 20 points for the mouth
- 17 points for the face contour
This implementation focuses exclusively on eye and eyebrow movements for gesture recognition.
""")
with tab3:
st.markdown("""
### Model Training
The CNN-LSTM model is trained using the processed dataset:
1. **Data Splitting**: Division into training, validation, and test sets
2. **CNN Training**: Learning spatial feature extraction
3. **LSTM Training**: Learning temporal patterns
4. **Hyperparameter Tuning**: Optimizing model architecture and parameters
5. **Validation**: Evaluating performance on validation set
6. **Testing**: Final evaluation on test set
""")
# Interactive training visualization
st.markdown("#### Training Visualization")
# Mock training metrics
epochs = 50
train_loss = 1.5 * np.exp(-0.05 * np.arange(epochs)) + 0.1 * np.random.rand(epochs)
val_loss = 1.7 * np.exp(-0.04 * np.arange(epochs)) + 0.15 * np.random.rand(epochs)
train_acc = 1 - train_loss * 0.5
val_acc = 1 - val_loss * 0.5
# Create interactive plot
metric = st.radio("Select metric to visualize", ["Loss", "Accuracy"])
if metric == "Loss":
fig = px.line(
x=list(range(1, epochs+1)),
y=[train_loss, val_loss],
labels={"x": "Epoch", "y": "Loss"},
title="Training and Validation Loss",
line_shape="spline"
)
fig.update_layout(legend_title_text="Legend")
fig.add_scatter(x=list(range(1, epochs+1)), y=train_loss, name="Training Loss", line=dict(color="blue"))
fig.add_scatter(x=list(range(1, epochs+1)), y=val_loss, name="Validation Loss", line=dict(color="red"))
else:
fig = px.line(
x=list(range(1, epochs+1)),
y=[train_acc, val_acc],
labels={"x": "Epoch", "y": "Accuracy"},
title="Training and Validation Accuracy",
line_shape="spline"
)
fig.update_layout(legend_title_text="Legend")
fig.add_scatter(x=list(range(1, epochs+1)), y=train_acc, name="Training Accuracy", line=dict(color="green"))
fig.add_scatter(x=list(range(1, epochs+1)), y=val_acc, name="Validation Accuracy", line=dict(color="orange"))
st.plotly_chart(fig)
def render_technology_section():
"""Render the technologies section"""
section_header("Technologies")
st.markdown("""
## Core Technologies
The facial gesture recognition system relies on several key technologies:
""")
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("""
### Python Ecosystem
- **Python**: Core programming language
- **NumPy**: Numerical operations
- **Pandas**: Data management
- **Matplotlib/Plotly**: Visualization
""")
st.image(AI_DATA_VIZ_IMAGES[2], use_container_width=True)
st.caption("Python data analysis visualization")
with col2:
st.markdown("""
### Deep Learning
- **TensorFlow/Keras**: Neural network framework
- **CNN**: Spatial feature extraction
- **LSTM**: Temporal sequence processing
- **Transfer Learning**: Leveraging pre-trained models
""")
# Create an interactive visualization of model architecture
st.markdown("#### Model Architecture")
fig = go.Figure()
# Draw rectangles representing layers
layers = [
{"name": "Input", "width": 0.8, "height": 0.15, "x": 0.5, "y": 0.9, "color": "lightblue"},
{"name": "Conv2D", "width": 0.8, "height": 0.1, "x": 0.5, "y": 0.75, "color": "lightgreen"},
{"name": "MaxPooling", "width": 0.7, "height": 0.1, "x": 0.5, "y": 0.63, "color": "lightgreen"},
{"name": "Conv2D", "width": 0.6, "height": 0.1, "x": 0.5, "y": 0.51, "color": "lightgreen"},
{"name": "MaxPooling", "width": 0.5, "height": 0.1, "x": 0.5, "y": 0.39, "color": "lightgreen"},
{"name": "LSTM", "width": 0.8, "height": 0.1, "x": 0.5, "y": 0.27, "color": "lightpink"},
{"name": "Dense", "width": 0.6, "height": 0.1, "x": 0.5, "y": 0.15, "color": "lightyellow"},
{"name": "Output", "width": 0.4, "height": 0.1, "x": 0.5, "y": 0.05, "color": "lightblue"}
]
for layer in layers:
# Add layer rectangle
fig.add_shape(
type="rect",
x0=layer["x"] - layer["width"]/2,
y0=layer["y"] - layer["height"]/2,
x1=layer["x"] + layer["width"]/2,
y1=layer["y"] + layer["height"]/2,
line=dict(color="black"),
fillcolor=layer["color"]
)
# Add layer name
fig.add_annotation(
x=layer["x"],
y=layer["y"],
text=layer["name"],
showarrow=False
)
# Add connection lines between layers (except for the last layer)
if layer != layers[-1]:
next_layer = layers[layers.index(layer) + 1]
fig.add_shape(
type="line",
x0=layer["x"],
y0=layer["y"] - layer["height"]/2,
x1=next_layer["x"],
y1=next_layer["y"] + next_layer["height"]/2,
line=dict(color="gray", width=1)
)
fig.update_layout(
showlegend=False,
width=300,
height=500,
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1])
)
st.plotly_chart(fig)
with col3:
st.markdown("""
### Computer Vision
- **OpenCV**: Image and video processing
- **Dlib**: Facial landmark detection
- **MediaPipe**: Real-time face mesh tracking
- **Image augmentation**: Diverse training samples
""")
st.image(AI_DATA_VIZ_IMAGES[3], use_container_width=True)
st.caption("Computer vision analysis")
st.markdown("---")
# Technology performance comparison
st.markdown("### Performance Comparison")
# Create a mock performance comparison chart
performance_data = {
'Method': ['Traditional CV', 'CNN Only', 'LSTM Only', 'CNN-LSTM'],
'Accuracy': [65, 82, 78, 93],
'Speed (FPS)': [45, 28, 32, 25],
'Memory (MB)': [120, 350, 280, 420]
}
metric_to_show = st.selectbox("Select performance metric", ["Accuracy", "Speed (FPS)", "Memory (MB)"])
fig = px.bar(
performance_data,
x='Method',
y=metric_to_show,
color='Method',
text=performance_data[metric_to_show],
title=f"Performance Comparison - {metric_to_show}"
)
# Customize the chart appearance
fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
st.plotly_chart(fig)
def render_applications_section():
"""Render the applications section"""
section_header("Applications")
st.markdown("""
## Potential Applications
Facial gesture recognition technology has numerous practical applications across various domains:
""")
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("""
### Human-Computer Interaction
- **Hands-free Computing**: Control computers without physical input devices
- **Accessible Technology**: Enable computer usage for people with mobility limitations
- **Interactive Presentations**: Control slides and demonstrations with facial gestures
- **Gaming**: Enhanced immersion through facial expression controls
""")
st.markdown("""
### Healthcare
- **Patient Monitoring**: Track patient attentiveness or consciousness
- **Rehabilitation**: Provide feedback for facial exercises
- **Pain Assessment**: Detect discomfort through facial expressions
- **Mental Health**: Analyze emotional responses during therapy
""")
with col2:
st.markdown("""
### Automotive Applications
- **Driver Monitoring**: Detect drowsiness or distraction
- **In-car Controls**: Adjust settings with facial gestures
- **Personalized Experience**: Recognize driver identity and preferences
- **Security**: Additional authentication layer
""")
st.markdown("""
### Accessibility
- **Assistive Technology**: Enable computer control for users with mobility impairments
- **Communication Aids**: Help non-verbal individuals express themselves
- **Smart Home Control**: Manage home automation with facial gestures
- **Public Kiosks**: Enable gesture-based interaction with public information systems
""")
# Interactive application explorer
st.markdown("### Application Explorer")
application_area = st.selectbox(
"Select an application area to explore",
["Human-Computer Interaction", "Healthcare", "Automotive", "Accessibility", "Education"]
)
if application_area == "Human-Computer Interaction":
st.info("""
**Featured Application: Gesture-Controlled Presentation System**
A system that allows presenters to control slideshows using facial gestures:
- Eye blinks to advance slides
- Eyebrow raises to go back
- Head nods/shakes to confirm/cancel actions
This enables hands-free presentations, allowing speakers to maintain natural gestures while speaking.
""")
elif application_area == "Healthcare":
st.info("""
**Featured Application: Pain Assessment Tool**
A system that monitors patient facial expressions to detect signs of pain:
- Real-time monitoring without requiring verbal communication
- Particularly useful for non-verbal patients or those with cognitive impairments
- Alerts medical staff when pain indicators are detected
- Maintains a log of pain expression events for medical review
""")
elif application_area == "Automotive":
st.info("""
**Featured Application: Driver Alertness Monitoring**
A system that detects signs of driver fatigue or distraction:
- Monitors eye closure duration and blink rate
- Detects head nodding indicative of drowsiness
- Provides audio alerts when fatigue signs are detected
- Suggests breaks when sustained fatigue patterns are observed
""")
elif application_area == "Accessibility":
st.info("""
**Featured Application: Facial Gesture Computer Control**
A complete computer control system for people with limited mobility:
- Cursor movement through slight head movements
- Selection through eye blinks or eyebrow raises
- Scrolling through specific eye movements
- Text input through an on-screen keyboard navigated by facial gestures
""")
elif application_area == "Education":
st.info("""
**Featured Application: Student Engagement Analytics**
A system that monitors student facial expressions during online learning:
- Tracks attentiveness and engagement through eye movements
- Identifies confusion through facial expressions
- Provides analytics to instructors about student engagement
- Helps identify content that may need additional explanation
""")
# Conclusion
st.markdown("---")
st.markdown("""
## Conclusion
Facial gesture recognition using AI represents a significant advancement in human-computer interaction. By combining CNN and LSTM architectures, we've created a system that can:
- Accurately recognize facial gestures in real-time
- Process video streams with minimal latency
- Adapt to different users and environments
- Enable new possibilities for accessibility and interaction
This technology continues to evolve, with ongoing improvements in accuracy, speed, and adaptability.
""")
st.success("Thank you for exploring this presentation on Facial Gesture Recognition using AI!")
# Using the SVG file from assets instead of embedding directly
st.image("assets/workflow_diagram.svg")
def get_landmark_point_from_detector(landmarks, index):
"""Helper function from eye_eyebrow_detector.py"""
return (landmarks.part(index).x, landmarks.part(index).y)
def eye_aspect_ratio_from_detector(eye_pts):
"""Helper function from eye_eyebrow_detector.py"""
A = dist.euclidean(eye_pts[1], eye_pts[5])
B = dist.euclidean(eye_pts[2], eye_pts[4])
C = dist.euclidean(eye_pts[0], eye_pts[3])
ear_val = (A + B) / (2.0 * C)
return ear_val
def initialize_dlib_components():
"""Initializes dlib detector and predictor."""
try:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(DLIB_SHAPE_PREDICTOR_PATH)
return detector, predictor
except RuntimeError as e:
st.error(f"Failed to load dlib model: {e}. Please ensure '{DLIB_SHAPE_PREDICTOR_PATH}' is in the correct path.")
return None, None
def render_live_demo_section():
"""Render the live facial gesture recognition demo section"""
section_header("Live Facial Gesture Demo")
st.write("This demo uses your webcam to perform real-time eye and eyebrow gesture detection.")
st.warning("Ensure you have a webcam connected and have granted permission if prompted by your browser. Also, make sure `shape_predictor_68_face_landmarks.dat` is in the application's root directory.")
if 'detector' not in st.session_state or 'predictor' not in st.session_state:
st.session_state.detector, st.session_state.predictor = initialize_dlib_components()
if st.session_state.detector is None or st.session_state.predictor is None:
st.error("Dlib components could not be initialized. The demo cannot run.")
return
# Initialize session state variables for the demo
if 'run_demo' not in st.session_state:
st.session_state.run_demo = False
if 'calibration_counter' not in st.session_state:
st.session_state.calibration_counter = 0
if 'calibration_data_user_L_eyebrow_y' not in st.session_state:
st.session_state.calibration_data_user_L_eyebrow_y = []
if 'calibration_data_user_R_eyebrow_y' not in st.session_state:
st.session_state.calibration_data_user_R_eyebrow_y = []
if 'calibration_data_user_L_eye_top_y' not in st.session_state:
st.session_state.calibration_data_user_L_eye_top_y = []
if 'calibration_data_user_R_eye_top_y' not in st.session_state:
st.session_state.calibration_data_user_R_eye_top_y = []
if 'normal_user_L_eyebrow_y_avg' not in st.session_state:
st.session_state.normal_user_L_eyebrow_y_avg = 0
if 'normal_user_R_eyebrow_y_avg' not in st.session_state:
st.session_state.normal_user_R_eyebrow_y_avg = 0
if 'normal_user_L_eye_top_y_avg' not in st.session_state:
st.session_state.normal_user_L_eye_top_y_avg = 0
if 'normal_user_R_eye_top_y_avg' not in st.session_state:
st.session_state.normal_user_R_eye_top_y_avg = 0
if 'normal_dist_L_eyebrow_to_eye' not in st.session_state:
st.session_state.normal_dist_L_eyebrow_to_eye = 0
if 'normal_dist_R_eyebrow_to_eye' not in st.session_state:
st.session_state.normal_dist_R_eyebrow_to_eye = 0
if 'current_state_demo' not in st.session_state:
st.session_state.current_state_demo = STATE_CALIBRATING
if 'camera_active' not in st.session_state:
st.session_state.camera_active = False
col1, col2 = st.columns(2)
with col1:
if st.button("Start/Restart Demo"):
st.session_state.run_demo = True
st.session_state.camera_active = True
# Reset calibration
st.session_state.calibration_counter = 0
st.session_state.calibration_data_user_L_eyebrow_y = []
st.session_state.calibration_data_user_R_eyebrow_y = []
st.session_state.calibration_data_user_L_eye_top_y = []
st.session_state.calibration_data_user_R_eye_top_y = []
st.session_state.current_state_demo = STATE_CALIBRATING
st.info("Calibration started. Look at the camera with a normal expression.")
with col2:
if st.button("Stop Demo"):
st.session_state.run_demo = False
st.session_state.camera_active = False
if st.session_state.run_demo and st.session_state.camera_active:
# Placeholder for video feed
frame_placeholder = st.empty()
# Attempt to open the webcam
# We manage cap in session_state to persist it across reruns if needed,
# but for a continuous loop, it's tricky.
# A common pattern is to release it if we stop.
if 'cap' not in st.session_state or not st.session_state.cap.isOpened():
st.session_state.cap = cv2.VideoCapture(0)
if not st.session_state.cap.isOpened():
st.error("Cannot open webcam.")
st.session_state.run_demo = False # Stop demo if camera fails
return
detector = st.session_state.detector
predictor = st.session_state.predictor
while st.session_state.run_demo and st.session_state.cap.isOpened():
ret, frame = st.session_state.cap.read()
if not ret:
st.error("Failed to grab frame from webcam.")
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = detector(gray)
display_text = st.session_state.current_state_demo
if st.session_state.calibration_counter < CALIBRATION_FRAMES:
st.session_state.current_state_demo = STATE_CALIBRATING
display_text = f"{STATE_CALIBRATING} ({st.session_state.calibration_counter}/{CALIBRATION_FRAMES})"
for face in faces:
landmarks = predictor(gray, face)
user_L_eyebrow_current_y_pts = [landmarks.part(i).y for i in user_L_eyebrow_y_calc_indices]
current_user_L_eyebrow_y_avg = np.mean(user_L_eyebrow_current_y_pts) if user_L_eyebrow_current_y_pts else 0
user_R_eyebrow_current_y_pts = [landmarks.part(i).y for i in user_R_eyebrow_y_calc_indices]
current_user_R_eyebrow_y_avg = np.mean(user_R_eyebrow_current_y_pts) if user_R_eyebrow_current_y_pts else 0
user_L_eye_top_current_y_pts = [landmarks.part(i).y for i in user_L_eye_top_indices]
current_user_L_eye_top_y_avg = np.mean(user_L_eye_top_current_y_pts) if user_L_eye_top_current_y_pts else 0
user_R_eye_top_current_y_pts = [landmarks.part(i).y for i in user_R_eye_top_indices]
current_user_R_eye_top_y_avg = np.mean(user_R_eye_top_current_y_pts) if user_R_eye_top_current_y_pts else 0
user_L_eye_all_pts = np.array([get_landmark_point_from_detector(landmarks, i) for i in range(user_L_eye_indices_start, user_L_eye_indices_end)], dtype="int")
user_R_eye_all_pts = np.array([get_landmark_point_from_detector(landmarks, i) for i in range(user_R_eye_indices_start, user_R_eye_indices_end)], dtype="int")
left_ear = eye_aspect_ratio_from_detector(user_L_eye_all_pts)
right_ear = eye_aspect_ratio_from_detector(user_R_eye_all_pts)
avg_ear = (left_ear + right_ear) / 2.0
if st.session_state.calibration_counter < CALIBRATION_FRAMES:
st.session_state.calibration_data_user_L_eyebrow_y.append(current_user_L_eyebrow_y_avg)
st.session_state.calibration_data_user_R_eyebrow_y.append(current_user_R_eyebrow_y_avg)
st.session_state.calibration_data_user_L_eye_top_y.append(current_user_L_eye_top_y_avg)
st.session_state.calibration_data_user_R_eye_top_y.append(current_user_R_eye_top_y_avg)
st.session_state.calibration_counter += 1
display_text = f"{STATE_CALIBRATING} ({st.session_state.calibration_counter}/{CALIBRATION_FRAMES})"
if st.session_state.calibration_counter == CALIBRATION_FRAMES:
st.session_state.normal_user_L_eyebrow_y_avg = np.mean(st.session_state.calibration_data_user_L_eyebrow_y) if st.session_state.calibration_data_user_L_eyebrow_y else 0
st.session_state.normal_user_R_eyebrow_y_avg = np.mean(st.session_state.calibration_data_user_R_eyebrow_y) if st.session_state.calibration_data_user_R_eyebrow_y else 0
st.session_state.normal_user_L_eye_top_y_avg = np.mean(st.session_state.calibration_data_user_L_eye_top_y) if st.session_state.calibration_data_user_L_eye_top_y else 0
st.session_state.normal_user_R_eye_top_y_avg = np.mean(st.session_state.calibration_data_user_R_eye_top_y) if st.session_state.calibration_data_user_R_eye_top_y else 0
st.session_state.normal_dist_L_eyebrow_to_eye = st.session_state.normal_user_L_eye_top_y_avg - st.session_state.normal_user_L_eyebrow_y_avg
st.session_state.normal_dist_R_eyebrow_to_eye = st.session_state.normal_user_R_eye_top_y_avg - st.session_state.normal_user_R_eyebrow_y_avg
st.session_state.current_state_demo = STATE_NORMAL
display_text = STATE_NORMAL
st.success("Calibration finished.")
else: # Detection Phase
st.session_state.current_state_demo = STATE_NORMAL # Default to normal after calibration
display_text = STATE_NORMAL
if st.session_state.normal_dist_L_eyebrow_to_eye != 0 and st.session_state.normal_dist_R_eyebrow_to_eye != 0:
if avg_ear < EAR_THRESHOLD:
st.session_state.current_state_demo = STATE_YES
display_text = STATE_YES
else:
current_dist_L = current_user_L_eye_top_y_avg - current_user_L_eyebrow_y_avg
current_dist_R = current_user_R_eye_top_y_avg - current_user_R_eyebrow_y_avg
threshold_dist_L = st.session_state.normal_dist_L_eyebrow_to_eye * (1 + EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR)
threshold_dist_R = st.session_state.normal_dist_R_eyebrow_to_eye * (1 + EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR)
if st.session_state.normal_dist_L_eyebrow_to_eye <= 0: threshold_dist_L = st.session_state.normal_dist_L_eyebrow_to_eye + abs(st.session_state.normal_dist_L_eyebrow_to_eye * EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) + 5
if st.session_state.normal_dist_R_eyebrow_to_eye <= 0: threshold_dist_R = st.session_state.normal_dist_R_eyebrow_to_eye + abs(st.session_state.normal_dist_R_eyebrow_to_eye * EYEBROW_TO_EYE_VERTICAL_DISTANCE_INCREASE_FACTOR) + 5
if current_dist_L > threshold_dist_L and current_dist_R > threshold_dist_R:
st.session_state.current_state_demo = STATE_NO
display_text = STATE_NO
# Display the detected state on the frame
color = (255, 255, 0) # Default for Normal/Calibrating
if st.session_state.current_state_demo == STATE_YES:
color = (0, 255, 0)
elif st.session_state.current_state_demo == STATE_NO:
color = (0, 0, 255)
# Make text larger and position it higher
cv2.putText(frame, display_text, (frame.shape[1] // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3, cv2.LINE_AA)
# Convert frame to RGB for Streamlit
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_placeholder.image(frame_rgb, channels="RGB")
# Add a small delay to make the video smoother and allow Streamlit to process
# time.sleep(0.01) # Removed for faster processing, relying on inherent delays
# Release camera when demo stops or an error occurs
if 'cap' in st.session_state and st.session_state.cap.isOpened():
st.session_state.cap.release()
if st.session_state.camera_active is False and 'cap' in st.session_state: # if explicitly stopped
del st.session_state.cap
elif not st.session_state.run_demo and st.session_state.camera_active:
# This case handles when Stop Demo is clicked, ensuring camera is released.
if 'cap' in st.session_state and st.session_state.cap.isOpened():
st.session_state.cap.release()
del st.session_state.cap # Ensure it's re-initialized if started again
st.session_state.camera_active = False
st.info("Live demo stopped.")
# Example of how to call this new section in a main app structure:
# if __name__ == "__main__":
# st.set_page_config(layout="wide")
# # Apply custom CSS (optional)
# # st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
#
# render_intro_section()
# render_objective_section()
# render_architecture_section()
# render_process_section()
# render_technology_section()
# render_applications_section()
# render_live_demo_section() # New section added here
#
# st.sidebar.title("Navigation")
# page = st.sidebar.radio("Go to", ["Introduction", "Objective", "Architecture", "Process Flow", "Technologies", "Applications", "Live Demo"])
#
# if page == "Introduction": render_intro_section()
# elif page == "Objective": render_objective_section()
# # ... etc. for other sections
# elif page == "Live Demo": render_live_demo_section() # Call if selected from sidebar too.
# # This part is just an example of how one might structure the main app.
# # The key is that `render_live_demo_section()` can now be called.
|