Spaces:

ajaykarthick
/

ASL_Recognition

Sleeping

App Files Files Community

Ajay Karthick Senthil Kumar commited on Oct 16, 2024

Commit

9ddd8d9

1 Parent(s): cd2b531

add files

Browse files

Files changed (5) hide show

.gitignore +1 -0
app.py +115 -0
config.py +31 -0
model/asl_model.h5 +3 -0
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import gradio as gr
+import cv2
+import numpy as np
+from tensorflow.keras.models import load_model
+import mediapipe as mp
+# Load your label to alphabet mapping
+from config import label_to_alphabet  # Ensure this file has the correct mapping
+# Load the saved ASL model
+model = load_model("model/asl_model.h5")
+# Initialize MediaPipe for hand detection
+mp_hands = mp.solutions.hands
+hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
+mp_drawing = mp.solutions.drawing_utils  # For hand landmark drawing
+def detect_and_crop_hand(image):
+    """
+    Detect the hand in the image, crop the region, and return the cropped hand image.
+    """
+    # Convert the image to RGB format (required by MediaPipe)
+    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # Detect hand landmarks
+    results = hands.process(rgb_image)
+    if results.multi_hand_landmarks:
+        for hand_landmarks in results.multi_hand_landmarks:
+            # Get the image dimensions
+            h, w, _ = image.shape
+            x_min = w
+            y_min = h
+            x_max = y_max = 0
+            # Loop through landmarks to determine bounding box for the hand
+            for landmark in hand_landmarks.landmark:
+                x, y = int(landmark.x * w), int(landmark.y * h)
+                x_min = min(x, x_min)
+                y_min = min(y, y_min)
+                x_max = max(x, x_max)
+                y_max = max(y, y_max)
+            # Crop the hand portion from the image
+            cropped_hand = image[y_min:y_max, x_min:x_max]
+            # Optional: Draw the landmarks on the original image for visualization (for debugging)
+            mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
+            return cropped_hand
+    # If no hand is detected, return None
+    return None
+def preprocess_hand_image(hand_image):
+    """
+    Preprocess the cropped hand image for the ASL recognition model.
+    This involves resizing, normalizing, and reshaping the image.
+    """
+    # Resize the image to 150x150 pixels (or your model's input size)
+    hand_image_resized = cv2.resize(hand_image, (150, 150))
+    # Normalize the image (scale pixel values to [0, 1])
+    hand_image_normalized = hand_image_resized / 255.0
+    # Reshape the image to match the model's expected input shape (1, 150, 150, 3)
+    hand_image_reshaped = np.expand_dims(hand_image_normalized, axis=0)
+    return hand_image_reshaped
+def predict_asl_alphabet(cropped_hand):
+    """
+    Feed the cropped hand image into the ASL recognition model and return the predicted alphabet.
+    """
+    # Preprocess the hand image
+    processed_hand = preprocess_hand_image(cropped_hand)
+    # Make the prediction using the ASL model
+    predictions = model.predict(processed_hand)
+    # Get the predicted label (the index of the highest predicted probability)
+    predicted_label = np.argmax(predictions[0])
+    # Map the label to the corresponding alphabet
+    predicted_alphabet = label_to_alphabet[predicted_label]
+    return predicted_alphabet
+# Gradio interface function
+def process_video_frame(image):
+    """
+    Process the webcam feed to detect, crop the hand, and predict the ASL alphabet.
+    """
+    # Detect and crop the hand from the image
+    cropped_hand = detect_and_crop_hand(image)
+    if cropped_hand is None:
+        return "No hand detected"
+    # Predict the ASL alphabet using the cropped hand image
+    predicted_alphabet = predict_asl_alphabet(cropped_hand)
+    return predicted_alphabet  # Return the predicted alphabet
+# Gradio interface setup
+iface = gr.Interface(
+    fn=process_video_frame,
+    inputs=gr.Image(sources=["webcam"], streaming=True),  # Webcam input
+    outputs="text",  # Display the predicted alphabet
+    live=True,  # Enable live video streaming
+    description="Real-Time ASL Hand Gesture Recognition"
+)
+# Launch the Gradio app
+iface.launch()

config.py ADDED Viewed

	@@ -0,0 +1,31 @@

+label_to_alphabet = {
+    0: 'A',
+    1: 'B',
+    2: 'C',
+    3: 'D',
+    4: 'E',
+    5: 'F',
+    6: 'G',
+    7: 'H',
+    8: 'I',
+    9: 'J',
+    10: 'K',
+    11: 'L',
+    12: 'M',
+    13: 'N',
+    14: 'O',
+    15: 'P',
+    16: 'Q',
+    17: 'R',
+    18: 'S',
+    19: 'T',
+    20: 'U',
+    21: 'V',
+    22: 'W',
+    23: 'X',
+    24: 'Y',
+    25: 'Z',
+    26: 'del',
+    27: 'nothing',
+    28: 'space'
+}

model/asl_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0f623d3b7f9aaf48f50efc73eb279ca6127b0a7518bb1def5971ea01238bf59
+size 510065040

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+opencv-python==4.7.0.72
+numpy==1.23.5
+tensorflow
+mediapipe