Spaces:
Running
Running
| import math | |
| import os | |
| import urllib.request | |
| import cv2 | |
| import numpy as np | |
| import mediapipe as mp | |
| from mediapipe.tasks import python | |
| from mediapipe.tasks.python import vision | |
| from mediapipe.framework.formats import landmark_pb2 | |
| class HandTracker: | |
| def __init__( | |
| self, | |
| model: str = None, | |
| num_hands: int = 2, | |
| min_hand_detection_confidence: float = 0.5, | |
| min_hand_presence_confidence: float = 0.5, | |
| min_tracking_confidence: float = 0.5, | |
| ): | |
| """ | |
| Initialize a HandTracker instance. | |
| Args: | |
| model (str): The path to the model for hand tracking. | |
| num_hands (int): Maximum number of hands to detect. | |
| min_hand_detection_confidence (float): Minimum confidence value ([0.0, 1.0]) for successful hand detection. | |
| min_hand_presence_confidence (float): Minimum confidence value ([0.0, 1.0]) for presence of a hand to be tracked. | |
| min_tracking_confidence (float): Minimum confidence value ([0.0, 1.0]) for successful hand landmark tracking. | |
| """ | |
| self.model = model | |
| if self.model is None: | |
| self.model = self.download_model() | |
| self.detector = self.initialize_detector( | |
| num_hands, | |
| min_hand_detection_confidence, | |
| min_hand_presence_confidence, | |
| min_tracking_confidence, | |
| ) | |
| self.mp_hands = mp.solutions.hands | |
| self.mp_drawing = mp.solutions.drawing_utils | |
| self.mp_drawing_styles = mp.solutions.drawing_styles | |
| self.DETECTION_RESULT = None | |
| self.tipIds = [4, 8, 12, 16, 20] | |
| self.MARGIN = 10 # pixels | |
| self.FONT_SIZE = 1 | |
| self.FONT_THICKNESS = 1 | |
| self.HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green | |
| # x is the raw distance, y is the value in cm | |
| # This values are used to calculate the approximate depth of the hand | |
| x = ( | |
| np.array( | |
| [ | |
| 300, | |
| 245, | |
| 200, | |
| 170, | |
| 145, | |
| 130, | |
| 112, | |
| 103, | |
| 93, | |
| 87, | |
| 80, | |
| 75, | |
| 70, | |
| 67, | |
| 62, | |
| 59, | |
| 57, | |
| ] | |
| ) | |
| / 1.5 | |
| ) | |
| y = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100] | |
| self.coff = np.polyfit(x, y, 2) # y = Ax^2 + Bx + C | |
| def save_result( | |
| self, | |
| result: landmark_pb2.NormalizedLandmarkList, | |
| unused_output_image, | |
| timestamp_ms: int, | |
| ): | |
| """ | |
| Saves the result of the detection. | |
| Args: | |
| result (mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList): Result of the detection. | |
| unused_output_image (mediapipe.framework.formats.image_frame.ImageFrame): Unused. | |
| timestamp_ms (int): Timestamp of the detection. | |
| Returns: | |
| None | |
| """ | |
| self.DETECTION_RESULT = result | |
| def initialize_detector( | |
| self, | |
| num_hands: int, | |
| min_hand_detection_confidence: float, | |
| min_hand_presence_confidence: float, | |
| min_tracking_confidence: float, | |
| ): | |
| """ | |
| Initializes the HandLandmarker instance. | |
| Args: | |
| num_hands (int): Maximum number of hands to detect. | |
| min_hand_detection_confidence (float): Minimum confidence value ([0.0, 1.0]) for hand detection to be considered successful. | |
| min_hand_presence_confidence (float): Minimum confidence value ([0.0, 1.0]) for the presence of a hand for the hand landmarks to be considered tracked successfully. | |
| min_tracking_confidence (float): Minimum confidence value ([0.0, 1.0]) for the hand landmarks to be considered tracked successfully. | |
| Returns: | |
| mediapipe.HandLandmarker: HandLandmarker instance. | |
| """ | |
| base_options = python.BaseOptions(model_asset_path=self.model) | |
| options = vision.HandLandmarkerOptions( | |
| base_options=base_options, | |
| # running_mode=vision.RunningMode.LIVE_STREAM, | |
| num_hands=num_hands, | |
| min_hand_detection_confidence=min_hand_detection_confidence, | |
| min_hand_presence_confidence=min_hand_presence_confidence, | |
| min_tracking_confidence=min_tracking_confidence, | |
| # result_callback=self.save_result, | |
| ) | |
| return vision.HandLandmarker.create_from_options(options) | |
| def draw_landmarks( | |
| self, | |
| image: np.ndarray, | |
| text_color: tuple = (0, 0, 0), | |
| font_size: int = 1, | |
| font_thickness: int = 1, | |
| ) -> np.ndarray: | |
| """ | |
| Draws the landmarks and handedness on the image. | |
| Args: | |
| image (numpy.ndarray): Image on which to draw the landmarks. | |
| text_color (tuple, optional): Color of the text. Defaults to (0, 0, 0). | |
| font_size (int, optional): Size of the font. Defaults to 1. | |
| font_thickness (int, optional): Thickness of the font. Defaults to 1. | |
| Returns: | |
| numpy.ndarray: Image with the landmarks drawn. | |
| """ | |
| if self.DETECTION_RESULT: | |
| # Landmark visualization parameters. | |
| # Draw landmarks and indicate handedness. | |
| for idx in range(len(self.DETECTION_RESULT.hand_landmarks)): | |
| hand_landmarks = self.DETECTION_RESULT.hand_landmarks[idx] | |
| handedness = self.DETECTION_RESULT.handedness[idx] | |
| # Draw the hand landmarks. | |
| hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList() | |
| hand_landmarks_proto.landmark.extend( | |
| [ | |
| landmark_pb2.NormalizedLandmark( | |
| x=landmark.x, y=landmark.y, z=landmark.z | |
| ) | |
| for landmark in hand_landmarks | |
| ] | |
| ) | |
| self.mp_drawing.draw_landmarks( | |
| image, | |
| hand_landmarks_proto, | |
| self.mp_hands.HAND_CONNECTIONS, | |
| self.mp_drawing_styles.get_default_hand_landmarks_style(), | |
| self.mp_drawing_styles.get_default_hand_connections_style(), | |
| ) | |
| # Get the top left corner of the detected hand's bounding box. | |
| height, width, _ = image.shape | |
| x_coordinates = [landmark.x for landmark in hand_landmarks] | |
| y_coordinates = [landmark.y for landmark in hand_landmarks] | |
| text_x = int(min(x_coordinates) * width) | |
| text_y = int(min(y_coordinates) * height) - self.MARGIN | |
| # Draw handedness (left or right hand) on the image. | |
| cv2.putText( | |
| image, | |
| f"{handedness[0].category_name}", | |
| (text_x, text_y), | |
| cv2.FONT_HERSHEY_DUPLEX, | |
| self.FONT_SIZE, | |
| self.HANDEDNESS_TEXT_COLOR, | |
| self.FONT_THICKNESS, | |
| cv2.LINE_AA, | |
| ) | |
| return image | |
| def detect(self, frame: np.ndarray, draw: bool = True) -> np.ndarray: | |
| """ | |
| Detects hands in the image. | |
| Args: | |
| frame (numpy.ndarray): Image in which to detect the hands. | |
| draw (bool, optional): Whether to draw the landmarks on the image. Defaults to False. | |
| Returns: | |
| numpy.ndarray: Image with the landmarks drawn if draw is True, else the original image. | |
| """ | |
| rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image) | |
| self.DETECTION_RESULT = self.detector.detect(mp_image) | |
| return self.draw_landmarks(frame) if draw else frame | |
| def raised_fingers(self): | |
| """ | |
| Counts the number of raised fingers. | |
| Returns: | |
| list: List of 1s and 0s, where 1 indicates a raised finger and 0 indicates a lowered finger. | |
| """ | |
| fingers = [] | |
| if self.DETECTION_RESULT: | |
| for idx, hand_landmarks in enumerate( | |
| self.DETECTION_RESULT.hand_world_landmarks | |
| ): | |
| if self.DETECTION_RESULT.handedness[idx][0].category_name == "Right": | |
| if ( | |
| hand_landmarks[self.tipIds[0]].x | |
| > hand_landmarks[self.tipIds[0] - 1].x | |
| ): | |
| fingers.append(1) | |
| else: | |
| fingers.append(0) | |
| else: | |
| if ( | |
| hand_landmarks[self.tipIds[0]].x | |
| < hand_landmarks[self.tipIds[0] - 1].x | |
| ): | |
| fingers.append(1) | |
| else: | |
| fingers.append(0) | |
| for id in range(1, 5): | |
| if ( | |
| hand_landmarks[self.tipIds[id]].y | |
| < hand_landmarks[self.tipIds[id] - 2].y | |
| ): | |
| fingers.append(1) | |
| else: | |
| fingers.append(0) | |
| return fingers | |
| def get_approximate_depth( | |
| self, hand_idx: int = 0, width: int = 640, height: int = 480 | |
| ) -> float: | |
| """ | |
| Calculates the depth of each finger landmark. | |
| Returns: | |
| numpy.ndarray: Mean of the depth of each finger landmark. | |
| """ | |
| if self.DETECTION_RESULT is not None: | |
| x1, y1 = ( | |
| self.DETECTION_RESULT.hand_landmarks[hand_idx][5].x * width, | |
| self.DETECTION_RESULT.hand_landmarks[hand_idx][5].y * height, | |
| ) | |
| x2, y2 = ( | |
| self.DETECTION_RESULT.hand_landmarks[hand_idx][17].x * width, | |
| self.DETECTION_RESULT.hand_landmarks[hand_idx][17].y * height, | |
| ) | |
| distance = math.sqrt((y2 - y1) ** 2 + (x2 - x1) ** 2) | |
| A, B, C = self.coff | |
| return A * distance**2 + B * distance + C | |
| else: | |
| 0 | |
| def get_hand_world_landmarks(self, hand_idx: int = 0): | |
| """ | |
| Returns the hand world landmarks. | |
| Args: | |
| hand_idx (int, optional): Index of the hand for which to return the landmarks. Defaults to 0. | |
| 0 = Right hand | |
| 1 = Left hand | |
| Returns: | |
| list: List of hand world landmarks. | |
| """ | |
| return ( | |
| self.DETECTION_RESULT.hand_world_landmarks[hand_idx] | |
| if self.DETECTION_RESULT is not None | |
| else [] | |
| ) | |
| def get_hand_landmarks(self, hand_idx: int = 0, idxs: list = None) -> list: | |
| """ | |
| Returns the hand landmarks. | |
| Args: | |
| hand_idx (int, optional): Index of the hand for which to return the landmarks. Defaults to 0. | |
| 0 = Right hand | |
| 1 = Left hand | |
| idxs (list, optional): List of indices of the landmarks to return. Defaults to None. | |
| Returns: | |
| list: List of hand world landmarks. | |
| """ | |
| if self.DETECTION_RESULT is not None: | |
| if idxs is None: | |
| return self.DETECTION_RESULT.hand_landmarks[hand_idx] | |
| else: | |
| return [ | |
| self.DETECTION_RESULT.hand_landmarks[hand_idx][idx] for idx in idxs | |
| ] | |
| else: | |
| return [] | |
| def find_distance(self, l1, l2, img, draw=True): | |
| """ | |
| Finds the distance between two landmarks. | |
| Args: | |
| l1 (int): Index of the first landmark. | |
| l2 (int): Index of the second landmark. | |
| img (numpy.ndarray): Image on which to draw the landmarks. | |
| draw (bool, optional): Whether to draw the landmarks on the image. Defaults to True. | |
| Returns: | |
| float: Distance between the two landmarks. | |
| numpy.ndarray: Image with the landmarks drawn if draw is True, else the original image. | |
| list: List of the coordinates of the two landmarks and the center of the line joining them. | |
| """ | |
| ladnmarks = self.get_hand_landmarks(idxs=[l1, l2]) | |
| x1, y1 = ladnmarks[0].x * img.shape[1], ladnmarks[0].y * img.shape[0] | |
| x2, y2 = ladnmarks[1].x * img.shape[1], ladnmarks[1].y * img.shape[0] | |
| cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 | |
| length = math.hypot(x2 - x1, y2 - y1) | |
| # Cast points to int | |
| x1, y1, x2, y2, cx, cy = map(int, [x1, y1, x2, y2, cx, cy]) | |
| if draw: | |
| cv2.circle(img, (x1, y1), 10, (255, 0, 255), cv2.FILLED) | |
| cv2.circle(img, (x2, y2), 10, (255, 0, 255), cv2.FILLED) | |
| cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3) | |
| cv2.circle(img, (cx, cy), 10, (255, 0, 255), cv2.FILLED) | |
| return length, img, [x1, y1, x2, y2, cx, cy] | |
| def download_model() -> str: | |
| """ | |
| Downloads the hand landmark model in float16 format from the mediapipe website. | |
| https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/latest/hand_landmarker.task | |
| Returns: | |
| str: Path to the downloaded model. | |
| """ | |
| root = os.path.dirname(os.path.realpath(__file__)) | |
| # Unino to res folder | |
| root = os.path.join(root, "..", "res") | |
| filename = os.path.join(root, "hand_landmarker.task") | |
| if os.path.exists(filename): | |
| print(f"O arquivo {filename} já existe, pulando o download.") | |
| else: | |
| print(f"Baixando o arquivo {filename}...") | |
| base = "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/latest/hand_landmarker.task" | |
| urllib.request.urlretrieve(base, filename) | |
| return filename | |