|
import numpy as np |
|
import cv2 as cv |
|
|
|
|
|
class MPHandPose: |
|
def __init__(self, modelPath, confThreshold=0.8, backendId=0, targetId=0): |
|
self.model_path = modelPath |
|
self.conf_threshold = confThreshold |
|
self.backend_id = backendId |
|
self.target_id = targetId |
|
|
|
self.input_size = np.array([256, 256]) |
|
self.PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2] |
|
self.PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0 |
|
self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2 |
|
self.PALM_BOX_SHIFT_VECTOR = [0, -0.4] |
|
self.PALM_BOX_ENLARGE_FACTOR = 3 |
|
self.HAND_BOX_SHIFT_VECTOR = [0, -0.1] |
|
self.HAND_BOX_ENLARGE_FACTOR = 1.65 |
|
|
|
self.model = cv.dnn.readNet(self.model_path) |
|
self.model.setPreferableBackend(self.backend_id) |
|
self.model.setPreferableTarget(self.target_id) |
|
|
|
@property |
|
def name(self): |
|
return self.__class__.__name__ |
|
|
|
def setBackend(self, backendId): |
|
self.backend_id = backendId |
|
self.model.setPreferableBackend(self.backend_id) |
|
|
|
def setTarget(self, targetId): |
|
self.target_id = targetId |
|
self.model.setPreferableTarget(self.target_id) |
|
|
|
def _preprocess(self, image, palm): |
|
''' |
|
Rotate input for inference. |
|
Parameters: |
|
image - input image of BGR channel order |
|
palm_bbox - palm bounding box found in image of format [[x1, y1], [x2, y2]] (top-left and bottom-right points) |
|
palm_landmarks - 7 landmarks (5 finger base points, 2 palm base points) of shape [7, 2] |
|
Returns: |
|
rotated_hand - rotated hand image for inference |
|
rotation_matrix - matrix for rotation and de-rotation |
|
''' |
|
|
|
|
|
palm_bbox = palm[0:4].reshape(2, 2) |
|
palm_landmarks = palm[4:18].reshape(7, 2) |
|
image = cv.cvtColor(image, cv.COLOR_BGR2RGB) |
|
|
|
p1 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_PALM_BASE] |
|
p2 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE] |
|
radians = np.pi / 2 - np.arctan2(-(p2[1] - p1[1]), p2[0] - p1[0]) |
|
radians = radians - 2 * np.pi * np.floor((radians + np.pi) / (2 * np.pi)) |
|
angle = np.rad2deg(radians) |
|
|
|
center_palm_bbox = np.sum(palm_bbox, axis=0) / 2 |
|
|
|
rotation_matrix = cv.getRotationMatrix2D(center_palm_bbox, angle, 1.0) |
|
|
|
rotated_image = cv.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0])) |
|
|
|
homogeneous_coord = np.c_[palm_landmarks, np.ones(palm_landmarks.shape[0])] |
|
rotated_palm_landmarks = np.array([ |
|
np.dot(homogeneous_coord, rotation_matrix[0]), |
|
np.dot(homogeneous_coord, rotation_matrix[1])]) |
|
|
|
rotated_palm_bbox = np.array([ |
|
np.amin(rotated_palm_landmarks, axis=1), |
|
np.amax(rotated_palm_landmarks, axis=1)]) |
|
|
|
|
|
wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0] |
|
shift_vector = self.PALM_BOX_SHIFT_VECTOR * wh_rotated_palm_bbox |
|
rotated_palm_bbox = rotated_palm_bbox + shift_vector |
|
|
|
center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2 |
|
wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0] |
|
new_half_size = np.amax(wh_rotated_palm_bbox) / 2 |
|
rotated_palm_bbox = np.array([ |
|
center_rotated_plam_bbox - new_half_size, |
|
center_rotated_plam_bbox + new_half_size]) |
|
|
|
|
|
center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2 |
|
wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0] |
|
new_half_size = wh_rotated_palm_bbox * self.PALM_BOX_ENLARGE_FACTOR / 2 |
|
rotated_palm_bbox = np.array([ |
|
center_rotated_plam_bbox - new_half_size, |
|
center_rotated_plam_bbox + new_half_size]) |
|
|
|
|
|
[[x1, y1], [x2, y2]] = rotated_palm_bbox.astype(np.int32) |
|
diff = np.maximum([-x1, -y1, x2 - rotated_image.shape[1], y2 - rotated_image.shape[0]], 0) |
|
[x1, y1, x2, y2] = [x1, y1, x2, y2] + diff |
|
crop = rotated_image[y1:y2, x1:x2, :] |
|
crop = cv.copyMakeBorder(crop, diff[1], diff[3], diff[0], diff[2], cv.BORDER_CONSTANT, value=(0, 0, 0)) |
|
blob = cv.resize(crop, dsize=self.input_size, interpolation=cv.INTER_AREA).astype(np.float32) / 255.0 |
|
|
|
return blob[np.newaxis, :, :, :], rotated_palm_bbox, angle, rotation_matrix |
|
|
|
def infer(self, image, palm): |
|
|
|
input_blob, rotated_palm_bbox, angle, rotation_matrix = self._preprocess(image, palm) |
|
|
|
|
|
self.model.setInput(input_blob) |
|
output_blob = self.model.forward(self.model.getUnconnectedOutLayersNames()) |
|
|
|
|
|
results = self._postprocess(output_blob, rotated_palm_bbox, angle, rotation_matrix) |
|
return results |
|
|
|
def _postprocess(self, blob, rotated_palm_bbox, angle, rotation_matrix): |
|
landmarks, conf = blob |
|
|
|
if conf < self.conf_threshold: |
|
return None |
|
|
|
landmarks = landmarks.reshape(-1, 3) |
|
|
|
|
|
wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0] |
|
scale_factor = wh_rotated_palm_bbox / self.input_size |
|
landmarks[:, :2] = (landmarks[:, :2] - self.input_size / 2) * scale_factor |
|
coords_rotation_matrix = cv.getRotationMatrix2D((0, 0), angle, 1.0) |
|
rotated_landmarks = np.dot(landmarks[:, :2], coords_rotation_matrix[:, :2]) |
|
rotated_landmarks = np.c_[rotated_landmarks, landmarks[:, 2]] |
|
|
|
rotation_component = np.array([ |
|
[rotation_matrix[0][0], rotation_matrix[1][0]], |
|
[rotation_matrix[0][1], rotation_matrix[1][1]]]) |
|
translation_component = np.array([ |
|
rotation_matrix[0][2], rotation_matrix[1][2]]) |
|
inverted_translation = np.array([ |
|
-np.dot(rotation_component[0], translation_component), |
|
-np.dot(rotation_component[1], translation_component)]) |
|
inverse_rotation_matrix = np.c_[rotation_component, inverted_translation] |
|
|
|
center = np.append(np.sum(rotated_palm_bbox, axis=0) / 2, 1) |
|
original_center = np.array([ |
|
np.dot(center, inverse_rotation_matrix[0]), |
|
np.dot(center, inverse_rotation_matrix[1])]) |
|
landmarks = rotated_landmarks[:, :2] + original_center |
|
|
|
|
|
bbox = np.array([ |
|
np.amin(landmarks, axis=0), |
|
np.amax(landmarks, axis=0)]) |
|
|
|
wh_bbox = bbox[1] - bbox[0] |
|
shift_vector = self.HAND_BOX_SHIFT_VECTOR * wh_bbox |
|
bbox = bbox + shift_vector |
|
|
|
center_bbox = np.sum(bbox, axis=0) / 2 |
|
wh_bbox = bbox[1] - bbox[0] |
|
new_half_size = wh_bbox * self.HAND_BOX_ENLARGE_FACTOR / 2 |
|
bbox = np.array([ |
|
center_bbox - new_half_size, |
|
center_bbox + new_half_size]) |
|
|
|
return np.r_[bbox.reshape(-1), landmarks.reshape(-1), conf[0]] |
|
|