Wanli commited on
Commit
576ce22
·
1 Parent(s): 549df9a

Update hadnpose preprocess (#141)

Browse files

* update preprocess of handpose estimation to handle some conner cases

* back PALM_BOX_ENLARGE_FACTOR to default

Files changed (1) hide show
  1. mp_handpose.py +64 -36
mp_handpose.py CHANGED
@@ -13,6 +13,8 @@ class MPHandPose:
13
  self.PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2]
14
  self.PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0
15
  self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2
 
 
16
  self.PALM_BOX_SHIFT_VECTOR = [0, -0.4]
17
  self.PALM_BOX_ENLARGE_FACTOR = 3
18
  self.HAND_BOX_SHIFT_VECTOR = [0, -0.1]
@@ -34,6 +36,48 @@ class MPHandPose:
34
  self.target_id = targetId
35
  self.model.setPreferableTarget(self.target_id)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def _preprocess(self, image, palm):
38
  '''
39
  Rotate input for inference.
@@ -43,14 +87,22 @@ class MPHandPose:
43
  palm_landmarks - 7 landmarks (5 finger base points, 2 palm base points) of shape [7, 2]
44
  Returns:
45
  rotated_hand - rotated hand image for inference
 
 
46
  rotation_matrix - matrix for rotation and de-rotation
 
47
  '''
48
- # Rotate input to have vertically oriented hand image
49
- # compute rotation
50
  palm_bbox = palm[0:4].reshape(2, 2)
51
- palm_landmarks = palm[4:18].reshape(7, 2)
52
  image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
 
53
 
 
 
 
 
54
  p1 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_PALM_BASE]
55
  p2 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]
56
  radians = np.pi / 2 - np.arctan2(-(p2[1] - p1[1]), p2[0] - p1[0])
@@ -72,49 +124,25 @@ class MPHandPose:
72
  np.amin(rotated_palm_landmarks, axis=1),
73
  np.amax(rotated_palm_landmarks, axis=1)]) # [top-left, bottom-right]
74
 
75
- # shift bounding box
76
- wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
77
- shift_vector = self.PALM_BOX_SHIFT_VECTOR * wh_rotated_palm_bbox
78
- rotated_palm_bbox = rotated_palm_bbox + shift_vector
79
- # squarify bounding boxx
80
- center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2
81
- wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
82
- new_half_size = np.amax(wh_rotated_palm_bbox) / 2
83
- rotated_palm_bbox = np.array([
84
- center_rotated_plam_bbox - new_half_size,
85
- center_rotated_plam_bbox + new_half_size])
86
-
87
- # enlarge bounding box
88
- center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2
89
- wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
90
- new_half_size = wh_rotated_palm_bbox * self.PALM_BOX_ENLARGE_FACTOR / 2
91
- rotated_palm_bbox = np.array([
92
- center_rotated_plam_bbox - new_half_size,
93
- center_rotated_plam_bbox + new_half_size])
94
-
95
- # Crop and resize the rotated image by the bounding box
96
- [[x1, y1], [x2, y2]] = rotated_palm_bbox.astype(np.int32)
97
- diff = np.maximum([-x1, -y1, x2 - rotated_image.shape[1], y2 - rotated_image.shape[0]], 0)
98
- [x1, y1, x2, y2] = [x1, y1, x2, y2] + diff
99
- crop = rotated_image[y1:y2, x1:x2, :]
100
- crop = cv.copyMakeBorder(crop, diff[1], diff[3], diff[0], diff[2], cv.BORDER_CONSTANT, value=(0, 0, 0))
101
- blob = cv.resize(crop, dsize=self.input_size, interpolation=cv.INTER_AREA).astype(np.float32) / 255.0
102
 
103
- return blob[np.newaxis, :, :, :], rotated_palm_bbox, angle, rotation_matrix
104
 
105
  def infer(self, image, palm):
106
  # Preprocess
107
- input_blob, rotated_palm_bbox, angle, rotation_matrix = self._preprocess(image, palm)
108
 
109
  # Forward
110
  self.model.setInput(input_blob)
111
  output_blob = self.model.forward(self.model.getUnconnectedOutLayersNames())
112
 
113
  # Postprocess
114
- results = self._postprocess(output_blob, rotated_palm_bbox, angle, rotation_matrix)
115
  return results # [bbox_coords, landmarks_coords, conf]
116
 
117
- def _postprocess(self, blob, rotated_palm_bbox, angle, rotation_matrix):
118
  landmarks, conf, handedness, landmarks_word = blob
119
 
120
  conf = conf[0][0]
@@ -127,7 +155,7 @@ class MPHandPose:
127
  # transform coords back to the input coords
128
  wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
129
  scale_factor = wh_rotated_palm_bbox / self.input_size
130
- landmarks[:, :2] = (landmarks[:, :2] - self.input_size / 2) * scale_factor
131
  landmarks[:, 2] = landmarks[:, 2] * max(scale_factor) # depth scaling
132
  coords_rotation_matrix = cv.getRotationMatrix2D((0, 0), angle, 1.0)
133
  rotated_landmarks = np.dot(landmarks[:, :2], coords_rotation_matrix[:, :2])
@@ -149,7 +177,7 @@ class MPHandPose:
149
  original_center = np.array([
150
  np.dot(center, inverse_rotation_matrix[0]),
151
  np.dot(center, inverse_rotation_matrix[1])])
152
- landmarks[:, :2] = rotated_landmarks[:, :2] + original_center
153
 
154
  # get bounding box from rotated_landmarks
155
  bbox = np.array([
 
13
  self.PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2]
14
  self.PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0
15
  self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2
16
+ self.PALM_BOX_PRE_SHIFT_VECTOR = [0, 0]
17
+ self.PALM_BOX_PRE_ENLARGE_FACTOR = 4
18
  self.PALM_BOX_SHIFT_VECTOR = [0, -0.4]
19
  self.PALM_BOX_ENLARGE_FACTOR = 3
20
  self.HAND_BOX_SHIFT_VECTOR = [0, -0.1]
 
36
  self.target_id = targetId
37
  self.model.setPreferableTarget(self.target_id)
38
 
39
+ def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False):
40
+ # shift bounding box
41
+ wh_palm_bbox = palm_bbox[1] - palm_bbox[0]
42
+ if for_rotation:
43
+ shift_vector = self.PALM_BOX_PRE_SHIFT_VECTOR
44
+ else:
45
+ shift_vector = self.PALM_BOX_SHIFT_VECTOR
46
+ shift_vector = shift_vector * wh_palm_bbox
47
+ palm_bbox = palm_bbox + shift_vector
48
+ # enlarge bounding box
49
+ center_palm_bbox = np.sum(palm_bbox, axis=0) / 2
50
+ wh_palm_bbox = palm_bbox[1] - palm_bbox[0]
51
+ if for_rotation:
52
+ enlarge_scale = self.PALM_BOX_PRE_ENLARGE_FACTOR
53
+ else:
54
+ enlarge_scale = self.PALM_BOX_ENLARGE_FACTOR
55
+ new_half_size = wh_palm_bbox * enlarge_scale / 2
56
+ palm_bbox = np.array([
57
+ center_palm_bbox - new_half_size,
58
+ center_palm_bbox + new_half_size])
59
+ palm_bbox = palm_bbox.astype(np.int32)
60
+ palm_bbox[:][0] = np.clip(palm_bbox[:][0], 0, image.shape[0])
61
+ palm_bbox[:][1] = np.clip(palm_bbox[:][1], 0, image.shape[1])
62
+ # crop to the size of interest
63
+ image = image[palm_bbox[0][1]:palm_bbox[1][1], palm_bbox[0][0]:palm_bbox[1][0], :]
64
+ # pad to ensure conner pixels won't be cropped
65
+ if for_rotation:
66
+ side_len = np.linalg.norm(image.shape[:2])
67
+ else:
68
+ side_len = max(image.shape[:2])
69
+
70
+ side_len = int(side_len)
71
+ pad_h = side_len - image.shape[0]
72
+ pad_w = side_len - image.shape[1]
73
+ left = pad_w // 2
74
+ top = pad_h // 2
75
+ right = pad_w - left
76
+ bottom = pad_h - top
77
+ image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0))
78
+ bias = palm_bbox[0] - [left, top]
79
+ return image, palm_bbox, bias
80
+
81
  def _preprocess(self, image, palm):
82
  '''
83
  Rotate input for inference.
 
87
  palm_landmarks - 7 landmarks (5 finger base points, 2 palm base points) of shape [7, 2]
88
  Returns:
89
  rotated_hand - rotated hand image for inference
90
+ rotate_palm_bbox - palm box of interest range
91
+ angle - rotate angle for hand
92
  rotation_matrix - matrix for rotation and de-rotation
93
+ pad_bias - pad pixels of interest range
94
  '''
95
+ # crop and pad image to interest range
96
+ pad_bias = np.array([0, 0], dtype=np.int32) # left, top
97
  palm_bbox = palm[0:4].reshape(2, 2)
98
+ image, palm_bbox, bias = self._cropAndPadFromPalm(image, palm_bbox, True)
99
  image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
100
+ pad_bias += bias
101
 
102
+ # Rotate input to have vertically oriented hand image
103
+ # compute rotation
104
+ palm_bbox -= pad_bias
105
+ palm_landmarks = palm[4:18].reshape(7, 2) - pad_bias
106
  p1 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_PALM_BASE]
107
  p2 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]
108
  radians = np.pi / 2 - np.arctan2(-(p2[1] - p1[1]), p2[0] - p1[0])
 
124
  np.amin(rotated_palm_landmarks, axis=1),
125
  np.amax(rotated_palm_landmarks, axis=1)]) # [top-left, bottom-right]
126
 
127
+ crop, rotated_palm_bbox, _ = self._cropAndPadFromPalm(rotated_image, rotated_palm_bbox)
128
+ blob = cv.resize(crop, dsize=self.input_size, interpolation=cv.INTER_AREA).astype(np.float32)
129
+ blob = blob / 255.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ return blob[np.newaxis, :, :, :], rotated_palm_bbox, angle, rotation_matrix, pad_bias
132
 
133
  def infer(self, image, palm):
134
  # Preprocess
135
+ input_blob, rotated_palm_bbox, angle, rotation_matrix, pad_bias = self._preprocess(image, palm)
136
 
137
  # Forward
138
  self.model.setInput(input_blob)
139
  output_blob = self.model.forward(self.model.getUnconnectedOutLayersNames())
140
 
141
  # Postprocess
142
+ results = self._postprocess(output_blob, rotated_palm_bbox, angle, rotation_matrix, pad_bias)
143
  return results # [bbox_coords, landmarks_coords, conf]
144
 
145
+ def _postprocess(self, blob, rotated_palm_bbox, angle, rotation_matrix, pad_bias):
146
  landmarks, conf, handedness, landmarks_word = blob
147
 
148
  conf = conf[0][0]
 
155
  # transform coords back to the input coords
156
  wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
157
  scale_factor = wh_rotated_palm_bbox / self.input_size
158
+ landmarks[:, :2] = (landmarks[:, :2] - self.input_size / 2) * max(scale_factor)
159
  landmarks[:, 2] = landmarks[:, 2] * max(scale_factor) # depth scaling
160
  coords_rotation_matrix = cv.getRotationMatrix2D((0, 0), angle, 1.0)
161
  rotated_landmarks = np.dot(landmarks[:, :2], coords_rotation_matrix[:, :2])
 
177
  original_center = np.array([
178
  np.dot(center, inverse_rotation_matrix[0]),
179
  np.dot(center, inverse_rotation_matrix[1])])
180
+ landmarks[:, :2] = rotated_landmarks[:, :2] + original_center + pad_bias
181
 
182
  # get bounding box from rotated_landmarks
183
  bbox = np.array([