Wanli commited on
Commit
91e8de5
·
1 Parent(s): d414071

Add gesture classification for handpose estimation (#168)

Browse files
Files changed (2) hide show
  1. README.md +3 -0
  2. demo.py +120 -0
README.md CHANGED
@@ -4,6 +4,9 @@ This model estimates 21 hand keypoints per detected hand from [palm detector](..
4
 
5
  ![MediaPipe Hands Keypoints](./example_outputs/hand_keypoints.png)
6
 
 
 
 
7
  This model is converted from TFlite to ONNX using following tools:
8
  - TFLite model to ONNX: https://github.com/onnx/tensorflow-onnx
9
  - simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
 
4
 
5
  ![MediaPipe Hands Keypoints](./example_outputs/hand_keypoints.png)
6
 
7
+ Hand gesture classification demo (0-9)
8
+ ![hand gestures](./example_outputs/gesture_classification.png)
9
+
10
  This model is converted from TFlite to ONNX using following tools:
11
  - TFLite model to ONNX: https://github.com/onnx/tensorflow-onnx
12
  - simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
demo.py CHANGED
@@ -85,6 +85,9 @@ def visualize(image, hands, print_result=False):
85
  for p in landmarks:
86
  cv.circle(image, p, thickness, (0, 0, 255), -1)
87
 
 
 
 
88
  for idx, handpose in enumerate(hands):
89
  conf = handpose[-1]
90
  bbox = handpose[0:4].astype(np.int32)
@@ -96,11 +99,14 @@ def visualize(image, hands, print_result=False):
96
  landmarks_screen = handpose[4:67].reshape(21, 3).astype(np.int32)
97
  landmarks_word = handpose[67:130].reshape(21, 3)
98
 
 
 
99
  # Print results
100
  if print_result:
101
  print('-----------hand {}-----------'.format(idx + 1))
102
  print('conf: {:.2f}'.format(conf))
103
  print('handedness: {}'.format(handedness_text))
 
104
  print('hand box: {}'.format(bbox))
105
  print('hand landmarks: ')
106
  for l in landmarks_screen:
@@ -113,6 +119,8 @@ def visualize(image, hands, print_result=False):
113
  cv.rectangle(display_screen, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
114
  # draw handedness
115
  cv.putText(display_screen, '{}'.format(handedness_text), (bbox[0], bbox[1] + 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
 
 
116
  # Draw line between each key points
117
  landmarks_xy = landmarks_screen[:, 0:2]
118
  draw_lines(display_screen, landmarks_xy, is_draw_point=False)
@@ -149,6 +157,118 @@ def visualize(image, hands, print_result=False):
149
 
150
  return display_screen, display_3d
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  if __name__ == '__main__':
154
  backend_id = backend_target_pairs[args.backend_target][0]
 
85
  for p in landmarks:
86
  cv.circle(image, p, thickness, (0, 0, 255), -1)
87
 
88
+ # used for gesture classification
89
+ gc = GestureClassification()
90
+
91
  for idx, handpose in enumerate(hands):
92
  conf = handpose[-1]
93
  bbox = handpose[0:4].astype(np.int32)
 
99
  landmarks_screen = handpose[4:67].reshape(21, 3).astype(np.int32)
100
  landmarks_word = handpose[67:130].reshape(21, 3)
101
 
102
+ gesture = gc.classify(landmarks_screen)
103
+
104
  # Print results
105
  if print_result:
106
  print('-----------hand {}-----------'.format(idx + 1))
107
  print('conf: {:.2f}'.format(conf))
108
  print('handedness: {}'.format(handedness_text))
109
+ print('gesture: {}'.format(gesture))
110
  print('hand box: {}'.format(bbox))
111
  print('hand landmarks: ')
112
  for l in landmarks_screen:
 
119
  cv.rectangle(display_screen, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
120
  # draw handedness
121
  cv.putText(display_screen, '{}'.format(handedness_text), (bbox[0], bbox[1] + 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
122
+ # draw gesture
123
+ cv.putText(display_screen, '{}'.format(gesture), (bbox[0], bbox[1] + 30), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
124
  # Draw line between each key points
125
  landmarks_xy = landmarks_screen[:, 0:2]
126
  draw_lines(display_screen, landmarks_xy, is_draw_point=False)
 
157
 
158
  return display_screen, display_3d
159
 
160
+ class GestureClassification:
161
+ def _vector_2_angle(self, v1, v2):
162
+ uv1 = v1 / np.linalg.norm(v1)
163
+ uv2 = v2 / np.linalg.norm(v2)
164
+ angle = np.degrees(np.arccos(np.dot(uv1, uv2)))
165
+ return angle
166
+
167
+ def _hand_angle(self, hand):
168
+ angle_list = []
169
+ # thumb
170
+ angle_ = self._vector_2_angle(
171
+ np.array([hand[0][0] - hand[2][0], hand[0][1] - hand[2][1]]),
172
+ np.array([hand[3][0] - hand[4][0], hand[3][1] - hand[4][1]])
173
+ )
174
+ angle_list.append(angle_)
175
+ # index
176
+ angle_ = self._vector_2_angle(
177
+ np.array([hand[0][0] - hand[6][0], hand[0][1] - hand[6][1]]),
178
+ np.array([hand[7][0] - hand[8][0], hand[7][1] - hand[8][1]])
179
+ )
180
+ angle_list.append(angle_)
181
+ # middle
182
+ angle_ = self._vector_2_angle(
183
+ np.array([hand[0][0] - hand[10][0], hand[0][1] - hand[10][1]]),
184
+ np.array([hand[11][0] - hand[12][0], hand[11][1] - hand[12][1]])
185
+ )
186
+ angle_list.append(angle_)
187
+ # ring
188
+ angle_ = self._vector_2_angle(
189
+ np.array([hand[0][0] - hand[14][0], hand[0][1] - hand[14][1]]),
190
+ np.array([hand[15][0] - hand[16][0], hand[15][1] - hand[16][1]])
191
+ )
192
+ angle_list.append(angle_)
193
+ # pink
194
+ angle_ = self._vector_2_angle(
195
+ np.array([hand[0][0] - hand[18][0], hand[0][1] - hand[18][1]]),
196
+ np.array([hand[19][0] - hand[20][0], hand[19][1] - hand[20][1]])
197
+ )
198
+ angle_list.append(angle_)
199
+ return angle_list
200
+
201
+ def _finger_status(self, lmList):
202
+ fingerList = []
203
+ originx, originy = lmList[0]
204
+ keypoint_list = [[5, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
205
+ for point in keypoint_list:
206
+ x1, y1 = lmList[point[0]]
207
+ x2, y2 = lmList[point[1]]
208
+ if np.hypot(x2 - originx, y2 - originy) > np.hypot(x1 - originx, y1 - originy):
209
+ fingerList.append(True)
210
+ else:
211
+ fingerList.append(False)
212
+
213
+ return fingerList
214
+
215
+ def _classify(self, hand):
216
+ thr_angle = 65.
217
+ thr_angle_thumb = 30.
218
+ thr_angle_s = 49.
219
+ gesture_str = "Undefined"
220
+
221
+ angle_list = self._hand_angle(hand)
222
+
223
+ thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = self._finger_status(hand)
224
+ # Number
225
+ if (angle_list[0] > thr_angle_thumb) and (angle_list[1] > thr_angle) and (angle_list[2] > thr_angle) and (
226
+ angle_list[3] > thr_angle) and (angle_list[4] > thr_angle) and \
227
+ not firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
228
+ gesture_str = "Zero"
229
+ elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] > thr_angle) and (
230
+ angle_list[3] > thr_angle) and (angle_list[4] > thr_angle) and \
231
+ firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
232
+ gesture_str = "One"
233
+ elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
234
+ angle_list[3] > thr_angle) and (angle_list[4] > thr_angle) and \
235
+ not thumbOpen and firstOpen and secondOpen and not thirdOpen and not fourthOpen:
236
+ gesture_str = "Two"
237
+ elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
238
+ angle_list[3] < thr_angle_s) and (angle_list[4] > thr_angle) and \
239
+ not thumbOpen and firstOpen and secondOpen and thirdOpen and not fourthOpen:
240
+ gesture_str = "Three"
241
+ elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
242
+ angle_list[3] < thr_angle_s) and (angle_list[4] < thr_angle) and \
243
+ firstOpen and secondOpen and thirdOpen and fourthOpen:
244
+ gesture_str = "Four"
245
+ elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
246
+ angle_list[3] < thr_angle_s) and (angle_list[4] < thr_angle_s) and \
247
+ thumbOpen and firstOpen and secondOpen and thirdOpen and fourthOpen:
248
+ gesture_str = "Five"
249
+ elif (angle_list[0] < thr_angle_s) and (angle_list[1] > thr_angle) and (angle_list[2] > thr_angle) and (
250
+ angle_list[3] > thr_angle) and (angle_list[4] < thr_angle_s) and \
251
+ thumbOpen and not firstOpen and not secondOpen and not thirdOpen and fourthOpen:
252
+ gesture_str = "Six"
253
+ elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle) and (angle_list[2] > thr_angle) and (
254
+ angle_list[3] > thr_angle) and (angle_list[4] > thr_angle_s) and \
255
+ thumbOpen and firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
256
+ gesture_str = "Seven"
257
+ elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle) and (angle_list[2] < thr_angle) and (
258
+ angle_list[3] > thr_angle) and (angle_list[4] > thr_angle_s) and \
259
+ thumbOpen and firstOpen and secondOpen and not thirdOpen and not fourthOpen:
260
+ gesture_str = "Eight"
261
+ elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle) and (angle_list[2] < thr_angle) and (
262
+ angle_list[3] < thr_angle) and (angle_list[4] > thr_angle_s) and \
263
+ thumbOpen and firstOpen and secondOpen and thirdOpen and not fourthOpen:
264
+ gesture_str = "Nine"
265
+
266
+ return gesture_str
267
+
268
+ def classify(self, landmarks):
269
+ hand = landmarks[:21, :2]
270
+ gesture = self._classify(hand)
271
+ return gesture
272
 
273
  if __name__ == '__main__':
274
  backend_id = backend_target_pairs[args.backend_target][0]