Wanli
commited on
Commit
·
91e8de5
1
Parent(s):
d414071
Add gesture classification for handpose estimation (#168)
Browse files
README.md
CHANGED
@@ -4,6 +4,9 @@ This model estimates 21 hand keypoints per detected hand from [palm detector](..
|
|
4 |
|
5 |

|
6 |
|
|
|
|
|
|
|
7 |
This model is converted from TFlite to ONNX using following tools:
|
8 |
- TFLite model to ONNX: https://github.com/onnx/tensorflow-onnx
|
9 |
- simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
|
|
|
4 |
|
5 |

|
6 |
|
7 |
+
Hand gesture classification demo (0-9)
|
8 |
+

|
9 |
+
|
10 |
This model is converted from TFlite to ONNX using following tools:
|
11 |
- TFLite model to ONNX: https://github.com/onnx/tensorflow-onnx
|
12 |
- simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
|
demo.py
CHANGED
@@ -85,6 +85,9 @@ def visualize(image, hands, print_result=False):
|
|
85 |
for p in landmarks:
|
86 |
cv.circle(image, p, thickness, (0, 0, 255), -1)
|
87 |
|
|
|
|
|
|
|
88 |
for idx, handpose in enumerate(hands):
|
89 |
conf = handpose[-1]
|
90 |
bbox = handpose[0:4].astype(np.int32)
|
@@ -96,11 +99,14 @@ def visualize(image, hands, print_result=False):
|
|
96 |
landmarks_screen = handpose[4:67].reshape(21, 3).astype(np.int32)
|
97 |
landmarks_word = handpose[67:130].reshape(21, 3)
|
98 |
|
|
|
|
|
99 |
# Print results
|
100 |
if print_result:
|
101 |
print('-----------hand {}-----------'.format(idx + 1))
|
102 |
print('conf: {:.2f}'.format(conf))
|
103 |
print('handedness: {}'.format(handedness_text))
|
|
|
104 |
print('hand box: {}'.format(bbox))
|
105 |
print('hand landmarks: ')
|
106 |
for l in landmarks_screen:
|
@@ -113,6 +119,8 @@ def visualize(image, hands, print_result=False):
|
|
113 |
cv.rectangle(display_screen, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
|
114 |
# draw handedness
|
115 |
cv.putText(display_screen, '{}'.format(handedness_text), (bbox[0], bbox[1] + 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
|
|
|
|
|
116 |
# Draw line between each key points
|
117 |
landmarks_xy = landmarks_screen[:, 0:2]
|
118 |
draw_lines(display_screen, landmarks_xy, is_draw_point=False)
|
@@ -149,6 +157,118 @@ def visualize(image, hands, print_result=False):
|
|
149 |
|
150 |
return display_screen, display_3d
|
151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
if __name__ == '__main__':
|
154 |
backend_id = backend_target_pairs[args.backend_target][0]
|
|
|
85 |
for p in landmarks:
|
86 |
cv.circle(image, p, thickness, (0, 0, 255), -1)
|
87 |
|
88 |
+
# used for gesture classification
|
89 |
+
gc = GestureClassification()
|
90 |
+
|
91 |
for idx, handpose in enumerate(hands):
|
92 |
conf = handpose[-1]
|
93 |
bbox = handpose[0:4].astype(np.int32)
|
|
|
99 |
landmarks_screen = handpose[4:67].reshape(21, 3).astype(np.int32)
|
100 |
landmarks_word = handpose[67:130].reshape(21, 3)
|
101 |
|
102 |
+
gesture = gc.classify(landmarks_screen)
|
103 |
+
|
104 |
# Print results
|
105 |
if print_result:
|
106 |
print('-----------hand {}-----------'.format(idx + 1))
|
107 |
print('conf: {:.2f}'.format(conf))
|
108 |
print('handedness: {}'.format(handedness_text))
|
109 |
+
print('gesture: {}'.format(gesture))
|
110 |
print('hand box: {}'.format(bbox))
|
111 |
print('hand landmarks: ')
|
112 |
for l in landmarks_screen:
|
|
|
119 |
cv.rectangle(display_screen, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
|
120 |
# draw handedness
|
121 |
cv.putText(display_screen, '{}'.format(handedness_text), (bbox[0], bbox[1] + 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
|
122 |
+
# draw gesture
|
123 |
+
cv.putText(display_screen, '{}'.format(gesture), (bbox[0], bbox[1] + 30), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
|
124 |
# Draw line between each key points
|
125 |
landmarks_xy = landmarks_screen[:, 0:2]
|
126 |
draw_lines(display_screen, landmarks_xy, is_draw_point=False)
|
|
|
157 |
|
158 |
return display_screen, display_3d
|
159 |
|
160 |
+
class GestureClassification:
|
161 |
+
def _vector_2_angle(self, v1, v2):
|
162 |
+
uv1 = v1 / np.linalg.norm(v1)
|
163 |
+
uv2 = v2 / np.linalg.norm(v2)
|
164 |
+
angle = np.degrees(np.arccos(np.dot(uv1, uv2)))
|
165 |
+
return angle
|
166 |
+
|
167 |
+
def _hand_angle(self, hand):
|
168 |
+
angle_list = []
|
169 |
+
# thumb
|
170 |
+
angle_ = self._vector_2_angle(
|
171 |
+
np.array([hand[0][0] - hand[2][0], hand[0][1] - hand[2][1]]),
|
172 |
+
np.array([hand[3][0] - hand[4][0], hand[3][1] - hand[4][1]])
|
173 |
+
)
|
174 |
+
angle_list.append(angle_)
|
175 |
+
# index
|
176 |
+
angle_ = self._vector_2_angle(
|
177 |
+
np.array([hand[0][0] - hand[6][0], hand[0][1] - hand[6][1]]),
|
178 |
+
np.array([hand[7][0] - hand[8][0], hand[7][1] - hand[8][1]])
|
179 |
+
)
|
180 |
+
angle_list.append(angle_)
|
181 |
+
# middle
|
182 |
+
angle_ = self._vector_2_angle(
|
183 |
+
np.array([hand[0][0] - hand[10][0], hand[0][1] - hand[10][1]]),
|
184 |
+
np.array([hand[11][0] - hand[12][0], hand[11][1] - hand[12][1]])
|
185 |
+
)
|
186 |
+
angle_list.append(angle_)
|
187 |
+
# ring
|
188 |
+
angle_ = self._vector_2_angle(
|
189 |
+
np.array([hand[0][0] - hand[14][0], hand[0][1] - hand[14][1]]),
|
190 |
+
np.array([hand[15][0] - hand[16][0], hand[15][1] - hand[16][1]])
|
191 |
+
)
|
192 |
+
angle_list.append(angle_)
|
193 |
+
# pink
|
194 |
+
angle_ = self._vector_2_angle(
|
195 |
+
np.array([hand[0][0] - hand[18][0], hand[0][1] - hand[18][1]]),
|
196 |
+
np.array([hand[19][0] - hand[20][0], hand[19][1] - hand[20][1]])
|
197 |
+
)
|
198 |
+
angle_list.append(angle_)
|
199 |
+
return angle_list
|
200 |
+
|
201 |
+
def _finger_status(self, lmList):
|
202 |
+
fingerList = []
|
203 |
+
originx, originy = lmList[0]
|
204 |
+
keypoint_list = [[5, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
|
205 |
+
for point in keypoint_list:
|
206 |
+
x1, y1 = lmList[point[0]]
|
207 |
+
x2, y2 = lmList[point[1]]
|
208 |
+
if np.hypot(x2 - originx, y2 - originy) > np.hypot(x1 - originx, y1 - originy):
|
209 |
+
fingerList.append(True)
|
210 |
+
else:
|
211 |
+
fingerList.append(False)
|
212 |
+
|
213 |
+
return fingerList
|
214 |
+
|
215 |
+
def _classify(self, hand):
|
216 |
+
thr_angle = 65.
|
217 |
+
thr_angle_thumb = 30.
|
218 |
+
thr_angle_s = 49.
|
219 |
+
gesture_str = "Undefined"
|
220 |
+
|
221 |
+
angle_list = self._hand_angle(hand)
|
222 |
+
|
223 |
+
thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = self._finger_status(hand)
|
224 |
+
# Number
|
225 |
+
if (angle_list[0] > thr_angle_thumb) and (angle_list[1] > thr_angle) and (angle_list[2] > thr_angle) and (
|
226 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle) and \
|
227 |
+
not firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
|
228 |
+
gesture_str = "Zero"
|
229 |
+
elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] > thr_angle) and (
|
230 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle) and \
|
231 |
+
firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
|
232 |
+
gesture_str = "One"
|
233 |
+
elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
|
234 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle) and \
|
235 |
+
not thumbOpen and firstOpen and secondOpen and not thirdOpen and not fourthOpen:
|
236 |
+
gesture_str = "Two"
|
237 |
+
elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
|
238 |
+
angle_list[3] < thr_angle_s) and (angle_list[4] > thr_angle) and \
|
239 |
+
not thumbOpen and firstOpen and secondOpen and thirdOpen and not fourthOpen:
|
240 |
+
gesture_str = "Three"
|
241 |
+
elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
|
242 |
+
angle_list[3] < thr_angle_s) and (angle_list[4] < thr_angle) and \
|
243 |
+
firstOpen and secondOpen and thirdOpen and fourthOpen:
|
244 |
+
gesture_str = "Four"
|
245 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
|
246 |
+
angle_list[3] < thr_angle_s) and (angle_list[4] < thr_angle_s) and \
|
247 |
+
thumbOpen and firstOpen and secondOpen and thirdOpen and fourthOpen:
|
248 |
+
gesture_str = "Five"
|
249 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] > thr_angle) and (angle_list[2] > thr_angle) and (
|
250 |
+
angle_list[3] > thr_angle) and (angle_list[4] < thr_angle_s) and \
|
251 |
+
thumbOpen and not firstOpen and not secondOpen and not thirdOpen and fourthOpen:
|
252 |
+
gesture_str = "Six"
|
253 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle) and (angle_list[2] > thr_angle) and (
|
254 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle_s) and \
|
255 |
+
thumbOpen and firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
|
256 |
+
gesture_str = "Seven"
|
257 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle) and (angle_list[2] < thr_angle) and (
|
258 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle_s) and \
|
259 |
+
thumbOpen and firstOpen and secondOpen and not thirdOpen and not fourthOpen:
|
260 |
+
gesture_str = "Eight"
|
261 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle) and (angle_list[2] < thr_angle) and (
|
262 |
+
angle_list[3] < thr_angle) and (angle_list[4] > thr_angle_s) and \
|
263 |
+
thumbOpen and firstOpen and secondOpen and thirdOpen and not fourthOpen:
|
264 |
+
gesture_str = "Nine"
|
265 |
+
|
266 |
+
return gesture_str
|
267 |
+
|
268 |
+
def classify(self, landmarks):
|
269 |
+
hand = landmarks[:21, :2]
|
270 |
+
gesture = self._classify(hand)
|
271 |
+
return gesture
|
272 |
|
273 |
if __name__ == '__main__':
|
274 |
backend_id = backend_target_pairs[args.backend_target][0]
|