File size: 9,223 Bytes
a771188 41a18e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
import gradio as gr
import cv2 as cv
import tempfile
from mp_handpose import MPHandPose
from mp_palmdet import MPPalmDet
import numpy as np
from huggingface_hub import hf_hub_download
handpose_detector_path = hf_hub_download(repo_id="opencv/handpose_estimation_mediapipe", filename="handpose_estimation_mediapipe_2023feb.onnx")
palm_detector_path = hf_hub_download(repo_id="opencv/palm_detection_mediapipe", filename="palm_detection_mediapipe_2023feb.onnx")
backend_id = cv.dnn.DNN_BACKEND_OPENCV
target_id = cv.dnn.DNN_TARGET_CPU
palm_detector = MPPalmDet(modelPath=palm_detector_path, nmsThreshold=0.3, scoreThreshold=0.6, backendId=backend_id, targetId=target_id)
handpose_detector = MPHandPose(modelPath=handpose_detector_path, confThreshold=0.9, backendId=backend_id, targetId=target_id)
def visualize(image, hands):
display_screen = image.copy()
display_3d = np.zeros((400, 400, 3), np.uint8)
cv.line(display_3d, (200, 0), (200, 400), (255, 255, 255), 2)
cv.line(display_3d, (0, 200), (400, 200), (255, 255, 255), 2)
cv.putText(display_3d, 'Main View', (0, 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
cv.putText(display_3d, 'Top View', (200, 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
cv.putText(display_3d, 'Left View', (0, 212), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
cv.putText(display_3d, 'Right View', (200, 212), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
def draw_lines(image, landmarks, is_draw_point=True, thickness=2):
connections = [
(0, 1), (1, 2), (2, 3), (3, 4),
(0, 5), (5, 6), (6, 7), (7, 8),
(0, 9), (9, 10), (10, 11), (11, 12),
(0, 13), (13, 14), (14, 15), (15, 16),
(0, 17), (17, 18), (18, 19), (19, 20)
]
for (i, j) in connections:
cv.line(image, landmarks[i], landmarks[j], (255, 255, 255), thickness)
if is_draw_point:
for p in landmarks:
cv.circle(image, p, thickness, (0, 0, 255), -1)
gc = GestureClassification()
for handpose in hands:
bbox = handpose[0:4].astype(np.int32)
handedness = handpose[-2]
handedness_text = 'Left' if handedness <= 0.5 else 'Right'
landmarks_screen = handpose[4:67].reshape(21, 3).astype(np.int32)
landmarks_word = handpose[67:130].reshape(21, 3)
gesture = gc.classify(landmarks_screen)
cv.rectangle(display_screen, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
cv.putText(display_screen, handedness_text, (bbox[0], bbox[1] + 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
cv.putText(display_screen, gesture, (bbox[0], bbox[1] + 30), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
landmarks_xy = landmarks_screen[:, 0:2]
draw_lines(display_screen, landmarks_xy, is_draw_point=False)
for p in landmarks_screen:
r = max(5 - p[2] // 5, 0)
r = min(r, 14)
cv.circle(display_screen, (p[0], p[1]), r, (0, 0, 255), -1)
# Main view
landmarks_xy = (landmarks_word[:, [0, 1]] * 1000 + 100).astype(np.int32)
draw_lines(display_3d, landmarks_xy, thickness=5)
# Top view
landmarks_xz = landmarks_word[:, [0, 2]]
landmarks_xz[:, 1] *= -1
landmarks_xz = (landmarks_xz * 1000 + [300, 100]).astype(np.int32)
draw_lines(display_3d, landmarks_xz, thickness=5)
# Left view
landmarks_yz = landmarks_word[:, [2, 1]]
landmarks_yz[:, 0] *= -1
landmarks_yz = (landmarks_yz * 1000 + [100, 300]).astype(np.int32)
draw_lines(display_3d, landmarks_yz, thickness=5)
# Right view
landmarks_zy = (landmarks_word[:, [2, 1]] * 1000 + [300, 300]).astype(np.int32)
draw_lines(display_3d, landmarks_zy, thickness=5)
return display_screen, display_3d
class GestureClassification:
def _vector_2_angle(self, v1, v2):
uv1 = v1 / np.linalg.norm(v1)
uv2 = v2 / np.linalg.norm(v2)
return np.degrees(np.arccos(np.dot(uv1, uv2)))
def _hand_angle(self, hand):
return [
self._vector_2_angle(np.array([hand[0][0] - hand[i][0], hand[0][1] - hand[i][1]]),
np.array([hand[i + 1][0] - hand[i + 2][0], hand[i + 1][1] - hand[i + 2][1]]))
for i in [2, 6, 10, 14, 18]
]
def _finger_status(self, lmList):
originx, originy = lmList[0]
keypoint_list = [[5, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
return [np.hypot(x2 - originx, y2 - originy) > np.hypot(x1 - originx, y1 - originy)
for (x1, y1), (x2, y2) in [(lmList[i], lmList[j]) for i, j in keypoint_list]]
def _classify(self, hand):
angle_list = self._hand_angle(hand)
thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = self._finger_status(hand)
thr_angle = 65.
thr_angle_thumb = 30.
thr_angle_s = 49.
g = "Undefined"
if angle_list[0] > thr_angle_thumb and all(a > thr_angle for a in angle_list[1:]) and not any([firstOpen, secondOpen, thirdOpen, fourthOpen]):
g = "Zero"
elif angle_list[0] > thr_angle_thumb and angle_list[1] < thr_angle_s and all(a > thr_angle for a in angle_list[2:]) and firstOpen and not any([secondOpen, thirdOpen, fourthOpen]):
g = "One"
elif angle_list[0] > thr_angle_thumb and angle_list[1] < thr_angle_s and angle_list[2] < thr_angle_s and all(a > thr_angle for a in angle_list[3:]) and not thumbOpen and firstOpen and secondOpen and not any([thirdOpen, fourthOpen]):
g = "Two"
elif angle_list[0] > thr_angle_thumb and all(a < thr_angle_s for a in angle_list[1:4]) and angle_list[4] > thr_angle and not thumbOpen and all([firstOpen, secondOpen, thirdOpen]) and not fourthOpen:
g = "Three"
elif angle_list[0] > thr_angle_thumb and all(a < thr_angle_s for a in angle_list[1:]) and all([firstOpen, secondOpen, thirdOpen, fourthOpen]):
g = "Four"
elif all(a < thr_angle_s for a in angle_list) and all([thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen]):
g = "Five"
elif angle_list[0] < thr_angle_s and all(a > thr_angle for a in angle_list[1:4]) and angle_list[4] < thr_angle_s and thumbOpen and not any([firstOpen, secondOpen, thirdOpen]) and fourthOpen:
g = "Six"
elif angle_list[0] < thr_angle_s and angle_list[1] < thr_angle and all(a > thr_angle for a in angle_list[2:4]) and angle_list[4] > thr_angle_s and thumbOpen and firstOpen and not any([secondOpen, thirdOpen, fourthOpen]):
g = "Seven"
elif angle_list[0] < thr_angle_s and all(a < thr_angle for a in angle_list[1:3]) and angle_list[3] > thr_angle and angle_list[4] > thr_angle_s and thumbOpen and all([firstOpen, secondOpen]) and not any([thirdOpen, fourthOpen]):
g = "Eight"
elif angle_list[0] < thr_angle_s and all(a < thr_angle for a in angle_list[1:4]) and angle_list[4] > thr_angle_s and thumbOpen and all([firstOpen, secondOpen, thirdOpen]) and not fourthOpen:
g = "Nine"
return g
def classify(self, landmarks):
return self._classify(landmarks[:21, :2])
def process_video(video_path):
cap = cv.VideoCapture(video_path)
fps = cap.get(cv.CAP_PROP_FPS)
width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
out_path = tempfile.mktemp(suffix=".mp4")
out_writer = cv.VideoWriter(out_path, cv.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
palms = palm_detector.infer(frame)
hands = np.empty((0, 132))
for palm in palms:
handpose = handpose_detector.infer(frame, palm)
if handpose is not None:
hands = np.vstack((hands, handpose))
frame, _ = visualize(frame, hands)
out_writer.write(frame)
cap.release()
out_writer.release()
return out_path
# Gradio Interface
with gr.Blocks(css='''.example * {
font-style: italic;
font-size: 18px !important;
color: #0ea5e9 !important;
}''') as demo:
gr.Markdown("### Handpose Estimation with Mediapipe using OpenCV")
gr.Markdown("Upload a video to palm detection followed by hand pose estimation using MediaPipe and OpenCV")
with gr.Row():
inputs=gr.Video(label="Upload Video")
outputs=gr.Video(label="Handpose Estimation Output")
# Clear output when new video is uploaded
inputs.change(fn=lambda: (None), outputs=outputs)
with gr.Row():
submit_btn = gr.Button("Submit", variant="primary")
clear_btn = gr.Button("Clear")
submit_btn.click(fn=process_video, inputs=inputs, outputs=outputs)
clear_btn.click(fn=lambda:(None, None), outputs=[inputs, outputs])
gr.Markdown("Click on any example to try it.", elem_classes=["example"])
gr.Examples(
examples=[
["examples/handwave.mp4"],
["examples/handwave2.mp4"]
],
inputs=inputs
)
gr.Markdown("Example videos credit: https://pixabay.com/")
if __name__ == "__main__":
demo.launch()
|