|
import gradio as gr |
|
import cv2 as cv |
|
import tempfile |
|
from mp_handpose import MPHandPose |
|
from mp_palmdet import MPPalmDet |
|
import numpy as np |
|
from huggingface_hub import hf_hub_download |
|
|
|
handpose_detector_path = hf_hub_download(repo_id="opencv/handpose_estimation_mediapipe", filename="handpose_estimation_mediapipe_2023feb.onnx") |
|
palm_detector_path = hf_hub_download(repo_id="opencv/palm_detection_mediapipe", filename="palm_detection_mediapipe_2023feb.onnx") |
|
|
|
backend_id = cv.dnn.DNN_BACKEND_OPENCV |
|
target_id = cv.dnn.DNN_TARGET_CPU |
|
|
|
palm_detector = MPPalmDet(modelPath=palm_detector_path, nmsThreshold=0.3, scoreThreshold=0.6, backendId=backend_id, targetId=target_id) |
|
handpose_detector = MPHandPose(modelPath=handpose_detector_path, confThreshold=0.9, backendId=backend_id, targetId=target_id) |
|
|
|
def visualize(image, hands): |
|
display_screen = image.copy() |
|
display_3d = np.zeros((400, 400, 3), np.uint8) |
|
cv.line(display_3d, (200, 0), (200, 400), (255, 255, 255), 2) |
|
cv.line(display_3d, (0, 200), (400, 200), (255, 255, 255), 2) |
|
cv.putText(display_3d, 'Main View', (0, 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255)) |
|
cv.putText(display_3d, 'Top View', (200, 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255)) |
|
cv.putText(display_3d, 'Left View', (0, 212), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255)) |
|
cv.putText(display_3d, 'Right View', (200, 212), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255)) |
|
|
|
def draw_lines(image, landmarks, is_draw_point=True, thickness=2): |
|
connections = [ |
|
(0, 1), (1, 2), (2, 3), (3, 4), |
|
(0, 5), (5, 6), (6, 7), (7, 8), |
|
(0, 9), (9, 10), (10, 11), (11, 12), |
|
(0, 13), (13, 14), (14, 15), (15, 16), |
|
(0, 17), (17, 18), (18, 19), (19, 20) |
|
] |
|
for (i, j) in connections: |
|
cv.line(image, landmarks[i], landmarks[j], (255, 255, 255), thickness) |
|
if is_draw_point: |
|
for p in landmarks: |
|
cv.circle(image, p, thickness, (0, 0, 255), -1) |
|
|
|
gc = GestureClassification() |
|
|
|
for handpose in hands: |
|
bbox = handpose[0:4].astype(np.int32) |
|
handedness = handpose[-2] |
|
handedness_text = 'Left' if handedness <= 0.5 else 'Right' |
|
landmarks_screen = handpose[4:67].reshape(21, 3).astype(np.int32) |
|
landmarks_word = handpose[67:130].reshape(21, 3) |
|
gesture = gc.classify(landmarks_screen) |
|
|
|
cv.rectangle(display_screen, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) |
|
cv.putText(display_screen, handedness_text, (bbox[0], bbox[1] + 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255)) |
|
cv.putText(display_screen, gesture, (bbox[0], bbox[1] + 30), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255)) |
|
|
|
landmarks_xy = landmarks_screen[:, 0:2] |
|
draw_lines(display_screen, landmarks_xy, is_draw_point=False) |
|
|
|
for p in landmarks_screen: |
|
r = max(5 - p[2] // 5, 0) |
|
r = min(r, 14) |
|
cv.circle(display_screen, (p[0], p[1]), r, (0, 0, 255), -1) |
|
|
|
|
|
landmarks_xy = (landmarks_word[:, [0, 1]] * 1000 + 100).astype(np.int32) |
|
draw_lines(display_3d, landmarks_xy, thickness=5) |
|
|
|
landmarks_xz = landmarks_word[:, [0, 2]] |
|
landmarks_xz[:, 1] *= -1 |
|
landmarks_xz = (landmarks_xz * 1000 + [300, 100]).astype(np.int32) |
|
draw_lines(display_3d, landmarks_xz, thickness=5) |
|
|
|
landmarks_yz = landmarks_word[:, [2, 1]] |
|
landmarks_yz[:, 0] *= -1 |
|
landmarks_yz = (landmarks_yz * 1000 + [100, 300]).astype(np.int32) |
|
draw_lines(display_3d, landmarks_yz, thickness=5) |
|
|
|
landmarks_zy = (landmarks_word[:, [2, 1]] * 1000 + [300, 300]).astype(np.int32) |
|
draw_lines(display_3d, landmarks_zy, thickness=5) |
|
|
|
return display_screen, display_3d |
|
|
|
class GestureClassification: |
|
def _vector_2_angle(self, v1, v2): |
|
uv1 = v1 / np.linalg.norm(v1) |
|
uv2 = v2 / np.linalg.norm(v2) |
|
return np.degrees(np.arccos(np.dot(uv1, uv2))) |
|
|
|
def _hand_angle(self, hand): |
|
return [ |
|
self._vector_2_angle(np.array([hand[0][0] - hand[i][0], hand[0][1] - hand[i][1]]), |
|
np.array([hand[i + 1][0] - hand[i + 2][0], hand[i + 1][1] - hand[i + 2][1]])) |
|
for i in [2, 6, 10, 14, 18] |
|
] |
|
|
|
def _finger_status(self, lmList): |
|
originx, originy = lmList[0] |
|
keypoint_list = [[5, 4], [6, 8], [10, 12], [14, 16], [18, 20]] |
|
return [np.hypot(x2 - originx, y2 - originy) > np.hypot(x1 - originx, y1 - originy) |
|
for (x1, y1), (x2, y2) in [(lmList[i], lmList[j]) for i, j in keypoint_list]] |
|
|
|
def _classify(self, hand): |
|
angle_list = self._hand_angle(hand) |
|
thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = self._finger_status(hand) |
|
thr_angle = 65. |
|
thr_angle_thumb = 30. |
|
thr_angle_s = 49. |
|
g = "Undefined" |
|
if angle_list[0] > thr_angle_thumb and all(a > thr_angle for a in angle_list[1:]) and not any([firstOpen, secondOpen, thirdOpen, fourthOpen]): |
|
g = "Zero" |
|
elif angle_list[0] > thr_angle_thumb and angle_list[1] < thr_angle_s and all(a > thr_angle for a in angle_list[2:]) and firstOpen and not any([secondOpen, thirdOpen, fourthOpen]): |
|
g = "One" |
|
elif angle_list[0] > thr_angle_thumb and angle_list[1] < thr_angle_s and angle_list[2] < thr_angle_s and all(a > thr_angle for a in angle_list[3:]) and not thumbOpen and firstOpen and secondOpen and not any([thirdOpen, fourthOpen]): |
|
g = "Two" |
|
elif angle_list[0] > thr_angle_thumb and all(a < thr_angle_s for a in angle_list[1:4]) and angle_list[4] > thr_angle and not thumbOpen and all([firstOpen, secondOpen, thirdOpen]) and not fourthOpen: |
|
g = "Three" |
|
elif angle_list[0] > thr_angle_thumb and all(a < thr_angle_s for a in angle_list[1:]) and all([firstOpen, secondOpen, thirdOpen, fourthOpen]): |
|
g = "Four" |
|
elif all(a < thr_angle_s for a in angle_list) and all([thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen]): |
|
g = "Five" |
|
elif angle_list[0] < thr_angle_s and all(a > thr_angle for a in angle_list[1:4]) and angle_list[4] < thr_angle_s and thumbOpen and not any([firstOpen, secondOpen, thirdOpen]) and fourthOpen: |
|
g = "Six" |
|
elif angle_list[0] < thr_angle_s and angle_list[1] < thr_angle and all(a > thr_angle for a in angle_list[2:4]) and angle_list[4] > thr_angle_s and thumbOpen and firstOpen and not any([secondOpen, thirdOpen, fourthOpen]): |
|
g = "Seven" |
|
elif angle_list[0] < thr_angle_s and all(a < thr_angle for a in angle_list[1:3]) and angle_list[3] > thr_angle and angle_list[4] > thr_angle_s and thumbOpen and all([firstOpen, secondOpen]) and not any([thirdOpen, fourthOpen]): |
|
g = "Eight" |
|
elif angle_list[0] < thr_angle_s and all(a < thr_angle for a in angle_list[1:4]) and angle_list[4] > thr_angle_s and thumbOpen and all([firstOpen, secondOpen, thirdOpen]) and not fourthOpen: |
|
g = "Nine" |
|
return g |
|
|
|
def classify(self, landmarks): |
|
return self._classify(landmarks[:21, :2]) |
|
|
|
def process_video(video_path): |
|
cap = cv.VideoCapture(video_path) |
|
fps = cap.get(cv.CAP_PROP_FPS) |
|
width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) |
|
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) |
|
|
|
out_path = tempfile.mktemp(suffix=".mp4") |
|
out_writer = cv.VideoWriter(out_path, cv.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) |
|
|
|
while cap.isOpened(): |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
|
|
palms = palm_detector.infer(frame) |
|
hands = np.empty((0, 132)) |
|
for palm in palms: |
|
handpose = handpose_detector.infer(frame, palm) |
|
if handpose is not None: |
|
hands = np.vstack((hands, handpose)) |
|
frame, _ = visualize(frame, hands) |
|
out_writer.write(frame) |
|
|
|
cap.release() |
|
out_writer.release() |
|
return out_path |
|
|
|
gr.Interface( |
|
fn=process_video, |
|
inputs=gr.File(label="Upload Video", file_types=[".mp4", ".avi"]), |
|
outputs=gr.Video(label="Processed Video"), |
|
title="Video Edge Detection", |
|
allow_flagging="never" |
|
).launch() |
|
|