Abhishek Gola
Added handpose estimation to opencv spaces
a771188
raw
history blame
8.27 kB
import gradio as gr
import cv2 as cv
import tempfile
from mp_handpose import MPHandPose
from mp_palmdet import MPPalmDet
import numpy as np
from huggingface_hub import hf_hub_download
handpose_detector_path = hf_hub_download(repo_id="opencv/handpose_estimation_mediapipe", filename="handpose_estimation_mediapipe_2023feb.onnx")
palm_detector_path = hf_hub_download(repo_id="opencv/palm_detection_mediapipe", filename="palm_detection_mediapipe_2023feb.onnx")
backend_id = cv.dnn.DNN_BACKEND_OPENCV
target_id = cv.dnn.DNN_TARGET_CPU
palm_detector = MPPalmDet(modelPath=palm_detector_path, nmsThreshold=0.3, scoreThreshold=0.6, backendId=backend_id, targetId=target_id)
handpose_detector = MPHandPose(modelPath=handpose_detector_path, confThreshold=0.9, backendId=backend_id, targetId=target_id)
def visualize(image, hands):
display_screen = image.copy()
display_3d = np.zeros((400, 400, 3), np.uint8)
cv.line(display_3d, (200, 0), (200, 400), (255, 255, 255), 2)
cv.line(display_3d, (0, 200), (400, 200), (255, 255, 255), 2)
cv.putText(display_3d, 'Main View', (0, 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
cv.putText(display_3d, 'Top View', (200, 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
cv.putText(display_3d, 'Left View', (0, 212), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
cv.putText(display_3d, 'Right View', (200, 212), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
def draw_lines(image, landmarks, is_draw_point=True, thickness=2):
connections = [
(0, 1), (1, 2), (2, 3), (3, 4),
(0, 5), (5, 6), (6, 7), (7, 8),
(0, 9), (9, 10), (10, 11), (11, 12),
(0, 13), (13, 14), (14, 15), (15, 16),
(0, 17), (17, 18), (18, 19), (19, 20)
]
for (i, j) in connections:
cv.line(image, landmarks[i], landmarks[j], (255, 255, 255), thickness)
if is_draw_point:
for p in landmarks:
cv.circle(image, p, thickness, (0, 0, 255), -1)
gc = GestureClassification()
for handpose in hands:
bbox = handpose[0:4].astype(np.int32)
handedness = handpose[-2]
handedness_text = 'Left' if handedness <= 0.5 else 'Right'
landmarks_screen = handpose[4:67].reshape(21, 3).astype(np.int32)
landmarks_word = handpose[67:130].reshape(21, 3)
gesture = gc.classify(landmarks_screen)
cv.rectangle(display_screen, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
cv.putText(display_screen, handedness_text, (bbox[0], bbox[1] + 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
cv.putText(display_screen, gesture, (bbox[0], bbox[1] + 30), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
landmarks_xy = landmarks_screen[:, 0:2]
draw_lines(display_screen, landmarks_xy, is_draw_point=False)
for p in landmarks_screen:
r = max(5 - p[2] // 5, 0)
r = min(r, 14)
cv.circle(display_screen, (p[0], p[1]), r, (0, 0, 255), -1)
# Main view
landmarks_xy = (landmarks_word[:, [0, 1]] * 1000 + 100).astype(np.int32)
draw_lines(display_3d, landmarks_xy, thickness=5)
# Top view
landmarks_xz = landmarks_word[:, [0, 2]]
landmarks_xz[:, 1] *= -1
landmarks_xz = (landmarks_xz * 1000 + [300, 100]).astype(np.int32)
draw_lines(display_3d, landmarks_xz, thickness=5)
# Left view
landmarks_yz = landmarks_word[:, [2, 1]]
landmarks_yz[:, 0] *= -1
landmarks_yz = (landmarks_yz * 1000 + [100, 300]).astype(np.int32)
draw_lines(display_3d, landmarks_yz, thickness=5)
# Right view
landmarks_zy = (landmarks_word[:, [2, 1]] * 1000 + [300, 300]).astype(np.int32)
draw_lines(display_3d, landmarks_zy, thickness=5)
return display_screen, display_3d
class GestureClassification:
def _vector_2_angle(self, v1, v2):
uv1 = v1 / np.linalg.norm(v1)
uv2 = v2 / np.linalg.norm(v2)
return np.degrees(np.arccos(np.dot(uv1, uv2)))
def _hand_angle(self, hand):
return [
self._vector_2_angle(np.array([hand[0][0] - hand[i][0], hand[0][1] - hand[i][1]]),
np.array([hand[i + 1][0] - hand[i + 2][0], hand[i + 1][1] - hand[i + 2][1]]))
for i in [2, 6, 10, 14, 18]
]
def _finger_status(self, lmList):
originx, originy = lmList[0]
keypoint_list = [[5, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
return [np.hypot(x2 - originx, y2 - originy) > np.hypot(x1 - originx, y1 - originy)
for (x1, y1), (x2, y2) in [(lmList[i], lmList[j]) for i, j in keypoint_list]]
def _classify(self, hand):
angle_list = self._hand_angle(hand)
thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = self._finger_status(hand)
thr_angle = 65.
thr_angle_thumb = 30.
thr_angle_s = 49.
g = "Undefined"
if angle_list[0] > thr_angle_thumb and all(a > thr_angle for a in angle_list[1:]) and not any([firstOpen, secondOpen, thirdOpen, fourthOpen]):
g = "Zero"
elif angle_list[0] > thr_angle_thumb and angle_list[1] < thr_angle_s and all(a > thr_angle for a in angle_list[2:]) and firstOpen and not any([secondOpen, thirdOpen, fourthOpen]):
g = "One"
elif angle_list[0] > thr_angle_thumb and angle_list[1] < thr_angle_s and angle_list[2] < thr_angle_s and all(a > thr_angle for a in angle_list[3:]) and not thumbOpen and firstOpen and secondOpen and not any([thirdOpen, fourthOpen]):
g = "Two"
elif angle_list[0] > thr_angle_thumb and all(a < thr_angle_s for a in angle_list[1:4]) and angle_list[4] > thr_angle and not thumbOpen and all([firstOpen, secondOpen, thirdOpen]) and not fourthOpen:
g = "Three"
elif angle_list[0] > thr_angle_thumb and all(a < thr_angle_s for a in angle_list[1:]) and all([firstOpen, secondOpen, thirdOpen, fourthOpen]):
g = "Four"
elif all(a < thr_angle_s for a in angle_list) and all([thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen]):
g = "Five"
elif angle_list[0] < thr_angle_s and all(a > thr_angle for a in angle_list[1:4]) and angle_list[4] < thr_angle_s and thumbOpen and not any([firstOpen, secondOpen, thirdOpen]) and fourthOpen:
g = "Six"
elif angle_list[0] < thr_angle_s and angle_list[1] < thr_angle and all(a > thr_angle for a in angle_list[2:4]) and angle_list[4] > thr_angle_s and thumbOpen and firstOpen and not any([secondOpen, thirdOpen, fourthOpen]):
g = "Seven"
elif angle_list[0] < thr_angle_s and all(a < thr_angle for a in angle_list[1:3]) and angle_list[3] > thr_angle and angle_list[4] > thr_angle_s and thumbOpen and all([firstOpen, secondOpen]) and not any([thirdOpen, fourthOpen]):
g = "Eight"
elif angle_list[0] < thr_angle_s and all(a < thr_angle for a in angle_list[1:4]) and angle_list[4] > thr_angle_s and thumbOpen and all([firstOpen, secondOpen, thirdOpen]) and not fourthOpen:
g = "Nine"
return g
def classify(self, landmarks):
return self._classify(landmarks[:21, :2])
def process_video(video_path):
cap = cv.VideoCapture(video_path)
fps = cap.get(cv.CAP_PROP_FPS)
width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
out_path = tempfile.mktemp(suffix=".mp4")
out_writer = cv.VideoWriter(out_path, cv.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
palms = palm_detector.infer(frame)
hands = np.empty((0, 132))
for palm in palms:
handpose = handpose_detector.infer(frame, palm)
if handpose is not None:
hands = np.vstack((hands, handpose))
frame, _ = visualize(frame, hands)
out_writer.write(frame)
cap.release()
out_writer.release()
return out_path
gr.Interface(
fn=process_video,
inputs=gr.File(label="Upload Video", file_types=[".mp4", ".avi"]),
outputs=gr.Video(label="Processed Video"),
title="Video Edge Detection",
allow_flagging="never"
).launch()