Spaces:
Runtime error
Runtime error
File size: 7,403 Bytes
1a44db9 f6fcb5a ff3b552 18a5bd9 1805c7e ff3b552 f6fcb5a 18a5bd9 1805c7e f6fcb5a 1805c7e 621e425 18a5bd9 1805c7e 18a5bd9 ff3b552 18a5bd9 1805c7e 621e425 f6fcb5a 1a44db9 f6fcb5a 1a44db9 6e7a1b6 1a44db9 18a5bd9 1a44db9 1805c7e 1a44db9 ff3b552 1a44db9 18a5bd9 1805c7e 18a5bd9 1a44db9 33723c9 8a0255c 4e686ef 33723c9 8a0255c 33723c9 1a44db9 18a5bd9 1805c7e 18a5bd9 1a44db9 1805c7e 8a0255c 1a44db9 18a5bd9 1805c7e 18a5bd9 1805c7e 1a44db9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import gradio as gr
import mediapipe as mp
import numpy as np
import log_utils
from functools import lru_cache
import cv2
from google.protobuf.json_format import MessageToDict
logger = log_utils.get_logger()
mp_hands = mp.solutions.hands
mp_hands_connections = mp.solutions.hands_connections
mp_draw = mp.solutions.drawing_utils
connections = {
'HAND_CONNECTIONS': mp_hands_connections.HAND_CONNECTIONS,
'HAND_PALM_CONNECTIONS': mp_hands_connections.HAND_PALM_CONNECTIONS,
'HAND_THUMB_CONNECTIONS': mp_hands_connections.HAND_THUMB_CONNECTIONS,
'HAND_INDEX_FINGER_CONNECTIONS': mp_hands_connections.HAND_INDEX_FINGER_CONNECTIONS,
'HAND_MIDDLE_FINGER_CONNECTIONS': mp_hands_connections.HAND_MIDDLE_FINGER_CONNECTIONS,
'HAND_RING_FINGER_CONNECTIONS': mp_hands_connections.HAND_RING_FINGER_CONNECTIONS,
'HAND_PINKY_FINGER_CONNECTIONS': mp_hands_connections.HAND_PINKY_FINGER_CONNECTIONS,
}
@lru_cache(maxsize=10)
def get_model(static_image_mode, max_num_hands, model_complexity, min_detection_conf, min_tracking_conf):
return mp_hands.Hands(
static_image_mode=static_image_mode,
max_num_hands=max_num_hands,
model_complexity=model_complexity,
min_detection_confidence=min_detection_conf,
min_tracking_confidence=min_tracking_conf,
)
def draw_landmarks(model, img, selected_connections, draw_background, flip_image):
img_to_process = cv2.flip(img, 1) if flip_image else img
results = model.process(img_to_process)
output_img = img_to_process if draw_background else np.zeros_like(img_to_process)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_draw.draw_landmarks(output_img, hand_landmarks, connections[selected_connections])
if flip_image:
output_img = cv2.flip(output_img, 1)
return output_img, [MessageToDict(h) for _, h in enumerate(results.multi_handedness or [])]
def process_image(
img,
static_image_mode,
max_num_hands,
model_complexity,
min_detection_conf,
min_tracking_conf,
selected_connections,
draw_background,
flip_image,
):
logger.info(f"Processing image with connections: {selected_connections}, draw background: {draw_background}")
model = get_model(static_image_mode, max_num_hands, model_complexity, min_detection_conf, min_tracking_conf)
img, multi_handedness = draw_landmarks(model, img, selected_connections, draw_background, flip_image)
left_hand_count = len([h for h in multi_handedness if h['classification'][0]['label'] == 'Left'])
right_hand_count = len(multi_handedness) - left_hand_count
return img, multi_handedness, left_hand_count, right_hand_count
demo = gr.Blocks()
with demo:
gr.Markdown(
"""
# MediaPipe's Hand & Finger Tracking
A demo of hand and finger tracking using [Google's MediaPipe](https://google.github.io/mediapipe/solutions/hands.html).
""")
with gr.Column():
gr.Markdown("""
## Step 1: Configure the model
""")
with gr.Column():
static_image_mode = gr.Checkbox(label="Static image mode", value=False)
gr.Textbox(show_label=False,value="If unchecked, the solution treats the input images as a video stream. It will try to detect hands in the first input images, and upon a successful detection further localizes the hand landmarks. In subsequent images, once all max_num_hands hands are detected and the corresponding hand landmarks are localized, it simply tracks those landmarks without invoking another detection until it loses track of any of the hands. This reduces latency and is ideal for processing video frames. If checked, hand detection runs on every input image, ideal for processing a batch of static, possibly unrelated, images.")
max_num_hands = gr.Slider(label="Max number of hands to detect", value=1, minimum=1, maximum=10, step=1)
with gr.Column():
model_complexity = gr.Radio(label="Model complexity", choices=[0,1], value=1)
gr.Textbox(show_label=False, value="Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as inference latency generally go up with the model complexity.")
with gr.Column():
min_detection_conf = gr.Slider(label="Min detection confidence", value=0.5, minimum=0.0, maximum=1.0, step=0.1)
gr.Textbox(show_label=False, value="Minimum confidence value ([0.0, 1.0]) from the hand detection model for the detection to be considered successful.")
with gr.Column():
min_tracking_conf = gr.Slider(label="Min tracking confidence", value=0.5, minimum=0.0, maximum=1.0, step=0.1)
gr.Textbox(show_label=False, value="Minimum confidence value ([0.0, 1.0]) from the landmark-tracking model for the hand landmarks to be considered tracked successfully, or otherwise hand detection will be invoked automatically on the next input image. Setting it to a higher value can increase robustness of the solution, at the expense of a higher latency. Ignored if static_image_mode is true, where hand detection simply runs on every image.")
gr.Markdown("""
## Step 2: Set processing parameters
""")
draw_background = gr.Checkbox(value=True, label="Draw background?")
flip_image = gr.Checkbox(value=True, label="Flip image? (Note that handedness is determined assuming the input image is mirrored, i.e., taken with a front-facing/selfie camera with images flipped horizontally. If it is not the case, please swap the handedness output in the application.)")
connection_keys = list(connections.keys())
selected_connections = gr.Dropdown(
label="Select connections to draw",
choices=connection_keys,
value=connection_keys[0],
)
gr.Markdown("""
## Step 3: Select an image
""")
with gr.Tabs():
with gr.TabItem(label="Upload an image"):
uploaded_image = gr.Image(type="numpy", label="Input image")
example_image = gr.Examples(examples=[['examples/example-01.jpg', 1, 0.4], ['examples/example-02.jpg', 2, 0.5], ['examples/example-03.jpg', 1, 0.5]], inputs=[uploaded_image, max_num_hands, min_detection_conf])
submit_uploaded_image = gr.Button(value="Process Image")
with gr.TabItem(label="Take a picture"):
camera_picture = gr.Image(source="webcam", type="numpy", label="Input image")
submit_camera_picture = gr.Button(value="Process Image")
gr.Markdown("""
## Step 4: View results
""")
with gr.Column():
with gr.Row():
with gr.Column():
left_hands = gr.Number(label="Left hands detected")
with gr.Column():
right_hands = gr.Number(label="Right hands detected")
multi_handedness = gr.JSON(label="Raw handedness results")
processed_image = gr.Image(label="Processed image")
gr.Markdown('<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=kristyc.mediapipe-hands" />')
setting_inputs = [
static_image_mode,
max_num_hands,
model_complexity,
min_detection_conf,
min_tracking_conf,
selected_connections,
draw_background,
flip_image,
]
outputs = [processed_image, multi_handedness, left_hands, right_hands]
submit_uploaded_image.click(fn=process_image, inputs=[uploaded_image, *setting_inputs], outputs=outputs)
submit_camera_picture.click(fn=process_image, inputs=[camera_picture, *setting_inputs], outputs=outputs)
demo.launch()
|