File size: 7,433 Bytes
18a5bd9
1a44db9
 
f6fcb5a
 
1a44db9
 
ff3b552
18a5bd9
ff3b552
 
f6fcb5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18a5bd9
 
 
 
 
 
 
 
 
 
 
 
f6fcb5a
 
 
ff3b552
621e425
 
18a5bd9
 
 
 
 
 
 
 
 
 
ff3b552
18a5bd9
 
621e425
18a5bd9
 
 
 
 
 
 
 
 
 
ff3b552
18a5bd9
1a44db9
 
 
 
 
 
18a5bd9
1a44db9
621e425
f6fcb5a
1a44db9
f6fcb5a
1a44db9
 
 
6e7a1b6
 
1a44db9
 
18a5bd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a44db9
 
ff3b552
1a44db9
 
 
 
18a5bd9
 
 
 
1a44db9
33723c9
 
 
 
 
 
1a44db9
 
 
 
 
 
 
18a5bd9
 
 
1a44db9
 
33723c9
1a44db9
 
18a5bd9
 
 
 
 
 
 
 
 
ff3b552
 
 
 
1a44db9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from os import stat
import gradio as gr
from matplotlib.pyplot import draw
import mediapipe as mp
import numpy as np
import tempfile
import mediapy as media
import log_utils
from functools import lru_cache

logger = log_utils.get_logger()

mp_hands = mp.solutions.hands
mp_hands_connections = mp.solutions.hands_connections
mp_draw = mp.solutions.drawing_utils 

connections = {
    'HAND_CONNECTIONS': mp_hands_connections.HAND_CONNECTIONS,
    'HAND_PALM_CONNECTIONS': mp_hands_connections.HAND_PALM_CONNECTIONS,
    'HAND_THUMB_CONNECTIONS': mp_hands_connections.HAND_THUMB_CONNECTIONS,
    'HAND_INDEX_FINGER_CONNECTIONS': mp_hands_connections.HAND_INDEX_FINGER_CONNECTIONS,
    'HAND_MIDDLE_FINGER_CONNECTIONS': mp_hands_connections.HAND_MIDDLE_FINGER_CONNECTIONS,
    'HAND_RING_FINGER_CONNECTIONS': mp_hands_connections.HAND_RING_FINGER_CONNECTIONS,
    'HAND_PINKY_FINGER_CONNECTIONS': mp_hands_connections.HAND_PINKY_FINGER_CONNECTIONS,
}

@lru_cache(maxsize=10)
def get_model(static_image_mode, max_num_hands, model_complexity, min_detection_conf, min_tracking_conf):
  return mp_hands.Hands(
    static_image_mode=static_image_mode,
    max_num_hands=max_num_hands,
    model_complexity=model_complexity,
    min_detection_confidence=min_detection_conf,
    min_tracking_confidence=min_tracking_conf,
  )

def draw_landmarks(model, img, selected_connections, draw_background):
  results = model.process(img)
  output_img = img if draw_background else np.zeros_like(img)  
  if results.multi_hand_landmarks:
    for hand_landmarks in results.multi_hand_landmarks:
      mp_draw.draw_landmarks(output_img, hand_landmarks, connections[selected_connections])    
  return output_img

def process_image(
    img, 
    static_image_mode, 
    max_num_hands, 
    model_complexity, 
    min_detection_conf, 
    min_tracking_conf, 
    selected_connections, 
    draw_background,
  ):
  logger.info(f"Processing image with connections: {selected_connections}, draw background: {draw_background}")
  model = get_model(static_image_mode, max_num_hands, model_complexity, min_detection_conf, min_tracking_conf)
  return draw_landmarks(model, img, selected_connections, draw_background)

def process_video(
    video_path,
    static_image_mode, 
    max_num_hands, 
    model_complexity, 
    min_detection_conf, 
    min_tracking_conf, 
    selected_connections, 
    draw_background,
  ):
  logger.info(f"Processing video with connections: {selected_connections}, draw background: {draw_background}")
  model = get_model(static_image_mode, max_num_hands, model_complexity, min_detection_conf, min_tracking_conf)
  with tempfile.NamedTemporaryFile() as f:
    out_path = f"{f.name}.{video_path.split('.')[-1]}"
    with media.VideoReader(video_path) as r:
      with media.VideoWriter(
          out_path, shape=r.shape, fps=r.fps, bps=r.bps) as w:
        for image in r:
          w.add_image(draw_landmarks(model, image, selected_connections, draw_background))
    return out_path


demo = gr.Blocks()

with demo:
  gr.Markdown(
    """
    # MediaPipe's Hand & Finger Tracking
    A demo of hand and finger tracking using [Google's MediaPipe](https://google.github.io/mediapipe/solutions/hands.html).
    """)

  with gr.Column():
    gr.Markdown("""
      ## Step 1: Configure the model
      """)
    with gr.Column():  
      static_image_mode = gr.Checkbox(label="Static image mode", value=False)
      gr.Textbox(show_label=False,value="If unchecked, the solution treats the input images as a video stream. It will try to detect hands in the first input images, and upon a successful detection further localizes the hand landmarks. In subsequent images, once all max_num_hands hands are detected and the corresponding hand landmarks are localized, it simply tracks those landmarks without invoking another detection until it loses track of any of the hands. This reduces latency and is ideal for processing video frames. If checked, hand detection runs on every input image, ideal for processing a batch of static, possibly unrelated, images.")

    max_num_hands = gr.Slider(label="Max number of hands to detect", value=1, minimum=1, maximum=10, step=1)

    with gr.Column():
      model_complexity = gr.Radio(label="Model complexity", choices=[0,1], value=1)
      gr.Textbox(show_label=False, value="Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as inference latency generally go up with the model complexity.")

    with gr.Column():
      min_detection_conf = gr.Slider(label="Min detection confidence", value=0.5, minimum=0.0, maximum=1.0, step=0.1)
      gr.Textbox(show_label=False, value="Minimum confidence value ([0.0, 1.0]) from the hand detection model for the detection to be considered successful.")

    with gr.Column():
      min_tracking_conf = gr.Slider(label="Min tracking confidence", value=0.5, minimum=0.0, maximum=1.0, step=0.1)
      gr.Textbox(show_label=False, value="Minimum confidence value ([0.0, 1.0]) from the landmark-tracking model for the hand landmarks to be considered tracked successfully, or otherwise hand detection will be invoked automatically on the next input image. Setting it to a higher value can increase robustness of the solution, at the expense of a higher latency. Ignored if static_image_mode is true, where hand detection simply runs on every image.")

    gr.Markdown("""
      ## Step 2: Set processing parameters
      """)  
    draw_background = gr.Checkbox(value=True, label="Draw background?")
    connection_keys = list(connections.keys())
    selected_connections = gr.Dropdown(
      label="Select connections to draw", 
      choices=connection_keys,
      value=connection_keys[0],
      )

    gr.Markdown("""
      ## Step 3: Select an image or video
      """)  
    with gr.Tabs():
      with gr.TabItem(label="Upload an image"):
        uploaded_image = gr.Image(type="numpy")
        submit_uploaded_image = gr.Button(value="Process Image")
      with gr.TabItem(label="Take a picture"):
        camera_picture = gr.Image(source="webcam", type="numpy")
        submit_camera_picture = gr.Button(value="Process Image")  
      with gr.TabItem(label="Record a video"):
        recorded_video = gr.Video(source="webcam", format="mp4")
        submit_recorded_video = gr.Button(value="Process Video")
      with gr.TabItem(label="Upload a video"):
        uploaded_video = gr.Video(format="mp4")
        submit_uploaded_video = gr.Button(value="Process Video")  

  gr.Markdown(""" 
      ## Step 4: View results
      """)  
  with gr.Column():
    processed_video = gr.Video()
    processed_image = gr.Image()

  gr.Markdown('<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=kristyc.mediapipe-hands" />')
  setting_inputs = [
    static_image_mode, 
    max_num_hands, 
    model_complexity, 
    min_detection_conf, 
    min_tracking_conf, 
    selected_connections, 
    draw_background,
  ]
  submit_uploaded_image.click(fn=process_image, inputs=[uploaded_image, *setting_inputs], outputs=[processed_image])
  submit_camera_picture.click(fn=process_image, inputs=[camera_picture, *setting_inputs], outputs=[processed_image])
  submit_recorded_video.click(fn=process_video, inputs=[recorded_video, *setting_inputs], outputs=[processed_video])
  submit_uploaded_video.click(fn=process_video, inputs=[recorded_video, *setting_inputs], outputs=[processed_video])

demo.launch()