blocks
Browse files- Dockerfile +1 -1
- main.py +109 -94
- main_blocks.py → main_webcamtest.py +94 -109
Dockerfile
CHANGED
|
@@ -42,4 +42,4 @@ WORKDIR $HOME/app
|
|
| 42 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 43 |
COPY --chown=user . $HOME/app
|
| 44 |
|
| 45 |
-
CMD ["python", "
|
|
|
|
| 42 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 43 |
COPY --chown=user . $HOME/app
|
| 44 |
|
| 45 |
+
CMD ["python", "main.py"]
|
main.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
# Pose inferencing
|
| 2 |
import mmpose
|
| 3 |
from mmpose.apis import MMPoseInferencer
|
|
@@ -20,14 +22,12 @@ import cv2
|
|
| 20 |
|
| 21 |
print("[INFO]: Imported modules!")
|
| 22 |
human = MMPoseInferencer("human")
|
| 23 |
-
hand = MMPoseInferencer("hand")
|
| 24 |
human3d = MMPoseInferencer(pose3d="human3d")
|
| 25 |
track_model = YOLO('yolov8n.pt') # Load an official Detect model
|
| 26 |
|
| 27 |
# ultraltics
|
| 28 |
|
| 29 |
-
# [INFO] VIDEO INPUT: /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
|
| 30 |
-
|
| 31 |
# Defining inferencer models to lookup in function
|
| 32 |
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
|
| 33 |
|
|
@@ -44,11 +44,11 @@ def tracking(video, model, boxes=True):
|
|
| 44 |
|
| 45 |
return annotated_frame
|
| 46 |
|
| 47 |
-
def show_tracking(video_content
|
| 48 |
video = cv2.VideoCapture(video_content)
|
| 49 |
|
| 50 |
# Track
|
| 51 |
-
video_track = tracking(video_content,
|
| 52 |
|
| 53 |
# Prepare to save video
|
| 54 |
#out_file = os.path.join(vis_out_dir, "track.mp4")
|
|
@@ -65,11 +65,9 @@ def show_tracking(video_content, vis_out_dir, model):
|
|
| 65 |
# Go through frames and write them
|
| 66 |
for frame_track in video_track:
|
| 67 |
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
|
| 68 |
-
out_track.write(result_track)
|
| 69 |
-
|
| 70 |
print("[INFO] Done with frames")
|
| 71 |
#print(type(result_pose)) numpy ndarray
|
| 72 |
-
|
| 73 |
|
| 74 |
out_track.release()
|
| 75 |
|
|
@@ -79,112 +77,129 @@ def show_tracking(video_content, vis_out_dir, model):
|
|
| 79 |
return out_file
|
| 80 |
|
| 81 |
|
| 82 |
-
def
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
vis_out_dir = vis_out_dir,
|
| 86 |
-
return_vis=True,
|
| 87 |
thickness=2,
|
| 88 |
rebase_keypoint_height=True,
|
| 89 |
-
|
| 90 |
-
device="cuda"
|
| 91 |
-
)
|
| 92 |
|
| 93 |
result = [result for result in result_generator] #next(result_generator)
|
| 94 |
-
|
| 95 |
-
|
| 96 |
|
| 97 |
return out_file
|
| 98 |
|
| 99 |
-
def infer(video, check, kpt_thr, webcam=True):
|
| 100 |
-
print("[INFO] VIDEO INPUT: ", video)
|
| 101 |
-
|
| 102 |
-
# Selecting the specific inferencer
|
| 103 |
-
out_files=[]
|
| 104 |
-
|
| 105 |
-
for i in check:
|
| 106 |
-
# Create out directory
|
| 107 |
-
vis_out_dir = str(uuid.uuid4())
|
| 108 |
-
inferencer = inferencers[i] # 'hand', 'human , device='cuda'
|
| 109 |
-
|
| 110 |
-
if i == "Detect and track":
|
| 111 |
-
#continue
|
| 112 |
-
trackfile = show_tracking(video, vis_out_dir, inferencer)
|
| 113 |
-
|
| 114 |
-
else:
|
| 115 |
-
if webcam==True:
|
| 116 |
-
print("WEBCAM")
|
| 117 |
-
add_dir = str(uuid.uuid4())
|
| 118 |
-
vidname = video.split("/")[-1]
|
| 119 |
-
vis_out_dir_web = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
| 120 |
-
out_file = poses(inferencer, video, vis_out_dir_web, kpt_thr)
|
| 121 |
-
fullname = os.path.join(vis_out_dir_web, vidname)
|
| 122 |
-
#if i == "Estimate human 3d poses":
|
| 123 |
-
# fullname = fullname[:-4]+"mp4" #Change to .mp4
|
| 124 |
-
# out_files.append(fullname)
|
| 125 |
-
#else:
|
| 126 |
-
out_files.append(fullname)
|
| 127 |
-
|
| 128 |
-
else:
|
| 129 |
-
out_files.extend(out_file)
|
| 130 |
-
|
| 131 |
-
print(out_files)
|
| 132 |
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
-
def
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
| 147 |
|
| 148 |
-
|
| 149 |
-
fn=infer,
|
| 150 |
-
inputs= [gr.Video(source="webcam", height=512), check_web, web_kpthr], # /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
|
| 151 |
-
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
|
| 152 |
-
title = 'Tracking and pose estimation',
|
| 153 |
-
description = description,
|
| 154 |
-
allow_flagging=False
|
| 155 |
-
)
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
inputs = [gr.Video(source="upload", height=512), check_file, file_kpthr],
|
| 160 |
-
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
|
| 161 |
-
title = 'Tracking and pose estimation',
|
| 162 |
-
description = description,
|
| 163 |
-
allow_flagging=False
|
| 164 |
-
)
|
| 165 |
|
| 166 |
-
|
| 167 |
-
interface_list=[file, webcam],
|
| 168 |
-
tab_names=["From a File", "From your Webcam"]
|
| 169 |
-
)
|
| 170 |
|
| 171 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
| 172 |
|
| 173 |
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
# videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth
|
| 182 |
-
# videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth
|
| 183 |
-
# videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth
|
| 184 |
-
# https://github.com/open-mmlab/mmpose/blob/main/mmpose/apis/inferencers/pose3d_inferencer.py
|
| 185 |
|
|
|
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
# Pose inferencing
|
| 4 |
import mmpose
|
| 5 |
from mmpose.apis import MMPoseInferencer
|
|
|
|
| 22 |
|
| 23 |
print("[INFO]: Imported modules!")
|
| 24 |
human = MMPoseInferencer("human")
|
| 25 |
+
hand = MMPoseInferencer("hand")
|
| 26 |
human3d = MMPoseInferencer(pose3d="human3d")
|
| 27 |
track_model = YOLO('yolov8n.pt') # Load an official Detect model
|
| 28 |
|
| 29 |
# ultraltics
|
| 30 |
|
|
|
|
|
|
|
| 31 |
# Defining inferencer models to lookup in function
|
| 32 |
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
|
| 33 |
|
|
|
|
| 44 |
|
| 45 |
return annotated_frame
|
| 46 |
|
| 47 |
+
def show_tracking(video_content):
|
| 48 |
video = cv2.VideoCapture(video_content)
|
| 49 |
|
| 50 |
# Track
|
| 51 |
+
video_track = tracking(video_content, track_model.track)
|
| 52 |
|
| 53 |
# Prepare to save video
|
| 54 |
#out_file = os.path.join(vis_out_dir, "track.mp4")
|
|
|
|
| 65 |
# Go through frames and write them
|
| 66 |
for frame_track in video_track:
|
| 67 |
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
|
|
|
|
|
|
|
| 68 |
print("[INFO] Done with frames")
|
| 69 |
#print(type(result_pose)) numpy ndarray
|
| 70 |
+
out_track.write(result_track)
|
| 71 |
|
| 72 |
out_track.release()
|
| 73 |
|
|
|
|
| 77 |
return out_file
|
| 78 |
|
| 79 |
|
| 80 |
+
def pose3d(video):
|
| 81 |
+
add_dir = str(uuid.uuid4())
|
| 82 |
+
#vidname = video.split("/")[-1]
|
| 83 |
+
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
| 84 |
+
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
| 85 |
+
|
| 86 |
+
#full name = os.path.join(vis_out_dir, vidname)
|
| 87 |
+
|
| 88 |
+
result_generator = human3d(video,
|
| 89 |
vis_out_dir = vis_out_dir,
|
|
|
|
| 90 |
thickness=2,
|
| 91 |
rebase_keypoint_height=True,
|
| 92 |
+
device="cuda")
|
|
|
|
|
|
|
| 93 |
|
| 94 |
result = [result for result in result_generator] #next(result_generator)
|
| 95 |
+
out_file = glob.glob(os.path.join(vis_out_dir, "*"))
|
| 96 |
+
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
| 97 |
|
| 98 |
return out_file
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
+
def pose2d(video):
|
| 102 |
+
add_dir = str(uuid.uuid4())
|
| 103 |
+
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
| 104 |
+
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
result_generator = human(video,
|
| 108 |
+
vis_out_dir = vis_out_dir,
|
| 109 |
+
thickness=2,
|
| 110 |
+
rebase_keypoint_height=True,
|
| 111 |
+
device="cuda")
|
| 112 |
+
|
| 113 |
+
result = [result for result in result_generator] #next(result_generator)
|
| 114 |
+
|
| 115 |
+
out_file = glob.glob(os.path.join(vis_out_dir, "*"))
|
| 116 |
+
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
| 117 |
+
|
| 118 |
+
return out_file
|
| 119 |
+
|
| 120 |
|
| 121 |
+
def pose2dhand(video):
|
| 122 |
+
add_dir = str(uuid.uuid4())
|
| 123 |
+
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
|
|
|
| 124 |
|
| 125 |
+
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
| 126 |
+
|
| 127 |
+
vis_out_dir = str(uuid.uuid4())
|
| 128 |
|
| 129 |
+
result_generator = hand(video,
|
| 130 |
+
vis_out_dir = vis_out_dir,
|
| 131 |
+
thickness=2,
|
| 132 |
+
rebase_keypoint_height=True,
|
| 133 |
+
device="cuda")
|
| 134 |
|
| 135 |
+
result = [result for result in result_generator] #next(result_generator)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
+
out_file = glob.glob(os.path.join(vis_out_dir, "*"))
|
| 138 |
+
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
+
return out_file
|
|
|
|
|
|
|
|
|
|
| 141 |
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
+
with gr.Blocks() as demo:
|
| 145 |
+
with gr.Column():
|
| 146 |
+
with gr.Tab("Upload video"):
|
| 147 |
+
with gr.Row():
|
| 148 |
+
with gr.Column():
|
| 149 |
+
video_input = gr.Video(source="upload", type="filepath", height=512)
|
| 150 |
|
| 151 |
+
submit_pose_file = gr.Button("Make 2d pose estimation")
|
| 152 |
+
submit_pose3d_file = gr.Button("Make 3d pose estimation")
|
| 153 |
+
submit_hand_file = gr.Button("Make 2d hand estimation")
|
| 154 |
+
submit_detect_file = gr.Button("Detect and track objects")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
+
video_output = gr.Video(height=512)
|
| 157 |
|
| 158 |
+
with gr.Tab("Record video with webcam"):
|
| 159 |
+
with gr.Row():
|
| 160 |
+
with gr.Column():
|
| 161 |
+
webcam_input = gr.Video(source="webcam", height=512)
|
| 162 |
+
|
| 163 |
+
submit_pose_web = gr.Button("Make 2d pose estimation")
|
| 164 |
+
submit_pose3d_web = gr.Button("Make 3d pose estimation")
|
| 165 |
+
submit_hand_web = gr.Button("Make 2d hand estimation")
|
| 166 |
+
submit_detect_web = gr.Button("Detect and track objects")
|
| 167 |
|
| 168 |
+
webcam_output = gr.Video(height=512)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
# From file
|
| 172 |
+
submit_pose_file.click(fn=pose2d,
|
| 173 |
+
inputs= video_input,
|
| 174 |
+
outputs = video_output)
|
| 175 |
+
|
| 176 |
+
submit_pose3d_file.click(fn=pose3d,
|
| 177 |
+
inputs= video_input,
|
| 178 |
+
outputs = video_output)
|
| 179 |
+
|
| 180 |
+
submit_hand_file.click(fn=pose2dhand,
|
| 181 |
+
inputs= video_input,
|
| 182 |
+
outputs = video_output)
|
| 183 |
+
|
| 184 |
+
submit_detect_file.click(fn=show_tracking,
|
| 185 |
+
inputs= video_input,
|
| 186 |
+
outputs = video_output)
|
| 187 |
+
|
| 188 |
+
# Web
|
| 189 |
+
submit_pose_web.click(fn=pose2d,
|
| 190 |
+
inputs= video_input,
|
| 191 |
+
outputs = video_output)
|
| 192 |
+
|
| 193 |
+
submit_pose3d_web.click(fn=pose3d,
|
| 194 |
+
inputs= video_input,
|
| 195 |
+
outputs = video_output)
|
| 196 |
+
|
| 197 |
+
submit_hand_web.click(fn=pose2dhand,
|
| 198 |
+
inputs= video_input,
|
| 199 |
+
outputs = video_output)
|
| 200 |
+
|
| 201 |
+
submit_detect_web.click(fn=show_tracking,
|
| 202 |
+
inputs= video_input,
|
| 203 |
+
outputs = video_output)
|
| 204 |
|
| 205 |
+
demo.launch()
|
main_blocks.py → main_webcamtest.py
RENAMED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
# Pose inferencing
|
| 4 |
import mmpose
|
| 5 |
from mmpose.apis import MMPoseInferencer
|
|
@@ -22,12 +20,14 @@ import cv2
|
|
| 22 |
|
| 23 |
print("[INFO]: Imported modules!")
|
| 24 |
human = MMPoseInferencer("human")
|
| 25 |
-
hand = MMPoseInferencer("hand")
|
| 26 |
human3d = MMPoseInferencer(pose3d="human3d")
|
| 27 |
track_model = YOLO('yolov8n.pt') # Load an official Detect model
|
| 28 |
|
| 29 |
# ultraltics
|
| 30 |
|
|
|
|
|
|
|
| 31 |
# Defining inferencer models to lookup in function
|
| 32 |
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
|
| 33 |
|
|
@@ -44,11 +44,11 @@ def tracking(video, model, boxes=True):
|
|
| 44 |
|
| 45 |
return annotated_frame
|
| 46 |
|
| 47 |
-
def show_tracking(video_content):
|
| 48 |
video = cv2.VideoCapture(video_content)
|
| 49 |
|
| 50 |
# Track
|
| 51 |
-
video_track = tracking(video_content,
|
| 52 |
|
| 53 |
# Prepare to save video
|
| 54 |
#out_file = os.path.join(vis_out_dir, "track.mp4")
|
|
@@ -65,9 +65,11 @@ def show_tracking(video_content):
|
|
| 65 |
# Go through frames and write them
|
| 66 |
for frame_track in video_track:
|
| 67 |
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
|
|
|
|
|
|
|
| 68 |
print("[INFO] Done with frames")
|
| 69 |
#print(type(result_pose)) numpy ndarray
|
| 70 |
-
|
| 71 |
|
| 72 |
out_track.release()
|
| 73 |
|
|
@@ -77,129 +79,112 @@ def show_tracking(video_content):
|
|
| 77 |
return out_file
|
| 78 |
|
| 79 |
|
| 80 |
-
def
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
| 84 |
-
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
| 85 |
-
|
| 86 |
-
#full name = os.path.join(vis_out_dir, vidname)
|
| 87 |
-
|
| 88 |
-
result_generator = human3d(video,
|
| 89 |
-
vis_out_dir = vis_out_dir,
|
| 90 |
-
thickness=2,
|
| 91 |
-
rebase_keypoint_height=True,
|
| 92 |
-
device="cuda")
|
| 93 |
-
|
| 94 |
-
result = [result for result in result_generator] #next(result_generator)
|
| 95 |
-
out_file = glob.glob(os.path.join(vis_out_dir, "*"))
|
| 96 |
-
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
| 97 |
-
|
| 98 |
-
return out_file
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
def pose2d(video):
|
| 102 |
-
add_dir = str(uuid.uuid4())
|
| 103 |
-
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
| 104 |
-
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
result_generator = human(video,
|
| 108 |
vis_out_dir = vis_out_dir,
|
|
|
|
| 109 |
thickness=2,
|
| 110 |
rebase_keypoint_height=True,
|
| 111 |
-
|
|
|
|
|
|
|
| 112 |
|
| 113 |
result = [result for result in result_generator] #next(result_generator)
|
| 114 |
|
| 115 |
-
out_file = glob.glob(os.path.join(vis_out_dir, "
|
| 116 |
-
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
| 117 |
|
| 118 |
return out_file
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
|
| 122 |
-
add_dir = str(uuid.uuid4())
|
| 123 |
-
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
| 124 |
-
|
| 125 |
-
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
| 126 |
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
rebase_keypoint_height=True,
|
| 133 |
-
device="cuda")
|
| 134 |
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
| 141 |
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
with gr.Tab("Upload video"):
|
| 147 |
-
with gr.Row():
|
| 148 |
-
with gr.Column():
|
| 149 |
-
video_input = gr.Video(source="upload", type="filepath", height=512)
|
| 150 |
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
-
video_output = gr.Video(height=512)
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
with gr.Column():
|
| 161 |
-
webcam_input = gr.Video(source="webcam", height=512)
|
| 162 |
-
|
| 163 |
-
submit_pose_web = gr.Button("Make 2d pose estimation")
|
| 164 |
-
submit_pose3d_web = gr.Button("Make 3d pose estimation")
|
| 165 |
-
submit_hand_web = gr.Button("Make 2d hand estimation")
|
| 166 |
-
submit_detect_web = gr.Button("Detect and track objects")
|
| 167 |
|
| 168 |
-
webcam_output = gr.Video(height=512)
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
# From file
|
| 172 |
-
submit_pose_file.click(fn=pose2d,
|
| 173 |
-
inputs= video_input,
|
| 174 |
-
outputs = video_output)
|
| 175 |
-
|
| 176 |
-
submit_pose3d_file.click(fn=pose3d,
|
| 177 |
-
inputs= video_input,
|
| 178 |
-
outputs = video_output)
|
| 179 |
-
|
| 180 |
-
submit_hand_file.click(fn=pose2dhand,
|
| 181 |
-
inputs= video_input,
|
| 182 |
-
outputs = video_output)
|
| 183 |
-
|
| 184 |
-
submit_detect_file.click(fn=show_tracking,
|
| 185 |
-
inputs= video_input,
|
| 186 |
-
outputs = video_output)
|
| 187 |
-
|
| 188 |
-
# Web
|
| 189 |
-
submit_pose_web.click(fn=pose2d,
|
| 190 |
-
inputs= video_input,
|
| 191 |
-
outputs = video_output)
|
| 192 |
-
|
| 193 |
-
submit_pose3d_web.click(fn=pose3d,
|
| 194 |
-
inputs= video_input,
|
| 195 |
-
outputs = video_output)
|
| 196 |
-
|
| 197 |
-
submit_hand_web.click(fn=pose2dhand,
|
| 198 |
-
inputs= video_input,
|
| 199 |
-
outputs = video_output)
|
| 200 |
-
|
| 201 |
-
submit_detect_web.click(fn=show_tracking,
|
| 202 |
-
inputs= video_input,
|
| 203 |
-
outputs = video_output)
|
| 204 |
|
| 205 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
| 1 |
# Pose inferencing
|
| 2 |
import mmpose
|
| 3 |
from mmpose.apis import MMPoseInferencer
|
|
|
|
| 20 |
|
| 21 |
print("[INFO]: Imported modules!")
|
| 22 |
human = MMPoseInferencer("human")
|
| 23 |
+
hand = MMPoseInferencer("hand") #kpt_thr (float) – The threshold to visualize the keypoints. Defaults to 0.3
|
| 24 |
human3d = MMPoseInferencer(pose3d="human3d")
|
| 25 |
track_model = YOLO('yolov8n.pt') # Load an official Detect model
|
| 26 |
|
| 27 |
# ultraltics
|
| 28 |
|
| 29 |
+
# [INFO] VIDEO INPUT: /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
|
| 30 |
+
|
| 31 |
# Defining inferencer models to lookup in function
|
| 32 |
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
|
| 33 |
|
|
|
|
| 44 |
|
| 45 |
return annotated_frame
|
| 46 |
|
| 47 |
+
def show_tracking(video_content, vis_out_dir, model):
|
| 48 |
video = cv2.VideoCapture(video_content)
|
| 49 |
|
| 50 |
# Track
|
| 51 |
+
video_track = tracking(video_content, model.track)
|
| 52 |
|
| 53 |
# Prepare to save video
|
| 54 |
#out_file = os.path.join(vis_out_dir, "track.mp4")
|
|
|
|
| 65 |
# Go through frames and write them
|
| 66 |
for frame_track in video_track:
|
| 67 |
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
|
| 68 |
+
out_track.write(result_track)
|
| 69 |
+
|
| 70 |
print("[INFO] Done with frames")
|
| 71 |
#print(type(result_pose)) numpy ndarray
|
| 72 |
+
|
| 73 |
|
| 74 |
out_track.release()
|
| 75 |
|
|
|
|
| 79 |
return out_file
|
| 80 |
|
| 81 |
|
| 82 |
+
def poses(inferencer, video, vis_out_dir, kpt_thr):
|
| 83 |
+
print("[INFO] VIDEO INPUT: ", video)
|
| 84 |
+
result_generator = inferencer(video,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
vis_out_dir = vis_out_dir,
|
| 86 |
+
return_vis=True,
|
| 87 |
thickness=2,
|
| 88 |
rebase_keypoint_height=True,
|
| 89 |
+
#kpt_thr=kpt_thr,
|
| 90 |
+
device="cuda"
|
| 91 |
+
)
|
| 92 |
|
| 93 |
result = [result for result in result_generator] #next(result_generator)
|
| 94 |
|
| 95 |
+
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4"))
|
|
|
|
| 96 |
|
| 97 |
return out_file
|
| 98 |
|
| 99 |
+
def infer(video, check, kpt_thr, webcam=True):
|
| 100 |
+
print("[INFO] VIDEO INPUT: ", video)
|
| 101 |
+
|
| 102 |
+
# Selecting the specific inferencer
|
| 103 |
+
out_files=[]
|
| 104 |
+
|
| 105 |
+
for i in check:
|
| 106 |
+
# Create out directory
|
| 107 |
+
vis_out_dir = str(uuid.uuid4())
|
| 108 |
+
inferencer = inferencers[i] # 'hand', 'human , device='cuda'
|
| 109 |
+
|
| 110 |
+
if i == "Detect and track":
|
| 111 |
+
#continue
|
| 112 |
+
trackfile = show_tracking(video, vis_out_dir, inferencer)
|
| 113 |
+
|
| 114 |
+
else:
|
| 115 |
+
if webcam==True:
|
| 116 |
+
print("WEBCAM")
|
| 117 |
+
add_dir = str(uuid.uuid4())
|
| 118 |
+
vidname = video.split("/")[-1]
|
| 119 |
+
vis_out_dir_web = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
| 120 |
+
out_file = poses(inferencer, video, vis_out_dir_web, kpt_thr)
|
| 121 |
+
fullname = os.path.join(vis_out_dir_web, vidname)
|
| 122 |
+
#if i == "Estimate human 3d poses":
|
| 123 |
+
# fullname = fullname[:-4]+"mp4" #Change to .mp4
|
| 124 |
+
# out_files.append(fullname)
|
| 125 |
+
#else:
|
| 126 |
+
out_files.append(fullname)
|
| 127 |
+
|
| 128 |
+
else:
|
| 129 |
+
out_files.extend(out_file)
|
| 130 |
+
|
| 131 |
+
print(out_files)
|
| 132 |
|
| 133 |
+
return "track.mp4", out_files[0], out_files[1], out_files[2] # out_files[3]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
+
def run():
|
| 136 |
+
#https://github.com/open-mmlab/mmpose/blob/main/docs/en/user_guides/inference.md
|
| 137 |
+
check_web = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
|
| 138 |
+
check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
|
| 139 |
+
|
| 140 |
+
description = """
|
| 141 |
+
\n\nHere you can upload videos or record one with your webcam and track objects or detect bodyposes in 2d and 3d.
|
| 142 |
+
"""
|
| 143 |
|
| 144 |
+
# Insert slider with kpt_thr
|
| 145 |
+
web_kpthr = gr.Slider(0, 1, value=0.3)
|
| 146 |
+
file_kpthr = gr.Slider(0, 1, value=0.3)
|
|
|
|
|
|
|
| 147 |
|
| 148 |
+
webcam = gr.Interface(
|
| 149 |
+
fn=infer,
|
| 150 |
+
inputs= [gr.Video(source="webcam", height=512), check_web, web_kpthr], # /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
|
| 151 |
+
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
|
| 152 |
+
title = 'Tracking and pose estimation',
|
| 153 |
+
description = description,
|
| 154 |
+
allow_flagging=False
|
| 155 |
+
)
|
| 156 |
|
| 157 |
+
file = gr.Interface(
|
| 158 |
+
infer,
|
| 159 |
+
inputs = [gr.Video(source="upload", height=512), check_file, file_kpthr],
|
| 160 |
+
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
|
| 161 |
+
title = 'Tracking and pose estimation',
|
| 162 |
+
description = description,
|
| 163 |
+
allow_flagging=False
|
| 164 |
+
)
|
| 165 |
|
| 166 |
+
demo = gr.TabbedInterface(
|
| 167 |
+
interface_list=[file, webcam],
|
| 168 |
+
tab_names=["From a File", "From your Webcam"]
|
| 169 |
+
)
|
| 170 |
|
| 171 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
| 172 |
|
| 173 |
|
| 174 |
+
if __name__ == "__main__":
|
| 175 |
+
run()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
+
# https://github.com/open-mmlab/mmpose/tree/dev-1.x/configs/body_3d_keypoint/pose_lift
|
| 178 |
+
# motionbert_ft_h36m-d80af323_20230531.pth
|
| 179 |
+
# simple3Dbaseline_h36m-f0ad73a4_20210419.pth
|
| 180 |
+
# videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth
|
| 181 |
+
# videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth
|
| 182 |
+
# videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth
|
| 183 |
+
# videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth
|
| 184 |
+
# https://github.com/open-mmlab/mmpose/blob/main/mmpose/apis/inferencers/pose3d_inferencer.py
|
| 185 |
|
|
|
|
| 186 |
|
| 187 |
+
# 00000.mp4
|
| 188 |
+
# 000000.mp4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
|
|