File size: 8,157 Bytes
baa8e90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np

from .dw_onnx.cv_ox_det import inference_detector as inference_onnx_yolox
from .dw_onnx.cv_ox_yolo_nas import inference_detector as inference_onnx_yolo_nas
from .dw_onnx.cv_ox_pose import inference_pose as inference_onnx_pose

from .dw_torchscript.jit_det import inference_detector as inference_jit_yolox
from .dw_torchscript.jit_pose import inference_pose as inference_jit_pose

from typing import List, Optional
from .types import PoseResult, BodyResult, Keypoint
from timeit import default_timer
import os
from controlnet_aux.dwpose.util import guess_onnx_input_shape_dtype, get_model_type, get_ort_providers, is_model_torchscript
import torch
import torch.utils.benchmark.utils.timer as torch_timer

class Wholebody:
    def __init__(self, det_model_path: Optional[str] = None, pose_model_path: Optional[str] = None, torchscript_device="cuda"):
        self.det_filename = det_model_path and os.path.basename(det_model_path)
        self.pose_filename = pose_model_path and os.path.basename(pose_model_path)
        self.det, self.pose = None, None
        # return type: None ort cv2 torchscript
        self.det_model_type = get_model_type("DWPose",self.det_filename)
        self.pose_model_type = get_model_type("DWPose",self.pose_filename)
        # Always loads to CPU to avoid building OpenCV.
        cv2_device = 'cpu'
        cv2_backend = cv2.dnn.DNN_BACKEND_OPENCV if cv2_device == 'cpu' else cv2.dnn.DNN_BACKEND_CUDA
        # You need to manually build OpenCV through cmake to work with your GPU.
        cv2_providers = cv2.dnn.DNN_TARGET_CPU if cv2_device == 'cpu' else cv2.dnn.DNN_TARGET_CUDA
        ort_providers = get_ort_providers()

        if self.det_model_type is None:
            pass
        elif self.det_model_type == "ort":
            try:
                import onnxruntime as ort
                self.det = ort.InferenceSession(det_model_path, providers=ort_providers)
            except:
                print(f"Failed to load onnxruntime with {self.det.get_providers()}.\nPlease change EP_list in the config.yaml and restart ComfyUI")
                self.det = ort.InferenceSession(det_model_path, providers=["CPUExecutionProvider"])
        elif self.det_model_type == "cv2":
            try:
                self.det = cv2.dnn.readNetFromONNX(det_model_path)
                self.det.setPreferableBackend(cv2_backend)
                self.det.setPreferableTarget(cv2_providers)
            except:
                print("TopK operators may not work on your OpenCV, try use onnxruntime with CPUExecutionProvider")
                try:
                    import onnxruntime as ort
                    self.det = ort.InferenceSession(det_model_path, providers=["CPUExecutionProvider"])
                except:
                    print(f"Failed to load {det_model_path}, you can use other models instead")
        else:
            self.det = torch.jit.load(det_model_path)
            self.det.to(torchscript_device)

        if self.pose_model_type is None:
            pass
        elif self.pose_model_type == "ort":
            try:
                import onnxruntime as ort
                self.pose = ort.InferenceSession(pose_model_path, providers=ort_providers)
            except:
                print(f"Failed to load onnxruntime with {self.pose.get_providers()}.\nPlease change EP_list in the config.yaml and restart ComfyUI")
                self.pose = ort.InferenceSession(pose_model_path, providers=["CPUExecutionProvider"])
        elif self.pose_model_type == "cv2":
            self.pose = cv2.dnn.readNetFromONNX(pose_model_path)
            self.pose.setPreferableBackend(cv2_backend)
            self.pose.setPreferableTarget(cv2_providers)
        else:
            self.pose = torch.jit.load(pose_model_path)
            self.pose.to(torchscript_device)
        
        if self.pose_filename is not None:
            self.pose_input_size, _ = guess_onnx_input_shape_dtype(self.pose_filename)

    def __call__(self, oriImg) -> Optional[np.ndarray]:
        
        if is_model_torchscript(self.det):
            det_start = torch_timer.timer()
            det_result = inference_jit_yolox(self.det, oriImg, detect_classes=[0])
            print(f"DWPose: Bbox {((torch_timer.timer() - det_start) * 1000):.2f}ms")
        else:
            det_start = default_timer()
            if "yolox" in self.det_filename:
                det_result = inference_onnx_yolox(self.det, oriImg, detect_classes=[0], dtype=np.float32)
            else:
                #FP16 and INT8 YOLO NAS accept uint8 input
                det_result = inference_onnx_yolo_nas(self.det, oriImg, detect_classes=[0], dtype=np.uint8)
            print(f"DWPose: Bbox {((default_timer() - det_start) * 1000):.2f}ms")
        if (det_result is None) or (det_result.shape[0] == 0):
            return None

        if is_model_torchscript(self.pose):
            pose_start = torch_timer.timer()
            keypoints, scores = inference_jit_pose(self.pose, det_result, oriImg, self.pose_input_size)
            print(f"DWPose: Pose {((torch_timer.timer() - pose_start) * 1000):.2f}ms on {det_result.shape[0]} people\n")
        else:
            pose_start = default_timer()
            _, pose_onnx_dtype = guess_onnx_input_shape_dtype(self.pose_filename)
            keypoints, scores = inference_onnx_pose(self.pose, det_result, oriImg, self.pose_input_size, dtype=pose_onnx_dtype)
            print(f"DWPose: Pose {((default_timer() - pose_start) * 1000):.2f}ms on {det_result.shape[0]} people\n")

        keypoints_info = np.concatenate(
            (keypoints, scores[..., None]), axis=-1)
        # compute neck joint
        neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
        # neck score when visualizing pred
        neck[:, 2:4] = np.logical_and(
            keypoints_info[:, 5, 2:4] > 0.3,
            keypoints_info[:, 6, 2:4] > 0.3).astype(int)
        new_keypoints_info = np.insert(
            keypoints_info, 17, neck, axis=1)
        mmpose_idx = [
            17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
        ]
        openpose_idx = [
            1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
        ]
        new_keypoints_info[:, openpose_idx] = \
            new_keypoints_info[:, mmpose_idx]
        keypoints_info = new_keypoints_info

        return keypoints_info

    @staticmethod
    def format_result(keypoints_info: Optional[np.ndarray]) -> List[PoseResult]:
        def format_keypoint_part(
            part: np.ndarray,
        ) -> Optional[List[Optional[Keypoint]]]:
            keypoints = [
                Keypoint(x, y, score, i) if score >= 0.3 else None
                for i, (x, y, score) in enumerate(part)
            ]
            return (
                None if all(keypoint is None for keypoint in keypoints) else keypoints
            )

        def total_score(keypoints: Optional[List[Optional[Keypoint]]]) -> float:
            return (
                sum(keypoint.score for keypoint in keypoints if keypoint is not None)
                if keypoints is not None
                else 0.0
            )

        pose_results = []
        if keypoints_info is None:
            return pose_results

        for instance in keypoints_info:
            body_keypoints = format_keypoint_part(instance[:18]) or ([None] * 18)
            left_hand = format_keypoint_part(instance[92:113])
            right_hand = format_keypoint_part(instance[113:134])
            face = format_keypoint_part(instance[24:92])

            # Openpose face consists of 70 points in total, while DWPose only
            # provides 68 points. Padding the last 2 points.
            if face is not None:
                # left eye
                face.append(body_keypoints[14])
                # right eye
                face.append(body_keypoints[15])

            body = BodyResult(
                body_keypoints, total_score(body_keypoints), len(body_keypoints)
            )
            pose_results.append(PoseResult(body, left_hand, right_hand, face))

        return pose_results