Spaces:
Running
on
Zero
Running
on
Zero
update half-body model
Browse files
app.py
CHANGED
|
@@ -70,6 +70,132 @@ def get_bbox(mask):
|
|
| 70 |
scale_box = box.scale(1.1, width=width, height=height)
|
| 71 |
return scale_box
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
def infer_preprocess_image(
|
| 74 |
rgb_path,
|
| 75 |
mask,
|
|
@@ -99,21 +225,24 @@ def infer_preprocess_image(
|
|
| 99 |
rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
|
| 100 |
mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
|
| 101 |
|
|
|
|
| 102 |
h, w, _ = rgb.shape
|
| 103 |
assert w < h
|
| 104 |
cur_ratio = h / w
|
| 105 |
scale_ratio = cur_ratio / aspect_standard
|
| 106 |
|
|
|
|
| 107 |
target_w = int(min(w * scale_ratio, h))
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
rgb = np.pad(
|
| 112 |
rgb,
|
| 113 |
((0, 0), (offset_w, offset_w), (0, 0)),
|
| 114 |
mode="constant",
|
| 115 |
constant_values=255,
|
| 116 |
)
|
|
|
|
| 117 |
mask = np.pad(
|
| 118 |
mask,
|
| 119 |
((0, 0), (offset_w, offset_w)),
|
|
@@ -121,25 +250,22 @@ def infer_preprocess_image(
|
|
| 121 |
constant_values=0,
|
| 122 |
)
|
| 123 |
else:
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
mask = mask[:,offset_w:-offset_w]
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
constant_values=255,
|
| 135 |
-
)
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
|
| 144 |
rgb = rgb / 255.0 # normalize to [0, 1]
|
| 145 |
mask = mask / 255.0
|
|
@@ -265,20 +391,19 @@ def launch_pretrained():
|
|
| 265 |
from huggingface_hub import snapshot_download, hf_hub_download
|
| 266 |
hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='assets.tar', local_dir="./")
|
| 267 |
os.system("tar -xf assets.tar && rm assets.tar")
|
| 268 |
-
hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
|
| 269 |
-
os.system("tar -xf LHM-0.5B.tar && rm LHM-0.5B.tar")
|
| 270 |
hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
|
| 271 |
os.system("tar -xf LHM_prior_model.tar && rm LHM_prior_model.tar")
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
def launch_env_not_compile_with_cuda():
|
| 274 |
os.system("pip install chumpy")
|
| 275 |
os.system("pip uninstall -y basicsr")
|
| 276 |
os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
|
| 277 |
os.system("pip install numpy==1.23.0")
|
| 278 |
-
# os.system("pip install git+https://github.com/hitsz-zuoqi/sam2/")
|
| 279 |
-
# os.system("pip install git+https://github.com/ashawkey/diff-gaussian-rasterization/")
|
| 280 |
-
# os.system("pip install git+https://github.com/camenduru/simple-knn/")
|
| 281 |
-
# os.system("pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html")
|
| 282 |
|
| 283 |
|
| 284 |
def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
|
|
@@ -662,7 +787,7 @@ def demo_lhm(pose_estimator, face_detector, lhm, cfg):
|
|
| 662 |
)
|
| 663 |
|
| 664 |
gr.HTML(
|
| 665 |
-
"""<p><h4 style="color: red;"> Notes 1:
|
| 666 |
)
|
| 667 |
gr.HTML(
|
| 668 |
"""<p><h4 style="color: green;"> Notes 2: We drop ComfyUI Nodes of LHM on https://github.com/aigc3d/LHM/tree/feat/comfyui which support any character and any driven videos as input. Try it!</h4></p>"""
|
|
|
|
| 70 |
scale_box = box.scale(1.1, width=width, height=height)
|
| 71 |
return scale_box
|
| 72 |
|
| 73 |
+
# def infer_preprocess_image(
|
| 74 |
+
# rgb_path,
|
| 75 |
+
# mask,
|
| 76 |
+
# intr,
|
| 77 |
+
# pad_ratio,
|
| 78 |
+
# bg_color,
|
| 79 |
+
# max_tgt_size,
|
| 80 |
+
# aspect_standard,
|
| 81 |
+
# enlarge_ratio,
|
| 82 |
+
# render_tgt_size,
|
| 83 |
+
# multiply,
|
| 84 |
+
# need_mask=True,
|
| 85 |
+
# ):
|
| 86 |
+
# """inferece
|
| 87 |
+
# image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0,
|
| 88 |
+
# max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
|
| 89 |
+
# render_tgt_size=source_size, multiply=14, need_mask=True)
|
| 90 |
+
|
| 91 |
+
# """
|
| 92 |
+
|
| 93 |
+
# rgb = np.array(Image.open(rgb_path))
|
| 94 |
+
# rgb_raw = rgb.copy()
|
| 95 |
+
|
| 96 |
+
# bbox = get_bbox(mask)
|
| 97 |
+
# bbox_list = bbox.get_box()
|
| 98 |
+
|
| 99 |
+
# rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
|
| 100 |
+
# mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
|
| 101 |
+
|
| 102 |
+
# h, w, _ = rgb.shape
|
| 103 |
+
# assert w < h
|
| 104 |
+
# cur_ratio = h / w
|
| 105 |
+
# scale_ratio = cur_ratio / aspect_standard
|
| 106 |
+
|
| 107 |
+
# target_w = int(min(w * scale_ratio, h))
|
| 108 |
+
# offset_w = (target_w - w) // 2
|
| 109 |
+
# # resize to target ratio.
|
| 110 |
+
# if offset_w > 0:
|
| 111 |
+
# rgb = np.pad(
|
| 112 |
+
# rgb,
|
| 113 |
+
# ((0, 0), (offset_w, offset_w), (0, 0)),
|
| 114 |
+
# mode="constant",
|
| 115 |
+
# constant_values=255,
|
| 116 |
+
# )
|
| 117 |
+
# mask = np.pad(
|
| 118 |
+
# mask,
|
| 119 |
+
# ((0, 0), (offset_w, offset_w)),
|
| 120 |
+
# mode="constant",
|
| 121 |
+
# constant_values=0,
|
| 122 |
+
# )
|
| 123 |
+
# else:
|
| 124 |
+
# offset_w = -offset_w
|
| 125 |
+
# rgb = rgb[:,offset_w:-offset_w,:]
|
| 126 |
+
# mask = mask[:,offset_w:-offset_w]
|
| 127 |
+
|
| 128 |
+
# # resize to target ratio.
|
| 129 |
+
|
| 130 |
+
# rgb = np.pad(
|
| 131 |
+
# rgb,
|
| 132 |
+
# ((0, 0), (offset_w, offset_w), (0, 0)),
|
| 133 |
+
# mode="constant",
|
| 134 |
+
# constant_values=255,
|
| 135 |
+
# )
|
| 136 |
+
|
| 137 |
+
# mask = np.pad(
|
| 138 |
+
# mask,
|
| 139 |
+
# ((0, 0), (offset_w, offset_w)),
|
| 140 |
+
# mode="constant",
|
| 141 |
+
# constant_values=0,
|
| 142 |
+
# )
|
| 143 |
+
|
| 144 |
+
# rgb = rgb / 255.0 # normalize to [0, 1]
|
| 145 |
+
# mask = mask / 255.0
|
| 146 |
+
|
| 147 |
+
# mask = (mask > 0.5).astype(np.float32)
|
| 148 |
+
# rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None])
|
| 149 |
+
|
| 150 |
+
# # resize to specific size require by preprocessor of smplx-estimator.
|
| 151 |
+
# rgb = resize_image_keepaspect_np(rgb, max_tgt_size)
|
| 152 |
+
# mask = resize_image_keepaspect_np(mask, max_tgt_size)
|
| 153 |
+
|
| 154 |
+
# # crop image to enlarge human area.
|
| 155 |
+
# rgb, mask, offset_x, offset_y = center_crop_according_to_mask(
|
| 156 |
+
# rgb, mask, aspect_standard, enlarge_ratio
|
| 157 |
+
# )
|
| 158 |
+
# if intr is not None:
|
| 159 |
+
# intr[0, 2] -= offset_x
|
| 160 |
+
# intr[1, 2] -= offset_y
|
| 161 |
+
|
| 162 |
+
# # resize to render_tgt_size for training
|
| 163 |
+
|
| 164 |
+
# tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect(
|
| 165 |
+
# cur_hw=rgb.shape[:2],
|
| 166 |
+
# aspect_standard=aspect_standard,
|
| 167 |
+
# tgt_size=render_tgt_size,
|
| 168 |
+
# multiply=multiply,
|
| 169 |
+
# )
|
| 170 |
+
|
| 171 |
+
# rgb = cv2.resize(
|
| 172 |
+
# rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
|
| 173 |
+
# )
|
| 174 |
+
# mask = cv2.resize(
|
| 175 |
+
# mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
|
| 176 |
+
# )
|
| 177 |
+
|
| 178 |
+
# if intr is not None:
|
| 179 |
+
|
| 180 |
+
# # ******************** Merge *********************** #
|
| 181 |
+
# intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y)
|
| 182 |
+
# assert (
|
| 183 |
+
# abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5
|
| 184 |
+
# ), f"{intr[0, 2] * 2}, {rgb.shape[1]}"
|
| 185 |
+
# assert (
|
| 186 |
+
# abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5
|
| 187 |
+
# ), f"{intr[1, 2] * 2}, {rgb.shape[0]}"
|
| 188 |
+
|
| 189 |
+
# # ******************** Merge *********************** #
|
| 190 |
+
# intr[0, 2] = rgb.shape[1] // 2
|
| 191 |
+
# intr[1, 2] = rgb.shape[0] // 2
|
| 192 |
+
|
| 193 |
+
# rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0) # [1, 3, H, W]
|
| 194 |
+
# mask = (
|
| 195 |
+
# torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0)
|
| 196 |
+
# ) # [1, 1, H, W]
|
| 197 |
+
# return rgb, mask, intr
|
| 198 |
+
|
| 199 |
def infer_preprocess_image(
|
| 200 |
rgb_path,
|
| 201 |
mask,
|
|
|
|
| 225 |
rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
|
| 226 |
mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
|
| 227 |
|
| 228 |
+
|
| 229 |
h, w, _ = rgb.shape
|
| 230 |
assert w < h
|
| 231 |
cur_ratio = h / w
|
| 232 |
scale_ratio = cur_ratio / aspect_standard
|
| 233 |
|
| 234 |
+
|
| 235 |
target_w = int(min(w * scale_ratio, h))
|
| 236 |
+
if target_w - w >0:
|
| 237 |
+
offset_w = (target_w - w) // 2
|
| 238 |
+
|
| 239 |
rgb = np.pad(
|
| 240 |
rgb,
|
| 241 |
((0, 0), (offset_w, offset_w), (0, 0)),
|
| 242 |
mode="constant",
|
| 243 |
constant_values=255,
|
| 244 |
)
|
| 245 |
+
|
| 246 |
mask = np.pad(
|
| 247 |
mask,
|
| 248 |
((0, 0), (offset_w, offset_w)),
|
|
|
|
| 250 |
constant_values=0,
|
| 251 |
)
|
| 252 |
else:
|
| 253 |
+
target_h = w * aspect_standard
|
| 254 |
+
offset_h = int(target_h - h)
|
|
|
|
| 255 |
|
| 256 |
+
rgb = np.pad(
|
| 257 |
+
rgb,
|
| 258 |
+
((offset_h, 0), (0, 0), (0, 0)),
|
| 259 |
+
mode="constant",
|
| 260 |
+
constant_values=255,
|
| 261 |
+
)
|
|
|
|
|
|
|
| 262 |
|
| 263 |
+
mask = np.pad(
|
| 264 |
+
mask,
|
| 265 |
+
((offset_h, 0), (0, 0)),
|
| 266 |
+
mode="constant",
|
| 267 |
+
constant_values=0,
|
| 268 |
+
)
|
| 269 |
|
| 270 |
rgb = rgb / 255.0 # normalize to [0, 1]
|
| 271 |
mask = mask / 255.0
|
|
|
|
| 391 |
from huggingface_hub import snapshot_download, hf_hub_download
|
| 392 |
hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='assets.tar', local_dir="./")
|
| 393 |
os.system("tar -xf assets.tar && rm assets.tar")
|
| 394 |
+
# hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
|
| 395 |
+
# os.system("tar -xf LHM-0.5B.tar && rm LHM-0.5B.tar")
|
| 396 |
hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
|
| 397 |
os.system("tar -xf LHM_prior_model.tar && rm LHM_prior_model.tar")
|
| 398 |
+
# replace the weight of full body
|
| 399 |
+
hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='config.json', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
|
| 400 |
+
hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='model.safetensors', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
|
| 401 |
|
| 402 |
def launch_env_not_compile_with_cuda():
|
| 403 |
os.system("pip install chumpy")
|
| 404 |
os.system("pip uninstall -y basicsr")
|
| 405 |
os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
|
| 406 |
os.system("pip install numpy==1.23.0")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
|
| 409 |
def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
|
|
|
|
| 787 |
)
|
| 788 |
|
| 789 |
gr.HTML(
|
| 790 |
+
"""<p><h4 style="color: red;"> Notes 1: Glad to tell you that we have supported both full-body or half-body input! Try to test the robustness with half-body images!.</h4></p>"""
|
| 791 |
)
|
| 792 |
gr.HTML(
|
| 793 |
"""<p><h4 style="color: green;"> Notes 2: We drop ComfyUI Nodes of LHM on https://github.com/aigc3d/LHM/tree/feat/comfyui which support any character and any driven videos as input. Try it!</h4></p>"""
|