Spaces:
Runtime error
Runtime error
import sys | |
import os | |
import numpy as np | |
import open3d as o3d | |
import torch | |
from mmengine import Config | |
from pyvirtualdisplay import Display | |
from tqdm import tqdm | |
sys.path.append("Metric3D") | |
def display_wrapper(func): | |
def inner(*args, **kwargs): | |
with Display(visible=False, size=(1920, 1080)): | |
return func(*args, **kwargs) | |
return inner | |
def relative_pose(rt: np.ndarray, mode: str, ref_index: int = 0) -> np.ndarray: | |
if mode == "left": | |
rt = np.linalg.inv(rt[ref_index]) @ rt | |
elif mode == "right": | |
rt = rt @ np.linalg.inv(rt[ref_index]) | |
return rt | |
def project_point_cloud( | |
frame: np.ndarray, | |
depth: np.ndarray, | |
intrinsics: list[float], | |
remove_outliers: bool = True, | |
voxel_size: float = None, | |
) -> o3d.geometry.PointCloud: | |
from mono.utils.unproj_pcd import reconstruct_pcd | |
points = reconstruct_pcd(depth, *intrinsics).reshape(-1, 3) | |
colors = frame.reshape(-1, 3) / 255 | |
pcd = o3d.geometry.PointCloud() | |
pcd.points = o3d.utility.Vector3dVector(points.astype(np.double)) | |
pcd.colors = o3d.utility.Vector3dVector(colors.astype(np.double)) | |
if remove_outliers: | |
cl, ind = pcd.remove_statistical_outlier(nb_neighbors=12, std_ratio=3.0) | |
pcd = pcd.select_by_index(ind) | |
if voxel_size is not None: | |
pcd = pcd.voxel_down_sample(voxel_size=0.5) | |
return pcd | |
def create_camera_frustum( | |
frame: np.ndarray, | |
intrinsic: o3d.camera.PinholeCameraIntrinsic, | |
c2w: np.ndarray, | |
frustum_scale: float = 0.5, | |
): | |
W, H = intrinsic.width, intrinsic.height | |
fx, fy = intrinsic.get_focal_length() | |
cx, cy = intrinsic.get_principal_point() | |
z = frustum_scale | |
x = (W - cx) * z / fx | |
y = (H - cy) * z / fy | |
points = [[0, 0, 0], [-x, -y, z], [x, -y, z], [x, y, z], [-x, y, z]] | |
lines = [[0, 1], [0, 2], [0, 3], [0, 4], [1, 2], [2, 3], [3, 4], [4, 1]] | |
line_set = o3d.geometry.LineSet( | |
points=o3d.utility.Vector3dVector(points), | |
lines=o3d.utility.Vector2iVector(lines), | |
) | |
line_set.paint_uniform_color([0.8, 0.2, 0.2]) | |
line_set.transform(c2w) | |
vertices = [points[i] for i in [1, 2, 3, 4]] | |
triangles = [[0, 1, 2], [0, 2, 3]] | |
img_plane = o3d.geometry.TriangleMesh( | |
vertices=o3d.utility.Vector3dVector(vertices), | |
triangles=o3d.utility.Vector3iVector(triangles), | |
) | |
img_plane.triangle_uvs = o3d.utility.Vector2dVector( | |
np.array([[0, 1], [1, 1], [1, 0], [0, 1], [1, 0], [0, 0]]) | |
) | |
img_plane.transform(c2w) | |
material = o3d.visualization.rendering.MaterialRecord() | |
material.shader = "defaultUnlit" | |
material.albedo_img = o3d.geometry.Image(frame) | |
return line_set, img_plane, material | |
class Previewer: | |
def __init__(self, model_path: str = "pretrained/metric_depth_vit_large_800k.pth"): | |
self.model_path = model_path | |
self.depth_predictor = None | |
def init_depth_predictor(self): | |
from mono.model.monodepth_model import get_configured_monodepth_model | |
from mono.utils.running import load_ckpt | |
self.config = Config.fromfile( | |
"Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py" | |
) | |
model = get_configured_monodepth_model(self.config) | |
model = torch.nn.DataParallel(model).cuda().eval().requires_grad_(False) | |
model, _, _, _ = load_ckpt(self.model_path, model, strict_match=False) | |
self.depth_predictor = model | |
def estimate_depths( | |
self, frames: np.ndarray, intrinsics: list[float] | |
) -> np.ndarray: | |
""" | |
:param frames: `np.ndarray` of shape (B, H, W, C) and range (0, 255) | |
:param intrinsics: list of [fx, fy, cx, cy] | |
:return depths: `np.ndarray` of shape (B, H, W) and range (0, 300) | |
""" | |
from mono.utils.do_test import transform_test_data_scalecano | |
if self.depth_predictor is None: | |
self.init_depth_predictor() | |
B, H, W, C = frames.shape | |
rgb_inputs, pads = [], [] | |
for frame in frames: | |
rgb_input, _, pad, label_scale_factor = transform_test_data_scalecano( | |
frame, intrinsics, self.config.data_basic | |
) | |
rgb_inputs.append(rgb_input) | |
pads.append(pad) | |
with torch.inference_mode(), torch.autocast("cuda"): # b c h w | |
depths, _, _ = self.depth_predictor.module.inference( | |
{"input": torch.stack(rgb_inputs).cuda(), "pad_info": pads} | |
) | |
_, _, h, w = depths.shape | |
depths = depths[..., pad[0] : h - pad[1], pad[2] : w - pad[3]] | |
depths = depths * self.config.data_basic.depth_range[-1] / label_scale_factor | |
depths = torch.nn.functional.interpolate(depths, (H, W), mode="bilinear") | |
return depths.clamp(0, 300).squeeze(1).cpu().numpy() | |
def render_previews( | |
self, | |
frame: np.ndarray, | |
depth: np.ndarray, | |
intrinsics: list[float], | |
w2cs: np.ndarray, | |
): | |
""" | |
:param frame: `np.ndarray` of shape (H, W, C) and range (0, 255) | |
:param depth: `np.ndarray` of shape (H, W) and range (0, 300) | |
:param intrinsics: list of [fx, fy, cx, cy] | |
:param w2cs: `np.ndarray` of shape (4, 4) | |
:return: previews: `np.ndarray of shape (B, H, W, C) and range (0, 255)` | |
""" | |
H, W, _ = frame.shape | |
K = o3d.camera.PinholeCameraIntrinsic(W, H, *intrinsics) | |
pcd = project_point_cloud(frame, depth, intrinsics) | |
mat = o3d.visualization.rendering.MaterialRecord() | |
mat.shader = "defaultUnlit" | |
mat.point_size = 2 | |
renderer = o3d.visualization.rendering.OffscreenRenderer(W, H) | |
renderer.scene.set_background(np.array([1.0, 1.0, 1.0, 1.0])) | |
renderer.scene.view.set_post_processing(False) | |
renderer.scene.clear_geometry() | |
renderer.scene.add_geometry("point cloud", pcd, mat) | |
previews = [] | |
for w2c in tqdm(relative_pose(w2cs, mode="left")): | |
renderer.setup_camera(K, w2c) | |
previews.append(renderer.render_to_image()) | |
return np.stack(previews) | |
def render_4d_scene( | |
self, | |
frames: np.ndarray, | |
depths: np.ndarray, | |
intrinsics: list[float], | |
w2cs: np.ndarray, | |
): | |
""" | |
:param frames: `np.ndarray` of shape (B, H, W, C) and range (0, 255) | |
:param depths: `np.ndarray` of shape (B, H, W) and range (0, 300) | |
:param intrinsics: list of [fx, fy, cx, cy] | |
:param w2cs: `np.ndarray` of shape (4, 4) | |
:return: renderings: `np.ndarray of shape (B, H, W, C) and range (0, 255)` | |
""" | |
F, H, W, _ = frames.shape | |
K = o3d.camera.PinholeCameraIntrinsic(W, H, *intrinsics) | |
renderer = o3d.visualization.rendering.OffscreenRenderer(W, H) | |
renderer.scene.set_background(np.array([1.0, 1.0, 1.0, 1.0])) | |
renderer.scene.view.set_post_processing(False) | |
c2w_0 = np.linalg.inv(w2cs[0]) | |
eye_pos_world = (c2w_0 @ np.array([0.3, -0.5, -0.5, 1]))[:3] | |
center_pos_world = (c2w_0 @ np.array([0, 0, 2, 1]))[:3] | |
up_vector_world = np.array([0, -1, 0]) | |
renderer.scene.camera.look_at(center_pos_world, eye_pos_world, up_vector_world) | |
point_material = o3d.visualization.rendering.MaterialRecord() | |
point_material.shader = "defaultUnlit" | |
point_material.point_size = 2 | |
line_material = o3d.visualization.rendering.MaterialRecord() | |
line_material.shader = "unlitLine" | |
line_material.line_width = 3 | |
renderings = [] | |
for frame, depth, w2c in tqdm(zip(frames, depths, w2cs), total=F): | |
c2w = np.linalg.inv(w2c) | |
pcd = project_point_cloud(frame, depth, intrinsics) | |
pcd.transform(c2w) | |
wire_frame, frustum, frustum_material = create_camera_frustum(frame, K, c2w) | |
renderer.scene.clear_geometry() | |
renderer.scene.add_geometry("point cloud", pcd, point_material) | |
renderer.scene.add_geometry("wire frame", wire_frame, line_material) | |
renderer.scene.add_geometry("frustum", frustum, frustum_material) | |
renderings.append(renderer.render_to_image()) | |
return np.stack(renderings) | |
if __name__ == "__main__": | |
with Display(visible=False, size=(512, 320)): | |
o3d.visualization.rendering.OffscreenRenderer(512, 320) | |