Spaces:
Running
Running
# | |
# Copyright (C) 2023, Inria | |
# GRAPHDECO research group, https://team.inria.fr/graphdeco | |
# All rights reserved. | |
# | |
# This software is free for non-commercial, research and evaluation use | |
# under the terms of the LICENSE.md file. | |
# | |
# For inquiries contact [email protected] | |
# | |
import math | |
import torch | |
from diff_LangSurf_rasterization import \ | |
GaussianRasterizationSettings as PlaneGaussianRasterizationSettings | |
from diff_LangSurf_rasterization import \ | |
GaussianRasterizer as PlaneGaussianRasterizer | |
from field_construction.scene.app_model import AppModel | |
from field_construction.scene.gaussian_model import GaussianModel | |
from field_construction.utils.graphics_utils import normal_from_depth_image | |
from field_construction.utils.pose_utils import (get_camera_from_tensor, | |
quadmultiply) | |
from field_construction.utils.sh_utils import eval_sh | |
def render_normal(viewpoint_cam, depth, offset=None, normal=None, scale=1): | |
# depth: (H, W), bg_color: (3), alpha: (H, W) | |
# normal_ref: (3, H, W) | |
intrinsic_matrix, extrinsic_matrix = viewpoint_cam.get_calib_matrix_nerf(scale=scale) | |
st = max(int(scale/2)-1,0) | |
if offset is not None: | |
offset = offset[st::scale,st::scale] | |
normal_ref = normal_from_depth_image(depth[st::scale,st::scale], | |
intrinsic_matrix.to(depth.device), | |
extrinsic_matrix.to(depth.device), offset) | |
normal_ref = normal_ref.permute(2,0,1) | |
return normal_ref | |
def render( | |
viewpoint_camera, | |
pc : GaussianModel, | |
pipe, | |
bg_color : torch.Tensor, | |
scaling_modifier=1.0, | |
override_color=None, | |
app_model: AppModel=None, | |
return_plane=True, | |
return_depth_normal=True, | |
include_feature=True, | |
camera_pose=None | |
): | |
""" | |
Render the scene. | |
Background tensor (bg_color) must be on GPU! | |
""" | |
# Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means | |
screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0 | |
screenspace_points_abs = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0 | |
try: | |
screenspace_points.retain_grad() | |
screenspace_points_abs.retain_grad() | |
except: | |
pass | |
# Set up rasterization configuration | |
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) | |
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) | |
w2c = torch.eye(4).cuda() | |
projmatrix = ( | |
w2c.unsqueeze(0).bmm(viewpoint_camera.projection_matrix.unsqueeze(0)) | |
).squeeze(0) | |
camera_pos = w2c.inverse()[3, :3] | |
if camera_pose is not None: | |
rel_w2c = get_camera_from_tensor(camera_pose) | |
gaussians_xyz = pc._xyz.clone() | |
gaussians_rot = pc._rotation.clone() | |
xyz_ones = torch.ones(gaussians_xyz.shape[0], 1).cuda().float() | |
xyz_homo = torch.cat((gaussians_xyz, xyz_ones), dim=1) | |
gaussians_xyz_trans = (rel_w2c @ xyz_homo.T).T[:, :3] | |
gaussians_rot_trans = quadmultiply(camera_pose[:4], gaussians_rot) | |
means3D = gaussians_xyz_trans | |
else: | |
means3D = pc.get_xyz | |
means2D = screenspace_points | |
means2D_abs = screenspace_points_abs | |
opacity = pc.get_opacity | |
# If precomputed 3d covariance is provided, use it. If not, then it will be computed from | |
# scaling / rotation by the rasterizer. | |
scales = None | |
rotations = None | |
cov3D_precomp = None | |
if pipe.compute_cov3D_python: | |
cov3D_precomp = pc.get_covariance(scaling_modifier) | |
else: | |
scales = pc.get_scaling | |
rotations = gaussians_rot_trans if camera_pose is not None else pc.get_rotation | |
# rotations = pc.get_rotation | |
# If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors | |
# from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer. | |
shs = None | |
colors_precomp = None | |
if override_color is None: | |
if pipe.convert_SHs_python: | |
shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2) | |
dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1)) | |
dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True) | |
sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized) | |
colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0) | |
else: | |
shs = pc.get_features | |
else: | |
colors_precomp = override_color | |
if include_feature: | |
language_feature_precomp = pc.get_language_feature | |
instance_feature_precomp = pc.get_instance_feature | |
# language_feature_precomp = language_feature_precomp / (language_feature_precomp.norm(dim=-1, keepdim=True) + 1e-9) | |
# instance_feature_precomp = instance_feature_precomp / (instance_feature_precomp.norm(dim=-1, keepdim=True) + 1e-9) | |
# language_feature_precomp = torch.sigmoid(language_feature_precomp) | |
else: | |
language_feature_precomp = torch.zeros((1,), dtype=opacity.dtype, device=opacity.device) | |
instance_feature_precomp = torch.zeros((1,), dtype=opacity.dtype, device=opacity.device) | |
return_dict = None | |
raster_settings = PlaneGaussianRasterizationSettings( | |
image_height=int(viewpoint_camera.image_height), | |
image_width=int(viewpoint_camera.image_width), | |
tanfovx=tanfovx, | |
tanfovy=tanfovy, | |
bg=bg_color, | |
scale_modifier=scaling_modifier, | |
# viewmatrix=viewpoint_camera.world_view_transform, | |
# projmatrix=viewpoint_camera.full_proj_transform, | |
viewmatrix=w2c if camera_pose is not None else viewpoint_camera.world_view_transform, | |
projmatrix=projmatrix if camera_pose is not None else viewpoint_camera.full_proj_transform, | |
sh_degree=pc.active_sh_degree, | |
# campos=viewpoint_camera.camera_center, | |
campos=camera_pos if camera_pose is not None else viewpoint_camera.camera_center, | |
prefiltered=False, | |
render_geo=return_plane, | |
debug=pipe.debug, | |
include_feature=include_feature, | |
) | |
rasterizer = PlaneGaussianRasterizer(raster_settings=raster_settings) | |
if not return_plane: | |
rendered_image, language_feature, instance_feature, radii, out_observe, _, _ = rasterizer( | |
means3D = means3D, | |
means2D = means2D, | |
means2D_abs = means2D_abs, | |
shs = shs, | |
colors_precomp = colors_precomp, | |
language_feature_precomp = language_feature_precomp, | |
language_feature_instance_precomp = instance_feature_precomp, | |
opacities = opacity, | |
scales = scales, | |
rotations = rotations, | |
cov3D_precomp = cov3D_precomp) | |
return_dict = {"render": rendered_image, | |
"viewspace_points": screenspace_points, | |
"viewspace_points_abs": screenspace_points_abs, | |
"visibility_filter" : radii > 0, | |
"radii": radii, | |
"out_observe": out_observe, | |
"language_feature": language_feature, | |
"instance_feature": instance_feature, | |
} | |
if app_model is not None and pc.use_app: | |
appear_ab = app_model.appear_ab[torch.tensor(viewpoint_camera.uid).cuda()] | |
app_image = torch.exp(appear_ab[0]) * rendered_image + appear_ab[1] | |
return_dict.update({"app_image": app_image}) | |
return return_dict | |
global_normal = pc.get_normal(viewpoint_camera) | |
local_normal = global_normal @ viewpoint_camera.world_view_transform[:3,:3] | |
pts_in_cam = means3D @ viewpoint_camera.world_view_transform[:3,:3] + viewpoint_camera.world_view_transform[3,:3] | |
depth_z = pts_in_cam[:, 2] | |
local_distance = (local_normal * pts_in_cam).sum(-1).abs() | |
input_all_map = torch.zeros((means3D.shape[0], 5)).cuda().float() | |
input_all_map[:, :3] = local_normal | |
input_all_map[:, 3] = 1.0 | |
input_all_map[:, 4] = local_distance | |
rendered_image, language_feature, instance_feature, radii, out_observe, out_all_map, plane_depth = rasterizer( | |
means3D = means3D, | |
means2D = means2D, | |
means2D_abs = means2D_abs, | |
shs = shs, | |
colors_precomp = colors_precomp, | |
language_feature_precomp = language_feature_precomp, | |
language_feature_instance_precomp = instance_feature_precomp, | |
opacities = opacity, | |
scales = scales, | |
rotations = rotations, | |
all_map = input_all_map, | |
cov3D_precomp = cov3D_precomp) | |
rendered_normal = out_all_map[0:3] | |
rendered_alpha = out_all_map[3:4, ] | |
rendered_distance = out_all_map[4:5, ] | |
return_dict = {"render": rendered_image, | |
"viewspace_points": screenspace_points, | |
"viewspace_points_abs": screenspace_points_abs, | |
"visibility_filter" : radii > 0, | |
"radii": radii, | |
"out_observe": out_observe, | |
"rendered_normal": rendered_normal, | |
"plane_depth": plane_depth, | |
"rendered_distance": rendered_distance, | |
"language_feature": language_feature, | |
"instance_feature": instance_feature, | |
} | |
if app_model is not None: | |
appear_ab = app_model.appear_ab[torch.tensor(viewpoint_camera.uid).cuda()] | |
app_image = torch.exp(appear_ab[0]) * rendered_image + appear_ab[1] | |
return_dict.update({"app_image": app_image}) | |
if return_depth_normal: | |
depth_normal = render_normal(viewpoint_camera, plane_depth.squeeze()) * (rendered_alpha).detach() | |
return_dict.update({"depth_normal": depth_normal}) | |
# Those Gaussians that were frustum culled or had a radius of 0 were not visible. | |
# They will be excluded from value updates used in the splitting criteria. | |
return return_dict |