omnipart's picture
init
491eded
#
# Copyright (C) 2023, Inria
# GRAPHDECO research group, https://team.inria.fr/graphdeco
# All rights reserved.
#
# This software is free for non-commercial, research and evaluation use
# under the terms of the LICENSE.md file.
#
# For inquiries contact [email protected]
#
import torch
import math
from easydict import EasyDict as edict
import numpy as np
from ..representations.gaussian import Gaussian
from .sh_utils import eval_sh
import torch.nn.functional as F
from easydict import EasyDict as edict
def intrinsics_to_projection(
intrinsics: torch.Tensor,
near: float,
far: float,
) -> torch.Tensor:
"""
Convert OpenCV-style camera intrinsics matrix to OpenGL perspective projection matrix.
This function transforms a standard 3x3 camera intrinsics matrix into a 4x4 perspective
projection matrix compatible with OpenGL rendering pipeline. The resulting matrix
properly handles the coordinate system differences between computer vision and
computer graphics conventions.
Args:
intrinsics (torch.Tensor): [3, 3] OpenCV intrinsics matrix containing focal lengths
and principal point coordinates
near (float): Distance to the near clipping plane (must be positive)
far (float): Distance to the far clipping plane (must be greater than near)
Returns:
torch.Tensor: [4, 4] OpenGL perspective projection matrix for rendering
"""
# Extract focal lengths and principal point from intrinsics matrix
fx, fy = intrinsics[0, 0], intrinsics[1, 1] # Focal lengths in x and y directions
cx, cy = intrinsics[0, 2], intrinsics[1, 2] # Principal point coordinates
# Initialize empty 4x4 projection matrix
ret = torch.zeros((4, 4), dtype=intrinsics.dtype, device=intrinsics.device)
# Fill in the projection matrix components
ret[0, 0] = 2 * fx # Scale for x axis based on horizontal focal length
ret[1, 1] = 2 * fy # Scale for y axis based on vertical focal length
ret[0, 2] = 2 * cx - 1 # X offset based on principal point (OpenCV to OpenGL conversion)
ret[1, 2] = - 2 * cy + 1 # Y offset based on principal point (with flipped Y axis)
ret[2, 2] = far / (far - near) # Handle depth mapping to clip space
ret[2, 3] = near * far / (near - far) # Term for perspective division in clip space
ret[3, 2] = 1. # Enable perspective division
return ret
def render(viewpoint_camera, pc : Gaussian, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, override_color = None):
"""
Render the scene using 3D Gaussians.
This function performs the rasterization of 3D Gaussian points into a 2D image from a given viewpoint.
Args:
viewpoint_camera: Camera parameters including position, view transform, and projection
pc (Gaussian): Point cloud represented as 3D Gaussians
pipe: Pipeline configuration parameters
bg_color (torch.Tensor): Background color tensor (must be on GPU)
scaling_modifier (float): Scale modifier for the Gaussian splats
override_color (torch.Tensor, optional): Custom colors to override computed SH-based colors
Returns:
edict: Dictionary containing rendered image, viewspace points, visibility filter, and radii information
"""
# Lazy import of the rasterization module to avoid circular dependencies
# or to improve startup performance when not needed immediately
if 'GaussianRasterizer' not in globals():
from diff_gaussian_rasterization import GaussianRasterizer, GaussianRasterizationSettings
# Create zero tensor for screen space points
# This tensor will hold gradients of the 2D (screen-space) means for optimization
screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
try:
screenspace_points.retain_grad()
except:
pass
# Calculate camera frustum parameters from the field of view
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
# Get kernel size from the pipeline configuration
kernel_size = pipe.kernel_size
# Initialize subpixel offset for all pixels (used for anti-aliasing)
subpixel_offset = torch.zeros((int(viewpoint_camera.image_height), int(viewpoint_camera.image_width), 2),
dtype=torch.float32, device="cuda")
# Configure the Gaussian rasterization settings with all necessary parameters
raster_settings = GaussianRasterizationSettings(
image_height=int(viewpoint_camera.image_height),
image_width=int(viewpoint_camera.image_width),
tanfovx=tanfovx,
tanfovy=tanfovy,
kernel_size=kernel_size,
subpixel_offset=subpixel_offset,
bg=bg_color,
scale_modifier=scaling_modifier,
viewmatrix=viewpoint_camera.world_view_transform,
projmatrix=viewpoint_camera.full_proj_transform,
sh_degree=pc.active_sh_degree,
campos=viewpoint_camera.camera_center,
prefiltered=False,
debug=pipe.debug
)
# Create the rasterizer with the configured settings
rasterizer = GaussianRasterizer(raster_settings=raster_settings)
# Get the Gaussian 3D positions and opacities
means3D = pc.get_xyz
means2D = screenspace_points
opacity = pc.get_opacity
# Handle covariance computation options
# Either use precomputed 3D covariance or let the rasterizer compute it from scales and rotations
scales = None
rotations = None
cov3D_precomp = None
if pipe.compute_cov3D_python:
# Compute 3D covariances in Python before rasterization
cov3D_precomp = pc.get_covariance(scaling_modifier)
else:
# Let the rasterizer compute covariances from scale and rotation
scales = pc.get_scaling
rotations = pc.get_rotation
# Handle color computation options
# Either use override colors, precomputed colors from SHs, or let the rasterizer compute colors from SHs
shs = None
colors_precomp = None
if override_color is None:
if pipe.convert_SHs_python:
# Convert spherical harmonics to RGB colors in Python
shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2)
# Calculate the view direction from Gaussian center to camera
dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1))
dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True)
# Evaluate spherical harmonics to get RGB colors
sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
# Apply offset and clamp to ensure valid color values
colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
else:
# Let the rasterizer convert SHs to colors
shs = pc.get_features
else:
# Use provided override colors
colors_precomp = override_color
# Perform the rasterization to generate the final rendered image
# This projects the 3D Gaussians to 2D and blends them according to their opacities
rendered_image, radii = rasterizer(
means3D = means3D,
means2D = means2D,
shs = shs,
colors_precomp = colors_precomp,
opacities = opacity,
scales = scales,
rotations = rotations,
cov3D_precomp = cov3D_precomp
)
# Return the rendering results in a dictionary
# radii > 0 creates a filter for visible Gaussians (those not frustum-culled)
return edict({"render": rendered_image,
"viewspace_points": screenspace_points,
"visibility_filter" : radii > 0,
"radii": radii})
class GaussianRenderer:
"""
A renderer for Gaussian Splatting that converts 3D Gaussian primitives into 2D images.
This renderer projects 3D Gaussian splats onto a 2D image plane using the provided
camera parameters, handling the rasterization process through an optimized backend.
Args:
rendering_options (dict): Configuration options for rendering including resolution,
depth range, background color, and supersampling level.
"""
def __init__(self, rendering_options={}) -> None:
# Initialize default pipeline parameters
self.pipe = edict({
"kernel_size": 0.1, # Size of the Gaussian kernel for rasterization
"convert_SHs_python": False, # Whether to convert Spherical Harmonics to colors in Python
"compute_cov3D_python": False, # Whether to compute 3D covariance matrices in Python
"scale_modifier": 1.0, # Global scaling factor for all Gaussians
"debug": False # Enable/disable debug mode
})
# Initialize default rendering options
self.rendering_options = edict({
"resolution": None, # Output image resolution (width and height)
"near": None, # Near clipping plane distance
"far": None, # Far clipping plane distance
"ssaa": 1, # Super-sampling anti-aliasing factor (1 = disabled)
"bg_color": 'random', # Background color ('random' or specific color)
})
# Update with user-provided options
self.rendering_options.update(rendering_options)
# Initialize background color (will be set during rendering)
self.bg_color = None
def render(
self,
gausssian: Gaussian,
extrinsics: torch.Tensor,
intrinsics: torch.Tensor,
colors_overwrite: torch.Tensor = None
) -> edict:
"""
Render the 3D Gaussian representation from a given camera viewpoint.
This method projects the 3D Gaussians onto a 2D image plane using the provided camera parameters,
handling the full rendering pipeline including projection, rasterization, and optional supersampling.
Args:
gaussian: The Gaussian representation containing positions, features, and other attributes
extrinsics (torch.Tensor): (4, 4) camera extrinsics matrix defining camera position and orientation
intrinsics (torch.Tensor): (3, 3) camera intrinsics matrix with focal lengths and principal point
colors_overwrite (torch.Tensor): Optional (N, 3) tensor to override Gaussian colors
Returns:
edict containing:
color (torch.Tensor): (3, H, W) rendered color image
"""
# Extract rendering parameters from options
resolution = self.rendering_options["resolution"]
near = self.rendering_options["near"]
far = self.rendering_options["far"]
ssaa = self.rendering_options["ssaa"] # Super-sampling anti-aliasing factor
# Set background color based on rendering options
if self.rendering_options["bg_color"] == 'random':
# Randomly choose either black or white background
self.bg_color = torch.zeros(3, dtype=torch.float32, device="cuda")
if np.random.rand() < 0.5:
self.bg_color += 1
else:
# Use specified background color
self.bg_color = torch.tensor(self.rendering_options["bg_color"], dtype=torch.float32, device="cuda")
# Prepare camera parameters for the renderer
view = extrinsics # World-to-camera transform
# Convert OpenCV intrinsics to OpenGL projection matrix
perspective = intrinsics_to_projection(intrinsics, near, far)
# Extract camera center from extrinsics (inverse of view matrix)
camera = torch.inverse(view)[:3, 3]
# Calculate field of view from focal lengths
focalx = intrinsics[0, 0]
focaly = intrinsics[1, 1]
fovx = 2 * torch.atan(0.5 / focalx) # Horizontal FoV in radians
fovy = 2 * torch.atan(0.5 / focaly) # Vertical FoV in radians
# Build complete camera parameter dictionary
camera_dict = edict({
"image_height": resolution * ssaa, # Apply supersampling if enabled
"image_width": resolution * ssaa,
"FoVx": fovx,
"FoVy": fovy,
"znear": near,
"zfar": far,
"world_view_transform": view.T.contiguous(), # Transpose for OpenGL convention
"projection_matrix": perspective.T.contiguous(),
"full_proj_transform": (perspective @ view).T.contiguous(), # Combined projection and view
"camera_center": camera
})
# Perform the actual rendering using the 3D Gaussian rasterizer
render_ret = render(camera_dict, gausssian, self.pipe, self.bg_color,
override_color=colors_overwrite, scaling_modifier=self.pipe.scale_modifier)
# Handle supersampling by downsampling the high-resolution render to the target resolution
if ssaa > 1:
# Use bilinear interpolation with antialiasing to downsample the image
render_ret.render = F.interpolate(render_ret.render[None],
size=(resolution, resolution),
mode='bilinear',
align_corners=False,
antialias=True).squeeze()
# Return the final rendered color image
ret = edict({
'color': render_ret['render']
})
return ret