File size: 13,785 Bytes
491eded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#
# Copyright (C) 2023, Inria
# GRAPHDECO research group, https://team.inria.fr/graphdeco
# All rights reserved.
#
# This software is free for non-commercial, research and evaluation use 
# under the terms of the LICENSE.md file.
#
# For inquiries contact  [email protected]
#

import torch
import math
from easydict import EasyDict as edict
import numpy as np
from ..representations.gaussian import Gaussian
from .sh_utils import eval_sh
import torch.nn.functional as F
from easydict import EasyDict as edict


def intrinsics_to_projection(
    intrinsics: torch.Tensor,
    near: float,
    far: float,
    ) -> torch.Tensor:
    """
    Convert OpenCV-style camera intrinsics matrix to OpenGL perspective projection matrix.
    
    This function transforms a standard 3x3 camera intrinsics matrix into a 4x4 perspective
    projection matrix compatible with OpenGL rendering pipeline. The resulting matrix
    properly handles the coordinate system differences between computer vision and
    computer graphics conventions.
    
    Args:
        intrinsics (torch.Tensor): [3, 3] OpenCV intrinsics matrix containing focal lengths
                    and principal point coordinates
        near (float): Distance to the near clipping plane (must be positive)
        far (float): Distance to the far clipping plane (must be greater than near)
    
    Returns:
        torch.Tensor: [4, 4] OpenGL perspective projection matrix for rendering
    """
    
    # Extract focal lengths and principal point from intrinsics matrix
    fx, fy = intrinsics[0, 0], intrinsics[1, 1]  # Focal lengths in x and y directions
    cx, cy = intrinsics[0, 2], intrinsics[1, 2]  # Principal point coordinates
    
    # Initialize empty 4x4 projection matrix
    ret = torch.zeros((4, 4), dtype=intrinsics.dtype, device=intrinsics.device)
    
    # Fill in the projection matrix components
    ret[0, 0] = 2 * fx  # Scale for x axis based on horizontal focal length
    ret[1, 1] = 2 * fy  # Scale for y axis based on vertical focal length
    ret[0, 2] = 2 * cx - 1  # X offset based on principal point (OpenCV to OpenGL conversion)
    ret[1, 2] = - 2 * cy + 1  # Y offset based on principal point (with flipped Y axis)
    ret[2, 2] = far / (far - near)  # Handle depth mapping to clip space
    ret[2, 3] = near * far / (near - far)  # Term for perspective division in clip space
    ret[3, 2] = 1.  # Enable perspective division
    
    return ret

def render(viewpoint_camera, pc : Gaussian, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, override_color = None):
    """
    Render the scene using 3D Gaussians.
    
    This function performs the rasterization of 3D Gaussian points into a 2D image from a given viewpoint.
    
    Args:
        viewpoint_camera: Camera parameters including position, view transform, and projection
        pc (Gaussian): Point cloud represented as 3D Gaussians
        pipe: Pipeline configuration parameters
        bg_color (torch.Tensor): Background color tensor (must be on GPU)
        scaling_modifier (float): Scale modifier for the Gaussian splats
        override_color (torch.Tensor, optional): Custom colors to override computed SH-based colors
    
    Returns:
        edict: Dictionary containing rendered image, viewspace points, visibility filter, and radii information
    """
    # Lazy import of the rasterization module to avoid circular dependencies
    # or to improve startup performance when not needed immediately
    if 'GaussianRasterizer' not in globals():
        from diff_gaussian_rasterization import GaussianRasterizer, GaussianRasterizationSettings
    
    # Create zero tensor for screen space points
    # This tensor will hold gradients of the 2D (screen-space) means for optimization
    screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
    try:
        screenspace_points.retain_grad()
    except:
        pass
        
    # Calculate camera frustum parameters from the field of view
    tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
    tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
    
    # Get kernel size from the pipeline configuration
    kernel_size = pipe.kernel_size
    
    # Initialize subpixel offset for all pixels (used for anti-aliasing)
    subpixel_offset = torch.zeros((int(viewpoint_camera.image_height), int(viewpoint_camera.image_width), 2), 
                                dtype=torch.float32, device="cuda")

    # Configure the Gaussian rasterization settings with all necessary parameters
    raster_settings = GaussianRasterizationSettings(
        image_height=int(viewpoint_camera.image_height),
        image_width=int(viewpoint_camera.image_width),
        tanfovx=tanfovx,
        tanfovy=tanfovy,
        kernel_size=kernel_size,
        subpixel_offset=subpixel_offset,
        bg=bg_color,
        scale_modifier=scaling_modifier,
        viewmatrix=viewpoint_camera.world_view_transform,
        projmatrix=viewpoint_camera.full_proj_transform,
        sh_degree=pc.active_sh_degree,
        campos=viewpoint_camera.camera_center,
        prefiltered=False,
        debug=pipe.debug
    )
    
    # Create the rasterizer with the configured settings
    rasterizer = GaussianRasterizer(raster_settings=raster_settings)

    # Get the Gaussian 3D positions and opacities
    means3D = pc.get_xyz
    means2D = screenspace_points
    opacity = pc.get_opacity

    # Handle covariance computation options
    # Either use precomputed 3D covariance or let the rasterizer compute it from scales and rotations
    scales = None
    rotations = None
    cov3D_precomp = None
    if pipe.compute_cov3D_python:
        # Compute 3D covariances in Python before rasterization
        cov3D_precomp = pc.get_covariance(scaling_modifier)
    else:
        # Let the rasterizer compute covariances from scale and rotation
        scales = pc.get_scaling
        rotations = pc.get_rotation

    # Handle color computation options
    # Either use override colors, precomputed colors from SHs, or let the rasterizer compute colors from SHs
    shs = None
    colors_precomp = None
    if override_color is None:
        if pipe.convert_SHs_python:
            # Convert spherical harmonics to RGB colors in Python
            shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2)
            # Calculate the view direction from Gaussian center to camera
            dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1))
            dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True)
            # Evaluate spherical harmonics to get RGB colors
            sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
            # Apply offset and clamp to ensure valid color values
            colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
        else:
            # Let the rasterizer convert SHs to colors
            shs = pc.get_features
    else:
        # Use provided override colors
        colors_precomp = override_color

    # Perform the rasterization to generate the final rendered image
    # This projects the 3D Gaussians to 2D and blends them according to their opacities
    rendered_image, radii = rasterizer(
        means3D = means3D,
        means2D = means2D,
        shs = shs,
        colors_precomp = colors_precomp,
        opacities = opacity,
        scales = scales,
        rotations = rotations,
        cov3D_precomp = cov3D_precomp
    )

    # Return the rendering results in a dictionary
    # radii > 0 creates a filter for visible Gaussians (those not frustum-culled)
    return edict({"render": rendered_image,
            "viewspace_points": screenspace_points,
            "visibility_filter" : radii > 0,
            "radii": radii})

class GaussianRenderer:
    """
    A renderer for Gaussian Splatting that converts 3D Gaussian primitives into 2D images.
    
    This renderer projects 3D Gaussian splats onto a 2D image plane using the provided
    camera parameters, handling the rasterization process through an optimized backend.
    
    Args:
        rendering_options (dict): Configuration options for rendering including resolution,
                                    depth range, background color, and supersampling level.
    """

    def __init__(self, rendering_options={}) -> None:
        # Initialize default pipeline parameters
        self.pipe = edict({
            "kernel_size": 0.1,       # Size of the Gaussian kernel for rasterization
            "convert_SHs_python": False,  # Whether to convert Spherical Harmonics to colors in Python
            "compute_cov3D_python": False,  # Whether to compute 3D covariance matrices in Python
            "scale_modifier": 1.0,    # Global scaling factor for all Gaussians
            "debug": False            # Enable/disable debug mode
        })
        
        # Initialize default rendering options
        self.rendering_options = edict({
            "resolution": None,       # Output image resolution (width and height)
            "near": None,             # Near clipping plane distance
            "far": None,              # Far clipping plane distance
            "ssaa": 1,                # Super-sampling anti-aliasing factor (1 = disabled)
            "bg_color": 'random',     # Background color ('random' or specific color)
        })
        
        # Update with user-provided options
        self.rendering_options.update(rendering_options)
        
        # Initialize background color (will be set during rendering)
        self.bg_color = None
    
    def render(
            self,
            gausssian: Gaussian,
            extrinsics: torch.Tensor,
            intrinsics: torch.Tensor,
            colors_overwrite: torch.Tensor = None
        ) -> edict:
        """
        Render the 3D Gaussian representation from a given camera viewpoint.

        This method projects the 3D Gaussians onto a 2D image plane using the provided camera parameters,
        handling the full rendering pipeline including projection, rasterization, and optional supersampling.

        Args:
            gaussian: The Gaussian representation containing positions, features, and other attributes
            extrinsics (torch.Tensor): (4, 4) camera extrinsics matrix defining camera position and orientation
            intrinsics (torch.Tensor): (3, 3) camera intrinsics matrix with focal lengths and principal point
            colors_overwrite (torch.Tensor): Optional (N, 3) tensor to override Gaussian colors

        Returns:
            edict containing:
                color (torch.Tensor): (3, H, W) rendered color image
        """
        # Extract rendering parameters from options
        resolution = self.rendering_options["resolution"]
        near = self.rendering_options["near"]
        far = self.rendering_options["far"]
        ssaa = self.rendering_options["ssaa"]  # Super-sampling anti-aliasing factor
        
        # Set background color based on rendering options
        if self.rendering_options["bg_color"] == 'random':
            # Randomly choose either black or white background
            self.bg_color = torch.zeros(3, dtype=torch.float32, device="cuda")
            if np.random.rand() < 0.5:
                self.bg_color += 1
        else:
            # Use specified background color
            self.bg_color = torch.tensor(self.rendering_options["bg_color"], dtype=torch.float32, device="cuda")

        # Prepare camera parameters for the renderer
        view = extrinsics  # World-to-camera transform
        
        # Convert OpenCV intrinsics to OpenGL projection matrix
        perspective = intrinsics_to_projection(intrinsics, near, far)
        
        # Extract camera center from extrinsics (inverse of view matrix)
        camera = torch.inverse(view)[:3, 3]
        
        # Calculate field of view from focal lengths
        focalx = intrinsics[0, 0]
        focaly = intrinsics[1, 1]
        fovx = 2 * torch.atan(0.5 / focalx)  # Horizontal FoV in radians
        fovy = 2 * torch.atan(0.5 / focaly)  # Vertical FoV in radians
            
        # Build complete camera parameter dictionary
        camera_dict = edict({
            "image_height": resolution * ssaa,  # Apply supersampling if enabled
            "image_width": resolution * ssaa,
            "FoVx": fovx,
            "FoVy": fovy,
            "znear": near,
            "zfar": far,
            "world_view_transform": view.T.contiguous(),  # Transpose for OpenGL convention
            "projection_matrix": perspective.T.contiguous(),
            "full_proj_transform": (perspective @ view).T.contiguous(),  # Combined projection and view
            "camera_center": camera
        })

        # Perform the actual rendering using the 3D Gaussian rasterizer
        render_ret = render(camera_dict, gausssian, self.pipe, self.bg_color, 
                            override_color=colors_overwrite, scaling_modifier=self.pipe.scale_modifier)

        # Handle supersampling by downsampling the high-resolution render to the target resolution
        if ssaa > 1:
            # Use bilinear interpolation with antialiasing to downsample the image
            render_ret.render = F.interpolate(render_ret.render[None], 
                                            size=(resolution, resolution), 
                                            mode='bilinear', 
                                            align_corners=False, 
                                            antialias=True).squeeze()
            
        # Return the final rendered color image
        ret = edict({
            'color': render_ret['render']
        })
        return ret