import numpy as np def similarity_from_cameras(c2w, strict_scaling=False, center_method="focus"): """ reference: nerf-factory Get a similarity transform to normalize dataset from c2w (OpenCV convention) cameras :param c2w: (N, 4) :return T (4,4) , scale (float) """ t = c2w[:, :3, 3] R = c2w[:, :3, :3] # (1) Rotate the world so that z+ is the up axis # we estimate the up axis by averaging the camera up axes ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1) world_up = np.mean(ups, axis=0) world_up /= np.linalg.norm(world_up) up_camspace = np.array([0.0, -1.0, 0.0]) c = (up_camspace * world_up).sum() cross = np.cross(world_up, up_camspace) skew = np.array( [ [0.0, -cross[2], cross[1]], [cross[2], 0.0, -cross[0]], [-cross[1], cross[0], 0.0], ] ) if c > -1: R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c) else: # In the unlikely case the original data has y+ up axis, # rotate 180-deg about x axis R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) # R_align = np.eye(3) # DEBUG R = R_align @ R fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1) t = (R_align @ t[..., None])[..., 0] # (2) Recenter the scene. if center_method == "focus": # find the closest point to the origin for each camera's center ray nearest = t + (fwds * -t).sum(-1)[:, None] * fwds translate = -np.median(nearest, axis=0) elif center_method == "poses": # use center of the camera positions translate = -np.median(t, axis=0) else: raise ValueError(f"Unknown center_method {center_method}") transform = np.eye(4) transform[:3, 3] = translate transform[:3, :3] = R_align # (3) Rescale the scene using camera distances scale_fn = np.max if strict_scaling else np.median scale = 1.0 / scale_fn(np.linalg.norm(t + translate, axis=-1)) transform[:3, :] *= scale return transform def align_principal_axes(point_cloud): # Compute centroid centroid = np.median(point_cloud, axis=0) # Translate point cloud to centroid translated_point_cloud = point_cloud - centroid # Compute covariance matrix covariance_matrix = np.cov(translated_point_cloud, rowvar=False) # Compute eigenvectors and eigenvalues eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix) # Sort eigenvectors by eigenvalues (descending order) so that the z-axis # is the principal axis with the smallest eigenvalue. sort_indices = eigenvalues.argsort()[::-1] eigenvectors = eigenvectors[:, sort_indices] # Check orientation of eigenvectors. If the determinant of the eigenvectors is # negative, then we need to flip the sign of one of the eigenvectors. if np.linalg.det(eigenvectors) < 0: eigenvectors[:, 0] *= -1 # Create rotation matrix rotation_matrix = eigenvectors.T # Create SE(3) matrix (4x4 transformation matrix) transform = np.eye(4) transform[:3, :3] = rotation_matrix transform[:3, 3] = -rotation_matrix @ centroid return transform def transform_points(matrix, points): """Transform points using an SE(3) matrix. Args: matrix: 4x4 SE(3) matrix points: Nx3 array of points Returns: Nx3 array of transformed points """ assert matrix.shape == (4, 4) assert len(points.shape) == 2 and points.shape[1] == 3 return points @ matrix[:3, :3].T + matrix[:3, 3] def transform_cameras(matrix, camtoworlds): """Transform cameras using an SE(3) matrix. Args: matrix: 4x4 SE(3) matrix camtoworlds: Nx4x4 array of camera-to-world matrices Returns: Nx4x4 array of transformed camera-to-world matrices """ assert matrix.shape == (4, 4) assert len(camtoworlds.shape) == 3 and camtoworlds.shape[1:] == (4, 4) camtoworlds = np.einsum("nij, ki -> nkj", camtoworlds, matrix) scaling = np.linalg.norm(camtoworlds[:, 0, :3], axis=1) camtoworlds[:, :3, :3] = camtoworlds[:, :3, :3] / scaling[:, None, None] return camtoworlds def normalize(camtoworlds, points=None): T1 = similarity_from_cameras(camtoworlds) camtoworlds = transform_cameras(T1, camtoworlds) if points is not None: points = transform_points(T1, points) T2 = align_principal_axes(points) camtoworlds = transform_cameras(T2, camtoworlds) points = transform_points(T2, points) return camtoworlds, points, T2 @ T1 else: return camtoworlds, T1