File size: 4,650 Bytes
2568013 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import numpy as np
def similarity_from_cameras(c2w, strict_scaling=False, center_method="focus"):
"""
reference: nerf-factory
Get a similarity transform to normalize dataset
from c2w (OpenCV convention) cameras
:param c2w: (N, 4)
:return T (4,4) , scale (float)
"""
t = c2w[:, :3, 3]
R = c2w[:, :3, :3]
# (1) Rotate the world so that z+ is the up axis
# we estimate the up axis by averaging the camera up axes
ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1)
world_up = np.mean(ups, axis=0)
world_up /= np.linalg.norm(world_up)
up_camspace = np.array([0.0, -1.0, 0.0])
c = (up_camspace * world_up).sum()
cross = np.cross(world_up, up_camspace)
skew = np.array(
[
[0.0, -cross[2], cross[1]],
[cross[2], 0.0, -cross[0]],
[-cross[1], cross[0], 0.0],
]
)
if c > -1:
R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c)
else:
# In the unlikely case the original data has y+ up axis,
# rotate 180-deg about x axis
R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
# R_align = np.eye(3) # DEBUG
R = R_align @ R
fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1)
t = (R_align @ t[..., None])[..., 0]
# (2) Recenter the scene.
if center_method == "focus":
# find the closest point to the origin for each camera's center ray
nearest = t + (fwds * -t).sum(-1)[:, None] * fwds
translate = -np.median(nearest, axis=0)
elif center_method == "poses":
# use center of the camera positions
translate = -np.median(t, axis=0)
else:
raise ValueError(f"Unknown center_method {center_method}")
transform = np.eye(4)
transform[:3, 3] = translate
transform[:3, :3] = R_align
# (3) Rescale the scene using camera distances
scale_fn = np.max if strict_scaling else np.median
scale = 1.0 / scale_fn(np.linalg.norm(t + translate, axis=-1))
transform[:3, :] *= scale
return transform
def align_principal_axes(point_cloud):
# Compute centroid
centroid = np.median(point_cloud, axis=0)
# Translate point cloud to centroid
translated_point_cloud = point_cloud - centroid
# Compute covariance matrix
covariance_matrix = np.cov(translated_point_cloud, rowvar=False)
# Compute eigenvectors and eigenvalues
eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
# Sort eigenvectors by eigenvalues (descending order) so that the z-axis
# is the principal axis with the smallest eigenvalue.
sort_indices = eigenvalues.argsort()[::-1]
eigenvectors = eigenvectors[:, sort_indices]
# Check orientation of eigenvectors. If the determinant of the eigenvectors is
# negative, then we need to flip the sign of one of the eigenvectors.
if np.linalg.det(eigenvectors) < 0:
eigenvectors[:, 0] *= -1
# Create rotation matrix
rotation_matrix = eigenvectors.T
# Create SE(3) matrix (4x4 transformation matrix)
transform = np.eye(4)
transform[:3, :3] = rotation_matrix
transform[:3, 3] = -rotation_matrix @ centroid
return transform
def transform_points(matrix, points):
"""Transform points using an SE(3) matrix.
Args:
matrix: 4x4 SE(3) matrix
points: Nx3 array of points
Returns:
Nx3 array of transformed points
"""
assert matrix.shape == (4, 4)
assert len(points.shape) == 2 and points.shape[1] == 3
return points @ matrix[:3, :3].T + matrix[:3, 3]
def transform_cameras(matrix, camtoworlds):
"""Transform cameras using an SE(3) matrix.
Args:
matrix: 4x4 SE(3) matrix
camtoworlds: Nx4x4 array of camera-to-world matrices
Returns:
Nx4x4 array of transformed camera-to-world matrices
"""
assert matrix.shape == (4, 4)
assert len(camtoworlds.shape) == 3 and camtoworlds.shape[1:] == (4, 4)
camtoworlds = np.einsum("nij, ki -> nkj", camtoworlds, matrix)
scaling = np.linalg.norm(camtoworlds[:, 0, :3], axis=1)
camtoworlds[:, :3, :3] = camtoworlds[:, :3, :3] / scaling[:, None, None]
return camtoworlds
def normalize(camtoworlds, points=None):
T1 = similarity_from_cameras(camtoworlds)
camtoworlds = transform_cameras(T1, camtoworlds)
if points is not None:
points = transform_points(T1, points)
T2 = align_principal_axes(points)
camtoworlds = transform_cameras(T2, camtoworlds)
points = transform_points(T2, points)
return camtoworlds, points, T2 @ T1
else:
return camtoworlds, T1
|