|
import numpy as np |
|
|
|
|
|
def similarity_from_cameras(c2w, strict_scaling=False, center_method="focus"): |
|
""" |
|
reference: nerf-factory |
|
Get a similarity transform to normalize dataset |
|
from c2w (OpenCV convention) cameras |
|
:param c2w: (N, 4) |
|
:return T (4,4) , scale (float) |
|
""" |
|
t = c2w[:, :3, 3] |
|
R = c2w[:, :3, :3] |
|
|
|
|
|
|
|
ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1) |
|
world_up = np.mean(ups, axis=0) |
|
world_up /= np.linalg.norm(world_up) |
|
|
|
up_camspace = np.array([0.0, -1.0, 0.0]) |
|
c = (up_camspace * world_up).sum() |
|
cross = np.cross(world_up, up_camspace) |
|
skew = np.array( |
|
[ |
|
[0.0, -cross[2], cross[1]], |
|
[cross[2], 0.0, -cross[0]], |
|
[-cross[1], cross[0], 0.0], |
|
] |
|
) |
|
if c > -1: |
|
R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c) |
|
else: |
|
|
|
|
|
R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) |
|
|
|
|
|
R = R_align @ R |
|
fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1) |
|
t = (R_align @ t[..., None])[..., 0] |
|
|
|
|
|
if center_method == "focus": |
|
|
|
nearest = t + (fwds * -t).sum(-1)[:, None] * fwds |
|
translate = -np.median(nearest, axis=0) |
|
elif center_method == "poses": |
|
|
|
translate = -np.median(t, axis=0) |
|
else: |
|
raise ValueError(f"Unknown center_method {center_method}") |
|
|
|
transform = np.eye(4) |
|
transform[:3, 3] = translate |
|
transform[:3, :3] = R_align |
|
|
|
|
|
scale_fn = np.max if strict_scaling else np.median |
|
scale = 1.0 / scale_fn(np.linalg.norm(t + translate, axis=-1)) |
|
transform[:3, :] *= scale |
|
|
|
return transform |
|
|
|
|
|
def align_principal_axes(point_cloud): |
|
|
|
centroid = np.median(point_cloud, axis=0) |
|
|
|
|
|
translated_point_cloud = point_cloud - centroid |
|
|
|
|
|
covariance_matrix = np.cov(translated_point_cloud, rowvar=False) |
|
|
|
|
|
eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix) |
|
|
|
|
|
|
|
sort_indices = eigenvalues.argsort()[::-1] |
|
eigenvectors = eigenvectors[:, sort_indices] |
|
|
|
|
|
|
|
if np.linalg.det(eigenvectors) < 0: |
|
eigenvectors[:, 0] *= -1 |
|
|
|
|
|
rotation_matrix = eigenvectors.T |
|
|
|
|
|
transform = np.eye(4) |
|
transform[:3, :3] = rotation_matrix |
|
transform[:3, 3] = -rotation_matrix @ centroid |
|
|
|
return transform |
|
|
|
|
|
def transform_points(matrix, points): |
|
"""Transform points using an SE(3) matrix. |
|
|
|
Args: |
|
matrix: 4x4 SE(3) matrix |
|
points: Nx3 array of points |
|
|
|
Returns: |
|
Nx3 array of transformed points |
|
""" |
|
assert matrix.shape == (4, 4) |
|
assert len(points.shape) == 2 and points.shape[1] == 3 |
|
return points @ matrix[:3, :3].T + matrix[:3, 3] |
|
|
|
|
|
def transform_cameras(matrix, camtoworlds): |
|
"""Transform cameras using an SE(3) matrix. |
|
|
|
Args: |
|
matrix: 4x4 SE(3) matrix |
|
camtoworlds: Nx4x4 array of camera-to-world matrices |
|
|
|
Returns: |
|
Nx4x4 array of transformed camera-to-world matrices |
|
""" |
|
assert matrix.shape == (4, 4) |
|
assert len(camtoworlds.shape) == 3 and camtoworlds.shape[1:] == (4, 4) |
|
camtoworlds = np.einsum("nij, ki -> nkj", camtoworlds, matrix) |
|
scaling = np.linalg.norm(camtoworlds[:, 0, :3], axis=1) |
|
camtoworlds[:, :3, :3] = camtoworlds[:, :3, :3] / scaling[:, None, None] |
|
return camtoworlds |
|
|
|
|
|
def normalize(camtoworlds, points=None): |
|
T1 = similarity_from_cameras(camtoworlds) |
|
camtoworlds = transform_cameras(T1, camtoworlds) |
|
if points is not None: |
|
points = transform_points(T1, points) |
|
T2 = align_principal_axes(points) |
|
camtoworlds = transform_cameras(T2, camtoworlds) |
|
points = transform_points(T2, points) |
|
return camtoworlds, points, T2 @ T1 |
|
else: |
|
return camtoworlds, T1 |
|
|