Spaces:

alexnasa
/

AnySplat

Running on Zero

App Files Files Community

AnySplat / src /post_opt /datasets /normalize.py

alexnasa

Upload 243 files

2568013 verified 8 days ago

raw

history blame contribute delete

4.65 kB

	import numpy as np


	def similarity_from_cameras(c2w, strict_scaling=False, center_method="focus"):
	"""
	reference: nerf-factory
	Get a similarity transform to normalize dataset
	from c2w (OpenCV convention) cameras
	:param c2w: (N, 4)
	:return T (4,4) , scale (float)
	"""
	t = c2w[:, :3, 3]
	R = c2w[:, :3, :3]

	# (1) Rotate the world so that z+ is the up axis
	# we estimate the up axis by averaging the camera up axes
	ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1)
	world_up = np.mean(ups, axis=0)
	world_up /= np.linalg.norm(world_up)

	up_camspace = np.array([0.0, -1.0, 0.0])
	c = (up_camspace * world_up).sum()
	cross = np.cross(world_up, up_camspace)
	skew = np.array(
	[
	[0.0, -cross[2], cross[1]],
	[cross[2], 0.0, -cross[0]],
	[-cross[1], cross[0], 0.0],
	]
	)
	if c > -1:
	R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c)
	else:
	# In the unlikely case the original data has y+ up axis,
	# rotate 180-deg about x axis
	R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])

	# R_align = np.eye(3) # DEBUG
	R = R_align @ R
	fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1)
	t = (R_align @ t[..., None])[..., 0]

	# (2) Recenter the scene.
	if center_method == "focus":
	# find the closest point to the origin for each camera's center ray
	nearest = t + (fwds * -t).sum(-1)[:, None] * fwds
	translate = -np.median(nearest, axis=0)
	elif center_method == "poses":
	# use center of the camera positions
	translate = -np.median(t, axis=0)
	else:
	raise ValueError(f"Unknown center_method {center_method}")

	transform = np.eye(4)
	transform[:3, 3] = translate
	transform[:3, :3] = R_align

	# (3) Rescale the scene using camera distances
	scale_fn = np.max if strict_scaling else np.median
	scale = 1.0 / scale_fn(np.linalg.norm(t + translate, axis=-1))
	transform[:3, :] *= scale

	return transform


	def align_principal_axes(point_cloud):
	# Compute centroid
	centroid = np.median(point_cloud, axis=0)

	# Translate point cloud to centroid
	translated_point_cloud = point_cloud - centroid

	# Compute covariance matrix
	covariance_matrix = np.cov(translated_point_cloud, rowvar=False)

	# Compute eigenvectors and eigenvalues
	eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

	# Sort eigenvectors by eigenvalues (descending order) so that the z-axis
	# is the principal axis with the smallest eigenvalue.
	sort_indices = eigenvalues.argsort()[::-1]
	eigenvectors = eigenvectors[:, sort_indices]

	# Check orientation of eigenvectors. If the determinant of the eigenvectors is
	# negative, then we need to flip the sign of one of the eigenvectors.
	if np.linalg.det(eigenvectors) < 0:
	eigenvectors[:, 0] *= -1

	# Create rotation matrix
	rotation_matrix = eigenvectors.T

	# Create SE(3) matrix (4x4 transformation matrix)
	transform = np.eye(4)
	transform[:3, :3] = rotation_matrix
	transform[:3, 3] = -rotation_matrix @ centroid

	return transform


	def transform_points(matrix, points):
	"""Transform points using an SE(3) matrix.

	Args:
	matrix: 4x4 SE(3) matrix
	points: Nx3 array of points

	Returns:
	Nx3 array of transformed points
	"""
	assert matrix.shape == (4, 4)
	assert len(points.shape) == 2 and points.shape[1] == 3
	return points @ matrix[:3, :3].T + matrix[:3, 3]


	def transform_cameras(matrix, camtoworlds):
	"""Transform cameras using an SE(3) matrix.

	Args:
	matrix: 4x4 SE(3) matrix
	camtoworlds: Nx4x4 array of camera-to-world matrices

	Returns:
	Nx4x4 array of transformed camera-to-world matrices
	"""
	assert matrix.shape == (4, 4)
	assert len(camtoworlds.shape) == 3 and camtoworlds.shape[1:] == (4, 4)
	camtoworlds = np.einsum("nij, ki -> nkj", camtoworlds, matrix)
	scaling = np.linalg.norm(camtoworlds[:, 0, :3], axis=1)
	camtoworlds[:, :3, :3] = camtoworlds[:, :3, :3] / scaling[:, None, None]
	return camtoworlds


	def normalize(camtoworlds, points=None):
	T1 = similarity_from_cameras(camtoworlds)
	camtoworlds = transform_cameras(T1, camtoworlds)
	if points is not None:
	points = transform_points(T1, points)
	T2 = align_principal_axes(points)
	camtoworlds = transform_cameras(T2, camtoworlds)
	points = transform_points(T2, points)
	return camtoworlds, points, T2 @ T1
	else:
	return camtoworlds, T1