Spaces:

alexnasa
/

AnySplat

Running on Zero

App Files Files Community

AnySplat / src /utils /render.py

alexnasa

Upload 243 files

2568013 verified 8 days ago

raw

history blame contribute delete

11 kB

	# Copyright 2022 Google LLC
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# https://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import numpy as np
	import os
	import enum
	import types
	from typing import List, Mapping, Optional, Text, Tuple, Union
	import copy
	from PIL import Image
	# import mediapy as media
	from matplotlib import cm
	from tqdm import tqdm

	import torch

	def normalize(x: np.ndarray) -> np.ndarray:
	"""Normalization helper function."""
	return x / np.linalg.norm(x)

	def pad_poses(p: np.ndarray) -> np.ndarray:
	"""Pad [..., 3, 4] pose matrices with a homogeneous bottom row [0,0,0,1]."""
	bottom = np.broadcast_to([0, 0, 0, 1.], p[..., :1, :4].shape)
	return np.concatenate([p[..., :3, :4], bottom], axis=-2)


	def unpad_poses(p: np.ndarray) -> np.ndarray:
	"""Remove the homogeneous bottom row from [..., 4, 4] pose matrices."""
	return p[..., :3, :4]


	def recenter_poses(poses: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
	"""Recenter poses around the origin."""
	cam2world = average_pose(poses)
	transform = np.linalg.inv(pad_poses(cam2world))
	poses = transform @ pad_poses(poses)
	return unpad_poses(poses), transform


	def average_pose(poses: np.ndarray) -> np.ndarray:
	"""New pose using average position, z-axis, and up vector of input poses."""
	position = poses[:, :3, 3].mean(0)
	z_axis = poses[:, :3, 2].mean(0)
	up = poses[:, :3, 1].mean(0)
	cam2world = viewmatrix(z_axis, up, position)
	return cam2world

	def viewmatrix(lookdir: np.ndarray, up: np.ndarray,
	position: np.ndarray) -> np.ndarray:
	"""Construct lookat view matrix."""
	vec2 = normalize(lookdir)
	vec0 = normalize(np.cross(up, vec2))
	vec1 = normalize(np.cross(vec2, vec0))
	m = np.stack([vec0, vec1, vec2, position], axis=1)
	return m

	def focus_point_fn(poses: np.ndarray) -> np.ndarray:
	"""Calculate nearest point to all focal axes in poses."""
	directions, origins = poses[:, :3, 2:3], poses[:, :3, 3:4]
	m = np.eye(3) - directions * np.transpose(directions, [0, 2, 1])
	mt_m = np.transpose(m, [0, 2, 1]) @ m
	focus_pt = np.linalg.inv(mt_m.mean(0)) @ (mt_m @ origins).mean(0)[:, 0]
	return focus_pt

	def transform_poses_pca(poses: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
	"""Transforms poses so principal components lie on XYZ axes.

	Args:
	poses: a (N, 3, 4) array containing the cameras' camera to world transforms.

	Returns:
	A tuple (poses, transform), with the transformed poses and the applied
	camera_to_world transforms.
	"""
	t = poses[:, :3, 3]
	t_mean = t.mean(axis=0)
	t = t - t_mean

	eigval, eigvec = np.linalg.eig(t.T @ t)
	# Sort eigenvectors in order of largest to smallest eigenvalue.
	inds = np.argsort(eigval)[::-1]
	eigvec = eigvec[:, inds]
	rot = eigvec.T
	if np.linalg.det(rot) < 0:
	rot = np.diag(np.array([1, 1, -1])) @ rot

	transform = np.concatenate([rot, rot @ -t_mean[:, None]], -1)
	poses_recentered = unpad_poses(transform @ pad_poses(poses))
	transform = np.concatenate([transform, np.eye(4)[3:]], axis=0)

	# Flip coordinate system if z component of y-axis is negative
	if poses_recentered.mean(axis=0)[2, 1] < 0:
	poses_recentered = np.diag(np.array([1, -1, -1])) @ poses_recentered
	transform = np.diag(np.array([1, -1, -1, 1])) @ transform

	return poses_recentered, transform
	# points = np.random.rand(3,100)
	# points_h = np.concatenate((points,np.ones_like(points[:1])), axis=0)
	# (poses_recentered @ points_h)[0]
	# (transform @ pad_poses(poses) @ points_h)[0,:3]
	# import pdb; pdb.set_trace()

	# # Just make sure it's it in the [-1, 1]^3 cube
	# scale_factor = 1. / np.max(np.abs(poses_recentered[:, :3, 3]))
	# poses_recentered[:, :3, 3] *= scale_factor
	# transform = np.diag(np.array([scale_factor] * 3 + [1])) @ transform

	# return poses_recentered, transform

	def generate_ellipse_path(poses: np.ndarray,
	n_frames: int = 120,
	const_speed: bool = True,
	z_variation: float = 0.,
	z_phase: float = 0.) -> np.ndarray:
	"""Generate an elliptical render path based on the given poses."""
	# Calculate the focal point for the path (cameras point toward this).
	center = focus_point_fn(poses)
	# Path height sits at z=0 (in middle of zero-mean capture pattern).
	offset = np.array([center[0], center[1], 0])

	# Calculate scaling for ellipse axes based on input camera positions.
	sc = np.percentile(np.abs(poses[:, :3, 3] - offset), 90, axis=0)
	# Use ellipse that is symmetric about the focal point in xy.
	low = -sc + offset
	high = sc + offset
	# Optional height variation need not be symmetric
	z_low = np.percentile((poses[:, :3, 3]), 10, axis=0)
	z_high = np.percentile((poses[:, :3, 3]), 90, axis=0)

	def get_positions(theta):
	# Interpolate between bounds with trig functions to get ellipse in x-y.
	# Optionally also interpolate in z to change camera height along path.
	return np.stack([
	low[0] + (high - low)[0] * (np.cos(theta) * .5 + .5),
	low[1] + (high - low)[1] * (np.sin(theta) * .5 + .5),
	z_variation * (z_low[2] + (z_high - z_low)[2] *
	(np.cos(theta + 2 * np.pi * z_phase) * .5 + .5)),
	], -1)

	theta = np.linspace(0, 2. * np.pi, n_frames + 1, endpoint=True)
	positions = get_positions(theta)

	#if const_speed:

	# # Resample theta angles so that the velocity is closer to constant.
	# lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1)
	# theta = stepfun.sample(None, theta, np.log(lengths), n_frames + 1)
	# positions = get_positions(theta)

	# Throw away duplicated last position.
	positions = positions[:-1]

	# Set path's up vector to axis closest to average of input pose up vectors.
	avg_up = poses[:, :3, 1].mean(0)
	avg_up = avg_up / np.linalg.norm(avg_up)
	ind_up = np.argmax(np.abs(avg_up))
	up = np.eye(3)[ind_up] * np.sign(avg_up[ind_up])

	return np.stack([viewmatrix(p - center, up, p) for p in positions])


	def generate_path(viewpoint_cameras, n_frames=480):
	# c2ws = np.array([np.linalg.inv(np.asarray((cam.world_view_transform.T).cpu().numpy())) for cam in viewpoint_cameras])
	c2ws = viewpoint_cameras.cpu().numpy()
	pose = c2ws[:,:3,:] @ np.diag([1, -1, -1, 1])
	pose_recenter, colmap_to_world_transform = transform_poses_pca(pose)

	# generate new poses
	new_poses = generate_ellipse_path(poses=pose_recenter, n_frames=n_frames)
	# warp back to orignal scale
	new_poses = np.linalg.inv(colmap_to_world_transform) @ pad_poses(new_poses)

	return new_poses

	# traj = []
	# for c2w in new_poses:
	# c2w = c2w @ np.diag([1, -1, -1, 1])
	# cam = copy.deepcopy(viewpoint_cameras[0])
	# cam.image_height = int(cam.image_height / 2) * 2
	# cam.image_width = int(cam.image_width / 2) * 2
	# cam.world_view_transform = torch.from_numpy(np.linalg.inv(c2w).T).float().cuda()
	# cam.full_proj_transform = (cam.world_view_transform.unsqueeze(0).bmm(cam.projection_matrix.unsqueeze(0))).squeeze(0)
	# cam.camera_center = cam.world_view_transform.inverse()[3, :3]
	# traj.append(cam)

	# return traj

	def load_img(pth: str) -> np.ndarray:
	"""Load an image and cast to float32."""
	with open(pth, 'rb') as f:
	image = np.array(Image.open(f), dtype=np.float32)
	return image


	def create_videos(base_dir, input_dir, out_name, num_frames=480):
	"""Creates videos out of the images saved to disk."""
	# Last two parts of checkpoint path are experiment name and scene name.
	video_prefix = f'{out_name}'

	zpad = max(5, len(str(num_frames - 1)))
	idx_to_str = lambda idx: str(idx).zfill(zpad)

	os.makedirs(base_dir, exist_ok=True)
	render_dist_curve_fn = np.log

	# Load one example frame to get image shape and depth range.
	depth_file = os.path.join(input_dir, 'vis', f'depth_{idx_to_str(0)}.tiff')
	depth_frame = load_img(depth_file)
	shape = depth_frame.shape
	p = 3
	distance_limits = np.percentile(depth_frame.flatten(), [p, 100 - p])
	lo, hi = [render_dist_curve_fn(x) for x in distance_limits]
	print(f'Video shape is {shape[:2]}')

	video_kwargs = {
	'shape': shape[:2],
	'codec': 'h264',
	'fps': 60,
	'crf': 18,
	}

	for k in ['depth', 'normal', 'color']:
	video_file = os.path.join(base_dir, f'{video_prefix}_{k}.mp4')
	input_format = 'gray' if k == 'alpha' else 'rgb'


	file_ext = 'png' if k in ['color', 'normal'] else 'tiff'
	idx = 0

	if k == 'color':
	file0 = os.path.join(input_dir, 'renders', f'{idx_to_str(0)}.{file_ext}')
	else:
	file0 = os.path.join(input_dir, 'vis', f'{k}_{idx_to_str(0)}.{file_ext}')

	if not os.path.exists(file0):
	print(f'Images missing for tag {k}')
	continue
	print(f'Making video {video_file}...')
	with media.VideoWriter(
	video_file, **video_kwargs, input_format=input_format) as writer:
	for idx in tqdm(range(num_frames)):
	# img_file = os.path.join(input_dir, f'{k}_{idx_to_str(idx)}.{file_ext}')
	if k == 'color':
	img_file = os.path.join(input_dir, 'renders', f'{idx_to_str(idx)}.{file_ext}')
	else:
	img_file = os.path.join(input_dir, 'vis', f'{k}_{idx_to_str(idx)}.{file_ext}')

	if not os.path.exists(img_file):
	ValueError(f'Image file {img_file} does not exist.')
	img = load_img(img_file)
	if k in ['color', 'normal']:
	img = img / 255.
	elif k.startswith('depth'):
	img = render_dist_curve_fn(img)
	img = np.clip((img - np.minimum(lo, hi)) / np.abs(hi - lo), 0, 1)
	img = cm.get_cmap('turbo')(img)[..., :3]

	frame = (np.clip(np.nan_to_num(img), 0., 1.) * 255.).astype(np.uint8)
	writer.add_image(frame)
	idx += 1

	def save_img_u8(img, pth):
	"""Save an image (probably RGB) in [0, 1] to disk as a uint8 PNG."""
	with open(pth, 'wb') as f:
	Image.fromarray(
	(np.clip(np.nan_to_num(img), 0., 1.) * 255.).astype(np.uint8)).save(
	f, 'PNG')


	def save_img_f32(depthmap, pth):
	"""Save an image (probably a depthmap) to disk as a float32 TIFF."""
	with open(pth, 'wb') as f:
	Image.fromarray(np.nan_to_num(depthmap).astype(np.float32)).save(f, 'TIFF')