Spaces:

roll-ai
/

RealCam-I2V

Runtime error

App Files Files Community

RealCam-I2V / finetune /models /camera_controller /utils.py

roll-ai

Upload 333 files

e8bdafd verified 5 days ago

raw

history blame contribute delete

7.89 kB

	import copy
	import numpy as np
	import torch
	import torch.nn.functional as F
	import open3d as o3d
	from torch import Tensor

	def relative_pose(rt: Tensor, mode, ref_index) -> Tensor:
	'''
	:param rt: F,4,4
	:param mode: left or right
	:return:
	'''
	if mode == "left":
	rt = rt[ref_index].inverse() @ rt
	elif mode == "right":
	rt = rt @ rt[ref_index].inverse()
	return rt


	def create_line_point_cloud(start_point, end_point, num_points=50, color=np.array([0, 0, 1.0])):
	# 创建从起点到终点的线性空间
	points = np.linspace(start_point, end_point, num_points)
	# color 归一化的 RGB 值 (0, 0, 255 -> 0, 0, 1)
	colors = np.tile(color, (points.shape[0], 1))
	return points, colors


	def remove_outliers(pcd):
	"""
	基于统计方法移除点云中的离群点
	:param pcd: Open3D 的点云对象
	:param nb_neighbors: 每个点考虑的邻域点数量
	:param std_ratio: 离群点标准差阈值
	:return: 清理后的点云
	"""
	cl, ind = pcd.remove_statistical_outlier(nb_neighbors=16, std_ratio=3.0)
	# cl, ind = pcd.remove_radius_outlier(nb_points=3, radius=0.1)
	return pcd.select_by_index(ind)

	def construct_point_cloud(points: np.ndarray, colors: np.ndarray) -> o3d.geometry.PointCloud:
	pcd = o3d.geometry.PointCloud()
	pcd.points = o3d.utility.Vector3dVector(points)
	pcd.colors = o3d.utility.Vector3dVector(colors)
	pcd.transform([
	[-1, 0, 0, 0],
	[0, -1, 0, 0],
	[0, 0, 1, 0],
	[0, 0, 0, 1]
	])

	return pcd


	def camera_pose_lerp(c2w: Tensor, target_frames: int):
	weights = torch.linspace(0, c2w.size(0) - 1, target_frames, dtype=c2w.dtype)
	left_indices = weights.floor().long()
	right_indices = weights.ceil().long()

	return torch.lerp(c2w[left_indices], c2w[right_indices], weights.unsqueeze(-1).unsqueeze(-1).frac())


	def apply_thresholded_conv(mask, kernel_size=5, threshold=0.9):
	"""
	对输入的mask进行5x5卷积操作，卷积核用于对5x5区域求和。
	如果求和后值 < 25，则置为0；否则置为1。
	使用reflect padding进行边缘填充。

	参数:
	mask (torch.Tensor): 输入的四维Tensor，形状为 (b, f, h, w)，仅包含 0.0 和 1.0。

	返回:
	torch.Tensor: 应用阈值后的四维Tensor，形状为 (b, f, h, w)。
	"""
	# 获取输入的形状
	b, f, h, w = mask.shape

	# 创建一个5x5的卷积核，全为1
	kernel = torch.ones((1, 1, kernel_size, kernel_size), dtype=torch.float32, device=mask.device)

	# 使用 reflect padding 进行边缘填充
	mask_padded = F.pad(mask, pad=(kernel_size//2, kernel_size//2, kernel_size//2, kernel_size//2), mode='reflect')

	# 将 mask 视作 (b * f, 1, h, w)，这样可以在 h 和 w 上做 2D 卷积
	mask_reshaped = mask_padded.view(-1, 1, h + 2(kernel_size//2), w + 2(kernel_size//2))
	summed_mask = F.conv2d(mask_reshaped, kernel)

	# 对求和结果进行阈值判断，将 < 25 的置为 0，其余置为 1
	thresholded_mask = (summed_mask >= (kernel_size * kernel_size * threshold)).float()

	# 将结果 reshape 回原来的形状 (b, f, h, w)
	thresholded_mask = thresholded_mask.view(b, f, h, w)

	return thresholded_mask


	def constrain_to_multiple_of(x, min_val=0, max_val=None, multiple_of=14):
	y = (np.round(x / multiple_of) * multiple_of).astype(int)

	if max_val is not None and y > max_val:
	y = (np.floor(x / multiple_of) * multiple_of).astype(int)

	if y < min_val:
	y = (np.ceil(x / multiple_of) * multiple_of).astype(int)

	return y


	def add_camera_trace(points, colors, points_x, points_y):
	x, y = points_x, points_y
	for idx in [[0, 0], [0, -1], [-1, 0], [-1, -1]]:
	camera, camera_colors = create_line_point_cloud(
	start_point=np.array([0, 0, 0]),
	end_point=np.array([x[idx[0]][idx[1]], y[idx[0]][idx[1]], 1.0]),
	num_points=50,
	)
	points = np.concatenate((points, camera * 0.25), axis=0)
	colors = np.concatenate((colors, camera_colors), axis=0)

	for start_idx, end_idx in [
	[[0, 0], [0, -1]],
	[[0, 0], [-1, 0]],
	[[-1, -1], [0, -1]],
	[[-1, -1], [-1, 0]],
	]:
	camera, camera_colors = create_line_point_cloud(
	start_point=np.array([x[start_idx[0]][start_idx[1]], y[start_idx[0]][start_idx[1]], 1.0]),
	end_point=np.array([x[end_idx[0]][end_idx[1]], y[end_idx[0]][end_idx[1]], 1.0]),
	num_points=50,
	)
	points = np.concatenate((points, camera * 0.25), axis=0)
	colors = np.concatenate((colors, camera_colors), axis=0)

	return points, colors


	def create_relative(RT_list, K_1=4.7, dataset="syn"):
	scale_T = 1
	RT_list = [RT.reshape(3, 4) for RT in RT_list]
	temp = []
	first_frame_RT = copy.deepcopy(RT_list[0])
	# first_frame_R_inv = np.linalg.inv(first_frame_RT[:,:3])
	first_frame_R_inv = first_frame_RT[:, :3].T
	first_frame_T = first_frame_RT[:, -1]
	for RT in RT_list:
	RT[:, :3] = np.dot(RT[:, :3], first_frame_R_inv)
	RT[:, -1] = RT[:, -1] - np.dot(RT[:, :3], first_frame_T)
	RT[:, -1] = RT[:, -1] * scale_T
	temp.append(RT)
	if dataset == "realestate":
	temp = [RT.reshape(-1) for RT in temp]
	return temp


	def sigma_matrix2(sig_x, sig_y, theta):
	"""Calculate the rotated sigma matrix (two dimensional matrix).
	Args:
	sig_x (float):
	sig_y (float):
	theta (float): Radian measurement.
	Returns:
	ndarray: Rotated sigma matrix.
	"""
	d_matrix = np.array([[sig_x 2, 0], [0, sig_y 2]])
	u_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
	return np.dot(u_matrix, np.dot(d_matrix, u_matrix.T))


	def mesh_grid(kernel_size):
	"""Generate the mesh grid, centering at zero.
	Args:
	kernel_size (int):
	Returns:
	xy (ndarray): with the shape (kernel_size, kernel_size, 2)
	xx (ndarray): with the shape (kernel_size, kernel_size)
	yy (ndarray): with the shape (kernel_size, kernel_size)
	"""
	ax = np.arange(-kernel_size // 2 + 1., kernel_size // 2 + 1.)
	xx, yy = np.meshgrid(ax, ax)
	xy = np.hstack((xx.reshape((kernel_size * kernel_size, 1)), yy.reshape(kernel_size * kernel_size,
	1))).reshape(kernel_size, kernel_size, 2)
	return xy, xx, yy


	def pdf2(sigma_matrix, grid):
	"""Calculate PDF of the bivariate Gaussian distribution.
	Args:
	sigma_matrix (ndarray): with the shape (2, 2)
	grid (ndarray): generated by :func:`mesh_grid`,
	with the shape (K, K, 2), K is the kernel size.
	Returns:
	kernel (ndarrray): un-normalized kernel.
	"""
	inverse_sigma = np.linalg.inv(sigma_matrix)
	kernel = np.exp(-0.5 * np.sum(np.dot(grid, inverse_sigma) * grid, 2))
	return kernel


	def bivariate_Gaussian(kernel_size, sig_x, sig_y, theta, grid=None, isotropic=True):
	"""Generate a bivariate isotropic or anisotropic Gaussian kernel.
	In the isotropic mode, only `sig_x` is used. `sig_y` and `theta` is ignored.
	Args:
	kernel_size (int):
	sig_x (float):
	sig_y (float):
	theta (float): Radian measurement.
	grid (ndarray, optional): generated by :func:`mesh_grid`,
	with the shape (K, K, 2), K is the kernel size. Default: None
	isotropic (bool):
	Returns:
	kernel (ndarray): normalized kernel.
	"""
	if grid is None:
	grid, _, _ = mesh_grid(kernel_size)
	if isotropic:
	sigma_matrix = np.array([[sig_x 2, 0], [0, sig_x 2]])
	else:
	sigma_matrix = sigma_matrix2(sig_x, sig_y, theta)
	kernel = pdf2(sigma_matrix, grid)
	kernel = kernel / np.sum(kernel)
	return kernel