roll-ai's picture
Upload 333 files
e8bdafd verified
import copy
import numpy as np
import torch
import torch.nn.functional as F
import open3d as o3d
from torch import Tensor
def relative_pose(rt: Tensor, mode, ref_index) -> Tensor:
'''
:param rt: F,4,4
:param mode: left or right
:return:
'''
if mode == "left":
rt = rt[ref_index].inverse() @ rt
elif mode == "right":
rt = rt @ rt[ref_index].inverse()
return rt
def create_line_point_cloud(start_point, end_point, num_points=50, color=np.array([0, 0, 1.0])):
# 创建从起点到终点的线性空间
points = np.linspace(start_point, end_point, num_points)
# color 归一化的 RGB 值 (0, 0, 255 -> 0, 0, 1)
colors = np.tile(color, (points.shape[0], 1))
return points, colors
def remove_outliers(pcd):
"""
基于统计方法移除点云中的离群点
:param pcd: Open3D 的点云对象
:param nb_neighbors: 每个点考虑的邻域点数量
:param std_ratio: 离群点标准差阈值
:return: 清理后的点云
"""
cl, ind = pcd.remove_statistical_outlier(nb_neighbors=16, std_ratio=3.0)
# cl, ind = pcd.remove_radius_outlier(nb_points=3, radius=0.1)
return pcd.select_by_index(ind)
def construct_point_cloud(points: np.ndarray, colors: np.ndarray) -> o3d.geometry.PointCloud:
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
pcd.colors = o3d.utility.Vector3dVector(colors)
pcd.transform([
[-1, 0, 0, 0],
[0, -1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
])
return pcd
def camera_pose_lerp(c2w: Tensor, target_frames: int):
weights = torch.linspace(0, c2w.size(0) - 1, target_frames, dtype=c2w.dtype)
left_indices = weights.floor().long()
right_indices = weights.ceil().long()
return torch.lerp(c2w[left_indices], c2w[right_indices], weights.unsqueeze(-1).unsqueeze(-1).frac())
def apply_thresholded_conv(mask, kernel_size=5, threshold=0.9):
"""
对输入的mask进行5x5卷积操作,卷积核用于对5x5区域求和。
如果求和后值 < 25,则置为0;否则置为1。
使用reflect padding进行边缘填充。
参数:
mask (torch.Tensor): 输入的四维Tensor,形状为 (b, f, h, w),仅包含 0.0 和 1.0。
返回:
torch.Tensor: 应用阈值后的四维Tensor,形状为 (b, f, h, w)。
"""
# 获取输入的形状
b, f, h, w = mask.shape
# 创建一个5x5的卷积核,全为1
kernel = torch.ones((1, 1, kernel_size, kernel_size), dtype=torch.float32, device=mask.device)
# 使用 reflect padding 进行边缘填充
mask_padded = F.pad(mask, pad=(kernel_size//2, kernel_size//2, kernel_size//2, kernel_size//2), mode='reflect')
# 将 mask 视作 (b * f, 1, h, w),这样可以在 h 和 w 上做 2D 卷积
mask_reshaped = mask_padded.view(-1, 1, h + 2*(kernel_size//2), w + 2*(kernel_size//2))
summed_mask = F.conv2d(mask_reshaped, kernel)
# 对求和结果进行阈值判断,将 < 25 的置为 0,其余置为 1
thresholded_mask = (summed_mask >= (kernel_size * kernel_size * threshold)).float()
# 将结果 reshape 回原来的形状 (b, f, h, w)
thresholded_mask = thresholded_mask.view(b, f, h, w)
return thresholded_mask
def constrain_to_multiple_of(x, min_val=0, max_val=None, multiple_of=14):
y = (np.round(x / multiple_of) * multiple_of).astype(int)
if max_val is not None and y > max_val:
y = (np.floor(x / multiple_of) * multiple_of).astype(int)
if y < min_val:
y = (np.ceil(x / multiple_of) * multiple_of).astype(int)
return y
def add_camera_trace(points, colors, points_x, points_y):
x, y = points_x, points_y
for idx in [[0, 0], [0, -1], [-1, 0], [-1, -1]]:
camera, camera_colors = create_line_point_cloud(
start_point=np.array([0, 0, 0]),
end_point=np.array([x[idx[0]][idx[1]], y[idx[0]][idx[1]], 1.0]),
num_points=50,
)
points = np.concatenate((points, camera * 0.25), axis=0)
colors = np.concatenate((colors, camera_colors), axis=0)
for start_idx, end_idx in [
[[0, 0], [0, -1]],
[[0, 0], [-1, 0]],
[[-1, -1], [0, -1]],
[[-1, -1], [-1, 0]],
]:
camera, camera_colors = create_line_point_cloud(
start_point=np.array([x[start_idx[0]][start_idx[1]], y[start_idx[0]][start_idx[1]], 1.0]),
end_point=np.array([x[end_idx[0]][end_idx[1]], y[end_idx[0]][end_idx[1]], 1.0]),
num_points=50,
)
points = np.concatenate((points, camera * 0.25), axis=0)
colors = np.concatenate((colors, camera_colors), axis=0)
return points, colors
def create_relative(RT_list, K_1=4.7, dataset="syn"):
scale_T = 1
RT_list = [RT.reshape(3, 4) for RT in RT_list]
temp = []
first_frame_RT = copy.deepcopy(RT_list[0])
# first_frame_R_inv = np.linalg.inv(first_frame_RT[:,:3])
first_frame_R_inv = first_frame_RT[:, :3].T
first_frame_T = first_frame_RT[:, -1]
for RT in RT_list:
RT[:, :3] = np.dot(RT[:, :3], first_frame_R_inv)
RT[:, -1] = RT[:, -1] - np.dot(RT[:, :3], first_frame_T)
RT[:, -1] = RT[:, -1] * scale_T
temp.append(RT)
if dataset == "realestate":
temp = [RT.reshape(-1) for RT in temp]
return temp
def sigma_matrix2(sig_x, sig_y, theta):
"""Calculate the rotated sigma matrix (two dimensional matrix).
Args:
sig_x (float):
sig_y (float):
theta (float): Radian measurement.
Returns:
ndarray: Rotated sigma matrix.
"""
d_matrix = np.array([[sig_x ** 2, 0], [0, sig_y ** 2]])
u_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
return np.dot(u_matrix, np.dot(d_matrix, u_matrix.T))
def mesh_grid(kernel_size):
"""Generate the mesh grid, centering at zero.
Args:
kernel_size (int):
Returns:
xy (ndarray): with the shape (kernel_size, kernel_size, 2)
xx (ndarray): with the shape (kernel_size, kernel_size)
yy (ndarray): with the shape (kernel_size, kernel_size)
"""
ax = np.arange(-kernel_size // 2 + 1., kernel_size // 2 + 1.)
xx, yy = np.meshgrid(ax, ax)
xy = np.hstack((xx.reshape((kernel_size * kernel_size, 1)), yy.reshape(kernel_size * kernel_size,
1))).reshape(kernel_size, kernel_size, 2)
return xy, xx, yy
def pdf2(sigma_matrix, grid):
"""Calculate PDF of the bivariate Gaussian distribution.
Args:
sigma_matrix (ndarray): with the shape (2, 2)
grid (ndarray): generated by :func:`mesh_grid`,
with the shape (K, K, 2), K is the kernel size.
Returns:
kernel (ndarrray): un-normalized kernel.
"""
inverse_sigma = np.linalg.inv(sigma_matrix)
kernel = np.exp(-0.5 * np.sum(np.dot(grid, inverse_sigma) * grid, 2))
return kernel
def bivariate_Gaussian(kernel_size, sig_x, sig_y, theta, grid=None, isotropic=True):
"""Generate a bivariate isotropic or anisotropic Gaussian kernel.
In the isotropic mode, only `sig_x` is used. `sig_y` and `theta` is ignored.
Args:
kernel_size (int):
sig_x (float):
sig_y (float):
theta (float): Radian measurement.
grid (ndarray, optional): generated by :func:`mesh_grid`,
with the shape (K, K, 2), K is the kernel size. Default: None
isotropic (bool):
Returns:
kernel (ndarray): normalized kernel.
"""
if grid is None:
grid, _, _ = mesh_grid(kernel_size)
if isotropic:
sigma_matrix = np.array([[sig_x ** 2, 0], [0, sig_x ** 2]])
else:
sigma_matrix = sigma_matrix2(sig_x, sig_y, theta)
kernel = pdf2(sigma_matrix, grid)
kernel = kernel / np.sum(kernel)
return kernel