File size: 7,886 Bytes
e8bdafd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import copy
import numpy as np
import torch
import torch.nn.functional as F
import open3d as o3d
from torch import Tensor

def relative_pose(rt: Tensor, mode, ref_index) -> Tensor:
    '''
    :param rt: F,4,4
    :param mode: left or right
    :return:
    '''
    if mode == "left":
        rt = rt[ref_index].inverse() @ rt
    elif mode == "right":
        rt = rt @ rt[ref_index].inverse()
    return rt


def create_line_point_cloud(start_point, end_point, num_points=50, color=np.array([0, 0, 1.0])):
    # 创建从起点到终点的线性空间
    points = np.linspace(start_point, end_point, num_points)
    # color 归一化的 RGB 值 (0, 0, 255 -> 0, 0, 1)
    colors = np.tile(color, (points.shape[0], 1))
    return points, colors


def remove_outliers(pcd):
    """
    基于统计方法移除点云中的离群点
    :param pcd: Open3D 的点云对象
    :param nb_neighbors: 每个点考虑的邻域点数量
    :param std_ratio: 离群点标准差阈值
    :return: 清理后的点云
    """
    cl, ind = pcd.remove_statistical_outlier(nb_neighbors=16, std_ratio=3.0)
    # cl, ind = pcd.remove_radius_outlier(nb_points=3, radius=0.1)
    return pcd.select_by_index(ind)

def construct_point_cloud(points: np.ndarray, colors: np.ndarray) -> o3d.geometry.PointCloud:
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(colors)
    pcd.transform([
        [-1, 0, 0, 0],
        [0, -1, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 0, 1]
    ])

    return pcd


def camera_pose_lerp(c2w: Tensor, target_frames: int):
    weights = torch.linspace(0, c2w.size(0) - 1, target_frames, dtype=c2w.dtype)
    left_indices = weights.floor().long()
    right_indices = weights.ceil().long()

    return torch.lerp(c2w[left_indices], c2w[right_indices], weights.unsqueeze(-1).unsqueeze(-1).frac())


def apply_thresholded_conv(mask, kernel_size=5, threshold=0.9):
    """
    对输入的mask进行5x5卷积操作,卷积核用于对5x5区域求和。
    如果求和后值 < 25,则置为0;否则置为1。
    使用reflect padding进行边缘填充。

    参数:
    mask (torch.Tensor): 输入的四维Tensor,形状为 (b, f, h, w),仅包含 0.0 和 1.0。

    返回:
    torch.Tensor: 应用阈值后的四维Tensor,形状为 (b, f, h, w)。
    """
    # 获取输入的形状
    b, f, h, w = mask.shape

    # 创建一个5x5的卷积核,全为1
    kernel = torch.ones((1, 1, kernel_size, kernel_size), dtype=torch.float32, device=mask.device)

    # 使用 reflect padding 进行边缘填充
    mask_padded = F.pad(mask, pad=(kernel_size//2, kernel_size//2, kernel_size//2, kernel_size//2), mode='reflect')

    # 将 mask 视作 (b * f, 1, h, w),这样可以在 h 和 w 上做 2D 卷积
    mask_reshaped = mask_padded.view(-1, 1, h + 2*(kernel_size//2), w + 2*(kernel_size//2))
    summed_mask = F.conv2d(mask_reshaped, kernel)

    # 对求和结果进行阈值判断,将 < 25 的置为 0,其余置为 1
    thresholded_mask = (summed_mask >= (kernel_size * kernel_size * threshold)).float()

    # 将结果 reshape 回原来的形状 (b, f, h, w)
    thresholded_mask = thresholded_mask.view(b, f, h, w)

    return thresholded_mask


def constrain_to_multiple_of(x, min_val=0, max_val=None, multiple_of=14):
    y = (np.round(x / multiple_of) * multiple_of).astype(int)

    if max_val is not None and y > max_val:
        y = (np.floor(x / multiple_of) * multiple_of).astype(int)

    if y < min_val:
        y = (np.ceil(x / multiple_of) * multiple_of).astype(int)

    return y


def add_camera_trace(points, colors, points_x, points_y):
    x, y = points_x, points_y
    for idx in [[0, 0], [0, -1], [-1, 0], [-1, -1]]:
        camera, camera_colors = create_line_point_cloud(
            start_point=np.array([0, 0, 0]),
            end_point=np.array([x[idx[0]][idx[1]], y[idx[0]][idx[1]], 1.0]),
            num_points=50,
        )
        points = np.concatenate((points, camera * 0.25), axis=0)
        colors = np.concatenate((colors, camera_colors), axis=0)

    for start_idx, end_idx in [
        [[0, 0], [0, -1]],
        [[0, 0], [-1, 0]],
        [[-1, -1], [0, -1]],
        [[-1, -1], [-1, 0]],
    ]:
        camera, camera_colors = create_line_point_cloud(
            start_point=np.array([x[start_idx[0]][start_idx[1]], y[start_idx[0]][start_idx[1]], 1.0]),
            end_point=np.array([x[end_idx[0]][end_idx[1]], y[end_idx[0]][end_idx[1]], 1.0]),
            num_points=50,
        )
        points = np.concatenate((points, camera * 0.25), axis=0)
        colors = np.concatenate((colors, camera_colors), axis=0)

    return points, colors


def create_relative(RT_list, K_1=4.7, dataset="syn"):
    scale_T = 1
    RT_list = [RT.reshape(3, 4) for RT in RT_list]
    temp = []
    first_frame_RT = copy.deepcopy(RT_list[0])
    # first_frame_R_inv = np.linalg.inv(first_frame_RT[:,:3])
    first_frame_R_inv = first_frame_RT[:, :3].T
    first_frame_T = first_frame_RT[:, -1]
    for RT in RT_list:
        RT[:, :3] = np.dot(RT[:, :3], first_frame_R_inv)
        RT[:, -1] = RT[:, -1] - np.dot(RT[:, :3], first_frame_T)
        RT[:, -1] = RT[:, -1] * scale_T
        temp.append(RT)
    if dataset == "realestate":
        temp = [RT.reshape(-1) for RT in temp]
    return temp


def sigma_matrix2(sig_x, sig_y, theta):
    """Calculate the rotated sigma matrix (two dimensional matrix).
    Args:
        sig_x (float):
        sig_y (float):
        theta (float): Radian measurement.
    Returns:
        ndarray: Rotated sigma matrix.
    """
    d_matrix = np.array([[sig_x ** 2, 0], [0, sig_y ** 2]])
    u_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    return np.dot(u_matrix, np.dot(d_matrix, u_matrix.T))


def mesh_grid(kernel_size):
    """Generate the mesh grid, centering at zero.
    Args:
        kernel_size (int):
    Returns:
        xy (ndarray): with the shape (kernel_size, kernel_size, 2)
        xx (ndarray): with the shape (kernel_size, kernel_size)
        yy (ndarray): with the shape (kernel_size, kernel_size)
    """
    ax = np.arange(-kernel_size // 2 + 1., kernel_size // 2 + 1.)
    xx, yy = np.meshgrid(ax, ax)
    xy = np.hstack((xx.reshape((kernel_size * kernel_size, 1)), yy.reshape(kernel_size * kernel_size,
                                                                           1))).reshape(kernel_size, kernel_size, 2)
    return xy, xx, yy


def pdf2(sigma_matrix, grid):
    """Calculate PDF of the bivariate Gaussian distribution.
    Args:
        sigma_matrix (ndarray): with the shape (2, 2)
        grid (ndarray): generated by :func:`mesh_grid`,
            with the shape (K, K, 2), K is the kernel size.
    Returns:
        kernel (ndarrray): un-normalized kernel.
    """
    inverse_sigma = np.linalg.inv(sigma_matrix)
    kernel = np.exp(-0.5 * np.sum(np.dot(grid, inverse_sigma) * grid, 2))
    return kernel


def bivariate_Gaussian(kernel_size, sig_x, sig_y, theta, grid=None, isotropic=True):
    """Generate a bivariate isotropic or anisotropic Gaussian kernel.
    In the isotropic mode, only `sig_x` is used. `sig_y` and `theta` is ignored.
    Args:
        kernel_size (int):
        sig_x (float):
        sig_y (float):
        theta (float): Radian measurement.
        grid (ndarray, optional): generated by :func:`mesh_grid`,
            with the shape (K, K, 2), K is the kernel size. Default: None
        isotropic (bool):
    Returns:
        kernel (ndarray): normalized kernel.
    """
    if grid is None:
        grid, _, _ = mesh_grid(kernel_size)
    if isotropic:
        sigma_matrix = np.array([[sig_x ** 2, 0], [0, sig_x ** 2]])
    else:
        sigma_matrix = sigma_matrix2(sig_x, sig_y, theta)
    kernel = pdf2(sigma_matrix, grid)
    kernel = kernel / np.sum(kernel)
    return kernel