Spaces:

roll-ai
/

RealCam-I2V

Runtime error

File size: 7,886 Bytes

e8bdafd

import copy
import numpy as np
import torch
import torch.nn.functional as F
import open3d as o3d
from torch import Tensor

def relative_pose(rt: Tensor, mode, ref_index) -> Tensor:
    '''
    :param rt: F,4,4
    :param mode: left or right
    :return:
    '''
    if mode == "left":
        rt = rt[ref_index].inverse() @ rt
    elif mode == "right":
        rt = rt @ rt[ref_index].inverse()
    return rt


def create_line_point_cloud(start_point, end_point, num_points=50, color=np.array([0, 0, 1.0])):
    # 创建从起点到终点的线性空间
    points = np.linspace(start_point, end_point, num_points)
    # color 归一化的 RGB 值 (0, 0, 255 -> 0, 0, 1)
    colors = np.tile(color, (points.shape[0], 1))
    return points, colors


def remove_outliers(pcd):
    """
    基于统计方法移除点云中的离群点
    :param pcd: Open3D 的点云对象
    :param nb_neighbors: 每个点考虑的邻域点数量
    :param std_ratio: 离群点标准差阈值
    :return: 清理后的点云
    """
    cl, ind = pcd.remove_statistical_outlier(nb_neighbors=16, std_ratio=3.0)
    # cl, ind = pcd.remove_radius_outlier(nb_points=3, radius=0.1)
    return pcd.select_by_index(ind)

def construct_point_cloud(points: np.ndarray, colors: np.ndarray) -> o3d.geometry.PointCloud:
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(colors)
    pcd.transform([
        [-1, 0, 0, 0],
        [0, -1, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 0, 1]
    ])

    return pcd


def camera_pose_lerp(c2w: Tensor, target_frames: int):
    weights = torch.linspace(0, c2w.size(0) - 1, target_frames, dtype=c2w.dtype)
    left_indices = weights.floor().long()
    right_indices = weights.ceil().long()

    return torch.lerp(c2w[left_indices], c2w[right_indices], weights.unsqueeze(-1).unsqueeze(-1).frac())


def apply_thresholded_conv(mask, kernel_size=5, threshold=0.9):
    """
    对输入的mask进行5x5卷积操作，卷积核用于对5x5区域求和。
    如果求和后值 < 25，则置为0；否则置为1。
    使用reflect padding进行边缘填充。

    参数:
    mask (torch.Tensor): 输入的四维Tensor，形状为 (b, f, h, w)，仅包含 0.0 和 1.0。

    返回:
    torch.Tensor: 应用阈值后的四维Tensor，形状为 (b, f, h, w)。
    """
    # 获取输入的形状
    b, f, h, w = mask.shape

    # 创建一个5x5的卷积核，全为1
    kernel = torch.ones((1, 1, kernel_size, kernel_size), dtype=torch.float32, device=mask.device)

    # 使用 reflect padding 进行边缘填充
    mask_padded = F.pad(mask, pad=(kernel_size//2, kernel_size//2, kernel_size//2, kernel_size//2), mode='reflect')

    # 将 mask 视作 (b * f, 1, h, w)，这样可以在 h 和 w 上做 2D 卷积
    mask_reshaped = mask_padded.view(-1, 1, h + 2*(kernel_size//2), w + 2*(kernel_size//2))
    summed_mask = F.conv2d(mask_reshaped, kernel)

    # 对求和结果进行阈值判断，将 < 25 的置为 0，其余置为 1
    thresholded_mask = (summed_mask >= (kernel_size * kernel_size * threshold)).float()

    # 将结果 reshape 回原来的形状 (b, f, h, w)
    thresholded_mask = thresholded_mask.view(b, f, h, w)

    return thresholded_mask


def constrain_to_multiple_of(x, min_val=0, max_val=None, multiple_of=14):
    y = (np.round(x / multiple_of) * multiple_of).astype(int)

    if max_val is not None and y > max_val:
        y = (np.floor(x / multiple_of) * multiple_of).astype(int)

    if y < min_val:
        y = (np.ceil(x / multiple_of) * multiple_of).astype(int)

    return y


def add_camera_trace(points, colors, points_x, points_y):
    x, y = points_x, points_y
    for idx in [[0, 0], [0, -1], [-1, 0], [-1, -1]]:
        camera, camera_colors = create_line_point_cloud(
            start_point=np.array([0, 0, 0]),
            end_point=np.array([x[idx[0]][idx[1]], y[idx[0]][idx[1]], 1.0]),
            num_points=50,
        )
        points = np.concatenate((points, camera * 0.25), axis=0)
        colors = np.concatenate((colors, camera_colors), axis=0)

    for start_idx, end_idx in [
        [[0, 0], [0, -1]],
        [[0, 0], [-1, 0]],
        [[-1, -1], [0, -1]],
        [[-1, -1], [-1, 0]],
    ]:
        camera, camera_colors = create_line_point_cloud(
            start_point=np.array([x[start_idx[0]][start_idx[1]], y[start_idx[0]][start_idx[1]], 1.0]),
            end_point=np.array([x[end_idx[0]][end_idx[1]], y[end_idx[0]][end_idx[1]], 1.0]),
            num_points=50,
        )
        points = np.concatenate((points, camera * 0.25), axis=0)
        colors = np.concatenate((colors, camera_colors), axis=0)

    return points, colors


def create_relative(RT_list, K_1=4.7, dataset="syn"):
    scale_T = 1
    RT_list = [RT.reshape(3, 4) for RT in RT_list]
    temp = []
    first_frame_RT = copy.deepcopy(RT_list[0])
    # first_frame_R_inv = np.linalg.inv(first_frame_RT[:,:3])
    first_frame_R_inv = first_frame_RT[:, :3].T
    first_frame_T = first_frame_RT[:, -1]
    for RT in RT_list:
        RT[:, :3] = np.dot(RT[:, :3], first_frame_R_inv)
        RT[:, -1] = RT[:, -1] - np.dot(RT[:, :3], first_frame_T)
        RT[:, -1] = RT[:, -1] * scale_T
        temp.append(RT)
    if dataset == "realestate":
        temp = [RT.reshape(-1) for RT in temp]
    return temp


def sigma_matrix2(sig_x, sig_y, theta):
    """Calculate the rotated sigma matrix (two dimensional matrix).
    Args:
        sig_x (float):
        sig_y (float):
        theta (float): Radian measurement.
    Returns:
        ndarray: Rotated sigma matrix.
    """
    d_matrix = np.array([[sig_x ** 2, 0], [0, sig_y ** 2]])
    u_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    return np.dot(u_matrix, np.dot(d_matrix, u_matrix.T))


def mesh_grid(kernel_size):
    """Generate the mesh grid, centering at zero.
    Args:
        kernel_size (int):
    Returns:
        xy (ndarray): with the shape (kernel_size, kernel_size, 2)
        xx (ndarray): with the shape (kernel_size, kernel_size)
        yy (ndarray): with the shape (kernel_size, kernel_size)
    """
    ax = np.arange(-kernel_size // 2 + 1., kernel_size // 2 + 1.)
    xx, yy = np.meshgrid(ax, ax)
    xy = np.hstack((xx.reshape((kernel_size * kernel_size, 1)), yy.reshape(kernel_size * kernel_size,
                                                                           1))).reshape(kernel_size, kernel_size, 2)
    return xy, xx, yy


def pdf2(sigma_matrix, grid):
    """Calculate PDF of the bivariate Gaussian distribution.
    Args:
        sigma_matrix (ndarray): with the shape (2, 2)
        grid (ndarray): generated by :func:`mesh_grid`,
            with the shape (K, K, 2), K is the kernel size.
    Returns:
        kernel (ndarrray): un-normalized kernel.
    """
    inverse_sigma = np.linalg.inv(sigma_matrix)
    kernel = np.exp(-0.5 * np.sum(np.dot(grid, inverse_sigma) * grid, 2))
    return kernel


def bivariate_Gaussian(kernel_size, sig_x, sig_y, theta, grid=None, isotropic=True):
    """Generate a bivariate isotropic or anisotropic Gaussian kernel.
    In the isotropic mode, only `sig_x` is used. `sig_y` and `theta` is ignored.
    Args:
        kernel_size (int):
        sig_x (float):
        sig_y (float):
        theta (float): Radian measurement.
        grid (ndarray, optional): generated by :func:`mesh_grid`,
            with the shape (K, K, 2), K is the kernel size. Default: None
        isotropic (bool):
    Returns:
        kernel (ndarray): normalized kernel.
    """
    if grid is None:
        grid, _, _ = mesh_grid(kernel_size)
    if isotropic:
        sigma_matrix = np.array([[sig_x ** 2, 0], [0, sig_x ** 2]])
    else:
        sigma_matrix = sigma_matrix2(sig_x, sig_y, theta)
    kernel = pdf2(sigma_matrix, grid)
    kernel = kernel / np.sum(kernel)
    return kernel