Spaces:
Running
on
Zero
Running
on
Zero
import numpy as np | |
from fusion import TSDFVolume, rigid_transform | |
def get_fov_mask(): | |
calib = read_calib() | |
T_velo_2_cam = calib["Tr"] | |
_, fov_mask = generate_point_grid(vox_origin=np.array([0, -25.6, -2]), | |
scene_size=(51.2, 51.2, 6.4), | |
voxel_size=0.2, | |
cam_E=T_velo_2_cam, | |
cam_k=get_cam_k()) | |
return fov_mask.reshape(256, 256, 32) | |
def generate_point_grid(cam_E, vox_origin, voxel_size, scene_size, cam_k, img_W=1408, img_H=376): | |
""" | |
compute the 2D projection of voxels centroids | |
Parameters: | |
---------- | |
cam_E: 4x4 | |
=camera pose in case of NYUv2 dataset | |
=Transformation from camera to lidar coordinate in case of SemKITTI | |
cam_k: 3x3 | |
camera intrinsics | |
vox_origin: (3,) | |
world(NYU)/lidar(SemKITTI) cooridnates of the voxel at index (0, 0, 0) | |
img_W: int | |
image width | |
img_H: int | |
image height | |
scene_size: (3,) | |
scene size in meter: (51.2, 51.2, 6.4) for SemKITTI and (4.8, 4.8, 2.88) for NYUv2 | |
Returns | |
------- | |
projected_pix: (N, 2) | |
Projected 2D positions of voxels | |
fov_mask: (N,) | |
Voxels mask indice voxels inside image's FOV | |
pix_z: (N,) | |
Voxels'distance to the sensor in meter | |
""" | |
# Compute the x, y, z bounding of the scene in meter | |
vol_bnds = np.zeros((3, 2)) | |
vol_bnds[:, 0] = vox_origin | |
vol_bnds[:, 1] = vox_origin + np.array(scene_size) | |
# Compute the voxels centroids in lidar cooridnates | |
vol_dim = np.ceil((vol_bnds[:, 1] - vol_bnds[:, 0]) / voxel_size).copy(order='C').astype(int) | |
xv, yv, zv = np.meshgrid( | |
range(vol_dim[0]), | |
range(vol_dim[1]), | |
range(vol_dim[2]), | |
indexing='ij' | |
) | |
print(xv.shape, yv.shape, zv.shape) | |
vox_coords = np.concatenate([ | |
xv.reshape(1, -1), | |
yv.reshape(1, -1), | |
zv.reshape(1, -1) | |
], axis=0).astype(int).T | |
# Project voxels'centroid from lidar coordinates to camera coordinates | |
cam_pts = TSDFVolume.vox2world(vox_origin, vox_coords, voxel_size) | |
cam_pts = rigid_transform(cam_pts, cam_E) | |
# Project camera coordinates to pixel positions | |
projected_pix = TSDFVolume.cam2pix(cam_pts, cam_k) | |
pix_x, pix_y = projected_pix[:, 0], projected_pix[:, 1] | |
# Eliminate pixels outside view frustum | |
pix_z = cam_pts[:, 2] | |
fov_mask = np.logical_and(pix_x >= 0, | |
np.logical_and(pix_x < img_W, | |
np.logical_and(pix_y >= 0, | |
np.logical_and(pix_y < img_H, | |
pix_z > 0)))) | |
return cam_pts, fov_mask | |
def read_calib(): | |
""" | |
:param calib_path: Path to a calibration text file. | |
:return: dict with calibration matrices. | |
""" | |
P = np.array( | |
[ | |
552.554261, | |
0.000000, | |
682.049453, | |
0.000000, | |
0.000000, | |
552.554261, | |
238.769549, | |
0.000000, | |
0.000000, | |
0.000000, | |
1.000000, | |
0.000000, | |
] | |
).reshape(3, 4) | |
cam2velo = np.array( | |
[ | |
0.04307104361, | |
-0.08829286498, | |
0.995162929, | |
0.8043914418, | |
-0.999004371, | |
0.007784614041, | |
0.04392796942, | |
0.2993489574, | |
-0.01162548558, | |
-0.9960641394, | |
-0.08786966659, | |
-0.1770225824, | |
] | |
).reshape(3, 4) | |
C2V = np.concatenate( | |
[cam2velo, np.array([0, 0, 0, 1]).reshape(1, 4)], axis=0 | |
) | |
# print("C2V: ", C2V) | |
V2C = np.linalg.inv(C2V) | |
# print("V2C: ", V2C) | |
V2C = V2C[:3, :] | |
# print("V2C: ", V2C) | |
# reshape matrices | |
calib_out = {} | |
# 3x4 projection matrix for left camera | |
calib_out["P2"] = P | |
calib_out["Tr"] = np.identity(4) # 4x4 matrix | |
calib_out["Tr"][:3, :4] = V2C | |
return calib_out | |
def get_cam_k(): | |
cam_k = np.array( | |
[ | |
552.554261, | |
0.000000, | |
682.049453, | |
0.000000, | |
0.000000, | |
552.554261, | |
238.769549, | |
0.000000, | |
0.000000, | |
0.000000, | |
1.000000, | |
0.000000, | |
] | |
).reshape(3, 4) | |
return cam_k[:3, :3] |