import glob, math import numpy as np # from scipy import misc # from scipy import linalg from PIL import Image import io import matplotlib.pyplot as plt EPS = 1e-6 XMIN = -64.0 # right (neg is left) XMAX = 64.0 # right YMIN = -64.0 # down (neg is up) YMAX = 64.0 # down ZMIN = -64.0 # forward ZMAX = 64.0 # forward def print_stats(name, tensor): tensor = tensor.astype(np.float32) print('%s min = %.2f, mean = %.2f, max = %.2f' % (name, np.min(tensor), np.mean(tensor), np.max(tensor)), tensor.shape) def reduce_masked_mean(x, mask, axis=None, keepdims=False): # x and mask are the same shape # returns shape-1 # axis can be a list of axes prod = x*mask numer = np.sum(prod, axis=axis, keepdims=keepdims) denom = EPS+np.sum(mask, axis=axis, keepdims=keepdims) mean = numer/denom return mean def reduce_masked_sum(x, mask, axis=None, keepdims=False): # x and mask are the same shape # returns shape-1 # axis can be a list of axes prod = x*mask numer = np.sum(prod, axis=axis, keepdims=keepdims) return numer def reduce_masked_median(x, mask, keep_batch=False): # x and mask are the same shape # returns shape-1 # axis can be a list of axes if not (x.shape == mask.shape): print('reduce_masked_median: these shapes should match:', x.shape, mask.shape) assert(False) # assert(x.shape == mask.shape) B = list(x.shape)[0] if keep_batch: x = np.reshape(x, [B, -1]) mask = np.reshape(mask, [B, -1]) meds = np.zeros([B], np.float32) for b in list(range(B)): xb = x[b] mb = mask[b] if np.sum(mb) > 0: xb = xb[mb > 0] meds[b] = np.median(xb) else: meds[b] = np.nan return meds else: x = np.reshape(x, [-1]) mask = np.reshape(mask, [-1]) if np.sum(mask) > 0: x = x[mask > 0] med = np.median(x) else: med = np.nan med = np.array([med], np.float32) return med def get_nFiles(path): return len(glob.glob(path)) def get_file_list(path): return glob.glob(path) def rotm2eul(R): # R is 3x3 sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) if sy > 1e-6: # singular x = math.atan2(R[2,1] , R[2,2]) y = math.atan2(-R[2,0], sy) z = math.atan2(R[1,0], R[0,0]) else: x = math.atan2(-R[1,2], R[1,1]) y = math.atan2(-R[2,0], sy) z = 0 return x, y, z def rad2deg(rad): return rad*180.0/np.pi def deg2rad(deg): return deg/180.0*np.pi def eul2rotm(rx, ry, rz): # copy of matlab, but order of inputs is different # R = [ cy*cz sy*sx*cz-sz*cx sy*cx*cz+sz*sx # cy*sz sy*sx*sz+cz*cx sy*cx*sz-cz*sx # -sy cy*sx cy*cx] sinz = np.sin(rz) siny = np.sin(ry) sinx = np.sin(rx) cosz = np.cos(rz) cosy = np.cos(ry) cosx = np.cos(rx) r11 = cosy*cosz r12 = sinx*siny*cosz - cosx*sinz r13 = cosx*siny*cosz + sinx*sinz r21 = cosy*sinz r22 = sinx*siny*sinz + cosx*cosz r23 = cosx*siny*sinz - sinx*cosz r31 = -siny r32 = sinx*cosy r33 = cosx*cosy r1 = np.stack([r11,r12,r13],axis=-1) r2 = np.stack([r21,r22,r23],axis=-1) r3 = np.stack([r31,r32,r33],axis=-1) r = np.stack([r1,r2,r3],axis=0) return r def wrap2pi(rad_angle): # puts the angle into the range [-pi, pi] return np.arctan2(np.sin(rad_angle), np.cos(rad_angle)) def rot2view(rx,ry,rz,x,y,z): # takes rot angles and 3d position as input # returns viewpoint angles as output # (all in radians) # it will perform strangely if z <= 0 az = wrap2pi(ry - (-np.arctan2(z, x) - 1.5*np.pi)) el = -wrap2pi(rx - (-np.arctan2(z, y) - 1.5*np.pi)) th = -rz return az, el, th def invAxB(a,b): """ Compute the relative 3d transformation between a and b. Input: a -- first pose (homogeneous 4x4 matrix) b -- second pose (homogeneous 4x4 matrix) Output: Relative 3d transformation from a to b. """ return np.dot(np.linalg.inv(a),b) def merge_rt(r, t): # r is 3 x 3 # t is 3 or maybe 3 x 1 t = np.reshape(t, [3, 1]) rt = np.concatenate((r,t), axis=1) # rt is 3 x 4 br = np.reshape(np.array([0,0,0,1], np.float32), [1, 4]) # br is 1 x 4 rt = np.concatenate((rt, br), axis=0) # rt is 4 x 4 return rt def split_rt(rt): r = rt[:3,:3] t = rt[:3,3] r = np.reshape(r, [3, 3]) t = np.reshape(t, [3, 1]) return r, t def split_intrinsics(K): # K is 3 x 4 or 4 x 4 fx = K[0,0] fy = K[1,1] x0 = K[0,2] y0 = K[1,2] return fx, fy, x0, y0 def merge_intrinsics(fx, fy, x0, y0): # inputs are shaped [] K = np.eye(4) K[0,0] = fx K[1,1] = fy K[0,2] = x0 K[1,2] = y0 # K is shaped 4 x 4 return K def scale_intrinsics(K, sx, sy): fx, fy, x0, y0 = split_intrinsics(K) fx *= sx fy *= sy x0 *= sx y0 *= sy return merge_intrinsics(fx, fy, x0, y0) # def meshgrid(H, W): # x = np.linspace(0, W-1, W) # y = np.linspace(0, H-1, H) # xv, yv = np.meshgrid(x, y) # return xv, yv def compute_distance(transform): """ Compute the distance of the translational component of a 4x4 homogeneous matrix. """ return numpy.linalg.norm(transform[0:3,3]) def radian_l1_dist(e, g): # if our angles are in [0, 360] we can follow this stack overflow answer: # https://gamedev.stackexchange.com/questions/4467/comparing-angles-and-working-out-the-difference # wrap2pi brings the angles to [-180, 180]; adding pi puts them in [0, 360] e = wrap2pi(e)+np.pi g = wrap2pi(g)+np.pi l = np.abs(np.pi - np.abs(np.abs(e-g) - np.pi)) return l def apply_pix_T_cam(pix_T_cam, xyz): fx, fy, x0, y0 = split_intrinsics(pix_T_cam) # xyz is shaped B x H*W x 3 # returns xy, shaped B x H*W x 2 N, C = xyz.shape x, y, z = np.split(xyz, 3, axis=-1) EPS = 1e-4 z = np.clip(z, EPS, None) x = (x*fx)/(z)+x0 y = (y*fy)/(z)+y0 xy = np.concatenate([x, y], axis=-1) return xy def apply_4x4(RT, XYZ): # RT is 4 x 4 # XYZ is N x 3 # put into homogeneous coords X, Y, Z = np.split(XYZ, 3, axis=1) ones = np.ones_like(X) XYZ1 = np.concatenate([X, Y, Z, ones], axis=1) # XYZ1 is N x 4 XYZ1_t = np.transpose(XYZ1) # this is 4 x N XYZ2_t = np.dot(RT, XYZ1_t) # this is 4 x N XYZ2 = np.transpose(XYZ2_t) # this is N x 4 XYZ2 = XYZ2[:,:3] # this is N x 3 return XYZ2 def Ref2Mem(xyz, Z, Y, X): # xyz is N x 3, in ref coordinates # transforms ref coordinates into mem coordinates N, C = xyz.shape assert(C==3) mem_T_ref = get_mem_T_ref(Z, Y, X) xyz = apply_4x4(mem_T_ref, xyz) return xyz # def Mem2Ref(xyz_mem, MH, MW, MD): # # xyz is B x N x 3, in mem coordinates # # transforms mem coordinates into ref coordinates # B, N, C = xyz_mem.get_shape().as_list() # ref_T_mem = get_ref_T_mem(B, MH, MW, MD) # xyz_ref = utils_geom.apply_4x4(ref_T_mem, xyz_mem) # return xyz_ref def get_mem_T_ref(Z, Y, X): # sometimes we want the mat itself # note this is not a rigid transform # for interpretability, let's construct this in two steps... # translation center_T_ref = np.eye(4, dtype=np.float32) center_T_ref[0,3] = -XMIN center_T_ref[1,3] = -YMIN center_T_ref[2,3] = -ZMIN VOX_SIZE_X = (XMAX-XMIN)/float(X) VOX_SIZE_Y = (YMAX-YMIN)/float(Y) VOX_SIZE_Z = (ZMAX-ZMIN)/float(Z) # scaling mem_T_center = np.eye(4, dtype=np.float32) mem_T_center[0,0] = 1./VOX_SIZE_X mem_T_center[1,1] = 1./VOX_SIZE_Y mem_T_center[2,2] = 1./VOX_SIZE_Z mem_T_ref = np.dot(mem_T_center, center_T_ref) return mem_T_ref def safe_inverse(a): r, t = split_rt(a) t = np.reshape(t, [3, 1]) r_transpose = r.T inv = np.concatenate([r_transpose, -np.matmul(r_transpose, t)], 1) bottom_row = a[3:4, :] # this is [0, 0, 0, 1] inv = np.concatenate([inv, bottom_row], 0) return inv def get_ref_T_mem(Z, Y, X): mem_T_ref = get_mem_T_ref(X, Y, X) # note safe_inverse is inapplicable here, # since the transform is nonrigid ref_T_mem = np.linalg.inv(mem_T_ref) return ref_T_mem def voxelize_xyz(xyz_ref, Z, Y, X): # xyz_ref is N x 3 xyz_mem = Ref2Mem(xyz_ref, Z, Y, X) # this is N x 3 voxels = get_occupancy(xyz_mem, Z, Y, X) voxels = np.reshape(voxels, [Z, Y, X, 1]) return voxels def get_inbounds(xyz, Z, Y, X, already_mem=False): # xyz is H*W x 3 if not already_mem: xyz = Ref2Mem(xyz, Z, Y, X) x_valid = np.logical_and( np.greater_equal(xyz[:,0], -0.5), np.less(xyz[:,0], float(X)-0.5)) y_valid = np.logical_and( np.greater_equal(xyz[:,1], -0.5), np.less(xyz[:,1], float(Y)-0.5)) z_valid = np.logical_and( np.greater_equal(xyz[:,2], -0.5), np.less(xyz[:,2], float(Z)-0.5)) inbounds = np.logical_and(np.logical_and(x_valid, y_valid), z_valid) return inbounds def sub2ind3d_zyx(depth, height, width, d, h, w): # same as sub2ind3d, but inputs in zyx order # when gathering/scattering with these inds, the tensor should be Z x Y x X return d*height*width + h*width + w def sub2ind3d_yxz(height, width, depth, h, w, d): return h*width*depth + w*depth + d # def ind2sub(height, width, ind): # # int input # y = int(ind / height) # x = ind % height # return y, x def get_occupancy(xyz_mem, Z, Y, X): # xyz_mem is N x 3 # we want to fill a voxel tensor with 1's at these inds inbounds = get_inbounds(xyz_mem, Z, Y, X, already_mem=True) inds = np.where(inbounds) xyz_mem = np.reshape(xyz_mem[inds], [-1, 3]) # xyz_mem is N x 3 # this is more accurate than a cast/floor, but runs into issues when Y==0 xyz_mem = np.round(xyz_mem).astype(np.int32) x = xyz_mem[:,0] y = xyz_mem[:,1] z = xyz_mem[:,2] voxels = np.zeros([Z, Y, X], np.float32) voxels[z, y, x] = 1.0 return voxels def pixels2camera(x,y,z,fx,fy,x0,y0): # x and y are locations in pixel coordinates, z is a depth image in meters # their shapes are H x W # fx, fy, x0, y0 are scalar camera intrinsics # returns xyz, sized [B,H*W,3] H, W = z.shape fx = np.reshape(fx, [1,1]) fy = np.reshape(fy, [1,1]) x0 = np.reshape(x0, [1,1]) y0 = np.reshape(y0, [1,1]) # unproject x = ((z+EPS)/fx)*(x-x0) y = ((z+EPS)/fy)*(y-y0) x = np.reshape(x, [-1]) y = np.reshape(y, [-1]) z = np.reshape(z, [-1]) xyz = np.stack([x,y,z], axis=1) return xyz def depth2pointcloud(z, pix_T_cam): H = z.shape[0] W = z.shape[1] y, x = meshgrid2d(H, W) z = np.reshape(z, [H, W]) fx, fy, x0, y0 = split_intrinsics(pix_T_cam) xyz = pixels2camera(x, y, z, fx, fy, x0, y0) return xyz def meshgrid2d(Y, X): grid_y = np.linspace(0.0, Y-1, Y) grid_y = np.reshape(grid_y, [Y, 1]) grid_y = np.tile(grid_y, [1, X]) grid_x = np.linspace(0.0, X-1, X) grid_x = np.reshape(grid_x, [1, X]) grid_x = np.tile(grid_x, [Y, 1]) # outputs are Y x X return grid_y, grid_x def gridcloud3d(Y, X, Z): x_ = np.linspace(0, X-1, X) y_ = np.linspace(0, Y-1, Y) z_ = np.linspace(0, Z-1, Z) y, x, z = np.meshgrid(y_, x_, z_, indexing='ij') x = np.reshape(x, [-1]) y = np.reshape(y, [-1]) z = np.reshape(z, [-1]) xyz = np.stack([x,y,z], axis=1).astype(np.float32) return xyz def gridcloud2d(Y, X): x_ = np.linspace(0, X-1, X) y_ = np.linspace(0, Y-1, Y) y, x = np.meshgrid(y_, x_, indexing='ij') x = np.reshape(x, [-1]) y = np.reshape(y, [-1]) xy = np.stack([x,y], axis=1).astype(np.float32) return xy def normalize(im): im = im - np.min(im) im = im / np.max(im) return im def wrap2pi(rad_angle): # rad_angle can be any shape # puts the angle into the range [-pi, pi] return np.arctan2(np.sin(rad_angle), np.cos(rad_angle)) def convert_occ_to_height(occ): Z, Y, X, C = occ.shape assert(C==1) height = np.linspace(float(Y), 1.0, Y) height = np.reshape(height, [1, Y, 1, 1]) height = np.max(occ*height, axis=1)/float(Y) height = np.reshape(height, [Z, X, C]) return height def create_depth_image(xy, Z, H, W): # turn the xy coordinates into image inds xy = np.round(xy) # lidar reports a sphere of measurements # only use the inds that are within the image bounds # also, only use forward-pointing depths (Z > 0) valid = (xy[:,0] < W-1) & (xy[:,1] < H-1) & (xy[:,0] >= 0) & (xy[:,1] >= 0) & (Z[:] > 0) # gather these up xy = xy[valid] Z = Z[valid] inds = sub2ind(H,W,xy[:,1],xy[:,0]) depth = np.zeros((H*W), np.float32) for (index, replacement) in zip(inds, Z): depth[index] = replacement depth[np.where(depth == 0.0)] = 70.0 depth = np.reshape(depth, [H, W]) return depth def vis_depth(depth, maxdepth=80.0, log_vis=True): depth[depth<=0.0] = maxdepth if log_vis: depth = np.log(depth) depth = np.clip(depth, 0, np.log(maxdepth)) else: depth = np.clip(depth, 0, maxdepth) depth = (depth*255.0).astype(np.uint8) return depth def preprocess_color(x): return x.astype(np.float32) * 1./255 - 0.5 def convert_box_to_ref_T_obj(boxes): shape = boxes.shape boxes = boxes.reshape(-1,9) rots = [eul2rotm(rx,ry,rz) for rx,ry,rz in boxes[:,6:]] rots = np.stack(rots,axis=0) trans = boxes[:,:3] ref_T_objs = [merge_rt(rot,tran) for rot,tran in zip(rots,trans)] ref_T_objs = np.stack(ref_T_objs,axis=0) ref_T_objs = ref_T_objs.reshape(shape[:-1]+(4,4)) ref_T_objs = ref_T_objs.astype(np.float32) return ref_T_objs def get_rot_from_delta(delta, yaw_only=False): dx = delta[:,0] dy = delta[:,1] dz = delta[:,2] bot_hyp = np.sqrt(dz**2 + dx**2) # top_hyp = np.sqrt(bot_hyp**2 + dy**2) pitch = -np.arctan2(dy, bot_hyp) yaw = np.arctan2(dz, dx) if yaw_only: rot = [eul2rotm(0,y,0) for y in yaw] else: rot = [eul2rotm(0,y,p) for (p,y) in zip(pitch,yaw)] rot = np.stack(rot) # rot is B x 3 x 3 return rot def im2col(im, psize): n_channels = 1 if len(im.shape) == 2 else im.shape[0] (n_channels, rows, cols) = (1,) * (3 - len(im.shape)) + im.shape im_pad = np.zeros((n_channels, int(math.ceil(1.0 * rows / psize) * psize), int(math.ceil(1.0 * cols / psize) * psize))) im_pad[:, 0:rows, 0:cols] = im final = np.zeros((im_pad.shape[1], im_pad.shape[2], n_channels, psize, psize)) for c in np.arange(n_channels): for x in np.arange(psize): for y in np.arange(psize): im_shift = np.vstack( (im_pad[c, x:], im_pad[c, :x])) im_shift = np.column_stack( (im_shift[:, y:], im_shift[:, :y])) final[x::psize, y::psize, c] = np.swapaxes( im_shift.reshape(int(im_pad.shape[1] / psize), psize, int(im_pad.shape[2] / psize), psize), 1, 2) return np.squeeze(final[0:rows - psize + 1, 0:cols - psize + 1]) def filter_discontinuities(depth, filter_size=9, thresh=10): H, W = list(depth.shape) # Ensure that filter sizes are okay assert filter_size % 2 == 1, "Can only use odd filter sizes." # Compute discontinuities offset = int((filter_size - 1) / 2) patches = 1.0 * im2col(depth, filter_size) mids = patches[:, :, offset, offset] mins = np.min(patches, axis=(2, 3)) maxes = np.max(patches, axis=(2, 3)) discont = np.maximum(np.abs(mins - mids), np.abs(maxes - mids)) mark = discont > thresh # Account for offsets final_mark = np.zeros((H, W), dtype=np.uint16) final_mark[offset:offset + mark.shape[0], offset:offset + mark.shape[1]] = mark return depth * (1 - final_mark) def argmax2d(tensor): Y, X = list(tensor.shape) # flatten the Tensor along the height and width axes flat_tensor = tensor.reshape(-1) # argmax of the flat tensor argmax = np.argmax(flat_tensor) # convert the indices into 2d coordinates argmax_y = argmax // X # row argmax_x = argmax % X # col return argmax_y, argmax_x def plot_traj_3d(traj): # traj is S x 3 # print('traj', traj.shape) S, C = list(traj.shape) assert(C==3) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') colors = [plt.cm.RdYlBu(i) for i in np.linspace(0,1,S)] # print('colors', colors) xs = traj[:,0] ys = -traj[:,1] zs = traj[:,2] ax.scatter(xs, zs, ys, s=30, c=colors, marker='o', alpha=1.0, edgecolors=(0,0,0))#, color=color_map[n]) ax.set_xlabel('X') ax.set_ylabel('Z') ax.set_zlabel('Y') ax.set_xlim(0,1) ax.set_ylim(0,1) # this is really Z ax.set_zlim(-1,0) # this is really Y buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) image = np.array(Image.open(buf)) # H x W x 4 image = image[:,:,:3] plt.close() return image def camera2pixels(xyz, pix_T_cam): # xyz is shaped N x 3 # returns xy, shaped N x 2 fx, fy, x0, y0 = split_intrinsics(pix_T_cam) x, y, z = xyz[:,0], xyz[:,1], xyz[:,2] EPS = 1e-4 z = np.clip(z, EPS, None) x = (x*fx)/z + x0 y = (y*fy)/z + y0 xy = np.stack([x, y], axis=-1) return xy def make_colorwheel(): """ Generates a color wheel for optical flow visualization as presented in: Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007) URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf Code follows the original C++ source code of Daniel Scharstein. Code follows the the Matlab source code of Deqing Sun. Returns: np.ndarray: Color wheel """ RY = 15 YG = 6 GC = 4 CB = 11 BM = 13 MR = 6 ncols = RY + YG + GC + CB + BM + MR colorwheel = np.zeros((ncols, 3)) col = 0 # RY colorwheel[0:RY, 0] = 255 colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY) col = col+RY # YG colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG) colorwheel[col:col+YG, 1] = 255 col = col+YG # GC colorwheel[col:col+GC, 1] = 255 colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC) col = col+GC # CB colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB) colorwheel[col:col+CB, 2] = 255 col = col+CB # BM colorwheel[col:col+BM, 2] = 255 colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM) col = col+BM # MR colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR) colorwheel[col:col+MR, 0] = 255 return colorwheel def flow_uv_to_colors(u, v, convert_to_bgr=False): """ Applies the flow color wheel to (possibly clipped) flow components u and v. According to the C++ source code of Daniel Scharstein According to the Matlab source code of Deqing Sun Args: u (np.ndarray): Input horizontal flow of shape [H,W] v (np.ndarray): Input vertical flow of shape [H,W] convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. Returns: np.ndarray: Flow visualization image of shape [H,W,3] """ flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8) colorwheel = make_colorwheel() # shape [55x3] ncols = colorwheel.shape[0] rad = np.sqrt(np.square(u) + np.square(v)) a = np.arctan2(-v, -u)/np.pi fk = (a+1) / 2*(ncols-1) k0 = np.floor(fk).astype(np.int32) k1 = k0 + 1 k1[k1 == ncols] = 0 f = fk - k0 for i in range(colorwheel.shape[1]): tmp = colorwheel[:,i] col0 = tmp[k0] / 255.0 col1 = tmp[k1] / 255.0 col = (1-f)*col0 + f*col1 idx = (rad <= 1) col[idx] = 1 - rad[idx] * (1-col[idx]) col[~idx] = col[~idx] * 0.75 # out of range # Note the 2-i => BGR instead of RGB ch_idx = 2-i if convert_to_bgr else i flow_image[:,:,ch_idx] = np.floor(255 * col) return flow_image def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False): """ Expects a two dimensional flow image of shape. Args: flow_uv (np.ndarray): Flow UV image of shape [H,W,2] clip_flow (float, optional): Clip maximum of flow values. Defaults to None. convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. Returns: np.ndarray: Flow visualization image of shape [H,W,3] """ assert flow_uv.ndim == 3, 'input flow must have three dimensions' assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]' if clip_flow is not None: flow_uv = np.clip(flow_uv, -clip_flow, clip_flow) / clip_flow # flow_uv = np.clamp(flow, -clip, clip)/clip u = flow_uv[:,:,0] v = flow_uv[:,:,1] rad = np.sqrt(np.square(u) + np.square(v)) rad_max = np.max(rad) epsilon = 1e-5 u = u / (rad_max + epsilon) v = v / (rad_max + epsilon) return flow_uv_to_colors(u, v, convert_to_bgr)