Spaces:
Running
on
Zero
Running
on
Zero
import glob, math | |
import numpy as np | |
# from scipy import misc | |
# from scipy import linalg | |
from PIL import Image | |
import io | |
import matplotlib.pyplot as plt | |
EPS = 1e-6 | |
XMIN = -64.0 # right (neg is left) | |
XMAX = 64.0 # right | |
YMIN = -64.0 # down (neg is up) | |
YMAX = 64.0 # down | |
ZMIN = -64.0 # forward | |
ZMAX = 64.0 # forward | |
def print_stats(name, tensor): | |
tensor = tensor.astype(np.float32) | |
print('%s min = %.2f, mean = %.2f, max = %.2f' % (name, np.min(tensor), np.mean(tensor), np.max(tensor)), tensor.shape) | |
def reduce_masked_mean(x, mask, axis=None, keepdims=False): | |
# x and mask are the same shape | |
# returns shape-1 | |
# axis can be a list of axes | |
prod = x*mask | |
numer = np.sum(prod, axis=axis, keepdims=keepdims) | |
denom = EPS+np.sum(mask, axis=axis, keepdims=keepdims) | |
mean = numer/denom | |
return mean | |
def reduce_masked_sum(x, mask, axis=None, keepdims=False): | |
# x and mask are the same shape | |
# returns shape-1 | |
# axis can be a list of axes | |
prod = x*mask | |
numer = np.sum(prod, axis=axis, keepdims=keepdims) | |
return numer | |
def reduce_masked_median(x, mask, keep_batch=False): | |
# x and mask are the same shape | |
# returns shape-1 | |
# axis can be a list of axes | |
if not (x.shape == mask.shape): | |
print('reduce_masked_median: these shapes should match:', x.shape, mask.shape) | |
assert(False) | |
# assert(x.shape == mask.shape) | |
B = list(x.shape)[0] | |
if keep_batch: | |
x = np.reshape(x, [B, -1]) | |
mask = np.reshape(mask, [B, -1]) | |
meds = np.zeros([B], np.float32) | |
for b in list(range(B)): | |
xb = x[b] | |
mb = mask[b] | |
if np.sum(mb) > 0: | |
xb = xb[mb > 0] | |
meds[b] = np.median(xb) | |
else: | |
meds[b] = np.nan | |
return meds | |
else: | |
x = np.reshape(x, [-1]) | |
mask = np.reshape(mask, [-1]) | |
if np.sum(mask) > 0: | |
x = x[mask > 0] | |
med = np.median(x) | |
else: | |
med = np.nan | |
med = np.array([med], np.float32) | |
return med | |
def get_nFiles(path): | |
return len(glob.glob(path)) | |
def get_file_list(path): | |
return glob.glob(path) | |
def rotm2eul(R): | |
# R is 3x3 | |
sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) | |
if sy > 1e-6: # singular | |
x = math.atan2(R[2,1] , R[2,2]) | |
y = math.atan2(-R[2,0], sy) | |
z = math.atan2(R[1,0], R[0,0]) | |
else: | |
x = math.atan2(-R[1,2], R[1,1]) | |
y = math.atan2(-R[2,0], sy) | |
z = 0 | |
return x, y, z | |
def rad2deg(rad): | |
return rad*180.0/np.pi | |
def deg2rad(deg): | |
return deg/180.0*np.pi | |
def eul2rotm(rx, ry, rz): | |
# copy of matlab, but order of inputs is different | |
# R = [ cy*cz sy*sx*cz-sz*cx sy*cx*cz+sz*sx | |
# cy*sz sy*sx*sz+cz*cx sy*cx*sz-cz*sx | |
# -sy cy*sx cy*cx] | |
sinz = np.sin(rz) | |
siny = np.sin(ry) | |
sinx = np.sin(rx) | |
cosz = np.cos(rz) | |
cosy = np.cos(ry) | |
cosx = np.cos(rx) | |
r11 = cosy*cosz | |
r12 = sinx*siny*cosz - cosx*sinz | |
r13 = cosx*siny*cosz + sinx*sinz | |
r21 = cosy*sinz | |
r22 = sinx*siny*sinz + cosx*cosz | |
r23 = cosx*siny*sinz - sinx*cosz | |
r31 = -siny | |
r32 = sinx*cosy | |
r33 = cosx*cosy | |
r1 = np.stack([r11,r12,r13],axis=-1) | |
r2 = np.stack([r21,r22,r23],axis=-1) | |
r3 = np.stack([r31,r32,r33],axis=-1) | |
r = np.stack([r1,r2,r3],axis=0) | |
return r | |
def wrap2pi(rad_angle): | |
# puts the angle into the range [-pi, pi] | |
return np.arctan2(np.sin(rad_angle), np.cos(rad_angle)) | |
def rot2view(rx,ry,rz,x,y,z): | |
# takes rot angles and 3d position as input | |
# returns viewpoint angles as output | |
# (all in radians) | |
# it will perform strangely if z <= 0 | |
az = wrap2pi(ry - (-np.arctan2(z, x) - 1.5*np.pi)) | |
el = -wrap2pi(rx - (-np.arctan2(z, y) - 1.5*np.pi)) | |
th = -rz | |
return az, el, th | |
def invAxB(a,b): | |
""" | |
Compute the relative 3d transformation between a and b. | |
Input: | |
a -- first pose (homogeneous 4x4 matrix) | |
b -- second pose (homogeneous 4x4 matrix) | |
Output: | |
Relative 3d transformation from a to b. | |
""" | |
return np.dot(np.linalg.inv(a),b) | |
def merge_rt(r, t): | |
# r is 3 x 3 | |
# t is 3 or maybe 3 x 1 | |
t = np.reshape(t, [3, 1]) | |
rt = np.concatenate((r,t), axis=1) | |
# rt is 3 x 4 | |
br = np.reshape(np.array([0,0,0,1], np.float32), [1, 4]) | |
# br is 1 x 4 | |
rt = np.concatenate((rt, br), axis=0) | |
# rt is 4 x 4 | |
return rt | |
def split_rt(rt): | |
r = rt[:3,:3] | |
t = rt[:3,3] | |
r = np.reshape(r, [3, 3]) | |
t = np.reshape(t, [3, 1]) | |
return r, t | |
def split_intrinsics(K): | |
# K is 3 x 4 or 4 x 4 | |
fx = K[0,0] | |
fy = K[1,1] | |
x0 = K[0,2] | |
y0 = K[1,2] | |
return fx, fy, x0, y0 | |
def merge_intrinsics(fx, fy, x0, y0): | |
# inputs are shaped [] | |
K = np.eye(4) | |
K[0,0] = fx | |
K[1,1] = fy | |
K[0,2] = x0 | |
K[1,2] = y0 | |
# K is shaped 4 x 4 | |
return K | |
def scale_intrinsics(K, sx, sy): | |
fx, fy, x0, y0 = split_intrinsics(K) | |
fx *= sx | |
fy *= sy | |
x0 *= sx | |
y0 *= sy | |
return merge_intrinsics(fx, fy, x0, y0) | |
# def meshgrid(H, W): | |
# x = np.linspace(0, W-1, W) | |
# y = np.linspace(0, H-1, H) | |
# xv, yv = np.meshgrid(x, y) | |
# return xv, yv | |
def compute_distance(transform): | |
""" | |
Compute the distance of the translational component of a 4x4 homogeneous matrix. | |
""" | |
return numpy.linalg.norm(transform[0:3,3]) | |
def radian_l1_dist(e, g): | |
# if our angles are in [0, 360] we can follow this stack overflow answer: | |
# https://gamedev.stackexchange.com/questions/4467/comparing-angles-and-working-out-the-difference | |
# wrap2pi brings the angles to [-180, 180]; adding pi puts them in [0, 360] | |
e = wrap2pi(e)+np.pi | |
g = wrap2pi(g)+np.pi | |
l = np.abs(np.pi - np.abs(np.abs(e-g) - np.pi)) | |
return l | |
def apply_pix_T_cam(pix_T_cam, xyz): | |
fx, fy, x0, y0 = split_intrinsics(pix_T_cam) | |
# xyz is shaped B x H*W x 3 | |
# returns xy, shaped B x H*W x 2 | |
N, C = xyz.shape | |
x, y, z = np.split(xyz, 3, axis=-1) | |
EPS = 1e-4 | |
z = np.clip(z, EPS, None) | |
x = (x*fx)/(z)+x0 | |
y = (y*fy)/(z)+y0 | |
xy = np.concatenate([x, y], axis=-1) | |
return xy | |
def apply_4x4(RT, XYZ): | |
# RT is 4 x 4 | |
# XYZ is N x 3 | |
# put into homogeneous coords | |
X, Y, Z = np.split(XYZ, 3, axis=1) | |
ones = np.ones_like(X) | |
XYZ1 = np.concatenate([X, Y, Z, ones], axis=1) | |
# XYZ1 is N x 4 | |
XYZ1_t = np.transpose(XYZ1) | |
# this is 4 x N | |
XYZ2_t = np.dot(RT, XYZ1_t) | |
# this is 4 x N | |
XYZ2 = np.transpose(XYZ2_t) | |
# this is N x 4 | |
XYZ2 = XYZ2[:,:3] | |
# this is N x 3 | |
return XYZ2 | |
def Ref2Mem(xyz, Z, Y, X): | |
# xyz is N x 3, in ref coordinates | |
# transforms ref coordinates into mem coordinates | |
N, C = xyz.shape | |
assert(C==3) | |
mem_T_ref = get_mem_T_ref(Z, Y, X) | |
xyz = apply_4x4(mem_T_ref, xyz) | |
return xyz | |
# def Mem2Ref(xyz_mem, MH, MW, MD): | |
# # xyz is B x N x 3, in mem coordinates | |
# # transforms mem coordinates into ref coordinates | |
# B, N, C = xyz_mem.get_shape().as_list() | |
# ref_T_mem = get_ref_T_mem(B, MH, MW, MD) | |
# xyz_ref = utils_geom.apply_4x4(ref_T_mem, xyz_mem) | |
# return xyz_ref | |
def get_mem_T_ref(Z, Y, X): | |
# sometimes we want the mat itself | |
# note this is not a rigid transform | |
# for interpretability, let's construct this in two steps... | |
# translation | |
center_T_ref = np.eye(4, dtype=np.float32) | |
center_T_ref[0,3] = -XMIN | |
center_T_ref[1,3] = -YMIN | |
center_T_ref[2,3] = -ZMIN | |
VOX_SIZE_X = (XMAX-XMIN)/float(X) | |
VOX_SIZE_Y = (YMAX-YMIN)/float(Y) | |
VOX_SIZE_Z = (ZMAX-ZMIN)/float(Z) | |
# scaling | |
mem_T_center = np.eye(4, dtype=np.float32) | |
mem_T_center[0,0] = 1./VOX_SIZE_X | |
mem_T_center[1,1] = 1./VOX_SIZE_Y | |
mem_T_center[2,2] = 1./VOX_SIZE_Z | |
mem_T_ref = np.dot(mem_T_center, center_T_ref) | |
return mem_T_ref | |
def safe_inverse(a): | |
r, t = split_rt(a) | |
t = np.reshape(t, [3, 1]) | |
r_transpose = r.T | |
inv = np.concatenate([r_transpose, -np.matmul(r_transpose, t)], 1) | |
bottom_row = a[3:4, :] # this is [0, 0, 0, 1] | |
inv = np.concatenate([inv, bottom_row], 0) | |
return inv | |
def get_ref_T_mem(Z, Y, X): | |
mem_T_ref = get_mem_T_ref(X, Y, X) | |
# note safe_inverse is inapplicable here, | |
# since the transform is nonrigid | |
ref_T_mem = np.linalg.inv(mem_T_ref) | |
return ref_T_mem | |
def voxelize_xyz(xyz_ref, Z, Y, X): | |
# xyz_ref is N x 3 | |
xyz_mem = Ref2Mem(xyz_ref, Z, Y, X) | |
# this is N x 3 | |
voxels = get_occupancy(xyz_mem, Z, Y, X) | |
voxels = np.reshape(voxels, [Z, Y, X, 1]) | |
return voxels | |
def get_inbounds(xyz, Z, Y, X, already_mem=False): | |
# xyz is H*W x 3 | |
if not already_mem: | |
xyz = Ref2Mem(xyz, Z, Y, X) | |
x_valid = np.logical_and( | |
np.greater_equal(xyz[:,0], -0.5), | |
np.less(xyz[:,0], float(X)-0.5)) | |
y_valid = np.logical_and( | |
np.greater_equal(xyz[:,1], -0.5), | |
np.less(xyz[:,1], float(Y)-0.5)) | |
z_valid = np.logical_and( | |
np.greater_equal(xyz[:,2], -0.5), | |
np.less(xyz[:,2], float(Z)-0.5)) | |
inbounds = np.logical_and(np.logical_and(x_valid, y_valid), z_valid) | |
return inbounds | |
def sub2ind3d_zyx(depth, height, width, d, h, w): | |
# same as sub2ind3d, but inputs in zyx order | |
# when gathering/scattering with these inds, the tensor should be Z x Y x X | |
return d*height*width + h*width + w | |
def sub2ind3d_yxz(height, width, depth, h, w, d): | |
return h*width*depth + w*depth + d | |
# def ind2sub(height, width, ind): | |
# # int input | |
# y = int(ind / height) | |
# x = ind % height | |
# return y, x | |
def get_occupancy(xyz_mem, Z, Y, X): | |
# xyz_mem is N x 3 | |
# we want to fill a voxel tensor with 1's at these inds | |
inbounds = get_inbounds(xyz_mem, Z, Y, X, already_mem=True) | |
inds = np.where(inbounds) | |
xyz_mem = np.reshape(xyz_mem[inds], [-1, 3]) | |
# xyz_mem is N x 3 | |
# this is more accurate than a cast/floor, but runs into issues when Y==0 | |
xyz_mem = np.round(xyz_mem).astype(np.int32) | |
x = xyz_mem[:,0] | |
y = xyz_mem[:,1] | |
z = xyz_mem[:,2] | |
voxels = np.zeros([Z, Y, X], np.float32) | |
voxels[z, y, x] = 1.0 | |
return voxels | |
def pixels2camera(x,y,z,fx,fy,x0,y0): | |
# x and y are locations in pixel coordinates, z is a depth image in meters | |
# their shapes are H x W | |
# fx, fy, x0, y0 are scalar camera intrinsics | |
# returns xyz, sized [B,H*W,3] | |
H, W = z.shape | |
fx = np.reshape(fx, [1,1]) | |
fy = np.reshape(fy, [1,1]) | |
x0 = np.reshape(x0, [1,1]) | |
y0 = np.reshape(y0, [1,1]) | |
# unproject | |
x = ((z+EPS)/fx)*(x-x0) | |
y = ((z+EPS)/fy)*(y-y0) | |
x = np.reshape(x, [-1]) | |
y = np.reshape(y, [-1]) | |
z = np.reshape(z, [-1]) | |
xyz = np.stack([x,y,z], axis=1) | |
return xyz | |
def depth2pointcloud(z, pix_T_cam): | |
H = z.shape[0] | |
W = z.shape[1] | |
y, x = meshgrid2d(H, W) | |
z = np.reshape(z, [H, W]) | |
fx, fy, x0, y0 = split_intrinsics(pix_T_cam) | |
xyz = pixels2camera(x, y, z, fx, fy, x0, y0) | |
return xyz | |
def meshgrid2d(Y, X): | |
grid_y = np.linspace(0.0, Y-1, Y) | |
grid_y = np.reshape(grid_y, [Y, 1]) | |
grid_y = np.tile(grid_y, [1, X]) | |
grid_x = np.linspace(0.0, X-1, X) | |
grid_x = np.reshape(grid_x, [1, X]) | |
grid_x = np.tile(grid_x, [Y, 1]) | |
# outputs are Y x X | |
return grid_y, grid_x | |
def gridcloud3d(Y, X, Z): | |
x_ = np.linspace(0, X-1, X) | |
y_ = np.linspace(0, Y-1, Y) | |
z_ = np.linspace(0, Z-1, Z) | |
y, x, z = np.meshgrid(y_, x_, z_, indexing='ij') | |
x = np.reshape(x, [-1]) | |
y = np.reshape(y, [-1]) | |
z = np.reshape(z, [-1]) | |
xyz = np.stack([x,y,z], axis=1).astype(np.float32) | |
return xyz | |
def gridcloud2d(Y, X): | |
x_ = np.linspace(0, X-1, X) | |
y_ = np.linspace(0, Y-1, Y) | |
y, x = np.meshgrid(y_, x_, indexing='ij') | |
x = np.reshape(x, [-1]) | |
y = np.reshape(y, [-1]) | |
xy = np.stack([x,y], axis=1).astype(np.float32) | |
return xy | |
def normalize(im): | |
im = im - np.min(im) | |
im = im / np.max(im) | |
return im | |
def wrap2pi(rad_angle): | |
# rad_angle can be any shape | |
# puts the angle into the range [-pi, pi] | |
return np.arctan2(np.sin(rad_angle), np.cos(rad_angle)) | |
def convert_occ_to_height(occ): | |
Z, Y, X, C = occ.shape | |
assert(C==1) | |
height = np.linspace(float(Y), 1.0, Y) | |
height = np.reshape(height, [1, Y, 1, 1]) | |
height = np.max(occ*height, axis=1)/float(Y) | |
height = np.reshape(height, [Z, X, C]) | |
return height | |
def create_depth_image(xy, Z, H, W): | |
# turn the xy coordinates into image inds | |
xy = np.round(xy) | |
# lidar reports a sphere of measurements | |
# only use the inds that are within the image bounds | |
# also, only use forward-pointing depths (Z > 0) | |
valid = (xy[:,0] < W-1) & (xy[:,1] < H-1) & (xy[:,0] >= 0) & (xy[:,1] >= 0) & (Z[:] > 0) | |
# gather these up | |
xy = xy[valid] | |
Z = Z[valid] | |
inds = sub2ind(H,W,xy[:,1],xy[:,0]) | |
depth = np.zeros((H*W), np.float32) | |
for (index, replacement) in zip(inds, Z): | |
depth[index] = replacement | |
depth[np.where(depth == 0.0)] = 70.0 | |
depth = np.reshape(depth, [H, W]) | |
return depth | |
def vis_depth(depth, maxdepth=80.0, log_vis=True): | |
depth[depth<=0.0] = maxdepth | |
if log_vis: | |
depth = np.log(depth) | |
depth = np.clip(depth, 0, np.log(maxdepth)) | |
else: | |
depth = np.clip(depth, 0, maxdepth) | |
depth = (depth*255.0).astype(np.uint8) | |
return depth | |
def preprocess_color(x): | |
return x.astype(np.float32) * 1./255 - 0.5 | |
def convert_box_to_ref_T_obj(boxes): | |
shape = boxes.shape | |
boxes = boxes.reshape(-1,9) | |
rots = [eul2rotm(rx,ry,rz) | |
for rx,ry,rz in boxes[:,6:]] | |
rots = np.stack(rots,axis=0) | |
trans = boxes[:,:3] | |
ref_T_objs = [merge_rt(rot,tran) | |
for rot,tran in zip(rots,trans)] | |
ref_T_objs = np.stack(ref_T_objs,axis=0) | |
ref_T_objs = ref_T_objs.reshape(shape[:-1]+(4,4)) | |
ref_T_objs = ref_T_objs.astype(np.float32) | |
return ref_T_objs | |
def get_rot_from_delta(delta, yaw_only=False): | |
dx = delta[:,0] | |
dy = delta[:,1] | |
dz = delta[:,2] | |
bot_hyp = np.sqrt(dz**2 + dx**2) | |
# top_hyp = np.sqrt(bot_hyp**2 + dy**2) | |
pitch = -np.arctan2(dy, bot_hyp) | |
yaw = np.arctan2(dz, dx) | |
if yaw_only: | |
rot = [eul2rotm(0,y,0) for y in yaw] | |
else: | |
rot = [eul2rotm(0,y,p) for (p,y) in zip(pitch,yaw)] | |
rot = np.stack(rot) | |
# rot is B x 3 x 3 | |
return rot | |
def im2col(im, psize): | |
n_channels = 1 if len(im.shape) == 2 else im.shape[0] | |
(n_channels, rows, cols) = (1,) * (3 - len(im.shape)) + im.shape | |
im_pad = np.zeros((n_channels, | |
int(math.ceil(1.0 * rows / psize) * psize), | |
int(math.ceil(1.0 * cols / psize) * psize))) | |
im_pad[:, 0:rows, 0:cols] = im | |
final = np.zeros((im_pad.shape[1], im_pad.shape[2], n_channels, | |
psize, psize)) | |
for c in np.arange(n_channels): | |
for x in np.arange(psize): | |
for y in np.arange(psize): | |
im_shift = np.vstack( | |
(im_pad[c, x:], im_pad[c, :x])) | |
im_shift = np.column_stack( | |
(im_shift[:, y:], im_shift[:, :y])) | |
final[x::psize, y::psize, c] = np.swapaxes( | |
im_shift.reshape(int(im_pad.shape[1] / psize), psize, | |
int(im_pad.shape[2] / psize), psize), 1, 2) | |
return np.squeeze(final[0:rows - psize + 1, 0:cols - psize + 1]) | |
def filter_discontinuities(depth, filter_size=9, thresh=10): | |
H, W = list(depth.shape) | |
# Ensure that filter sizes are okay | |
assert filter_size % 2 == 1, "Can only use odd filter sizes." | |
# Compute discontinuities | |
offset = int((filter_size - 1) / 2) | |
patches = 1.0 * im2col(depth, filter_size) | |
mids = patches[:, :, offset, offset] | |
mins = np.min(patches, axis=(2, 3)) | |
maxes = np.max(patches, axis=(2, 3)) | |
discont = np.maximum(np.abs(mins - mids), | |
np.abs(maxes - mids)) | |
mark = discont > thresh | |
# Account for offsets | |
final_mark = np.zeros((H, W), dtype=np.uint16) | |
final_mark[offset:offset + mark.shape[0], | |
offset:offset + mark.shape[1]] = mark | |
return depth * (1 - final_mark) | |
def argmax2d(tensor): | |
Y, X = list(tensor.shape) | |
# flatten the Tensor along the height and width axes | |
flat_tensor = tensor.reshape(-1) | |
# argmax of the flat tensor | |
argmax = np.argmax(flat_tensor) | |
# convert the indices into 2d coordinates | |
argmax_y = argmax // X # row | |
argmax_x = argmax % X # col | |
return argmax_y, argmax_x | |
def plot_traj_3d(traj): | |
# traj is S x 3 | |
# print('traj', traj.shape) | |
S, C = list(traj.shape) | |
assert(C==3) | |
fig = plt.figure() | |
ax = fig.add_subplot(111, projection='3d') | |
colors = [plt.cm.RdYlBu(i) for i in np.linspace(0,1,S)] | |
# print('colors', colors) | |
xs = traj[:,0] | |
ys = -traj[:,1] | |
zs = traj[:,2] | |
ax.scatter(xs, zs, ys, s=30, c=colors, marker='o', alpha=1.0, edgecolors=(0,0,0))#, color=color_map[n]) | |
ax.set_xlabel('X') | |
ax.set_ylabel('Z') | |
ax.set_zlabel('Y') | |
ax.set_xlim(0,1) | |
ax.set_ylim(0,1) # this is really Z | |
ax.set_zlim(-1,0) # this is really Y | |
buf = io.BytesIO() | |
plt.savefig(buf, format='png') | |
buf.seek(0) | |
image = np.array(Image.open(buf)) # H x W x 4 | |
image = image[:,:,:3] | |
plt.close() | |
return image | |
def camera2pixels(xyz, pix_T_cam): | |
# xyz is shaped N x 3 | |
# returns xy, shaped N x 2 | |
fx, fy, x0, y0 = split_intrinsics(pix_T_cam) | |
x, y, z = xyz[:,0], xyz[:,1], xyz[:,2] | |
EPS = 1e-4 | |
z = np.clip(z, EPS, None) | |
x = (x*fx)/z + x0 | |
y = (y*fy)/z + y0 | |
xy = np.stack([x, y], axis=-1) | |
return xy | |
def make_colorwheel(): | |
""" | |
Generates a color wheel for optical flow visualization as presented in: | |
Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007) | |
URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf | |
Code follows the original C++ source code of Daniel Scharstein. | |
Code follows the the Matlab source code of Deqing Sun. | |
Returns: | |
np.ndarray: Color wheel | |
""" | |
RY = 15 | |
YG = 6 | |
GC = 4 | |
CB = 11 | |
BM = 13 | |
MR = 6 | |
ncols = RY + YG + GC + CB + BM + MR | |
colorwheel = np.zeros((ncols, 3)) | |
col = 0 | |
# RY | |
colorwheel[0:RY, 0] = 255 | |
colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY) | |
col = col+RY | |
# YG | |
colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG) | |
colorwheel[col:col+YG, 1] = 255 | |
col = col+YG | |
# GC | |
colorwheel[col:col+GC, 1] = 255 | |
colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC) | |
col = col+GC | |
# CB | |
colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB) | |
colorwheel[col:col+CB, 2] = 255 | |
col = col+CB | |
# BM | |
colorwheel[col:col+BM, 2] = 255 | |
colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM) | |
col = col+BM | |
# MR | |
colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR) | |
colorwheel[col:col+MR, 0] = 255 | |
return colorwheel | |
def flow_uv_to_colors(u, v, convert_to_bgr=False): | |
""" | |
Applies the flow color wheel to (possibly clipped) flow components u and v. | |
According to the C++ source code of Daniel Scharstein | |
According to the Matlab source code of Deqing Sun | |
Args: | |
u (np.ndarray): Input horizontal flow of shape [H,W] | |
v (np.ndarray): Input vertical flow of shape [H,W] | |
convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. | |
Returns: | |
np.ndarray: Flow visualization image of shape [H,W,3] | |
""" | |
flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8) | |
colorwheel = make_colorwheel() # shape [55x3] | |
ncols = colorwheel.shape[0] | |
rad = np.sqrt(np.square(u) + np.square(v)) | |
a = np.arctan2(-v, -u)/np.pi | |
fk = (a+1) / 2*(ncols-1) | |
k0 = np.floor(fk).astype(np.int32) | |
k1 = k0 + 1 | |
k1[k1 == ncols] = 0 | |
f = fk - k0 | |
for i in range(colorwheel.shape[1]): | |
tmp = colorwheel[:,i] | |
col0 = tmp[k0] / 255.0 | |
col1 = tmp[k1] / 255.0 | |
col = (1-f)*col0 + f*col1 | |
idx = (rad <= 1) | |
col[idx] = 1 - rad[idx] * (1-col[idx]) | |
col[~idx] = col[~idx] * 0.75 # out of range | |
# Note the 2-i => BGR instead of RGB | |
ch_idx = 2-i if convert_to_bgr else i | |
flow_image[:,:,ch_idx] = np.floor(255 * col) | |
return flow_image | |
def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False): | |
""" | |
Expects a two dimensional flow image of shape. | |
Args: | |
flow_uv (np.ndarray): Flow UV image of shape [H,W,2] | |
clip_flow (float, optional): Clip maximum of flow values. Defaults to None. | |
convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. | |
Returns: | |
np.ndarray: Flow visualization image of shape [H,W,3] | |
""" | |
assert flow_uv.ndim == 3, 'input flow must have three dimensions' | |
assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]' | |
if clip_flow is not None: | |
flow_uv = np.clip(flow_uv, -clip_flow, clip_flow) / clip_flow | |
# flow_uv = np.clamp(flow, -clip, clip)/clip | |
u = flow_uv[:,:,0] | |
v = flow_uv[:,:,1] | |
rad = np.sqrt(np.square(u) + np.square(v)) | |
rad_max = np.max(rad) | |
epsilon = 1e-5 | |
u = u / (rad_max + epsilon) | |
v = v / (rad_max + epsilon) | |
return flow_uv_to_colors(u, v, convert_to_bgr) | |