Spaces:
Configuration error
Configuration error
import torch | |
import torch.nn.functional as F | |
import numpy as np | |
class InputPadder: | |
""" Pads images such that dimensions are divisible by 8 """ | |
def __init__(self, dims, mode='sintel', padding_factor=8): | |
self.ht, self.wd = dims[-2:] | |
pad_ht = (((self.ht // padding_factor) + 1) * padding_factor - self.ht) % padding_factor | |
pad_wd = (((self.wd // padding_factor) + 1) * padding_factor - self.wd) % padding_factor | |
if mode == 'sintel': | |
self._pad = [pad_wd // 2, pad_wd - pad_wd // 2, pad_ht // 2, pad_ht - pad_ht // 2] | |
else: | |
self._pad = [pad_wd // 2, pad_wd - pad_wd // 2, 0, pad_ht] | |
def pad(self, *inputs): | |
return [F.pad(x, self._pad, mode='replicate') for x in inputs] | |
def unpad(self, x): | |
ht, wd = x.shape[-2:] | |
c = [self._pad[2], ht - self._pad[3], self._pad[0], wd - self._pad[1]] | |
return x[..., c[0]:c[1], c[2]:c[3]] | |
def bilinear_sampler(img, coords, mode='bilinear', mask=False, padding_mode='zeros'): | |
""" Wrapper for grid_sample, uses pixel coordinates """ | |
if coords.size(-1) != 2: # [B, 2, H, W] -> [B, H, W, 2] | |
coords = coords.permute(0, 2, 3, 1) | |
H, W = img.shape[-2:] | |
# H = height if height is not None else img.shape[-2] | |
# W = width if width is not None else img.shape[-1] | |
xgrid, ygrid = coords.split([1, 1], dim=-1) | |
# To handle H or W equals to 1 by explicitly defining height and width | |
if H == 1: | |
assert ygrid.abs().max() < 1e-8 | |
H = 10 | |
if W == 1: | |
assert xgrid.abs().max() < 1e-8 | |
W = 10 | |
xgrid = 2 * xgrid / (W - 1) - 1 | |
ygrid = 2 * ygrid / (H - 1) - 1 | |
grid = torch.cat([xgrid, ygrid], dim=-1) | |
img = F.grid_sample(img, grid, mode=mode, | |
padding_mode=padding_mode, | |
align_corners=True) | |
if mask: | |
mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1) | |
return img, mask.squeeze(-1).float() | |
return img | |
def coords_grid(batch, ht, wd, normalize=False): | |
if normalize: # [-1, 1] | |
coords = torch.meshgrid(2 * torch.arange(ht) / (ht - 1) - 1, | |
2 * torch.arange(wd) / (wd - 1) - 1) | |
else: | |
coords = torch.meshgrid(torch.arange(ht), torch.arange(wd)) | |
coords = torch.stack(coords[::-1], dim=0).float() | |
return coords[None].repeat(batch, 1, 1, 1) # [B, 2, H, W] | |
def coords_grid_np(h, w): # used for accumulating high speed sintel flow testdata | |
coords = np.meshgrid(np.arange(h, dtype=np.float32), | |
np.arange(w, dtype=np.float32), indexing='ij') | |
coords = np.stack(coords[::-1], axis=-1) # [H, W, 2] | |
return coords | |
def compute_out_of_boundary_mask(flow, downsample_factor=None): | |
# flow: [B, 2, H, W] | |
assert flow.dim() == 4 and flow.size(1) == 2 | |
b, _, h, w = flow.shape | |
init_coords = coords_grid(b, h, w).to(flow.device) | |
corres = init_coords + flow # [B, 2, H, W] | |
if downsample_factor is not None: | |
assert w % downsample_factor == 0 and h % downsample_factor == 0 | |
# the actual max disp can predict is in the downsampled feature resolution, then upsample | |
max_w = (w // downsample_factor - 1) * downsample_factor | |
max_h = (h // downsample_factor - 1) * downsample_factor | |
# print('max_w: %d, max_h: %d' % (max_w, max_h)) | |
else: | |
max_w = w - 1 | |
max_h = h - 1 | |
valid_mask = (corres[:, 0] >= 0) & (corres[:, 0] <= max_w) & (corres[:, 1] >= 0) & (corres[:, 1] <= max_h) | |
# in case very large flow | |
flow_mask = (flow[:, 0].abs() <= max_w) & (flow[:, 1].abs() <= max_h) | |
valid_mask = valid_mask & flow_mask | |
return valid_mask # [B, H, W] | |
def normalize_coords(grid): | |
"""Normalize coordinates of image scale to [-1, 1] | |
Args: | |
grid: [B, 2, H, W] | |
""" | |
assert grid.size(1) == 2 | |
h, w = grid.size()[2:] | |
grid[:, 0, :, :] = 2 * (grid[:, 0, :, :].clone() / (w - 1)) - 1 # x: [-1, 1] | |
grid[:, 1, :, :] = 2 * (grid[:, 1, :, :].clone() / (h - 1)) - 1 # y: [-1, 1] | |
# grid = grid.permute((0, 2, 3, 1)) # [B, H, W, 2] | |
return grid | |
def flow_warp(feature, flow, mask=False, padding_mode='zeros'): | |
b, c, h, w = feature.size() | |
assert flow.size(1) == 2 | |
grid = coords_grid(b, h, w).to(flow.device) + flow # [B, 2, H, W] | |
return bilinear_sampler(feature, grid, mask=mask, padding_mode=padding_mode) | |
def upflow8(flow, mode='bilinear'): | |
new_size = (8 * flow.shape[2], 8 * flow.shape[3]) | |
return 8 * F.interpolate(flow, size=new_size, mode=mode, align_corners=True) | |
def bilinear_upflow(flow, scale_factor=8): | |
assert flow.size(1) == 2 | |
flow = F.interpolate(flow, scale_factor=scale_factor, | |
mode='bilinear', align_corners=True) * scale_factor | |
return flow | |
def upsample_flow(flow, img): | |
if flow.size(-1) != img.size(-1): | |
scale_factor = img.size(-1) / flow.size(-1) | |
flow = F.interpolate(flow, size=img.size()[-2:], | |
mode='bilinear', align_corners=True) * scale_factor | |
return flow | |
def count_parameters(model): | |
num = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
return num | |
def set_bn_eval(m): | |
classname = m.__class__.__name__ | |
if classname.find('BatchNorm') != -1: | |
m.eval() | |