Spaces:
Configuration error
Configuration error
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import numpy as np | |
import re | |
from PIL import Image | |
import sys | |
import cv2 | |
import json | |
import os | |
def read_img(filename): | |
# convert to RGB for scene flow finalpass data | |
img = np.array(Image.open(filename).convert('RGB')).astype(np.float32) | |
return img | |
def read_disp(filename, subset=False, vkitti2=False, sintel=False, | |
tartanair=False, instereo2k=False, crestereo=False, | |
fallingthings=False, | |
argoverse=False, | |
raw_disp_png=False, | |
): | |
# Scene Flow dataset | |
if filename.endswith('pfm'): | |
# For finalpass and cleanpass, gt disparity is positive, subset is negative | |
disp = np.ascontiguousarray(_read_pfm(filename)[0]) | |
if subset: | |
disp = -disp | |
# VKITTI2 dataset | |
elif vkitti2: | |
disp = _read_vkitti2_disp(filename) | |
# Sintel | |
elif sintel: | |
disp = _read_sintel_disparity(filename) | |
elif tartanair: | |
disp = _read_tartanair_disp(filename) | |
elif instereo2k: | |
disp = _read_instereo2k_disp(filename) | |
elif crestereo: | |
disp = _read_crestereo_disp(filename) | |
elif fallingthings: | |
disp = _read_fallingthings_disp(filename) | |
elif argoverse: | |
disp = _read_argoverse_disp(filename) | |
elif raw_disp_png: | |
disp = np.array(Image.open(filename)).astype(np.float32) | |
# KITTI | |
elif filename.endswith('png'): | |
disp = _read_kitti_disp(filename) | |
elif filename.endswith('npy'): | |
disp = np.load(filename) | |
else: | |
raise Exception('Invalid disparity file format!') | |
return disp # [H, W] | |
def _read_pfm(file): | |
file = open(file, 'rb') | |
color = None | |
width = None | |
height = None | |
scale = None | |
endian = None | |
header = file.readline().rstrip() | |
if header.decode("ascii") == 'PF': | |
color = True | |
elif header.decode("ascii") == 'Pf': | |
color = False | |
else: | |
raise Exception('Not a PFM file.') | |
dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode("ascii")) | |
if dim_match: | |
width, height = list(map(int, dim_match.groups())) | |
else: | |
raise Exception('Malformed PFM header.') | |
scale = float(file.readline().decode("ascii").rstrip()) | |
if scale < 0: # little-endian | |
endian = '<' | |
scale = -scale | |
else: | |
endian = '>' # big-endian | |
data = np.fromfile(file, endian + 'f') | |
shape = (height, width, 3) if color else (height, width) | |
data = np.reshape(data, shape) | |
data = np.flipud(data) | |
return data, scale | |
def write_pfm(file, image, scale=1): | |
file = open(file, 'wb') | |
color = None | |
if image.dtype.name != 'float32': | |
raise Exception('Image dtype must be float32.') | |
image = np.flipud(image) | |
if len(image.shape) == 3 and image.shape[2] == 3: # color image | |
color = True | |
elif len(image.shape) == 2 or len( | |
image.shape) == 3 and image.shape[2] == 1: # greyscale | |
color = False | |
else: | |
raise Exception( | |
'Image must have H x W x 3, H x W x 1 or H x W dimensions.') | |
file.write(b'PF\n' if color else b'Pf\n') | |
file.write(b'%d %d\n' % (image.shape[1], image.shape[0])) | |
endian = image.dtype.byteorder | |
if endian == '<' or endian == '=' and sys.byteorder == 'little': | |
scale = -scale | |
file.write(b'%f\n' % scale) | |
image.tofile(file) | |
def _read_kitti_disp(filename): | |
depth = np.array(Image.open(filename)) | |
depth = depth.astype(np.float32) / 256. | |
return depth | |
def _read_vkitti2_disp(filename): | |
# read depth | |
depth = cv2.imread(filename, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) # in cm | |
depth = (depth / 100).astype(np.float32) # depth clipped to 655.35m for sky | |
valid = (depth > 0) & (depth < 655) # depth clipped to 655.35m for sky | |
# convert to disparity | |
focal_length = 725.0087 # in pixels | |
baseline = 0.532725 # meter | |
disp = baseline * focal_length / depth | |
disp[~valid] = 0.000001 # invalid as very small value | |
return disp | |
def _read_sintel_disparity(filename): | |
""" Return disparity read from filename. """ | |
f_in = np.array(Image.open(filename)) | |
d_r = f_in[:, :, 0].astype('float32') | |
d_g = f_in[:, :, 1].astype('float32') | |
d_b = f_in[:, :, 2].astype('float32') | |
depth = d_r * 4 + d_g / (2 ** 6) + d_b / (2 ** 14) | |
return depth | |
def _read_tartanair_disp(filename): | |
# the infinite distant object such as the sky has a large depth value (e.g. 10000) | |
depth = np.load(filename) | |
# change to disparity image | |
disparity = 80.0 / depth | |
return disparity | |
def _read_instereo2k_disp(filename): | |
disp = np.array(Image.open(filename)) | |
disp = disp.astype(np.float32) / 100. | |
return disp | |
def _read_crestereo_disp(filename): | |
disp = np.array(Image.open(filename)) | |
return disp.astype(np.float32) / 32. | |
def _read_fallingthings_disp(filename): | |
depth = np.array(Image.open(filename)) | |
camera_file = os.path.join(os.path.dirname(filename), '_camera_settings.json') | |
with open(camera_file, 'r') as f: | |
intrinsics = json.load(f) | |
fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx'] | |
disp = (fx * 6.0 * 100) / depth.astype(np.float32) | |
return disp | |
def _read_argoverse_disp(filename): | |
disparity_map = cv2.imread(filename, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) | |
return np.float32(disparity_map) / 256. | |
def extract_video(video_name): | |
cap = cv2.VideoCapture(video_name) | |
assert cap.isOpened(), f'Failed to load video file {video_name}' | |
# get video info | |
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), | |
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) | |
fps = cap.get(cv2.CAP_PROP_FPS) | |
print('video size (hxw): %dx%d' % (size[1], size[0])) | |
print('fps: %d' % fps) | |
imgs = [] | |
while cap.isOpened(): | |
# get frames | |
flag, img = cap.read() | |
if not flag: | |
break | |
# to rgb format | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
imgs.append(img) | |
return imgs, fps | |