JasonSmithSO's picture
Upload 777 files
0034848 verified
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import re
from PIL import Image
import sys
import cv2
import json
import os
def read_img(filename):
# convert to RGB for scene flow finalpass data
img = np.array(Image.open(filename).convert('RGB')).astype(np.float32)
return img
def read_disp(filename, subset=False, vkitti2=False, sintel=False,
tartanair=False, instereo2k=False, crestereo=False,
fallingthings=False,
argoverse=False,
raw_disp_png=False,
):
# Scene Flow dataset
if filename.endswith('pfm'):
# For finalpass and cleanpass, gt disparity is positive, subset is negative
disp = np.ascontiguousarray(_read_pfm(filename)[0])
if subset:
disp = -disp
# VKITTI2 dataset
elif vkitti2:
disp = _read_vkitti2_disp(filename)
# Sintel
elif sintel:
disp = _read_sintel_disparity(filename)
elif tartanair:
disp = _read_tartanair_disp(filename)
elif instereo2k:
disp = _read_instereo2k_disp(filename)
elif crestereo:
disp = _read_crestereo_disp(filename)
elif fallingthings:
disp = _read_fallingthings_disp(filename)
elif argoverse:
disp = _read_argoverse_disp(filename)
elif raw_disp_png:
disp = np.array(Image.open(filename)).astype(np.float32)
# KITTI
elif filename.endswith('png'):
disp = _read_kitti_disp(filename)
elif filename.endswith('npy'):
disp = np.load(filename)
else:
raise Exception('Invalid disparity file format!')
return disp # [H, W]
def _read_pfm(file):
file = open(file, 'rb')
color = None
width = None
height = None
scale = None
endian = None
header = file.readline().rstrip()
if header.decode("ascii") == 'PF':
color = True
elif header.decode("ascii") == 'Pf':
color = False
else:
raise Exception('Not a PFM file.')
dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode("ascii"))
if dim_match:
width, height = list(map(int, dim_match.groups()))
else:
raise Exception('Malformed PFM header.')
scale = float(file.readline().decode("ascii").rstrip())
if scale < 0: # little-endian
endian = '<'
scale = -scale
else:
endian = '>' # big-endian
data = np.fromfile(file, endian + 'f')
shape = (height, width, 3) if color else (height, width)
data = np.reshape(data, shape)
data = np.flipud(data)
return data, scale
def write_pfm(file, image, scale=1):
file = open(file, 'wb')
color = None
if image.dtype.name != 'float32':
raise Exception('Image dtype must be float32.')
image = np.flipud(image)
if len(image.shape) == 3 and image.shape[2] == 3: # color image
color = True
elif len(image.shape) == 2 or len(
image.shape) == 3 and image.shape[2] == 1: # greyscale
color = False
else:
raise Exception(
'Image must have H x W x 3, H x W x 1 or H x W dimensions.')
file.write(b'PF\n' if color else b'Pf\n')
file.write(b'%d %d\n' % (image.shape[1], image.shape[0]))
endian = image.dtype.byteorder
if endian == '<' or endian == '=' and sys.byteorder == 'little':
scale = -scale
file.write(b'%f\n' % scale)
image.tofile(file)
def _read_kitti_disp(filename):
depth = np.array(Image.open(filename))
depth = depth.astype(np.float32) / 256.
return depth
def _read_vkitti2_disp(filename):
# read depth
depth = cv2.imread(filename, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) # in cm
depth = (depth / 100).astype(np.float32) # depth clipped to 655.35m for sky
valid = (depth > 0) & (depth < 655) # depth clipped to 655.35m for sky
# convert to disparity
focal_length = 725.0087 # in pixels
baseline = 0.532725 # meter
disp = baseline * focal_length / depth
disp[~valid] = 0.000001 # invalid as very small value
return disp
def _read_sintel_disparity(filename):
""" Return disparity read from filename. """
f_in = np.array(Image.open(filename))
d_r = f_in[:, :, 0].astype('float32')
d_g = f_in[:, :, 1].astype('float32')
d_b = f_in[:, :, 2].astype('float32')
depth = d_r * 4 + d_g / (2 ** 6) + d_b / (2 ** 14)
return depth
def _read_tartanair_disp(filename):
# the infinite distant object such as the sky has a large depth value (e.g. 10000)
depth = np.load(filename)
# change to disparity image
disparity = 80.0 / depth
return disparity
def _read_instereo2k_disp(filename):
disp = np.array(Image.open(filename))
disp = disp.astype(np.float32) / 100.
return disp
def _read_crestereo_disp(filename):
disp = np.array(Image.open(filename))
return disp.astype(np.float32) / 32.
def _read_fallingthings_disp(filename):
depth = np.array(Image.open(filename))
camera_file = os.path.join(os.path.dirname(filename), '_camera_settings.json')
with open(camera_file, 'r') as f:
intrinsics = json.load(f)
fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx']
disp = (fx * 6.0 * 100) / depth.astype(np.float32)
return disp
def _read_argoverse_disp(filename):
disparity_map = cv2.imread(filename, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
return np.float32(disparity_map) / 256.
def extract_video(video_name):
cap = cv2.VideoCapture(video_name)
assert cap.isOpened(), f'Failed to load video file {video_name}'
# get video info
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
fps = cap.get(cv2.CAP_PROP_FPS)
print('video size (hxw): %dx%d' % (size[1], size[0]))
print('fps: %d' % fps)
imgs = []
while cap.isOpened():
# get frames
flag, img = cap.read()
if not flag:
break
# to rgb format
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
imgs.append(img)
return imgs, fps