Spaces:

JasonSmithSO
/

FooocusEnhanced

Configuration error

File size: 6,168 Bytes
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import re
from PIL import Image
import sys
import cv2
import json
import os


def read_img(filename):
    # convert to RGB for scene flow finalpass data
    img = np.array(Image.open(filename).convert('RGB')).astype(np.float32)
    return img


def read_disp(filename, subset=False, vkitti2=False, sintel=False,
              tartanair=False, instereo2k=False, crestereo=False,
              fallingthings=False,
              argoverse=False,
              raw_disp_png=False,
              ):
    # Scene Flow dataset
    if filename.endswith('pfm'):
        # For finalpass and cleanpass, gt disparity is positive, subset is negative
        disp = np.ascontiguousarray(_read_pfm(filename)[0])
        if subset:
            disp = -disp
    # VKITTI2 dataset
    elif vkitti2:
        disp = _read_vkitti2_disp(filename)
    # Sintel
    elif sintel:
        disp = _read_sintel_disparity(filename)
    elif tartanair:
        disp = _read_tartanair_disp(filename)
    elif instereo2k:
        disp = _read_instereo2k_disp(filename)
    elif crestereo:
        disp = _read_crestereo_disp(filename)
    elif fallingthings:
        disp = _read_fallingthings_disp(filename)
    elif argoverse:
        disp = _read_argoverse_disp(filename)
    elif raw_disp_png:
        disp = np.array(Image.open(filename)).astype(np.float32)
    # KITTI
    elif filename.endswith('png'):
        disp = _read_kitti_disp(filename)
    elif filename.endswith('npy'):
        disp = np.load(filename)
    else:
        raise Exception('Invalid disparity file format!')
    return disp  # [H, W]


def _read_pfm(file):
    file = open(file, 'rb')

    color = None
    width = None
    height = None
    scale = None
    endian = None

    header = file.readline().rstrip()
    if header.decode("ascii") == 'PF':
        color = True
    elif header.decode("ascii") == 'Pf':
        color = False
    else:
        raise Exception('Not a PFM file.')

    dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode("ascii"))
    if dim_match:
        width, height = list(map(int, dim_match.groups()))
    else:
        raise Exception('Malformed PFM header.')

    scale = float(file.readline().decode("ascii").rstrip())
    if scale < 0:  # little-endian
        endian = '<'
        scale = -scale
    else:
        endian = '>'  # big-endian

    data = np.fromfile(file, endian + 'f')
    shape = (height, width, 3) if color else (height, width)

    data = np.reshape(data, shape)
    data = np.flipud(data)
    return data, scale


def write_pfm(file, image, scale=1):
    file = open(file, 'wb')

    color = None

    if image.dtype.name != 'float32':
        raise Exception('Image dtype must be float32.')

    image = np.flipud(image)

    if len(image.shape) == 3 and image.shape[2] == 3:  # color image
        color = True
    elif len(image.shape) == 2 or len(
            image.shape) == 3 and image.shape[2] == 1:  # greyscale
        color = False
    else:
        raise Exception(
            'Image must have H x W x 3, H x W x 1 or H x W dimensions.')

    file.write(b'PF\n' if color else b'Pf\n')
    file.write(b'%d %d\n' % (image.shape[1], image.shape[0]))

    endian = image.dtype.byteorder

    if endian == '<' or endian == '=' and sys.byteorder == 'little':
        scale = -scale

    file.write(b'%f\n' % scale)

    image.tofile(file)


def _read_kitti_disp(filename):
    depth = np.array(Image.open(filename))
    depth = depth.astype(np.float32) / 256.
    return depth


def _read_vkitti2_disp(filename):
    # read depth
    depth = cv2.imread(filename, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)  # in cm
    depth = (depth / 100).astype(np.float32)  # depth clipped to 655.35m for sky

    valid = (depth > 0) & (depth < 655)  # depth clipped to 655.35m for sky

    # convert to disparity
    focal_length = 725.0087  # in pixels
    baseline = 0.532725  # meter

    disp = baseline * focal_length / depth

    disp[~valid] = 0.000001  # invalid as very small value

    return disp


def _read_sintel_disparity(filename):
    """ Return disparity read from filename. """
    f_in = np.array(Image.open(filename))

    d_r = f_in[:, :, 0].astype('float32')
    d_g = f_in[:, :, 1].astype('float32')
    d_b = f_in[:, :, 2].astype('float32')

    depth = d_r * 4 + d_g / (2 ** 6) + d_b / (2 ** 14)
    return depth


def _read_tartanair_disp(filename):
    # the infinite distant object such as the sky has a large depth value (e.g. 10000)
    depth = np.load(filename)

    # change to disparity image
    disparity = 80.0 / depth

    return disparity


def _read_instereo2k_disp(filename):
    disp = np.array(Image.open(filename))
    disp = disp.astype(np.float32) / 100.
    return disp


def _read_crestereo_disp(filename):
    disp = np.array(Image.open(filename))
    return disp.astype(np.float32) / 32.


def _read_fallingthings_disp(filename):
    depth = np.array(Image.open(filename))
    camera_file = os.path.join(os.path.dirname(filename), '_camera_settings.json')
    with open(camera_file, 'r') as f:
        intrinsics = json.load(f)
    fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx']
    disp = (fx * 6.0 * 100) / depth.astype(np.float32)

    return disp


def _read_argoverse_disp(filename):
    disparity_map = cv2.imread(filename, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
    return np.float32(disparity_map) / 256.


def extract_video(video_name):
    cap = cv2.VideoCapture(video_name)
    assert cap.isOpened(), f'Failed to load video file {video_name}'
    # get video info
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fps = cap.get(cv2.CAP_PROP_FPS)

    print('video size (hxw): %dx%d' % (size[1], size[0]))
    print('fps: %d' % fps)

    imgs = []
    while cap.isOpened():
        # get frames
        flag, img = cap.read()
        if not flag:
            break
        # to rgb format
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        imgs.append(img)

    return imgs, fps