whites
/
IRN

 
			
							# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""util."""
import sys
import numpy as np
import cv2
from PIL import Image

TAG_FLOAT = 202021.25
TAG_CHAR = 'PIEH'


def read_image(path):
    """Read image and output RGB image (0-1).

    Args:
        path (str): path to file

    Returns:
        array: RGB image (0-1)
    """
    img = cv2.imread(path)

    if img.ndim == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0

    return img


def write_depth(path, depth, bits=1):
    """Write depth map to pfm and png file.

    Args:
        path (str): filepath without extension
        depth (array): depth
        :param path:
        :param depth:
        :param bits:
    """
    write_pfm(path + ".pfm", depth.astype(np.float32))

    depth_min = depth.min()
    depth_max = depth.max()

    max_val = (2 ** (8 * bits)) - 1

    if depth_max - depth_min > np.finfo("float").eps:
        out = max_val * (depth - depth_min) / (depth_max - depth_min)
    else:
        out = np.zeros(depth.shape, dtype=depth.type)

    if bits == 1:
        cv2.imwrite(path + ".png", out.astype("uint8"))
    elif bits == 2:
        cv2.imwrite(path + ".png", out.astype("uint16"))

    return out


def write_pfm(path, image, scale=1):
    """Write pfm file.

    Args:
        path (str): path file
        image (array): data
        scale (int, optional): Scale. Defaults to 1.
    """

    with open(path, "wb") as file:
        color = None

        if image.dtype.name != "float32":
            raise Exception("Image dtype must be float32.")

        image = np.flipud(image)

        if len(image.shape) == 3 and image.shape[2] == 3:  # color image
            color = True
        elif (
                len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
        ):  # greyscale
            color = False
        else:
            raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")

        file.write("PF\n" if color else "Pf\n".encode())
        file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))

        endian = image.dtype.byteorder

        if endian == "<" or endian == "=" and sys.byteorder == "little":
            scale = -scale

        file.write("%f\n".encode() % scale)

        image.tofile(file)


def depth_read_kitti(filename):
    """

    :type filename: object
    """
    depth_png = np.array(Image.open(filename), dtype=int)
    assert np.max(depth_png) > 255

    depth = depth_png.astype(np.float) / 256.
    depth[depth_png == 0] = -1.
    return depth


def depth_read_sintel(filename):
    """ Read depth data from file, return as numpy array. """
    f = open(filename, 'rb')
    check = np.fromfile(f, dtype=np.float32, count=1)[0]
    assert check == TAG_FLOAT, ' depth_read:: Wrong tag in flow file ' \
                               '(should be: {0}, is: {1}). Big-endian machine? ' \
        .format(TAG_FLOAT, check)
    width = np.fromfile(f, dtype=np.int32, count=1)[0]
    height = np.fromfile(f, dtype=np.int32, count=1)[0]
    size = width * height
    assert width > 0 and height > 0 and 1 < size < 100000000, \
        ' depth_read:: Wrong input size (width = {0}, height = {1}).' \
        .format(width, height)
    depth = np.fromfile(f, dtype=np.float32, count=-1).reshape((height, width))
    return depth


class BadPixelMetric:
    """ BadPixelMetric. """

    def __init__(self, threshold=1.25, depth_cap=10, model='NYU'):
        self.__threshold = threshold
        self.__depth_cap = depth_cap
        self.__model = model

    def compute_scale_and_shift(self, prediction, target, mask):
        """ compute_scale_and_shift. """
        # system matrix: A = [[a_00, a_01], [a_10, a_11]]
        a_00 = np.sum(mask * prediction * prediction, (1, 2))
        a_01 = np.sum(mask * prediction, (1, 2))
        a_11 = np.sum(mask, (1, 2))

        # right hand side: b = [b_0, b_1]
        b_0 = np.sum(mask * prediction * target, (1, 2))
        b_1 = np.sum(mask * target, (1, 2))

        # solution: x = A^-1 . b = [[a_11, -a_01], [-a_10, a_00]] / (a_00 * a_11 - a_01 * a_10) . b
        x_0 = np.zeros_like(b_0)
        x_1 = np.zeros_like(b_1)

        det = a_00 * a_11 - a_01 * a_01
        # A needs to be a positive definite matrix.
        valid = det > 0

        x_0[valid] = (a_11[valid] * b_0[valid] - a_01[valid] * b_1[valid]) / det[valid]
        x_1[valid] = (-a_01[valid] * b_0[valid] + a_00[valid] * b_1[valid]) / det[valid]

        return x_0, x_1

    def __call__(self, prediction, target, mask):
        # transform predicted disparity to aligned depth
        target_disparity = np.zeros_like(target)
        target_disparity[mask == 1] = 1.0 / target[mask == 1]

        scale, shift = self.compute_scale_and_shift(prediction, target_disparity, mask)
        prediction_aligned = scale.reshape((-1, 1, 1)) * prediction + shift.reshape((-1, 1, 1))

        disparity_cap = 1.0 / self.__depth_cap
        prediction_aligned[prediction_aligned < disparity_cap] = disparity_cap

        prediciton_depth = 1.0 / prediction_aligned

        # bad pixel
        err = np.zeros_like(prediciton_depth, dtype=np.float)
        if self.__model == 'NYU' or self.__model == 'TUM' or self.__model == 'KITTI':
            err[mask == 1] = np.maximum(
                prediciton_depth[mask == 1] / target[mask == 1],
                target[mask == 1] / prediciton_depth[mask == 1],
            )

            err[mask == 1] = (err[mask == 1] > self.__threshold)

            p = np.sum(err, (1, 2)) / np.sum(mask, (1, 2))
        if self.__model == 'sintel' or self.__model == 'ETH3D':
            err[mask == 1] = np.abs((prediciton_depth[mask == 1] - target[mask == 1]) / target[mask == 1])
            err_sum = np.sum(err, (1, 2))
            mask_sum = np.sum(mask, (1, 2))
            print('err_sum is ', err_sum)
            print('mask_sum is ', mask_sum)
            if mask_sum == 0:
                p = np.zeros(1)
            else:
                p = err_sum / mask_sum
            return np.mean(p)

        return 100 * np.mean(p)