whites
/
IRN

 
			
							# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""eval midas."""
import glob
import csv
import os
import struct
import json
import numpy as np
from mindspore import Tensor
from mindspore import context
from mindspore import dtype as mstype
from mindspore.train import serialization
import mindspore.ops as ops
from src.util import depth_read_kitti, depth_read_sintel, BadPixelMetric
from src.midas_net import MidasNet
from src.config import config
from src.utils import transforms
from scipy.io import loadmat
import cv2
from PIL import Image
import h5py


def eval_Kitti(data_path, net):
    """
    eval Kitti.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=None,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="lower_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    metric = BadPixelMetric(1.25, 80, 'KITTI')
    loss_sum = 0
    sample = {}
    image_path = glob.glob(os.path.join(data_path, '*', 'image', '*.png'))
    num = 0
    for file_name in image_path:
        num += 1
        print(f"processing: {num} / {len(image_path)}")
        image = np.array(Image.open(file_name)).astype(float)  # (436,1024,3)
        image = image / 255
        print(file_name)
        all_path = file_name.split('/')
        depth_path_name = all_path[-1].split('.')[0]

        depth = depth_read_kitti(os.path.join(data_path, all_path[-3], 'depth', depth_path_name + '.png'))  # (436,1024)
        mask = (depth > 0) & (depth < 80)
        sample['image'] = image
        sample["depth"] = depth
        sample["mask"] = mask
        sample = img_input_1(sample)
        sample = img_input_2(sample)
        sample = img_input_3(sample)
        # print('transform later', sample['image'].shape)
        sample['image'] = Tensor([sample["image"]], mstype.float32)
        sample['depth'] = Tensor([sample["depth"]], mstype.float32)
        sample['mask'] = Tensor([sample["mask"]], mstype.int32)

        print(sample['image'].shape, sample['depth'].shape)
        prediction = net(sample['image'])

        mask = sample['mask'].asnumpy()
        depth = sample['depth'].asnumpy()

        expand_dims = ops.ExpandDims()
        prediction = expand_dims(prediction, 0)
        resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
        prediction = resize_bilinear(prediction)
        prediction = np.squeeze(prediction.asnumpy())
        loss = metric(prediction, depth, mask)

        print('loss is ', loss)
        loss_sum += loss

    print(f"Kitti bad pixel: {loss_sum / num:.3f}")
    return loss_sum / num


def eval_TUM(datapath, net):
    """
    eval TUM.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=None,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="upper_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    # get data
    metric = BadPixelMetric(1.25, 10, 'TUM')
    loss_sum = 0
    sample = {}
    file_path = glob.glob(os.path.join(datapath, '*_person', 'associate.txt'))

    num = 0
    for ind in file_path:
        all_path = ind.split('/')

        for line in open(ind):
            num += 1
            print(f"processing: {num}")
            data = line.split('\n')[0].split(' ')
            image_path = os.path.join(datapath, all_path[-2], data[0])  # (480,640,3)
            depth_path = os.path.join(datapath, all_path[-2], data[1])  # (480,640,3)
            image = cv2.imread(image_path) / 255
            depth = cv2.imread(depth_path)[:, :, 0] / 5000
            mask = (depth > 0) & (depth < 10)
            print('mask is ', np.unique(mask))
            sample['image'] = image
            sample["depth"] = depth
            sample["mask"] = mask

            sample = img_input_1(sample)
            sample = img_input_2(sample)
            sample = img_input_3(sample)

            sample['image'] = Tensor([sample["image"]], mstype.float32)
            sample['depth'] = Tensor([sample["depth"]], mstype.float32)
            sample['mask'] = Tensor([sample["mask"]], mstype.int32)

            print(sample['image'].shape, sample['depth'].shape)
            prediction = net(sample['image'])
            mask = sample['mask'].asnumpy()
            depth = sample['depth'].asnumpy()
            expand_dims = ops.ExpandDims()
            prediction = expand_dims(prediction, 0)
            print(prediction.shape, mask.shape)
            resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
            prediction = resize_bilinear(prediction)
            prediction = np.squeeze(prediction.asnumpy())

            loss = metric(prediction, depth, mask)

            print('loss is ', loss)
            loss_sum += loss

    print(f"TUM bad pixel: {loss_sum / num:.2f}")

    return loss_sum / num


def eval_Sintel(datapath, net):
    """
    eval Sintel.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=None,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="upper_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    # get data
    metric = BadPixelMetric(1.25, 72, 'sintel')
    loss_sum = 0
    sample = {}
    image_path = glob.glob(os.path.join(datapath, 'final_left', '*', '*.png'))

    num = 0
    for file_name in image_path:
        num += 1
        print(f"processing: {num} / {len(image_path)}")
        image = np.array(Image.open(file_name)).astype(float)  # (436,1024,3)
        image = image / 255
        print(file_name)
        all_path = file_name.split('/')
        depth_path_name = all_path[-1].split('.')[0]

        depth = depth_read_sintel(os.path.join(datapath, 'depth', all_path[-2], depth_path_name + '.dpt'))  # (436,1024)

        mask1 = np.array(Image.open(os.path.join(datapath, 'occlusions', all_path[-2], all_path[-1]))).astype(int)
        mask1 = mask1 / 255

        mask = (mask1 == 1) & (depth > 0) & (depth < 72)
        sample['image'] = image
        sample["depth"] = depth
        sample["mask"] = mask
        sample = img_input_1(sample)
        sample = img_input_2(sample)
        sample = img_input_3(sample)
        sample['image'] = Tensor([sample["image"]], mstype.float32)
        sample['depth'] = Tensor([sample["depth"]], mstype.float32)
        sample['mask'] = Tensor([sample["mask"]], mstype.int32)

        print(sample['image'].shape, sample['depth'].shape)
        prediction = net(sample['image'])

        mask = sample['mask'].asnumpy()
        depth = sample['depth'].asnumpy()

        expand_dims = ops.ExpandDims()
        prediction = expand_dims(prediction, 0)
        resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
        prediction = resize_bilinear(prediction)
        prediction = np.squeeze(prediction.asnumpy())
        loss = metric(prediction, depth, mask)

        print('loss is ', loss)
        loss_sum += loss

    print(f"sintel bad pixel: {loss_sum / len(image_path):.3f}")
    return loss_sum / len(image_path)


def eval_ETH3D(datapath, net):
    """
    eval ETH3D.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=True,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="upper_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    metric = BadPixelMetric(1.25, 72, 'ETH3D')

    loss_sum = 0
    sample = {}
    image_path = glob.glob(os.path.join(datapath, '*', 'images', 'dslr_images', '*.JPG'))
    num = 0
    for file_name in image_path:
        num += 1
        print(f"processing: {num} / {len(image_path)}")
        image = cv2.imread(file_name) / 255
        all_path = file_name.split('/')
        depth_path = os.path.join(datapath, all_path[-4], "ground_truth_depth", 'dslr_images', all_path[-1])
        depth = []
        with open(depth_path, 'rb') as f:
            data = f.read(4)
            while data:
                depth.append(struct.unpack('f', data))
                data = f.read(4)
            depth = np.reshape(np.array(depth), (4032, -1))
        mask = (depth > 0) & (depth < 72)
        sample['image'] = image
        sample["depth"] = depth
        sample["mask"] = mask

        sample = img_input_1(sample)
        sample = img_input_2(sample)
        sample = img_input_3(sample)
        sample['image'] = Tensor([sample["image"]], mstype.float32)
        sample['depth'] = Tensor([sample["depth"]], mstype.float32)
        sample['mask'] = Tensor([sample["mask"]], mstype.int32)

        prediction = net(sample['image'])

        mask = sample['mask'].asnumpy()
        depth = sample['depth'].asnumpy()

        expand_dims = ops.ExpandDims()
        prediction = expand_dims(prediction, 0)
        resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
        prediction = resize_bilinear(prediction)
        prediction = np.squeeze(prediction.asnumpy())
        loss = metric(prediction, depth, mask)

        print('loss is ', loss)
        loss_sum += loss

    print(f"ETH3D bad pixel: {loss_sum / num:.3f}")

    return loss_sum / num


def eval_DIW(datapath, net):
    """
    eval DIW.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=True,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="upper_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    loss_sum = 0
    num = 0
    sample = {}
    file_path = os.path.join(datapath, 'DIW_Annotations', 'DIW_test.csv')
    with open(file_path) as f:
        reader = list(csv.reader(f))
        for (i, row) in enumerate(reader):
            if i % 2 == 0:
                path = row[0].split('/')
                sample['file_name'] = os.path.join(datapath, path[-2], path[-1])
                sample['image'] = cv2.imread(sample['file_name']) / 255
            else:
                sample['depths'] = row
                if not os.path.exists(sample['file_name']):
                    continue
                num += 1  # 图片个数+1
                print(f"processing: {num}")
                sample = img_input_1(sample)
                sample = img_input_2(sample)
                sample = img_input_3(sample)
                sample['image'] = Tensor([sample["image"]], mstype.float32)
                prediction = net(sample['image'])
                shape_w, shape_h = [int(sample['depths'][-2]), int(sample['depths'][-1])]
                expand_dims = ops.ExpandDims()
                prediction = expand_dims(prediction, 0)
                resize_bilinear = ops.ResizeBilinear((shape_h, shape_w))
                prediction = resize_bilinear(prediction)
                prediction = np.squeeze(prediction.asnumpy())

                pixtel_a = prediction[int(sample['depths'][0]) - 1][int(sample['depths'][1]) - 1]
                pixtel_b = prediction[int(sample['depths'][2]) - 1][int(sample['depths'][3]) - 1]
                if pixtel_a > pixtel_b:
                    if sample['depths'][4] == '>':
                        loss_sum += 1
                if pixtel_a < pixtel_b:
                    if sample['depths'][4] == '<':
                        loss_sum += 1
                print(f"bad pixel: {(num - loss_sum) / num:.4f}")
    return (num - loss_sum) / num


def eval_NYU(datamat, splitmat, net):
    """
    eval NYU.
    Return the value, loss.
    """
    img_input_1 = Resize(config.img_width,
                         config.img_height,
                         resize_target=None,
                         keep_aspect_ratio=True,
                         ensure_multiple_of=32,
                         resize_method="upper_bound",
                         image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = PrepareForNet()

    # get data

    metric = BadPixelMetric(1.25, 10, 'NYU')
    loss_sum = 0
    sample = {}
    mat = loadmat(splitmat)
    indices = [ind[0] - 1 for ind in mat["testNdxs"]]
    num = 0
    with h5py.File(datamat, "r") as f:
        for ind in indices:
            num += 1
            print(num)
            image = np.swapaxes(f["images"][ind], 0, 2)
            image = image / 255
            depth = np.swapaxes(f["rawDepths"][ind], 0, 1)
            mask = (depth > 0) & (depth < 10)

            # mask = mask1
            sample['image'] = image
            sample["depth"] = depth
            sample["mask"] = mask
            sample = img_input_1(sample)
            sample = img_input_2(sample)
            sample = img_input_3(sample)
            sample['image'] = Tensor([sample["image"]], mstype.float32)
            sample['depth'] = Tensor([sample["depth"]], mstype.float32)
            sample['mask'] = Tensor([sample["mask"]], mstype.int32)

            print(sample['image'].shape, sample['depth'].shape)
            prediction = net(sample['image'])

            mask = sample['mask'].asnumpy()
            depth = sample['depth'].asnumpy()

            expand_dims = ops.ExpandDims()
            prediction = expand_dims(prediction, 0)
            resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
            prediction = resize_bilinear(prediction)
            prediction = np.squeeze(prediction.asnumpy())
            loss = metric(prediction, depth, mask)

            print('loss is ', loss)
            loss_sum += loss

    print(f"bad pixel: {loss_sum / num:.3f}")
    return loss_sum / num


def run_eval():
    """run."""
    datapath_TUM = config.train_data_dir+config.datapath_TUM
    datapath_Sintel = config.train_data_dir+config.datapath_Sintel
    datapath_ETH3D = config.train_data_dir+config.datapath_ETH3D
    datapath_Kitti = config.train_data_dir+config.datapath_Kitti
    datapath_DIW = config.train_data_dir+config.datapath_DIW
    datamat = config.train_data_dir+config.datapath_NYU[0]
    splitmat = config.train_data_dir+config.datapath_NYU[1]

    net = MidasNet()
    param_dict = serialization.load_checkpoint(config.ckpt_path)
    serialization.load_param_into_net(net, param_dict)
    results = {}
    if config.data_name == 'Sintel' or config.data_name == "all":
        result_sintel = eval_Sintel(datapath_Sintel, net)
        results['Sintel'] = result_sintel
    if config.data_name == 'Kitti' or config.data_name == "all":
        result_kitti = eval_Kitti(datapath_Kitti, net)
        results['Kitti'] = result_kitti
    if config.data_name == 'TUM' or config.data_name == "all":
        result_tum = eval_TUM(datapath_TUM, net)
        results['TUM'] = result_tum
    if config.data_name == 'DIW' or config.data_name == "all":
        result_DIW = eval_DIW(datapath_DIW, net)
        results['DIW'] = result_DIW
    if config.data_name == 'ETH3D' or config.data_name == "all":
        result_ETH3D = eval_ETH3D(datapath_ETH3D, net)
        results['ETH3D'] = result_ETH3D
    if config.data_name == 'NYU' or config.data_name == "all":
        result_NYU = eval_NYU(datamat, splitmat, net)
        results['NYU'] = result_NYU

    print(results)
    json.dump(results, open(config.ann_file, 'w'))


if __name__ == '__main__':
    context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=config.device_id)
    run_eval()