--- a/midas_eval.py
+++ b/midas_eval.py
@@ -0,0 +1,453 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """eval midas."""
 import glob
 import csv
 import os
 import struct
 import json
 import numpy as np
 from mindspore import Tensor
 from mindspore import context
 from mindspore import dtype as mstype
 from mindspore.train import serialization
 import mindspore.ops as ops
 from src.util import depth_read_kitti, depth_read_sintel, BadPixelMetric
 from src.midas_net import MidasNet
 from src.config import config
 from src.utils import transforms
 from scipy.io import loadmat
 import cv2
 from PIL import Image
 import h5py


 def eval_Kitti(data_path, net):
    """
    eval Kitti.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=None,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="lower_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    metric = BadPixelMetric(1.25, 80, 'KITTI')
    loss_sum = 0
    sample = {}
    image_path = glob.glob(os.path.join(data_path, '*', 'image', '*.png'))
    num = 0
    for file_name in image_path:
        num += 1
        print(f"processing: {num} / {len(image_path)}")
        image = np.array(Image.open(file_name)).astype(float)  # (436,1024,3)
        image = image / 255
        print(file_name)
        all_path = file_name.split('/')
        depth_path_name = all_path[-1].split('.')[0]

        depth = depth_read_kitti(os.path.join(data_path, all_path[-3], 'depth', depth_path_name + '.png'))  # (436,1024)
        mask = (depth > 0) & (depth < 80)
        sample['image'] = image
        sample["depth"] = depth
        sample["mask"] = mask
        sample = img_input_1(sample)
        sample = img_input_2(sample)
        sample = img_input_3(sample)
        # print('transform later', sample['image'].shape)
        sample['image'] = Tensor([sample["image"]], mstype.float32)
        sample['depth'] = Tensor([sample["depth"]], mstype.float32)
        sample['mask'] = Tensor([sample["mask"]], mstype.int32)

        print(sample['image'].shape, sample['depth'].shape)
        prediction = net(sample['image'])

        mask = sample['mask'].asnumpy()
        depth = sample['depth'].asnumpy()

        expand_dims = ops.ExpandDims()
        prediction = expand_dims(prediction, 0)
        resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
        prediction = resize_bilinear(prediction)
        prediction = np.squeeze(prediction.asnumpy())
        loss = metric(prediction, depth, mask)

        print('loss is ', loss)
        loss_sum += loss

    print(f"Kitti bad pixel: {loss_sum / num:.3f}")
    return loss_sum / num


 def eval_TUM(datapath, net):
    """
    eval TUM.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=None,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="upper_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    # get data
    metric = BadPixelMetric(1.25, 10, 'TUM')
    loss_sum = 0
    sample = {}
    file_path = glob.glob(os.path.join(datapath, '*_person', 'associate.txt'))

    num = 0
    for ind in file_path:
        all_path = ind.split('/')

        for line in open(ind):
            num += 1
            print(f"processing: {num}")
            data = line.split('\n')[0].split(' ')
            image_path = os.path.join(datapath, all_path[-2], data[0])  # (480,640,3)
            depth_path = os.path.join(datapath, all_path[-2], data[1])  # (480,640,3)
            image = cv2.imread(image_path) / 255
            depth = cv2.imread(depth_path)[:, :, 0] / 5000
            mask = (depth > 0) & (depth < 10)
            print('mask is ', np.unique(mask))
            sample['image'] = image
            sample["depth"] = depth
            sample["mask"] = mask

            sample = img_input_1(sample)
            sample = img_input_2(sample)
            sample = img_input_3(sample)

            sample['image'] = Tensor([sample["image"]], mstype.float32)
            sample['depth'] = Tensor([sample["depth"]], mstype.float32)
            sample['mask'] = Tensor([sample["mask"]], mstype.int32)

            print(sample['image'].shape, sample['depth'].shape)
            prediction = net(sample['image'])
            mask = sample['mask'].asnumpy()
            depth = sample['depth'].asnumpy()
            expand_dims = ops.ExpandDims()
            prediction = expand_dims(prediction, 0)
            print(prediction.shape, mask.shape)
            resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
            prediction = resize_bilinear(prediction)
            prediction = np.squeeze(prediction.asnumpy())

            loss = metric(prediction, depth, mask)

            print('loss is ', loss)
            loss_sum += loss

    print(f"TUM bad pixel: {loss_sum / num:.2f}")

    return loss_sum / num


 def eval_Sintel(datapath, net):
    """
    eval Sintel.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=None,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="upper_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    # get data
    metric = BadPixelMetric(1.25, 72, 'sintel')
    loss_sum = 0
    sample = {}
    image_path = glob.glob(os.path.join(datapath, 'final_left', '*', '*.png'))

    num = 0
    for file_name in image_path:
        num += 1
        print(f"processing: {num} / {len(image_path)}")
        image = np.array(Image.open(file_name)).astype(float)  # (436,1024,3)
        image = image / 255
        print(file_name)
        all_path = file_name.split('/')
        depth_path_name = all_path[-1].split('.')[0]

        depth = depth_read_sintel(os.path.join(datapath, 'depth', all_path[-2], depth_path_name + '.dpt'))  # (436,1024)

        mask1 = np.array(Image.open(os.path.join(datapath, 'occlusions', all_path[-2], all_path[-1]))).astype(int)
        mask1 = mask1 / 255

        mask = (mask1 == 1) & (depth > 0) & (depth < 72)
        sample['image'] = image
        sample["depth"] = depth
        sample["mask"] = mask
        sample = img_input_1(sample)
        sample = img_input_2(sample)
        sample = img_input_3(sample)
        sample['image'] = Tensor([sample["image"]], mstype.float32)
        sample['depth'] = Tensor([sample["depth"]], mstype.float32)
        sample['mask'] = Tensor([sample["mask"]], mstype.int32)

        print(sample['image'].shape, sample['depth'].shape)
        prediction = net(sample['image'])

        mask = sample['mask'].asnumpy()
        depth = sample['depth'].asnumpy()

        expand_dims = ops.ExpandDims()
        prediction = expand_dims(prediction, 0)
        resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
        prediction = resize_bilinear(prediction)
        prediction = np.squeeze(prediction.asnumpy())
        loss = metric(prediction, depth, mask)

        print('loss is ', loss)
        loss_sum += loss

    print(f"sintel bad pixel: {loss_sum / len(image_path):.3f}")
    return loss_sum / len(image_path)


 def eval_ETH3D(datapath, net):
    """
    eval ETH3D.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=True,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="upper_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    metric = BadPixelMetric(1.25, 72, 'ETH3D')

    loss_sum = 0
    sample = {}
    image_path = glob.glob(os.path.join(datapath, '*', 'images', 'dslr_images', '*.JPG'))
    num = 0
    for file_name in image_path:
        num += 1
        print(f"processing: {num} / {len(image_path)}")
        image = cv2.imread(file_name) / 255
        all_path = file_name.split('/')
        depth_path = os.path.join(datapath, all_path[-4], "ground_truth_depth", 'dslr_images', all_path[-1])
        depth = []
        with open(depth_path, 'rb') as f:
            data = f.read(4)
            while data:
                depth.append(struct.unpack('f', data))
                data = f.read(4)
            depth = np.reshape(np.array(depth), (4032, -1))
        mask = (depth > 0) & (depth < 72)
        sample['image'] = image
        sample["depth"] = depth
        sample["mask"] = mask

        sample = img_input_1(sample)
        sample = img_input_2(sample)
        sample = img_input_3(sample)
        sample['image'] = Tensor([sample["image"]], mstype.float32)
        sample['depth'] = Tensor([sample["depth"]], mstype.float32)
        sample['mask'] = Tensor([sample["mask"]], mstype.int32)

        prediction = net(sample['image'])

        mask = sample['mask'].asnumpy()
        depth = sample['depth'].asnumpy()

        expand_dims = ops.ExpandDims()
        prediction = expand_dims(prediction, 0)
        resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
        prediction = resize_bilinear(prediction)
        prediction = np.squeeze(prediction.asnumpy())
        loss = metric(prediction, depth, mask)

        print('loss is ', loss)
        loss_sum += loss

    print(f"ETH3D bad pixel: {loss_sum / num:.3f}")

    return loss_sum / num


 def eval_DIW(datapath, net):
    """
    eval DIW.
    Return the value, loss.
    """
    img_input_1 = transforms.Resize(config.img_width,
                                    config.img_height,
                                    resize_target=True,
                                    keep_aspect_ratio=True,
                                    ensure_multiple_of=32,
                                    resize_method="upper_bound",
                                    image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = transforms.PrepareForNet()
    loss_sum = 0
    num = 0
    sample = {}
    file_path = os.path.join(datapath, 'DIW_Annotations', 'DIW_test.csv')
    with open(file_path) as f:
        reader = list(csv.reader(f))
        for (i, row) in enumerate(reader):
            if i % 2 == 0:
                path = row[0].split('/')
                sample['file_name'] = os.path.join(datapath, path[-2], path[-1])
                sample['image'] = cv2.imread(sample['file_name']) / 255
            else:
                sample['depths'] = row
                if not os.path.exists(sample['file_name']):
                    continue
                num += 1  # 图片个数+1
                print(f"processing: {num}")
                sample = img_input_1(sample)
                sample = img_input_2(sample)
                sample = img_input_3(sample)
                sample['image'] = Tensor([sample["image"]], mstype.float32)
                prediction = net(sample['image'])
                shape_w, shape_h = [int(sample['depths'][-2]), int(sample['depths'][-1])]
                expand_dims = ops.ExpandDims()
                prediction = expand_dims(prediction, 0)
                resize_bilinear = ops.ResizeBilinear((shape_h, shape_w))
                prediction = resize_bilinear(prediction)
                prediction = np.squeeze(prediction.asnumpy())

                pixtel_a = prediction[int(sample['depths'][0]) - 1][int(sample['depths'][1]) - 1]
                pixtel_b = prediction[int(sample['depths'][2]) - 1][int(sample['depths'][3]) - 1]
                if pixtel_a > pixtel_b:
                    if sample['depths'][4] == '>':
                        loss_sum += 1
                if pixtel_a < pixtel_b:
                    if sample['depths'][4] == '<':
                        loss_sum += 1
                print(f"bad pixel: {(num - loss_sum) / num:.4f}")
    return (num - loss_sum) / num


 def eval_NYU(datamat, splitmat, net):
    """
    eval NYU.
    Return the value, loss.
    """
    img_input_1 = Resize(config.img_width,
                         config.img_height,
                         resize_target=None,
                         keep_aspect_ratio=True,
                         ensure_multiple_of=32,
                         resize_method="upper_bound",
                         image_interpolation_method=cv2.INTER_CUBIC)
    img_input_2 = NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
    img_input_3 = PrepareForNet()

    # get data

    metric = BadPixelMetric(1.25, 10, 'NYU')
    loss_sum = 0
    sample = {}
    mat = loadmat(splitmat)
    indices = [ind[0] - 1 for ind in mat["testNdxs"]]
    num = 0
    with h5py.File(datamat, "r") as f:
        for ind in indices:
            num += 1
            print(num)
            image = np.swapaxes(f["images"][ind], 0, 2)
            image = image / 255
            depth = np.swapaxes(f["rawDepths"][ind], 0, 1)
            mask = (depth > 0) & (depth < 10)

            # mask = mask1
            sample['image'] = image
            sample["depth"] = depth
            sample["mask"] = mask
            sample = img_input_1(sample)
            sample = img_input_2(sample)
            sample = img_input_3(sample)
            sample['image'] = Tensor([sample["image"]], mstype.float32)
            sample['depth'] = Tensor([sample["depth"]], mstype.float32)
            sample['mask'] = Tensor([sample["mask"]], mstype.int32)

            print(sample['image'].shape, sample['depth'].shape)
            prediction = net(sample['image'])

            mask = sample['mask'].asnumpy()
            depth = sample['depth'].asnumpy()

            expand_dims = ops.ExpandDims()
            prediction = expand_dims(prediction, 0)
            resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
            prediction = resize_bilinear(prediction)
            prediction = np.squeeze(prediction.asnumpy())
            loss = metric(prediction, depth, mask)

            print('loss is ', loss)
            loss_sum += loss

    print(f"bad pixel: {loss_sum / num:.3f}")
    return loss_sum / num


 def run_eval():
    """run."""
    datapath_TUM = config.train_data_dir+config.datapath_TUM
    datapath_Sintel = config.train_data_dir+config.datapath_Sintel
    datapath_ETH3D = config.train_data_dir+config.datapath_ETH3D
    datapath_Kitti = config.train_data_dir+config.datapath_Kitti
    datapath_DIW = config.train_data_dir+config.datapath_DIW
    datamat = config.train_data_dir+config.datapath_NYU[0]
    splitmat = config.train_data_dir+config.datapath_NYU[1]

    net = MidasNet()
    param_dict = serialization.load_checkpoint(config.ckpt_path)
    serialization.load_param_into_net(net, param_dict)
    results = {}
    if config.data_name == 'Sintel' or config.data_name == "all":
        result_sintel = eval_Sintel(datapath_Sintel, net)
        results['Sintel'] = result_sintel
    if config.data_name == 'Kitti' or config.data_name == "all":
        result_kitti = eval_Kitti(datapath_Kitti, net)
        results['Kitti'] = result_kitti
    if config.data_name == 'TUM' or config.data_name == "all":
        result_tum = eval_TUM(datapath_TUM, net)
        results['TUM'] = result_tum
    if config.data_name == 'DIW' or config.data_name == "all":
        result_DIW = eval_DIW(datapath_DIW, net)
        results['DIW'] = result_DIW
    if config.data_name == 'ETH3D' or config.data_name == "all":
        result_ETH3D = eval_ETH3D(datapath_ETH3D, net)
        results['ETH3D'] = result_ETH3D
    if config.data_name == 'NYU' or config.data_name == "all":
        result_NYU = eval_NYU(datamat, splitmat, net)
        results['NYU'] = result_NYU

    print(results)
    json.dump(results, open(config.ann_file, 'w'))


 if __name__ == '__main__':
    context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=config.device_id)
    run_eval()