|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """eval midas."""
- import glob
- import csv
- import os
- import struct
- import json
- import numpy as np
- from mindspore import Tensor
- from mindspore import context
- from mindspore import dtype as mstype
- from mindspore.train import serialization
- import mindspore.ops as ops
- from src.util import depth_read_kitti, depth_read_sintel, BadPixelMetric
- from src.midas_net import MidasNet
- from src.config import config
- from src.utils import transforms
- from scipy.io import loadmat
- import cv2
- from PIL import Image
- import h5py
-
-
- def eval_Kitti(data_path, net):
- """
- eval Kitti.
- Return the value, loss.
- """
- img_input_1 = transforms.Resize(config.img_width,
- config.img_height,
- resize_target=None,
- keep_aspect_ratio=True,
- ensure_multiple_of=32,
- resize_method="lower_bound",
- image_interpolation_method=cv2.INTER_CUBIC)
- img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
- img_input_3 = transforms.PrepareForNet()
- metric = BadPixelMetric(1.25, 80, 'KITTI')
- loss_sum = 0
- sample = {}
- image_path = glob.glob(os.path.join(data_path, '*', 'image', '*.png'))
- num = 0
- for file_name in image_path:
- num += 1
- print(f"processing: {num} / {len(image_path)}")
- image = np.array(Image.open(file_name)).astype(float) # (436,1024,3)
- image = image / 255
- print(file_name)
- all_path = file_name.split('/')
- depth_path_name = all_path[-1].split('.')[0]
-
- depth = depth_read_kitti(os.path.join(data_path, all_path[-3], 'depth', depth_path_name + '.png')) # (436,1024)
- mask = (depth > 0) & (depth < 80)
- sample['image'] = image
- sample["depth"] = depth
- sample["mask"] = mask
- sample = img_input_1(sample)
- sample = img_input_2(sample)
- sample = img_input_3(sample)
- # print('transform later', sample['image'].shape)
- sample['image'] = Tensor([sample["image"]], mstype.float32)
- sample['depth'] = Tensor([sample["depth"]], mstype.float32)
- sample['mask'] = Tensor([sample["mask"]], mstype.int32)
-
- print(sample['image'].shape, sample['depth'].shape)
- prediction = net(sample['image'])
-
- mask = sample['mask'].asnumpy()
- depth = sample['depth'].asnumpy()
-
- expand_dims = ops.ExpandDims()
- prediction = expand_dims(prediction, 0)
- resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
- prediction = resize_bilinear(prediction)
- prediction = np.squeeze(prediction.asnumpy())
- loss = metric(prediction, depth, mask)
-
- print('loss is ', loss)
- loss_sum += loss
-
- print(f"Kitti bad pixel: {loss_sum / num:.3f}")
- return loss_sum / num
-
-
- def eval_TUM(datapath, net):
- """
- eval TUM.
- Return the value, loss.
- """
- img_input_1 = transforms.Resize(config.img_width,
- config.img_height,
- resize_target=None,
- keep_aspect_ratio=True,
- ensure_multiple_of=32,
- resize_method="upper_bound",
- image_interpolation_method=cv2.INTER_CUBIC)
- img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
- img_input_3 = transforms.PrepareForNet()
- # get data
- metric = BadPixelMetric(1.25, 10, 'TUM')
- loss_sum = 0
- sample = {}
- file_path = glob.glob(os.path.join(datapath, '*_person', 'associate.txt'))
-
- num = 0
- for ind in file_path:
- all_path = ind.split('/')
-
- for line in open(ind):
- num += 1
- print(f"processing: {num}")
- data = line.split('\n')[0].split(' ')
- image_path = os.path.join(datapath, all_path[-2], data[0]) # (480,640,3)
- depth_path = os.path.join(datapath, all_path[-2], data[1]) # (480,640,3)
- image = cv2.imread(image_path) / 255
- depth = cv2.imread(depth_path)[:, :, 0] / 5000
- mask = (depth > 0) & (depth < 10)
- print('mask is ', np.unique(mask))
- sample['image'] = image
- sample["depth"] = depth
- sample["mask"] = mask
-
- sample = img_input_1(sample)
- sample = img_input_2(sample)
- sample = img_input_3(sample)
-
- sample['image'] = Tensor([sample["image"]], mstype.float32)
- sample['depth'] = Tensor([sample["depth"]], mstype.float32)
- sample['mask'] = Tensor([sample["mask"]], mstype.int32)
-
- print(sample['image'].shape, sample['depth'].shape)
- prediction = net(sample['image'])
- mask = sample['mask'].asnumpy()
- depth = sample['depth'].asnumpy()
- expand_dims = ops.ExpandDims()
- prediction = expand_dims(prediction, 0)
- print(prediction.shape, mask.shape)
- resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
- prediction = resize_bilinear(prediction)
- prediction = np.squeeze(prediction.asnumpy())
-
- loss = metric(prediction, depth, mask)
-
- print('loss is ', loss)
- loss_sum += loss
-
- print(f"TUM bad pixel: {loss_sum / num:.2f}")
-
- return loss_sum / num
-
-
- def eval_Sintel(datapath, net):
- """
- eval Sintel.
- Return the value, loss.
- """
- img_input_1 = transforms.Resize(config.img_width,
- config.img_height,
- resize_target=None,
- keep_aspect_ratio=True,
- ensure_multiple_of=32,
- resize_method="upper_bound",
- image_interpolation_method=cv2.INTER_CUBIC)
- img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
- img_input_3 = transforms.PrepareForNet()
- # get data
- metric = BadPixelMetric(1.25, 72, 'sintel')
- loss_sum = 0
- sample = {}
- image_path = glob.glob(os.path.join(datapath, 'final_left', '*', '*.png'))
-
- num = 0
- for file_name in image_path:
- num += 1
- print(f"processing: {num} / {len(image_path)}")
- image = np.array(Image.open(file_name)).astype(float) # (436,1024,3)
- image = image / 255
- print(file_name)
- all_path = file_name.split('/')
- depth_path_name = all_path[-1].split('.')[0]
-
- depth = depth_read_sintel(os.path.join(datapath, 'depth', all_path[-2], depth_path_name + '.dpt')) # (436,1024)
-
- mask1 = np.array(Image.open(os.path.join(datapath, 'occlusions', all_path[-2], all_path[-1]))).astype(int)
- mask1 = mask1 / 255
-
- mask = (mask1 == 1) & (depth > 0) & (depth < 72)
- sample['image'] = image
- sample["depth"] = depth
- sample["mask"] = mask
- sample = img_input_1(sample)
- sample = img_input_2(sample)
- sample = img_input_3(sample)
- sample['image'] = Tensor([sample["image"]], mstype.float32)
- sample['depth'] = Tensor([sample["depth"]], mstype.float32)
- sample['mask'] = Tensor([sample["mask"]], mstype.int32)
-
- print(sample['image'].shape, sample['depth'].shape)
- prediction = net(sample['image'])
-
- mask = sample['mask'].asnumpy()
- depth = sample['depth'].asnumpy()
-
- expand_dims = ops.ExpandDims()
- prediction = expand_dims(prediction, 0)
- resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
- prediction = resize_bilinear(prediction)
- prediction = np.squeeze(prediction.asnumpy())
- loss = metric(prediction, depth, mask)
-
- print('loss is ', loss)
- loss_sum += loss
-
- print(f"sintel bad pixel: {loss_sum / len(image_path):.3f}")
- return loss_sum / len(image_path)
-
-
- def eval_ETH3D(datapath, net):
- """
- eval ETH3D.
- Return the value, loss.
- """
- img_input_1 = transforms.Resize(config.img_width,
- config.img_height,
- resize_target=True,
- keep_aspect_ratio=True,
- ensure_multiple_of=32,
- resize_method="upper_bound",
- image_interpolation_method=cv2.INTER_CUBIC)
- img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
- img_input_3 = transforms.PrepareForNet()
- metric = BadPixelMetric(1.25, 72, 'ETH3D')
-
- loss_sum = 0
- sample = {}
- image_path = glob.glob(os.path.join(datapath, '*', 'images', 'dslr_images', '*.JPG'))
- num = 0
- for file_name in image_path:
- num += 1
- print(f"processing: {num} / {len(image_path)}")
- image = cv2.imread(file_name) / 255
- all_path = file_name.split('/')
- depth_path = os.path.join(datapath, all_path[-4], "ground_truth_depth", 'dslr_images', all_path[-1])
- depth = []
- with open(depth_path, 'rb') as f:
- data = f.read(4)
- while data:
- depth.append(struct.unpack('f', data))
- data = f.read(4)
- depth = np.reshape(np.array(depth), (4032, -1))
- mask = (depth > 0) & (depth < 72)
- sample['image'] = image
- sample["depth"] = depth
- sample["mask"] = mask
-
- sample = img_input_1(sample)
- sample = img_input_2(sample)
- sample = img_input_3(sample)
- sample['image'] = Tensor([sample["image"]], mstype.float32)
- sample['depth'] = Tensor([sample["depth"]], mstype.float32)
- sample['mask'] = Tensor([sample["mask"]], mstype.int32)
-
- prediction = net(sample['image'])
-
- mask = sample['mask'].asnumpy()
- depth = sample['depth'].asnumpy()
-
- expand_dims = ops.ExpandDims()
- prediction = expand_dims(prediction, 0)
- resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
- prediction = resize_bilinear(prediction)
- prediction = np.squeeze(prediction.asnumpy())
- loss = metric(prediction, depth, mask)
-
- print('loss is ', loss)
- loss_sum += loss
-
- print(f"ETH3D bad pixel: {loss_sum / num:.3f}")
-
- return loss_sum / num
-
-
- def eval_DIW(datapath, net):
- """
- eval DIW.
- Return the value, loss.
- """
- img_input_1 = transforms.Resize(config.img_width,
- config.img_height,
- resize_target=True,
- keep_aspect_ratio=True,
- ensure_multiple_of=32,
- resize_method="upper_bound",
- image_interpolation_method=cv2.INTER_CUBIC)
- img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
- img_input_3 = transforms.PrepareForNet()
- loss_sum = 0
- num = 0
- sample = {}
- file_path = os.path.join(datapath, 'DIW_Annotations', 'DIW_test.csv')
- with open(file_path) as f:
- reader = list(csv.reader(f))
- for (i, row) in enumerate(reader):
- if i % 2 == 0:
- path = row[0].split('/')
- sample['file_name'] = os.path.join(datapath, path[-2], path[-1])
- sample['image'] = cv2.imread(sample['file_name']) / 255
- else:
- sample['depths'] = row
- if not os.path.exists(sample['file_name']):
- continue
- num += 1 # 图片个数+1
- print(f"processing: {num}")
- sample = img_input_1(sample)
- sample = img_input_2(sample)
- sample = img_input_3(sample)
- sample['image'] = Tensor([sample["image"]], mstype.float32)
- prediction = net(sample['image'])
- shape_w, shape_h = [int(sample['depths'][-2]), int(sample['depths'][-1])]
- expand_dims = ops.ExpandDims()
- prediction = expand_dims(prediction, 0)
- resize_bilinear = ops.ResizeBilinear((shape_h, shape_w))
- prediction = resize_bilinear(prediction)
- prediction = np.squeeze(prediction.asnumpy())
-
- pixtel_a = prediction[int(sample['depths'][0]) - 1][int(sample['depths'][1]) - 1]
- pixtel_b = prediction[int(sample['depths'][2]) - 1][int(sample['depths'][3]) - 1]
- if pixtel_a > pixtel_b:
- if sample['depths'][4] == '>':
- loss_sum += 1
- if pixtel_a < pixtel_b:
- if sample['depths'][4] == '<':
- loss_sum += 1
- print(f"bad pixel: {(num - loss_sum) / num:.4f}")
- return (num - loss_sum) / num
-
-
- def eval_NYU(datamat, splitmat, net):
- """
- eval NYU.
- Return the value, loss.
- """
- img_input_1 = Resize(config.img_width,
- config.img_height,
- resize_target=None,
- keep_aspect_ratio=True,
- ensure_multiple_of=32,
- resize_method="upper_bound",
- image_interpolation_method=cv2.INTER_CUBIC)
- img_input_2 = NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
- img_input_3 = PrepareForNet()
-
- # get data
-
- metric = BadPixelMetric(1.25, 10, 'NYU')
- loss_sum = 0
- sample = {}
- mat = loadmat(splitmat)
- indices = [ind[0] - 1 for ind in mat["testNdxs"]]
- num = 0
- with h5py.File(datamat, "r") as f:
- for ind in indices:
- num += 1
- print(num)
- image = np.swapaxes(f["images"][ind], 0, 2)
- image = image / 255
- depth = np.swapaxes(f["rawDepths"][ind], 0, 1)
- mask = (depth > 0) & (depth < 10)
-
- # mask = mask1
- sample['image'] = image
- sample["depth"] = depth
- sample["mask"] = mask
- sample = img_input_1(sample)
- sample = img_input_2(sample)
- sample = img_input_3(sample)
- sample['image'] = Tensor([sample["image"]], mstype.float32)
- sample['depth'] = Tensor([sample["depth"]], mstype.float32)
- sample['mask'] = Tensor([sample["mask"]], mstype.int32)
-
- print(sample['image'].shape, sample['depth'].shape)
- prediction = net(sample['image'])
-
- mask = sample['mask'].asnumpy()
- depth = sample['depth'].asnumpy()
-
- expand_dims = ops.ExpandDims()
- prediction = expand_dims(prediction, 0)
- resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
- prediction = resize_bilinear(prediction)
- prediction = np.squeeze(prediction.asnumpy())
- loss = metric(prediction, depth, mask)
-
- print('loss is ', loss)
- loss_sum += loss
-
- print(f"bad pixel: {loss_sum / num:.3f}")
- return loss_sum / num
-
-
- def run_eval():
- """run."""
- datapath_TUM = config.train_data_dir+config.datapath_TUM
- datapath_Sintel = config.train_data_dir+config.datapath_Sintel
- datapath_ETH3D = config.train_data_dir+config.datapath_ETH3D
- datapath_Kitti = config.train_data_dir+config.datapath_Kitti
- datapath_DIW = config.train_data_dir+config.datapath_DIW
- datamat = config.train_data_dir+config.datapath_NYU[0]
- splitmat = config.train_data_dir+config.datapath_NYU[1]
-
- net = MidasNet()
- param_dict = serialization.load_checkpoint(config.ckpt_path)
- serialization.load_param_into_net(net, param_dict)
- results = {}
- if config.data_name == 'Sintel' or config.data_name == "all":
- result_sintel = eval_Sintel(datapath_Sintel, net)
- results['Sintel'] = result_sintel
- if config.data_name == 'Kitti' or config.data_name == "all":
- result_kitti = eval_Kitti(datapath_Kitti, net)
- results['Kitti'] = result_kitti
- if config.data_name == 'TUM' or config.data_name == "all":
- result_tum = eval_TUM(datapath_TUM, net)
- results['TUM'] = result_tum
- if config.data_name == 'DIW' or config.data_name == "all":
- result_DIW = eval_DIW(datapath_DIW, net)
- results['DIW'] = result_DIW
- if config.data_name == 'ETH3D' or config.data_name == "all":
- result_ETH3D = eval_ETH3D(datapath_ETH3D, net)
- results['ETH3D'] = result_ETH3D
- if config.data_name == 'NYU' or config.data_name == "all":
- result_NYU = eval_NYU(datamat, splitmat, net)
- results['NYU'] = result_NYU
-
- print(results)
- json.dump(results, open(config.ann_file, 'w'))
-
-
- if __name__ == '__main__':
- context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=config.device_id)
- run_eval()
|