#2 上传文件至 ''

Merged
whites merged 1 commits from whites-patch-2 into master 1 year ago
  1. +453
    -0
      midas_eval.py

+ 453
- 0
midas_eval.py View File

@@ -0,0 +1,453 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""eval midas."""
import glob
import csv
import os
import struct
import json
import numpy as np
from mindspore import Tensor
from mindspore import context
from mindspore import dtype as mstype
from mindspore.train import serialization
import mindspore.ops as ops
from src.util import depth_read_kitti, depth_read_sintel, BadPixelMetric
from src.midas_net import MidasNet
from src.config import config
from src.utils import transforms
from scipy.io import loadmat
import cv2
from PIL import Image
import h5py


def eval_Kitti(data_path, net):
"""
eval Kitti.
Return the value, loss.
"""
img_input_1 = transforms.Resize(config.img_width,
config.img_height,
resize_target=None,
keep_aspect_ratio=True,
ensure_multiple_of=32,
resize_method="lower_bound",
image_interpolation_method=cv2.INTER_CUBIC)
img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
img_input_3 = transforms.PrepareForNet()
metric = BadPixelMetric(1.25, 80, 'KITTI')
loss_sum = 0
sample = {}
image_path = glob.glob(os.path.join(data_path, '*', 'image', '*.png'))
num = 0
for file_name in image_path:
num += 1
print(f"processing: {num} / {len(image_path)}")
image = np.array(Image.open(file_name)).astype(float) # (436,1024,3)
image = image / 255
print(file_name)
all_path = file_name.split('/')
depth_path_name = all_path[-1].split('.')[0]

depth = depth_read_kitti(os.path.join(data_path, all_path[-3], 'depth', depth_path_name + '.png')) # (436,1024)
mask = (depth > 0) & (depth < 80)
sample['image'] = image
sample["depth"] = depth
sample["mask"] = mask
sample = img_input_1(sample)
sample = img_input_2(sample)
sample = img_input_3(sample)
# print('transform later', sample['image'].shape)
sample['image'] = Tensor([sample["image"]], mstype.float32)
sample['depth'] = Tensor([sample["depth"]], mstype.float32)
sample['mask'] = Tensor([sample["mask"]], mstype.int32)

print(sample['image'].shape, sample['depth'].shape)
prediction = net(sample['image'])

mask = sample['mask'].asnumpy()
depth = sample['depth'].asnumpy()

expand_dims = ops.ExpandDims()
prediction = expand_dims(prediction, 0)
resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
prediction = resize_bilinear(prediction)
prediction = np.squeeze(prediction.asnumpy())
loss = metric(prediction, depth, mask)

print('loss is ', loss)
loss_sum += loss

print(f"Kitti bad pixel: {loss_sum / num:.3f}")
return loss_sum / num


def eval_TUM(datapath, net):
"""
eval TUM.
Return the value, loss.
"""
img_input_1 = transforms.Resize(config.img_width,
config.img_height,
resize_target=None,
keep_aspect_ratio=True,
ensure_multiple_of=32,
resize_method="upper_bound",
image_interpolation_method=cv2.INTER_CUBIC)
img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
img_input_3 = transforms.PrepareForNet()
# get data
metric = BadPixelMetric(1.25, 10, 'TUM')
loss_sum = 0
sample = {}
file_path = glob.glob(os.path.join(datapath, '*_person', 'associate.txt'))

num = 0
for ind in file_path:
all_path = ind.split('/')

for line in open(ind):
num += 1
print(f"processing: {num}")
data = line.split('\n')[0].split(' ')
image_path = os.path.join(datapath, all_path[-2], data[0]) # (480,640,3)
depth_path = os.path.join(datapath, all_path[-2], data[1]) # (480,640,3)
image = cv2.imread(image_path) / 255
depth = cv2.imread(depth_path)[:, :, 0] / 5000
mask = (depth > 0) & (depth < 10)
print('mask is ', np.unique(mask))
sample['image'] = image
sample["depth"] = depth
sample["mask"] = mask

sample = img_input_1(sample)
sample = img_input_2(sample)
sample = img_input_3(sample)

sample['image'] = Tensor([sample["image"]], mstype.float32)
sample['depth'] = Tensor([sample["depth"]], mstype.float32)
sample['mask'] = Tensor([sample["mask"]], mstype.int32)

print(sample['image'].shape, sample['depth'].shape)
prediction = net(sample['image'])
mask = sample['mask'].asnumpy()
depth = sample['depth'].asnumpy()
expand_dims = ops.ExpandDims()
prediction = expand_dims(prediction, 0)
print(prediction.shape, mask.shape)
resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
prediction = resize_bilinear(prediction)
prediction = np.squeeze(prediction.asnumpy())

loss = metric(prediction, depth, mask)

print('loss is ', loss)
loss_sum += loss

print(f"TUM bad pixel: {loss_sum / num:.2f}")

return loss_sum / num


def eval_Sintel(datapath, net):
"""
eval Sintel.
Return the value, loss.
"""
img_input_1 = transforms.Resize(config.img_width,
config.img_height,
resize_target=None,
keep_aspect_ratio=True,
ensure_multiple_of=32,
resize_method="upper_bound",
image_interpolation_method=cv2.INTER_CUBIC)
img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
img_input_3 = transforms.PrepareForNet()
# get data
metric = BadPixelMetric(1.25, 72, 'sintel')
loss_sum = 0
sample = {}
image_path = glob.glob(os.path.join(datapath, 'final_left', '*', '*.png'))

num = 0
for file_name in image_path:
num += 1
print(f"processing: {num} / {len(image_path)}")
image = np.array(Image.open(file_name)).astype(float) # (436,1024,3)
image = image / 255
print(file_name)
all_path = file_name.split('/')
depth_path_name = all_path[-1].split('.')[0]

depth = depth_read_sintel(os.path.join(datapath, 'depth', all_path[-2], depth_path_name + '.dpt')) # (436,1024)

mask1 = np.array(Image.open(os.path.join(datapath, 'occlusions', all_path[-2], all_path[-1]))).astype(int)
mask1 = mask1 / 255

mask = (mask1 == 1) & (depth > 0) & (depth < 72)
sample['image'] = image
sample["depth"] = depth
sample["mask"] = mask
sample = img_input_1(sample)
sample = img_input_2(sample)
sample = img_input_3(sample)
sample['image'] = Tensor([sample["image"]], mstype.float32)
sample['depth'] = Tensor([sample["depth"]], mstype.float32)
sample['mask'] = Tensor([sample["mask"]], mstype.int32)

print(sample['image'].shape, sample['depth'].shape)
prediction = net(sample['image'])

mask = sample['mask'].asnumpy()
depth = sample['depth'].asnumpy()

expand_dims = ops.ExpandDims()
prediction = expand_dims(prediction, 0)
resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
prediction = resize_bilinear(prediction)
prediction = np.squeeze(prediction.asnumpy())
loss = metric(prediction, depth, mask)

print('loss is ', loss)
loss_sum += loss

print(f"sintel bad pixel: {loss_sum / len(image_path):.3f}")
return loss_sum / len(image_path)


def eval_ETH3D(datapath, net):
"""
eval ETH3D.
Return the value, loss.
"""
img_input_1 = transforms.Resize(config.img_width,
config.img_height,
resize_target=True,
keep_aspect_ratio=True,
ensure_multiple_of=32,
resize_method="upper_bound",
image_interpolation_method=cv2.INTER_CUBIC)
img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
img_input_3 = transforms.PrepareForNet()
metric = BadPixelMetric(1.25, 72, 'ETH3D')

loss_sum = 0
sample = {}
image_path = glob.glob(os.path.join(datapath, '*', 'images', 'dslr_images', '*.JPG'))
num = 0
for file_name in image_path:
num += 1
print(f"processing: {num} / {len(image_path)}")
image = cv2.imread(file_name) / 255
all_path = file_name.split('/')
depth_path = os.path.join(datapath, all_path[-4], "ground_truth_depth", 'dslr_images', all_path[-1])
depth = []
with open(depth_path, 'rb') as f:
data = f.read(4)
while data:
depth.append(struct.unpack('f', data))
data = f.read(4)
depth = np.reshape(np.array(depth), (4032, -1))
mask = (depth > 0) & (depth < 72)
sample['image'] = image
sample["depth"] = depth
sample["mask"] = mask

sample = img_input_1(sample)
sample = img_input_2(sample)
sample = img_input_3(sample)
sample['image'] = Tensor([sample["image"]], mstype.float32)
sample['depth'] = Tensor([sample["depth"]], mstype.float32)
sample['mask'] = Tensor([sample["mask"]], mstype.int32)

prediction = net(sample['image'])

mask = sample['mask'].asnumpy()
depth = sample['depth'].asnumpy()

expand_dims = ops.ExpandDims()
prediction = expand_dims(prediction, 0)
resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
prediction = resize_bilinear(prediction)
prediction = np.squeeze(prediction.asnumpy())
loss = metric(prediction, depth, mask)

print('loss is ', loss)
loss_sum += loss

print(f"ETH3D bad pixel: {loss_sum / num:.3f}")

return loss_sum / num


def eval_DIW(datapath, net):
"""
eval DIW.
Return the value, loss.
"""
img_input_1 = transforms.Resize(config.img_width,
config.img_height,
resize_target=True,
keep_aspect_ratio=True,
ensure_multiple_of=32,
resize_method="upper_bound",
image_interpolation_method=cv2.INTER_CUBIC)
img_input_2 = transforms.NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
img_input_3 = transforms.PrepareForNet()
loss_sum = 0
num = 0
sample = {}
file_path = os.path.join(datapath, 'DIW_Annotations', 'DIW_test.csv')
with open(file_path) as f:
reader = list(csv.reader(f))
for (i, row) in enumerate(reader):
if i % 2 == 0:
path = row[0].split('/')
sample['file_name'] = os.path.join(datapath, path[-2], path[-1])
sample['image'] = cv2.imread(sample['file_name']) / 255
else:
sample['depths'] = row
if not os.path.exists(sample['file_name']):
continue
num += 1 # 图片个数+1
print(f"processing: {num}")
sample = img_input_1(sample)
sample = img_input_2(sample)
sample = img_input_3(sample)
sample['image'] = Tensor([sample["image"]], mstype.float32)
prediction = net(sample['image'])
shape_w, shape_h = [int(sample['depths'][-2]), int(sample['depths'][-1])]
expand_dims = ops.ExpandDims()
prediction = expand_dims(prediction, 0)
resize_bilinear = ops.ResizeBilinear((shape_h, shape_w))
prediction = resize_bilinear(prediction)
prediction = np.squeeze(prediction.asnumpy())

pixtel_a = prediction[int(sample['depths'][0]) - 1][int(sample['depths'][1]) - 1]
pixtel_b = prediction[int(sample['depths'][2]) - 1][int(sample['depths'][3]) - 1]
if pixtel_a > pixtel_b:
if sample['depths'][4] == '>':
loss_sum += 1
if pixtel_a < pixtel_b:
if sample['depths'][4] == '<':
loss_sum += 1
print(f"bad pixel: {(num - loss_sum) / num:.4f}")
return (num - loss_sum) / num


def eval_NYU(datamat, splitmat, net):
"""
eval NYU.
Return the value, loss.
"""
img_input_1 = Resize(config.img_width,
config.img_height,
resize_target=None,
keep_aspect_ratio=True,
ensure_multiple_of=32,
resize_method="upper_bound",
image_interpolation_method=cv2.INTER_CUBIC)
img_input_2 = NormalizeImage(mean=config.nm_img_mean, std=config.nm_img_std)
img_input_3 = PrepareForNet()

# get data

metric = BadPixelMetric(1.25, 10, 'NYU')
loss_sum = 0
sample = {}
mat = loadmat(splitmat)
indices = [ind[0] - 1 for ind in mat["testNdxs"]]
num = 0
with h5py.File(datamat, "r") as f:
for ind in indices:
num += 1
print(num)
image = np.swapaxes(f["images"][ind], 0, 2)
image = image / 255
depth = np.swapaxes(f["rawDepths"][ind], 0, 1)
mask = (depth > 0) & (depth < 10)

# mask = mask1
sample['image'] = image
sample["depth"] = depth
sample["mask"] = mask
sample = img_input_1(sample)
sample = img_input_2(sample)
sample = img_input_3(sample)
sample['image'] = Tensor([sample["image"]], mstype.float32)
sample['depth'] = Tensor([sample["depth"]], mstype.float32)
sample['mask'] = Tensor([sample["mask"]], mstype.int32)

print(sample['image'].shape, sample['depth'].shape)
prediction = net(sample['image'])

mask = sample['mask'].asnumpy()
depth = sample['depth'].asnumpy()

expand_dims = ops.ExpandDims()
prediction = expand_dims(prediction, 0)
resize_bilinear = ops.ResizeBilinear(mask.shape[1:])
prediction = resize_bilinear(prediction)
prediction = np.squeeze(prediction.asnumpy())
loss = metric(prediction, depth, mask)

print('loss is ', loss)
loss_sum += loss

print(f"bad pixel: {loss_sum / num:.3f}")
return loss_sum / num


def run_eval():
"""run."""
datapath_TUM = config.train_data_dir+config.datapath_TUM
datapath_Sintel = config.train_data_dir+config.datapath_Sintel
datapath_ETH3D = config.train_data_dir+config.datapath_ETH3D
datapath_Kitti = config.train_data_dir+config.datapath_Kitti
datapath_DIW = config.train_data_dir+config.datapath_DIW
datamat = config.train_data_dir+config.datapath_NYU[0]
splitmat = config.train_data_dir+config.datapath_NYU[1]

net = MidasNet()
param_dict = serialization.load_checkpoint(config.ckpt_path)
serialization.load_param_into_net(net, param_dict)
results = {}
if config.data_name == 'Sintel' or config.data_name == "all":
result_sintel = eval_Sintel(datapath_Sintel, net)
results['Sintel'] = result_sintel
if config.data_name == 'Kitti' or config.data_name == "all":
result_kitti = eval_Kitti(datapath_Kitti, net)
results['Kitti'] = result_kitti
if config.data_name == 'TUM' or config.data_name == "all":
result_tum = eval_TUM(datapath_TUM, net)
results['TUM'] = result_tum
if config.data_name == 'DIW' or config.data_name == "all":
result_DIW = eval_DIW(datapath_DIW, net)
results['DIW'] = result_DIW
if config.data_name == 'ETH3D' or config.data_name == "all":
result_ETH3D = eval_ETH3D(datapath_ETH3D, net)
results['ETH3D'] = result_ETH3D
if config.data_name == 'NYU' or config.data_name == "all":
result_NYU = eval_NYU(datamat, splitmat, net)
results['NYU'] = result_NYU

print(results)
json.dump(results, open(config.ann_file, 'w'))


if __name__ == '__main__':
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=config.device_id)
run_eval()

Loading…
Cancel
Save