|
- import os
- import cv2
- import numpy as np
-
- import paddle
- from paddle.io import DataLoader
- from tqdm import tqdm
- import argparse
- from model.layers import disp_to_depth
- from utils import readlines, load_weight_file
- import datasets
- from model.core import build_model
- from collections import OrderedDict
- import pickle
- from pathlib import Path
-
- cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
-
-
- splits_dir = os.path.join(os.path.dirname(__file__), "splits")
- file_dir = os.path.dirname(__file__)
-
- # Models which were trained with stereo supervision were trained with a nominal
- # baseline of 0.1 units. The KITTI rig has a baseline of 54cm. Therefore,
- # to convert our stereo predictions to real-world scale we multiply our depths by 5.4.
- STEREO_SCALE_FACTOR = 5.4
-
-
- def compute_errors(gt, pred):
- """Computation of error metrics between predicted and ground truth depths
- """
- thresh = np.maximum((gt / pred), (pred / gt))
- a1 = (thresh < 1.25 ).mean()
- a2 = (thresh < 1.25 ** 2).mean()
- a3 = (thresh < 1.25 ** 3).mean()
-
- rmse = (gt - pred) ** 2
- rmse = np.sqrt(rmse.mean())
-
- rmse_log = (np.log(gt) - np.log(pred)) ** 2
- rmse_log = np.sqrt(rmse_log.mean())
-
- abs_rel = np.mean(np.abs(gt - pred) / gt)
-
- sq_rel = np.mean(((gt - pred) ** 2) / gt)
-
- return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
-
-
- def batch_post_process_disparity(l_disp, r_disp):
- """Apply the disparity post-processing method as introduced in Monodepthv1
- """
- _, h, w = l_disp.shape
- m_disp = 0.5 * (l_disp + r_disp)
- l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
- l_mask = (1.0 - np.clip(20 * (l - 0.05), 0, 1))[None, ...]
- r_mask = l_mask[:, :, ::-1]
- return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp
-
-
-
- def evaluate(opt, models=None):
- """Evaluates a pretrained model using a specified test set
- """
- MIN_DEPTH = 1e-3
- MAX_DEPTH = 80
-
- assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \
- "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"
-
- if opt.ext_disp_to_eval is None:
-
- if models is None:
- opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)
- assert os.path.isdir(opt.load_weights_folder), \
- "Cannot find a folder at {}".format(opt.load_weights_folder)
- print("-> Loading weights from {}".format(opt.load_weights_folder))
- encoder_path = os.path.join(opt.load_weights_folder, "encoder")
- decoder_path = os.path.join(opt.load_weights_folder, "depth")
-
- encoder_dict = load_weight_file(encoder_path)
- decoder_dict = load_weight_file(decoder_path)
- models, _ = build_model(opt)
- model_dict = models["encoder"].state_dict()
- models["encoder"].load_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
- models["depth"].load_dict(decoder_dict)
-
- models["encoder"].eval()
- models["depth"].eval()
-
- filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
- img_ext = '.png' if opt.png else '.jpg'
- test_data_path = opt.data_path
- dataset = datasets.KITTIRAWDataset(test_data_path, filenames,
- opt.height, opt.width,
- [0], 4, is_train=False, img_ext=img_ext)
-
- dataloader = DataLoader(dataset, batch_size=16, shuffle=False, num_workers=opt.num_workers, drop_last=False)
-
- pred_disps = []
-
- print("-> Computing predictions with size {}x{}".format(
- opt.width, opt.height))
-
- with paddle.no_grad():
- for data in tqdm(iter(dataloader)):
- input_color = data[("color", 0, 0)]
-
- if opt.post_process:
- # Post-processed results require each image to have two forward passes
- input_color = paddle.concat((input_color, paddle.flip(input_color, [3])), 0)
-
- output = models["depth"](models["encoder"](input_color))
-
- pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth)
- pred_disp = pred_disp.cpu()[:, 0].numpy()
-
- if opt.post_process:
- N = pred_disp.shape[0] // 2
- pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])
-
- pred_disps.append(pred_disp)
-
- pred_disps = np.concatenate(pred_disps)
-
- else:
- # Load predictions from file
- print("-> Loading predictions from {}".format(opt.ext_disp_to_eval))
- pred_disps = np.load(opt.ext_disp_to_eval)
-
- if opt.eval_eigen_to_benchmark:
- eigen_to_benchmark_ids = np.load(
- os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy"))
-
- pred_disps = pred_disps[eigen_to_benchmark_ids]
-
- if opt.save_pred_disps:
- output_path = os.path.join(
- opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split))
- print("-> Saving predicted disparities to ", output_path)
- np.save(output_path, pred_disps)
-
- if opt.no_eval:
- print("-> Evaluation disabled. Done.")
- quit()
-
- elif opt.eval_split == 'benchmark':
- save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions")
- print("-> Saving out benchmark predictions to {}".format(save_dir))
- if not os.path.exists(save_dir):
- os.makedirs(save_dir)
-
- for idx in range(len(pred_disps)):
- disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
- depth = STEREO_SCALE_FACTOR / disp_resized
- depth = np.clip(depth, 0, 80)
- depth = np.uint16(depth * 256)
- save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
- cv2.imwrite(save_path, depth)
-
- print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.")
- quit()
-
- gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
- gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"]
-
- print("-> Evaluating")
-
- if opt.eval_stereo:
- print(" Stereo evaluation - "
- "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR))
- opt.disable_median_scaling = True
- opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
- else:
- print(" Mono evaluation - using median scaling")
-
- errors = []
- ratios = []
-
- for i in range(pred_disps.shape[0]):
-
- gt_depth = gt_depths[i]
- gt_height, gt_width = gt_depth.shape[:2]
-
- pred_disp = pred_disps[i]
- pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
- pred_depth = 1 / pred_disp
-
- if opt.eval_split == "eigen":
- mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)
-
- crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
- 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32)
- crop_mask = np.zeros(mask.shape)
- crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
- mask = np.logical_and(mask, crop_mask)
-
- else:
- mask = gt_depth > 0
-
- pred_depth = pred_depth[mask]
- gt_depth = gt_depth[mask]
-
- pred_depth *= opt.pred_depth_scale_factor
- if not opt.disable_median_scaling:
- ratio = np.median(gt_depth) / np.median(pred_depth)
- ratios.append(ratio)
- pred_depth *= ratio
-
- pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
- pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH
-
- errors.append(compute_errors(gt_depth, pred_depth))
-
- if not opt.disable_median_scaling:
- ratios = np.array(ratios)
- med = np.median(ratios)
- print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))
-
- mean_errors = np.array(errors).mean(0)
-
- print("\n " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
- print(("&{: 8.3f} " * 7).format(*mean_errors.tolist()) + "\\\\")
- print("\n-> Done!")
- abs_rel = mean_errors[0]
- del mean_errors
- return abs_rel
-
-
- class MonodepthOptions:
- def __init__(self):
- self.parser = argparse.ArgumentParser(description="Monodepthv2 options")
- # CFG
- self.parser.add_argument("--type",
- type=str,
- help="path to the training data",
- default="MonoDepthv2")
-
- # PATHS
- self.parser.add_argument("--data_path",
- type=str,
- help="path to the training data",
- default=os.path.join(file_dir, "kitti_data"))
-
- # TRAINING options
- self.parser.add_argument('--num_gpus',
- type=int,
- help='number of gpus used in training',
- default=1)
- self.parser.add_argument("--seed",
- type=int,
- help='seed used in training.',
- default=210)
- self.parser.add_argument("--model_name",
- type=str,
- help="the name of the folder to save the model in",
- default="mdp")
- self.parser.add_argument("--split",
- type=str,
- help="which training split to use",
- choices=["eigen_zhou", "eigen_full", "odom", "benchmark"],
- default="eigen_zhou")
- self.parser.add_argument("--num_layers",
- type=int,
- help="number of resnet layers",
- default=18,
- choices=[18, 34, 50, 101, 152])
- self.parser.add_argument("--freeze_bn",
- action='store_true',
- help='freeze the running mean and running variance of all bn layers.')
- self.parser.add_argument("--dataset",
- type=str,
- help="dataset to train on",
- default="kitti",
- choices=["kitti", "kitti_odom", "kitti_depth", "kitti_test"])
- self.parser.add_argument("--png",
- help="if set, trains from raw KITTI png files (instead of jpgs)",
- action="store_true")
- self.parser.add_argument("--height",
- type=int,
- help="input image height",
- default=192)
- self.parser.add_argument("--width",
- type=int,
- help="input image width",
- default=640)
- self.parser.add_argument("--disparity_smoothness",
- type=float,
- help="disparity smoothness weight",
- default=1e-3)
- self.parser.add_argument("--scales",
- nargs="+",
- type=int,
- help="scales used in the loss",
- default=[0, 1, 2, 3])
- self.parser.add_argument("--min_depth",
- type=float,
- help="minimum depth",
- default=0.1)
- self.parser.add_argument("--max_depth",
- type=float,
- help="maximum depth",
- default=100.0)
- self.parser.add_argument("--use_stereo",
- help="if set, uses stereo pair for training",
- action="store_true")
- self.parser.add_argument("--frame_ids",
- nargs="+",
- type=int,
- help="frames to load",
- default=[0, -1, 1])
-
- # ABLATION options
- self.parser.add_argument("--v1_multiscale",
- help="if set, uses monodepth v1 multiscale",
- action="store_true")
- self.parser.add_argument("--avg_reprojection",
- help="if set, uses average reprojection loss",
- action="store_true")
- self.parser.add_argument("--disable_automasking",
- help="if set, doesn't do auto-masking",
- action="store_true")
- self.parser.add_argument("--predictive_mask",
- help="if set, uses a predictive masking scheme as in Zhou et al",
- action="store_true")
- self.parser.add_argument("--no_ssim",
- help="if set, disables ssim in the loss",
- action="store_true")
- self.parser.add_argument("--weights_init",
- type=str,
- help="choose from default (paddle pretrained weights), scratch, or a path to a custom weight file.",
- default="pretrained")
- self.parser.add_argument("--pose_model_input",
- type=str,
- help="how many images the pose network gets",
- default="pairs",
- choices=["pairs", "all"])
- self.parser.add_argument("--pose_model_type",
- type=str,
- help="normal or shared",
- default="separate_resnet",
- choices=["posecnn", "separate_resnet", "shared"])
-
- # SYSTEM options
- self.parser.add_argument("--num_workers",
- type=int,
- help="number of dataloader workers",
- default=1)
-
- # LOADING options
- self.parser.add_argument("--load_weights_folder",
- type=str,
- help="name of model to load")
- self.parser.add_argument("--models_to_load",
- nargs="+",
- type=str,
- help="models to load",
- default=["encoder", "depth", "pose_encoder", "pose"])
-
-
- # EVALUATION options
- self.parser.add_argument("--eval_stereo",
- help="if set evaluates in stereo mode",
- action="store_true")
- self.parser.add_argument("--eval_mono",
- help="if set evaluates in mono mode",
- action="store_true")
- self.parser.add_argument("--disable_median_scaling",
- help="if set disables median scaling in evaluation",
- action="store_true")
- self.parser.add_argument("--pred_depth_scale_factor",
- help="if set multiplies predictions by this number",
- type=float,
- default=1)
- self.parser.add_argument("--ext_disp_to_eval",
- type=str,
- help="optional path to a .npy disparities file to evaluate")
- self.parser.add_argument("--eval_split",
- type=str,
- default="eigen",
- choices=[
- "eigen", "eigen_benchmark", "benchmark", "odom_9", "odom_10"],
- help="which split to run eval on")
- self.parser.add_argument("--save_pred_disps",
- help="if set saves predicted disparities",
- action="store_true")
- self.parser.add_argument("--no_eval",
- help="if set disables evaluation",
- action="store_true")
- self.parser.add_argument("--eval_eigen_to_benchmark",
- help="if set assume we are loading eigen results from npy but "
- "we want to evaluate using the new benchmark.",
- action="store_true")
- self.parser.add_argument("--eval_out_dir",
- help="if set will output the disparities to this folder",
- type=str)
- self.parser.add_argument("--post_process",
- help="if set will perform the flipping post processing "
- "from the original monodepthv2 paper",
- action="store_true")
-
- def parse(self):
- self.options = self.parser.parse_args()
- return self.options
-
- if __name__ == "__main__":
- options = MonodepthOptions()
- opts = options.parse()
- evaluate(opts)
|