|
- """Meters."""
-
- from collections import deque
-
- import numpy as np
- import xnas.logger.logging as logging
- import torch
-
- from xnas.core.config import cfg
- from xnas.logger.timer import Timer
-
-
- logger = logging.get_logger(__name__)
-
-
- def time_string(seconds):
- """Converts time in seconds to a fixed-width string format."""
- days, rem = divmod(int(seconds), 24 * 3600)
- hrs, rem = divmod(rem, 3600)
- mins, secs = divmod(rem, 60)
- return "{0:02},{1:02}:{2:02}:{3:02}".format(days, hrs, mins, secs)
-
-
- def topk_errors(preds, labels, ks):
- """Computes the top-k error for each k."""
- err_str = "Batch dim of predictions and labels must match"
- assert preds.size(0) == labels.size(0), err_str
- # Find the top max_k predictions for each sample
- _top_max_k_vals, top_max_k_inds = torch.topk(
- preds, max(ks), dim=1, largest=True, sorted=True
- )
- # (batch_size, max_k) -> (max_k, batch_size)
- top_max_k_inds = top_max_k_inds.t()
- # (batch_size, ) -> (max_k, batch_size)
- rep_max_k_labels = labels.view(1, -1).expand_as(top_max_k_inds)
- # (i, j) = 1 if top i-th prediction for the j-th sample is correct
- top_max_k_correct = top_max_k_inds.eq(rep_max_k_labels)
- # Compute the number of topk correct predictions for each k
- topks_correct = [top_max_k_correct[:k, :].contiguous().view(-1).float().sum() for k in ks]
- return [(1.0 - x / preds.size(0)) * 100.0 for x in topks_correct]
-
-
- def accuracy(output, target, topk=(1,)):
- """ Computes the precision@k for the specified values of k """
- maxk = max(topk)
- batch_size = target.size(0)
-
- _, pred = output.topk(maxk, 1, True, True)
- pred = pred.t()
- # one-hot case
- if target.ndimension() > 1:
- target = target.max(1)[1]
-
- correct = pred.eq(target.view(1, -1).expand_as(pred))
-
- res = []
- for k in topk:
- correct_k = correct[:k].reshape(-1).float().sum(0)
- res.append(correct_k.mul_(1.0 / batch_size))
-
- return res
-
-
- def gpu_mem_usage():
- """Computes the GPU memory usage for the current device (MB)."""
- mem_usage_bytes = torch.cuda.max_memory_allocated()
- return mem_usage_bytes / 1024 / 1024
-
-
- class AverageMeter(object):
- """
- Computes and stores the average and current value
- Copied from: https://github.com/pytorch/examples/blob/master/imagenet/main.py
- """
-
- def __init__(self):
- self.val = 0
- self.avg = 0
- self.sum = 0
- self.count = 0
-
- def reset(self):
- self.val = 0
- self.avg = 0
- self.sum = 0
- self.count = 0
-
- def update(self, val, n=1):
- self.val = val
- self.sum += val * n
- self.count += n
- self.avg = self.sum / self.count
-
-
- class ScalarMeter(object):
- """Measures a scalar value (adapted from Detectron)."""
-
- def __init__(self, window_size):
- self.deque = deque(maxlen=window_size)
- self.total = 0.0
- self.count = 0
-
- def reset(self):
- self.deque.clear()
- self.total = 0.0
- self.count = 0
-
- def add_value(self, value):
- self.deque.append(value)
- self.count += 1
- self.total += value
-
- def get_win_median(self):
- return np.median(self.deque)
-
- def get_win_avg(self):
- return np.mean(self.deque)
-
- def get_global_avg(self):
- return self.total / self.count
-
-
- class TrainMeter(object):
- """Measures training stats."""
-
- def __init__(self, epoch_iters):
- self.epoch_iters = epoch_iters
- self.max_iter = (cfg.OPTIM.MAX_EPOCH+cfg.OPTIM.WARMUP_EPOCH) * epoch_iters
- self.iter_timer = Timer()
- self.loss = ScalarMeter(cfg.LOG_PERIOD)
- self.loss_total = 0.0
- self.lr = None
- # Current minibatch errors (smoothed over a window)
- self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD)
- self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD)
- # Number of misclassified examples
- self.num_top1_mis = 0
- self.num_top5_mis = 0
- self.num_samples = 0
-
- def reset(self, timer=False):
- if timer:
- self.iter_timer.reset()
- self.loss.reset()
- self.loss_total = 0.0
- self.lr = None
- self.mb_top1_err.reset()
- self.mb_top5_err.reset()
- self.num_top1_mis = 0
- self.num_top5_mis = 0
- self.num_samples = 0
-
- def iter_tic(self):
- self.iter_timer.tic()
-
- def iter_toc(self):
- self.iter_timer.toc()
-
- def update_stats(self, top1_err, top5_err, loss, lr, mb_size):
- # Current minibatch stats
- self.mb_top1_err.add_value(top1_err)
- self.mb_top5_err.add_value(top5_err)
- self.loss.add_value(loss)
- self.lr = lr
- # Aggregate stats
- self.num_top1_mis += top1_err * mb_size
- self.num_top5_mis += top5_err * mb_size
- self.loss_total += loss * mb_size
- self.num_samples += mb_size
-
- def get_iter_stats(self, cur_epoch, cur_iter):
- cur_iter_total = cur_epoch * self.epoch_iters + cur_iter + 1
- eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
- mem_usage = gpu_mem_usage()
- stats = {
- "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH+cfg.OPTIM.WARMUP_EPOCH),
- "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters),
- "time_avg": self.iter_timer.average_time,
- "time_diff": self.iter_timer.diff,
- "eta": time_string(eta_sec),
- "top1_err": self.mb_top1_err.get_win_avg(),
- "top5_err": self.mb_top5_err.get_win_avg(),
- "loss": self.loss.get_win_avg(),
- "lr": self.lr,
- "mem": int(np.ceil(mem_usage)),
- }
- return stats
-
- def log_iter_stats(self, cur_epoch, cur_iter):
- if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
- return
- stats = self.get_iter_stats(cur_epoch, cur_iter)
- logger.info(logging.dump_log_data(stats, "train_iter"))
-
- def get_epoch_stats(self, cur_epoch):
- cur_iter_total = (cur_epoch + 1) * self.epoch_iters
- eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
- mem_usage = gpu_mem_usage()
- top1_err = self.num_top1_mis / self.num_samples
- top5_err = self.num_top5_mis / self.num_samples
- avg_loss = self.loss_total / self.num_samples
- stats = {
- "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH+cfg.OPTIM.WARMUP_EPOCH),
- "time_avg": self.iter_timer.average_time,
- "eta": time_string(eta_sec),
- "top1_err": top1_err,
- "top5_err": top5_err,
- "loss": avg_loss,
- "lr": self.lr,
- "mem": int(np.ceil(mem_usage)),
- }
- return stats
-
- def log_epoch_stats(self, cur_epoch):
- stats = self.get_epoch_stats(cur_epoch)
- logger.info(logging.dump_log_data(stats, "train_epoch"))
-
-
- class TestMeter(object):
- """Measures testing stats."""
-
- def __init__(self, max_iter):
- self.max_iter = max_iter
- self.iter_timer = Timer()
- # Current minibatch errors (smoothed over a window)
- self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD)
- self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD)
- # Min errors (over the full test set)
- self.min_top1_err = 100.0
- self.min_top5_err = 100.0
- # Number of misclassified examples
- self.num_top1_mis = 0
- self.num_top5_mis = 0
- self.num_samples = 0
-
- def reset(self, min_errs=False):
- if min_errs:
- self.min_top1_err = 100.0
- self.min_top5_err = 100.0
- self.iter_timer.reset()
- self.mb_top1_err.reset()
- self.mb_top5_err.reset()
- self.num_top1_mis = 0
- self.num_top5_mis = 0
- self.num_samples = 0
-
- def iter_tic(self):
- self.iter_timer.tic()
-
- def iter_toc(self):
- self.iter_timer.toc()
-
- def update_stats(self, top1_err, top5_err, mb_size):
- self.mb_top1_err.add_value(top1_err)
- self.mb_top5_err.add_value(top5_err)
- self.num_top1_mis += top1_err * mb_size
- self.num_top5_mis += top5_err * mb_size
- self.num_samples += mb_size
-
- def get_iter_stats(self, cur_epoch, cur_iter):
- mem_usage = gpu_mem_usage()
- iter_stats = {
- "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH+cfg.OPTIM.WARMUP_EPOCH),
- "iter": "{}/{}".format(cur_iter + 1, self.max_iter),
- "time_avg": self.iter_timer.average_time,
- "time_diff": self.iter_timer.diff,
- "top1_err": self.mb_top1_err.get_win_avg(),
- "top5_err": self.mb_top5_err.get_win_avg(),
- "mem": int(np.ceil(mem_usage)),
- }
- return iter_stats
-
- def log_iter_stats(self, cur_epoch, cur_iter):
- if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
- return
- stats = self.get_iter_stats(cur_epoch, cur_iter)
- logger.info(logging.dump_log_data(stats, "test_iter"))
-
- def get_epoch_stats(self, cur_epoch):
- top1_err = self.num_top1_mis / self.num_samples
- top5_err = self.num_top5_mis / self.num_samples
- self.min_top1_err = min(self.min_top1_err, top1_err)
- self.min_top5_err = min(self.min_top5_err, top5_err)
- mem_usage = gpu_mem_usage()
- stats = {
- "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH+cfg.OPTIM.WARMUP_EPOCH),
- "top1_err": top1_err,
- "top5_err": top5_err,
- "min_top1_err": self.min_top1_err,
- "min_top5_err": self.min_top5_err,
- "mem": int(np.ceil(mem_usage)),
- }
- return stats
-
- def log_epoch_stats(self, cur_epoch):
- stats = self.get_epoch_stats(cur_epoch)
- logger.info(logging.dump_log_data(stats, "test_epoch"))
|