|
- import os
- import cv2
- import numpy as np
- import pickle
- import torch
- from utils import mkdir
- import pdb
-
-
- def resize_cam(cam, size=(224, 224)):
- cam = cv2.resize(cam , (size[0], size[1]))
- #cam = cam - cam.min()
- #cam = cam / cam.max()
- cam_min, cam_max = cam.min(), cam.max()
- cam = (cam - cam_min) / (cam_max - cam_min)
- return cam
-
-
- def blend_cam(image, cam, es_box=[0,0,1,1]):
- I = np.zeros_like(cam)
- x1, y1, x2, y2 = es_box
- I[y1:y2, x1:x2] = 1
- cam = cam * I
- cam = (cam * 255.).astype(np.uint8)
- heatmap = cv2.applyColorMap(cam, cv2.COLORMAP_JET)
- blend = image * 0.2 + heatmap * 0.8
-
- return blend, heatmap
-
-
- def get_bboxes(cam, cam_thr=0.2):
- """
- cam: single image with shape (h, w, 1)
- thr_val: float value (0~1)
- return estimated bounding box
- """
- cam = (cam * 255.).astype(np.uint8)
- map_thr = cam_thr * np.max(cam)
-
- _, thr_gray_heatmap = cv2.threshold(cam,
- int(map_thr), 255,
- cv2.THRESH_TOZERO)
- #thr_gray_heatmap = (thr_gray_heatmap*255.).astype(np.uint8)
-
- contours, _ = cv2.findContours(thr_gray_heatmap,
- cv2.RETR_TREE,
- cv2.CHAIN_APPROX_SIMPLE)
- if len(contours) != 0:
- c = max(contours, key=cv2.contourArea)
- x, y, w, h = cv2.boundingRect(c)
- estimated_bbox = [x, y, x + w, y + h]
- else:
- estimated_bbox = [0, 0, 1, 1]
-
- return estimated_bbox #, thr_gray_heatmap, len(contours)
-
-
- def tensor2image(input, image_mean, image_std):
- image_mean = torch.reshape(torch.tensor(image_mean), (1, 3, 1, 1))
- image_std = torch.reshape(torch.tensor(image_std), (1, 3, 1, 1))
- image = input * image_mean + image_std
- image = image.numpy().transpose(0, 2, 3, 1)
- image = image[:, :, :, ::-1] * 255
- return image
-
-
- def calculate_IOU(boxA, boxB):
- xA = max(boxA[0], boxB[0])
- yA = max(boxA[1], boxB[1])
- xB = min(boxA[2], boxB[2])
- yB = min(boxA[3], boxB[3])
-
- # compute the area of intersection rectangle
- interArea = (xB - xA + 1) * (yB - yA + 1)
-
- # compute the area of both the prediction and ground-truth
- # rectangles
- boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
- boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
-
- # compute the intersection over union by taking the intersection
- # area and dividing it by the sum of prediction + ground-truth
- # areas - the interesection area
- iou = interArea / float(boxAArea + boxBArea - interArea)
-
- # return the intersection over union value
- return iou
-
-
- def draw_bbox(image, iou, gt_box, pred_box, gt_score, is_top1=False):
-
- def draw_bbox(img, box1, box2, color1=(0, 0, 255), color2=(0, 255, 0)):
- for i in range(len(box1)):
- cv2.rectangle(img, (box1[i,0], box1[i,1]), (box1[i,2], box1[i,3]), color1, 2)
- cv2.rectangle(img, (box2[0], box2[1]), (box2[2], box2[3]), color2, 2)
- return img
-
- def mark_target(img, text='target', pos=(25, 25), size=2):
- cv2.putText(img, text, pos, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), size)
- return img
-
- boxed_image = image.copy()
-
- # draw bbox on image
- boxed_image = draw_bbox(boxed_image, gt_box, pred_box)
-
- # mark the iou
- mark_target(boxed_image, '%.1f' % (iou * 100), (140, 30), 2)
- # mark_target(boxed_image, 'IOU%.2f' % (iou), (80, 30), 2)
- # # mark the top1
- # if is_top1:
- # mark_target(boxed_image, 'Top1', (10, 30))
- # mark_target(boxed_image, 'GT_Score%.2f' % (gt_score), (10, 200), 2)
-
- return boxed_image
-
- def evaluate_cls_loc(input, cls_label, bbox_label, logits, cams, image_names, cfg, epoch):
- """
- :param input: input tensors of the model
- :param cls_label: class label
- :param bbox_label: bounding box label
- :param logits: classification scores
- :param cams: cam of all the classes
- :param image_names: names of images
- :param cfg: configurations
- :param epoch: epoch
- :return: evaluate results
- """
- cls_top1 = []
- cls_top5 = []
- loc_top1 = []
- loc_top5 = []
- loc_gt_known = []
- top1_loc_right = []
- top1_loc_cls = []
- top1_loc_mins = []
- top1_loc_part = []
- top1_loc_more = []
- top1_loc_wrong = []
-
- # label, top1 and top5 results
- cls_label = cls_label.tolist()
- cls_scores = logits.tolist()
- _, top1_idx = logits.topk(1, 1, True, True)
- top1_idx = top1_idx.tolist()
- _, top5_idx = logits.topk(5, 1, True, True)
- top5_idx = top5_idx.tolist()
-
- k = cfg.MODEL.TOP_K
- _, topk_idx = logits.topk(k, 1, True, True)
- topk_idx = topk_idx.tolist()
-
- batch = cams.shape[0]
- image = tensor2image(input.clone().detach().cpu(), cfg.DATA.IMAGE_MEAN, cfg.DATA.IMAGE_STD)
-
- for b in range(batch):
- gt_bbox = bbox_label[b].strip().split(' ')
- gt_bbox = list(map(float, gt_bbox))
- top_bboxes, top_mask=get_topk_boxes(top5_idx[b], cams[b], cfg.DATA.CROP_SIZE, threshold=cfg.MODEL.CAM_THR)
- topk_cls, topk_loc, wrong_details=cls_loc_err(top_bboxes, cls_label[b], gt_bbox, topk=(1,5))
- cls_top1_b, cls_top5_b = topk_cls
- loc_top1_b, loc_top5_b = topk_loc
- cls_top1.append(cls_top1_b)
- cls_top5.append(cls_top5_b)
- loc_top1.append(loc_top1_b)
- loc_top5.append(loc_top5_b)
- cls_wrong, multi_instances, region_part, region_more, region_wrong = wrong_details
- right = 1 - (cls_wrong + multi_instances + region_part + region_more + region_wrong)
- top1_loc_right.append(right)
- top1_loc_cls.append(cls_wrong)
- top1_loc_mins.append(multi_instances)
- top1_loc_part.append(region_part)
- top1_loc_more.append(region_more)
- top1_loc_wrong.append(region_wrong)
- # gt_known
- # mean top k
- cam_b = cams[b, [cls_label[b]], :, :]
- cam_b = torch.mean(cam_b, dim=0, keepdim=True)
-
- cam_b = cam_b.detach().cpu().numpy().transpose(1, 2, 0)
-
- # Resize and Normalize CAM
- cam_b = resize_cam(cam_b, size=(cfg.DATA.CROP_SIZE, cfg.DATA.CROP_SIZE))
-
- # Estimate BBOX
- estimated_bbox = get_bboxes(cam_b, cam_thr=cfg.MODEL.CAM_THR)
-
- # Calculate IoU
- gt_box_cnt = len(gt_bbox) // 4
- max_iou = 0
- for i in range(gt_box_cnt):
- gt_box = gt_bbox[i * 4:(i + 1) * 4]
- iou_i = cal_iou(estimated_bbox, gt_box)
- if iou_i > max_iou:
- max_iou = iou_i
-
- iou = max_iou
- # iou = calculate_IOU(bbox_label[b].numpy(), estimated_bbox)
-
- # print('cam_b shape', cam_b.shape, 'cam_b max', cam_b.max(), 'cam_b min', cam_b.min(), 'thre', cfg.MODEL.CAM_THR, 'iou ', iou)
- #if iou < 0.5:
- # pdb.set_trace()
- # gt known
- if iou >= 0.5:
- loc_gt_known.append(1)
- else:
- loc_gt_known.append(0)
-
- # Get blended image
- blend, heatmap = blend_cam(image[b], cam_b, estimated_bbox)
- # Get boxed image
- gt_score = cls_scores[b][top1_idx[b][0]] # score of gt class
- boxed_image = draw_bbox(blend, iou, np.array(gt_bbox).reshape(-1,4).astype(np.int), estimated_bbox, gt_score, False)
-
- # save result
- if cfg.TEST.SAVE_BOXED_IMAGE:
- image_name = image_names[b]
-
- save_dir = os.path.join(cfg.BASIC.SAVE_DIR, 'boxed_image', str(epoch), image_name.split('/')[0])
- save_path = os.path.join(cfg.BASIC.SAVE_DIR, 'boxed_image', str(epoch), image_name)
- mkdir(save_dir)
- # print(save_path)
- cv2.imwrite(save_path, boxed_image)
-
- return cls_top1, cls_top5, loc_top1, loc_top5, loc_gt_known, top1_loc_right, top1_loc_cls, top1_loc_mins, \
- top1_loc_part, top1_loc_more, top1_loc_wrong
-
-
- def get_topk_boxes(cls_inds, cam_map, crop_size, topk=(1, 5), threshold=0.2, ):
- maxk_boxes = []
- maxk_maps = []
- for cls in cls_inds:
- cam_map_ = cam_map[[cls], :, :]
- cam_map_ = cam_map_.detach().cpu().numpy().transpose(1, 2, 0)
- # Resize and Normalize CAM
- cam_map_ = resize_cam(cam_map_, size=(crop_size, crop_size))
- maxk_maps.append(cam_map_.copy())
-
- # Estimate BBOX
- estimated_bbox = get_bboxes(cam_map_, cam_thr=threshold)
- maxk_boxes.append([cls] + estimated_bbox)
-
- result = [maxk_boxes[:k] for k in topk]
-
- return result, maxk_maps
-
- def cls_loc_err(topk_boxes, gt_label, gt_boxes, topk=(1,), iou_th=0.5):
- assert len(topk_boxes) == len(topk)
- gt_boxes = gt_boxes
- gt_box_cnt = len(gt_boxes) // 4
- topk_loc = []
- topk_cls = []
- for topk_box in topk_boxes:
- loc_acc = 0
- cls_acc = 0
- for cls_box in topk_box:
- max_iou = 0
- max_gt_id = 0
- for i in range(gt_box_cnt):
- gt_box = gt_boxes[i*4:(i+1)*4]
- iou_i = cal_iou(cls_box[1:], gt_box)
- if iou_i> max_iou:
- max_iou = iou_i
- max_gt_id = i
- if len(topk_box) == 1:
- wrong_details = get_badcase_detail(cls_box, gt_boxes, gt_label, max_iou, max_gt_id)
- if cls_box[0] == gt_label:
- cls_acc = 1
- if cls_box[0] == gt_label and max_iou > iou_th:
- loc_acc = 1
- break
- topk_loc.append(float(loc_acc))
- topk_cls.append(float(cls_acc))
- return topk_cls, topk_loc, wrong_details
-
- def cal_iou(box1, box2, method='iou'):
- """
- support:
- 1. box1 and box2 are the same shape: [N, 4]
- 2.
- :param box1:
- :param box2:
- :return:
- """
- box1 = np.asarray(box1, dtype=float)
- box2 = np.asarray(box2, dtype=float)
- if box1.ndim == 1:
- box1 = box1[np.newaxis, :]
- if box2.ndim == 1:
- box2 = box2[np.newaxis, :]
-
- iw = np.minimum(box1[:, 2], box2[:, 2]) - np.maximum(box1[:, 0], box2[:, 0]) + 1
- ih = np.minimum(box1[:, 3], box2[:, 3]) - np.maximum(box1[:, 1], box2[:, 1]) + 1
-
- i_area = np.maximum(iw, 0.0) * np.maximum(ih, 0.0)
- box1_area = (box1[:, 2] - box1[:, 0] + 1) * (box1[:, 3] - box1[:, 1] + 1)
- box2_area = (box2[:, 2] - box2[:, 0] + 1) * (box2[:, 3] - box2[:, 1] + 1)
-
- if method == 'iog':
- iou_val = i_area / (box2_area)
- elif method == 'iob':
- iou_val = i_area / (box1_area)
- else:
- iou_val = i_area / (box1_area + box2_area - i_area)
- return iou_val
-
- def get_badcase_detail(top1_bbox, gt_bboxes, gt_label, max_iou, max_gt_id):
- cls_wrong = 0
- multi_instances = 0
- region_part = 0
- region_more = 0
- region_wrong = 0
-
- pred_cls = top1_bbox[0]
- pred_bbox = top1_bbox[1:]
-
- if not int(pred_cls) == gt_label:
- cls_wrong = 1
- return cls_wrong, multi_instances, region_part, region_more, region_wrong
-
- if max_iou > 0.5:
- return 0, 0, 0, 0, 0
-
- # multi_instances error
- gt_box_cnt = len(gt_bboxes) // 4
- if gt_box_cnt > 1:
- iogs = []
- for i in range(gt_box_cnt):
- gt_box = gt_bboxes[i * 4:(i + 1) * 4]
- iog = cal_iou(pred_bbox, gt_box, method='iog')
- iogs.append(iog)
- if sum(np.array(iogs) > 0.3)> 1:
- multi_instances = 1
- return cls_wrong, multi_instances, region_part, region_more, region_wrong
- # region part error
- iog = cal_iou(pred_bbox, gt_bboxes[max_gt_id*4:(max_gt_id+1)*4], method='iog')
- iob = cal_iou(pred_bbox, gt_bboxes[max_gt_id*4:(max_gt_id+1)*4], method='iob')
- if iob >0.5:
- region_part = 1
- return cls_wrong, multi_instances, region_part, region_more, region_wrong
- if iog >= 0.7:
- region_more = 1
- return cls_wrong, multi_instances, region_part, region_more, region_wrong
- region_wrong = 1
- return cls_wrong, multi_instances, region_part, region_more, region_wrong
|