|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
-
- """ eval my model """
-
- import json
- import time
- import os
- import argparse
- import math
- import random
- import numpy as np
- import cv2
- from tqdm import tqdm
- import mindspore as ms
- from mindspore import load_checkpoint, load_param_into_net
- from mindspore import context
- import pycocotools
- from pycocotools.coco import COCO
- from pycocotools.cocoeval import COCOeval
- from src.SOLO.layers.modules.loss_614 import mask_iou_net
- from src.SOLO.yolactpp import Yolact
- from.src.config import COCO_CLASSES, COCO_LABEL_MAP_EVAL
-
- MEANS = (103.94, 116.78, 123.68)
- STD = (57.38, 57.12, 58.40)
-
- parser = argparse.ArgumentParser(description='SOLO COCO Evaluation')
- parser.add_argument('--pre_trained',
- default='/data/weights/SOLO_plus_resnet50_800000.ckpt', type=str,
- help='Trained state_dict file path to open. If "interrupt", this will open the interrupt file.')
- parser.add_argument('--bbox_det_file', default='results/bbox_detections.json', type=str,
- help='The output file for coco bbox results if --coco_results is set.')
- parser.add_argument('--mask_det_file', default='results/mask_detections.json', type=str,
- help='The output file for coco mask results if --coco_results is set.')
- parser.add_argument('--seed', default=None, type=int,
- help='The seed to pass into random.seed. Note: this is"\
- "only really for the shuffle and does not (I think) affect cuda stuff.')
- parser.add_argument('--score_threshold', default=0, type=float,
- help='Detections with a score under this threshold"\
- "will not be considered. This currently only works in display mode.')
- parser.add_argument('--valid_img_path', default="/data/coco2017/val2017", type=str)
- parser.add_argument('--gt_ann_file', default="/data/coco2017/annotations/instances_val2017.json", type=str)
- parser.add_argument('--eval_type', default='both', choices=['bbox', 'mask', 'both'], type=str)
- parser.add_argument("--device_id", type=int, default=1, help="Device id, default is 0.")
- args = parser.parse_args()
-
- if args.seed is not None:
- random.seed(args.seed)
-
- context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
- device_id=args.device_id, enable_reduce_precision=True)
-
- iou_thresholds = [x / 100 for x in range(50, 100, 5)]
- coco_cats = {} # Call prep_coco_cats to fill this
- coco_cats_inv = {}
- color_cache = {}
-
- def prep_coco_cats():
- """ Prepare inverted table for category id lookup given a coco cats object. """
- for coco_cat_id, transformed_cat_id_p1 in get_label_map().items():
- transformed_cat_id = transformed_cat_id_p1 - 1
- coco_cats[transformed_cat_id] = coco_cat_id
- coco_cats_inv[coco_cat_id] = transformed_cat_id
-
- def get_label_map():
- return COCO_LABEL_MAP_EVAL
-
- class COCOAnnotationTransform():
- """Transforms a COCO annotation into a Tensor of bbox coords and label index
- Initialized with a dictionary lookup of classnames to indexes
- """
- def __init__(self):
- self.label_map = get_label_map()
-
- def __call__(self, target, width, height):
- """
- Args:
- target (dict): COCO target json annotation as a python dict
- height (int): height
- width (int): width
- Returns:
- a list containing lists of bounding boxes [bbox coords, class idx]
- """
- scale = np.array([width, height, width, height])
- res = []
- for obj in target:
- if 'bbox' in obj:
- bbox = obj['bbox']
- label_idx = obj['category_id']
- if label_idx >= 0:
- label_idx = self.label_map[label_idx] - 1
- final_box = list(np.array([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]])/scale)
- final_box.append(label_idx)
- res += [final_box] # [xmin, ymin, xmax, ymax, label_idx]
- else:
- print("No bbox found for object ", obj)
-
- return res
-
-
- class CocoDataset():
- """ Operate on the dataset """
- def __init__(self, image_path, info_file, transform=None,
- dataset_name='MS COCO', has_gt=True):
-
- self.root = image_path
- self.coco = COCO(info_file)
-
- self.ids = list(self.coco.imgToAnns.keys())
-
- if self.ids.isspace() or not has_gt:
- self.ids = list(self.coco.imgs.keys())
-
- self.transform = transform
- self.target_transform = COCOAnnotationTransform()
-
- self.name = dataset_name
- self.has_gt = has_gt
-
- def __len__(self):
- return len(self.ids)
-
- def pull_anno(self, img_id):
- '''Returns the original annotation of image at index
-
- Note: not using self.__getitem__(), as any transformations passed in
- could mess up this functionality.
-
- Return:
- list: [img_id, [(label, bbox coords),...]]
- eg: ('001718', [('dog', (96, 13, 438, 332))])
- '''
-
- ann_ids = self.coco.getAnnIds(imgIds=img_id)
- return self.coco.loadAnns(ann_ids)
-
- def pull_image(self, img_id):
- '''Returns the original image object at index in PIL form
-
- Note: not using self.__getitem__(), as any transformations passed in
- could mess up this functionality.
-
- Argument:
-
- Return:
- img info . ndarray
- '''
-
- path = self.coco.loadImgs(img_id)[0]['file_name']
- return cv2.imread(os.path.join(self.root, path), cv2.IMREAD_COLOR)
-
- def pull_item(self, img_id):
- """
- Returns:
- tuple: Tuple (image, target, masks, height, width, crowd).
- target is the object returned by ``coco.loadAnns``.
- Note that if no crowd annotations exist, crowd will be None
-
- """
-
- if self.has_gt:
- ann_ids = self.coco.getAnnIds(imgIds=img_id)
-
- # Target has {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'}
- target = [x for x in self.coco.loadAnns(ann_ids) if x['image_id'] == img_id]
- else:
- target = []
-
- # Separate out crowd annotations. These are annotations that signify a large crowd of
- # objects of said class, where there is no annotation for each individual object. Both
- # during testing and training, consider these crowds as neutral.
- crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])]
- target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])]
- num_crowds = len(crowd)
-
- for x in crowd:
- x['category_id'] = -1
-
- # This is so we ensure that all crowd annotations are at the end of the array
- target += crowd
-
- # The split here is to have compatibility with both COCO2014 and 2017 annotations.
- # In 2014, images have the pattern COCO_{train/val}2014_%012d.jpg, while in 2017 it's %012d.jpg.
- # Our script downloads the images as %012d.jpg so convert accordingly.
- file_name = self.coco.loadImgs(img_id)[0]['file_name']
-
- if file_name.startswith('COCO'): # ????
- file_name = file_name.split('_')[-1]
-
- path = os.path.join(self.root, file_name)
-
- img = cv2.imread(path)
- height, width, _ = img.shape
-
- if target.isspace() is not True:
- # Pool all the masks for this image into one [num_objects,height,width] matrix
- masks = [self.coco.annToMask(obj).reshape(-1) for obj in target]
- masks = np.vstack(masks)
- masks = masks.reshape(-1, height, width)
-
- if self.target_transform is not None and target.isspace() is not True:
- target = self.target_transform(target, width, height)
-
- if self.transform is not None:
- if target.isspace() is not True:
- target = np.array(target)
- img, masks, boxes, labels = self.transform(img, masks, target[:, :4],
- {'num_crowds': num_crowds, 'labels': target[:, 4]})
-
- # I stored num_crowds in labels so I didn't have to modify the entirety of augmentations
- num_crowds = labels['num_crowds']
- labels = labels['labels']
-
- target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
- else:
- img, _, _, _ = self.transform(img, np.zeros((1, height, width), dtype=np.float),
- np.array([[0, 0, 1, 1]]),
- {'num_crowds': 0, 'labels': np.array([0])})
- masks = None
- target = None
-
- if target[0].isspace() is True:
- print('Warning: Augmentation output an example with no ground truth. Resampling...')
- return self.pull_item(random.randint(0, len(self.ids) - 1))
-
- return img.transpose([2, 0, 1]), target, masks, height, width, num_crowds
-
- def sanitize_coordinates(_x1, _x2, img_size: int, padding: int = 0):
- """
- Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size.
- Also converts from relative to absolute coordinates and casts the results to long tensors.
-
- If cast is false, the result won't be cast to longs.
- Warning: this does things in-place behind the scenes so copy if necessary.
- """
- _x1 = _x1 * img_size
- _x2 = _x2 * img_size
-
- x1 = np.minimum(_x1, _x2)
- x2 = np.maximum(_x1, _x2)
- x1 = np.clip(x1 - padding, a_min=0, a_max=None)
- x2 = np.clip(x2 + padding, a_min=None, a_max=img_size)
-
- return x1, x2
-
- # done.
- def crop(masks, boxes, padding: int = 1):
- """
- "Crop" predicted masks by zeroing out everything not in the predicted bbox.
- Vectorized by Chong (thanks Chong).
-
- Args:
- - masks should be a size [h, w, n] tensor of masks
- - boxes should be a size [n, 4] tensor of bbox coords in relative point form
- """
- h, w, n = masks.shape
- # done
- x1, x2 = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, padding)
- y1, y2 = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, padding)
-
- np.arange(w, dtype=x1.dtype)
- rows = np.arange(w, dtype=x1.dtype).reshape((1, -1, 1)).repeat(repeats=h, axis=0).repeat(repeats=n, axis=2)
- cols = np.arange(h, dtype=x1.dtype).reshape((-1, 1, 1)).repeat(repeats=w, axis=1).repeat(repeats=n, axis=2)
-
- masks_left = rows >= x1.reshape((1, 1, -1))
- masks_right = rows < x2.reshape((1, 1, -1))
- masks_up = cols >= y1.reshape((1, 1, -1))
- masks_down = cols < y2.reshape((1, 1, -1))
-
- crop_mask = masks_left * masks_right * masks_up * masks_down
-
- return masks * crop_mask.astype(np.float32)
-
-
- def postprocess(det_output, w, h, batch_idx=0, crop_masks=True, score_threshold=0.15):
- """
- Postprocesses the output of SOLO on testing mode into a format that makes sense,
- accounting for all the possible configuration settings.
-
- Args:
- - det_output: The lost of dicts that Detect outputs.
- - w: The real with of the image.
- - h: The real height of the image.
- - batch_idx: If you have multiple images for this batch, the image's index in the batch.
- - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see torch.nn.functional.interpolate)
-
- Returns 4 torch Tensors (in the following order):
- - classes [num_det]: The class idx for each detection.
- - scores [num_det]: The confidence score for each detection.
- - boxes [num_det, 4]: The bounding box for each detection in absolute point form.
- - masks [num_det, h, w]: Full image masks for each detection.
- """
-
- dets = det_output[batch_idx]
- dets = dets['detection'] # dict, contains 5 tensor
-
- if dets is None:
- return [np.array([]), np.array([]), np.array([]), np.array([])] # Warning, this is 4 copies of the same thing
-
- for k in dets:
- dets[k] = dets[k].asnumpy()
-
- if score_threshold > 0:
- keep = dets['score'] > score_threshold # ndarray
-
- for k in dets:
- if k != 'proto':
- dets[k] = dets[k][keep]
- # modified
- if dets['score'].shape[0] == 0:
- return [np.array([]), np.array([]), np.array([]), np.array([])]
-
- # Actually extract everything from dets now
- classes = dets['class'] #[100,] need to be expand to [100,1]
- boxes = dets['box']
- scores = dets['score']
- masks = dets['mask']
-
- # At this points masks is only the coefficients
- proto_data = dets['proto']
-
- masks = np.dot(proto_data, masks.T) # [138, 138, 32] * [32, 100] -> [138, 138, 100]
- masks = 1 / (1 + np.exp(-masks)) # sigmoid
-
- # Crop masks before upsampling because you know why
- # done.
- if crop_masks: # True
- masks = crop(masks, boxes)
-
- # Permute into the correct output shape [num_dets, proto_h, proto_w]
- masks = masks.transpose((2, 0, 1))
-
- # =========== maskiou_net ==================
- masks = np.transpose(cv2.resize(np.transpose(masks, (2, 1, 0)), (h, w), interpolation=cv2.INTER_LINEAR), (2, 1, 0)) # it is bilinear interpolate
-
- # Binarize the masks.
- # this op will create a array like [[1,0,0,1...,1],[...]], all elements below 0.5 will be replaced by 0, otherwise 1.
- masks = np.greater(masks, 0.5).astype(np.float32)
-
- boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w)
- boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h)
-
- return classes, scores, boxes, masks
-
-
- def gather_numpy(self, dim, index):
-
- data_swaped = np.swapaxes(self, 0, dim)
- index_swaped = np.swapaxes(index, 0, dim)
- gathered = np.choose(index_swaped, data_swaped)
- return np.swapaxes(gathered, 0, dim)
-
-
- def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections):
- """ postprocess """
- # classes [100,], scores [100, ], boxes [100,4], masks [100, H, W]
- classes, scores, boxes, masks = postprocess(dets, w, h, crop_masks=True, score_threshold=args.threshold)
- if classes.shape.isspace() is True:
- return
-
- classes = list(classes.astype(int))
-
- if isinstance(scores, list):
- box_scores = list(scores[0].astype(float))
- mask_scores = list(scores[1].astype(float))
- else:
- scores = list(scores.astype(float))
- box_scores = scores
- mask_scores = scores
- masks = masks.reshape((-1, h * w))
-
- # ============== output json module ====================
- # add bboxes abd masks into detection obj.
- masks = masks.reshape((-1, h, w))
- for i in range(masks.shape[0]):
- # Make sure that the bounding box actually makes sense and a mask was produced
- if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0:
- detections.add_bbox(image_id, classes[i], boxes[i, :], box_scores[i])
- detections.add_mask(image_id, classes[i], masks[i, :, :], mask_scores[i])
- return
-
- class APDataObject:
- """
- Stores all the information necessary to calculate the AP for one IoU and one class.
- Note: I type annotated this because why not.
- """
-
- def __init__(self):
- self.data_points = []
- self.num_gt_positives = 0
-
- def push(self, score: float, is_true: bool):
- self.data_points.append((score, is_true))
-
- def add_gt_positives(self, num_positives: int):
- """ Call this once per image. """
- self.num_gt_positives += num_positives
-
- def is_empty(self) -> bool:
- return len(self.data_points) == 0 and self.num_gt_positives == 0
-
- def get_ap(self) -> float:
- """ Warning: result not cached. """
-
- if self.num_gt_positives == 0:
- return 0
-
- # Sort descending by score
- self.data_points.sort(key=lambda x: -x[0])
-
- precisions = []
- recalls = []
- num_true = 0
- num_false = 0
-
- # Compute the precision-recall curve. The x axis is recalls and the y axis precisions.
- for datum in self.data_points:
- # datum[1] is whether the detection a true or false positive
- if datum[1]:
- num_true += 1
- else:
- num_false += 1
-
- precision = num_true / (num_true + num_false)
- recall = num_true / self.num_gt_positives
-
- precisions.append(precision)
- recalls.append(recall)
-
- # Smooth the curve by computing [max(precisions[i:]) for i in range(len(precisions))]
- # Basically, remove any temporary dips from the curve.
- # At least that's what I think, idk. COCOEval did it so I do too.
- for i in range(len(precisions) - 1, 0, -1):
- if precisions[i] > precisions[i - 1]:
- precisions[i - 1] = precisions[i]
-
- # Compute the integral of precision(recall) d_recall from recall=0->1 using fixed-length riemann summation with 101 bars.
- y_range = [0] * 101 # idx 0 is recall == 0.0 and idx 100 is recall == 1.00
- x_range = np.array([x / 100 for x in range(101)])
- recalls = np.array(recalls)
-
- # I realize this is weird, but all it does is find the nearest precision(x) for a given x in x_range.
- # Basically, if the closest recall we have to 0.01 is 0.009 this sets precision(0.01) = precision(0.009).
- # I approximate the integral this way, because that's how COCOEval does it.
- indices = np.searchsorted(recalls, x_range, side='left')
- for bar_idx, precision_idx in enumerate(indices):
- if precision_idx < len(precisions):
- y_range[bar_idx] = precisions[precision_idx]
-
- # Finally compute the riemann sum to get our integral.
- # avg([precision(x) for x in 0:0.01:1])
- return sum(y_range) / len(y_range)
-
- def get_coco_cat(transformed_cat_id):
- """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """
- return coco_cats[transformed_cat_id]
-
- class Detections:
- """ Detect the model """
- def __init__(self):
- self.bbox_data = []
- self.mask_data = []
-
- def add_bbox(self, image_id: int, category_id: int, bbox: list, score: float):
- """ x1 y1 x2 y2 -> x1 y1 w h """
- bbox = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
-
- # Round to the nearest 10th to avoid huge file sizes, as COCO suggests
- bbox = [round(float(x) * 10) / 10 for x in bbox]
-
- self.bbox_data.append({
- 'image_id': int(image_id),
- 'category_id': get_coco_cat(int(category_id)),
- 'bbox': bbox,
- 'score': float(score)
- })
-
- def add_mask(self, image_id: int, category_id: int, segmentation: np.ndarray, score: float):
- """ The segmentation should be the full mask, the size of the image and with size [h, w]. """
- rle = pycocotools.mask.encode(np.asfortranarray(segmentation.astype(np.uint8)))
- rle['counts'] = rle['counts'].decode('ascii') # json.dump doesn't like bytes strings
-
- self.mask_data.append({
- 'image_id': int(image_id),
- 'category_id': get_coco_cat(int(category_id)),
- 'segmentation': rle,
- 'score': float(score)
- })
-
- def dump(self):
- dump_arguments = [
- (self.bbox_data, args.bbox_det_file),
- (self.mask_data, args.mask_det_file)
- ]
-
- for data, path in dump_arguments:
- with open(path, 'w') as f:
- json.dump(data, f)
-
- def eval_network(net, dataset):
- """ eval """
- net.detect.use_fast_nms = True
- net.detect.use_cross_class_nms = False
-
- dataset_size = len(dataset)
- print("dataset_size : {}".format(dataset_size))
-
- # For each class and iou, stores tuples (score, isPositive)
- # Index ap_data[type][iouIdx][classIdx]
- ap_data = {
- 'box': [[APDataObject() for _ in COCO_CLASSES] for _ in iou_thresholds],
- 'mask': [[APDataObject() for _ in COCO_CLASSES] for _ in iou_thresholds]
- }
- detections = Detections()
-
- for image_idx in tqdm(dataset.ids):
- # ============== load data =================
- # img : tensor([3,550,550]) in source code, now ndarray
- img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)
- img = np.expand_dims(img, axis=0) # [1,3,550,550]
-
- cur_time = time.time()
- preds = net(ms.Tensor(img).astype(ms.float32))
- print("forward consume {} s. ".format(time.time()-cur_time))
-
- prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, image_idx, detections)
-
- print('Dumping detections...')
- detections.dump()
-
- print("Dump json success. ")
-
- def _eval():
- """ _eval """
- eval_bbox = (args.eval_type in ('bbox', 'both')) # both
- eval_mask = (args.eval_type in ('mask', 'both'))
-
- print('Loading annotations...')
- gt_annotations = COCO(args.gt_ann_file)
- if eval_bbox:
- bbox_dets = gt_annotations.loadRes(args.bbox_det_file)
- if eval_mask:
- mask_dets = gt_annotations.loadRes(args.mask_det_file)
-
- if eval_bbox:
- print('\nEvaluating BBoxes:')
- bbox_eval = COCOeval(gt_annotations, bbox_dets, 'bbox')
- bbox_eval.evaluate()
- bbox_eval.accumulate()
- bbox_eval.summarize()
-
- if eval_mask:
- print('\nEvaluating Masks:')
- bbox_eval = COCOeval(gt_annotations, mask_dets, 'segm')
- bbox_eval.evaluate()
- bbox_eval.accumulate()
- bbox_eval.summarize()
-
-
-
-
- class ConvertFromInts():
- def __call__(self, image, masks=None, boxes=None, labels=None):
- return image.astype(np.float32), masks, boxes, labels
-
-
-
- class Resize():
- """ If preserve_aspect_ratio is true, this resizes to an approximate area of max_size * max_size """
-
- @staticmethod
- def calc_size_preserve_ar(img_w, img_h, max_size):
- """ I matched this one out on the piece of paper. Resulting width*height = approx max_size^2 """
- ratio = math.sqrt(img_w / img_h)
- w = max_size * ratio
- h = max_size / ratio
- return int(w), int(h)
-
- def __init__(self, resize_gt=True):
- self.resize_gt = resize_gt
- self.max_size = 550
- self.preserve_aspect_ratio = False
-
- def __call__(self, image, masks, boxes, labels=None):
- img_h, img_w, _ = image.shape
-
- if self.preserve_aspect_ratio:
- width, height = Resize.calc_size_preserve_ar(img_w, img_h, self.max_size)
- else:
- width, height = self.max_size, self.max_size
-
- image = cv2.resize(image, (width, height))
-
- if self.resize_gt:
- # Act like each object is a color channel
- masks = masks.transpose((1, 2, 0))
- masks = cv2.resize(masks, (width, height))
-
- # OpenCV resizes a (w,h,1) array to (s,s), so fix that
- if len(masks.shape) == 2:
- masks = np.expand_dims(masks, 0)
- else:
- masks = masks.transpose((2, 0, 1))
-
- # Scale bounding boxes (which are currently absolute coordinates)
- boxes[:, [0, 2]] *= (width / img_w)
- boxes[:, [1, 3]] *= (height / img_h)
-
- # Discard boxes that are smaller than we'd like
- w = boxes[:, 2] - boxes[:, 0]
- h = boxes[:, 3] - boxes[:, 1]
-
- keep = (w > 4/550) * (h > 4/550)
- masks = masks[keep]
- boxes = boxes[keep]
- labels['labels'] = labels['labels'][keep]
- labels['num_crowds'] = (labels['labels'] < 0).sum()
-
- return image, masks, boxes, labels
-
-
- class Compose():
- """Composes several augmentations together.
- Args:
- transforms (List[Transform]): list of transforms to compose.
- Example:
- # >>> augmentations.Compose([
- # >>> transforms.CenterCrop(10),
- # >>> transforms.ToTensor(),
- # >>> ])
- """
-
- def __init__(self, transforms):
- self.transforms = transforms
-
- def __call__(self, img, masks=None, boxes=None, labels=None):
- for t in self.transforms:
- img, masks, boxes, labels = t(img, masks, boxes, labels)
- return img, masks, boxes, labels
-
-
-
- class BackboneTransform():
- """
- Transforms a BRG image made of floats in the range [0, 255] to whatever
- input the current backbone network needs.
-
- transform is a transform config object (see config.py).
- in_channel_order is probably 'BGR' but you do you, kid.
- """
- def __init__(self, mean, std, in_channel_order):
- self.mean = np.array(mean, dtype=np.float32)
- self.std = np.array(std, dtype=np.float32)
-
- # Here I use "Algorithms an Coding" to convert string permutations to numbers
- self.channel_map = {c: idx for idx, c in enumerate(in_channel_order)}
- self.channel_permutation = [self.channel_map[c] for c in 'RGB']
-
- def __call__(self, img, masks=None, boxes=None, labels=None):
-
- img = img.astype(np.float32)
- img = (img - self.mean) / self.std
- img = img[:, :, self.channel_permutation]
-
- return img.astype(np.float32), masks, boxes, labels
-
-
-
- class BaseTransform():
- """BaseTransform"""
- def __init__(self, mean=MEANS, std=STD):
- self.augment = Compose([
- ConvertFromInts(),
- Resize(resize_gt=False),
- BackboneTransform(mean, std, 'BGR')
- ])
-
- def __call__(self, img, masks=None, boxes=None, labels=None):
- return self.augment(img, masks, boxes, labels)
-
-
- class ToAbsoluteCoords():
- def __call__(self, image, masks=None, boxes=None, labels=None):
- boxes[:, 0] *= width
- boxes[:, 2] *= width
- boxes[:, 1] *= height
- boxes[:, 3] *= height
-
- return image, masks, boxes, labels
-
-
- if __name__ == '__main__':
- parse_args()
-
- if not os.path.exists(args.bbox_det_file) or not os.path.exists(args.mask_det_file):
-
- datasets = CocoDataset(args.valid_img_path, args.gt_ann_file, transform=BaseTransform(), has_gt=True)
-
- prep_coco_cats()
-
- network = SOLO()
- mask_iou_net = mask_iou_net()
-
- ckpt_path = args.pre_trained
-
- if ckpt_path:
- param_dict = load_checkpoint(ckpt_path)
- load_param_into_net(network, param_dict)
-
- eval_network(net, datasets)
-
- # eval bbox and masks json
- _eval()
|