|
-
- # Copyright 2022 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
-
- import numpy as np
- from mindspore import ops
- from mindspore.common.tensor import Tensor
-
- def point_form(boxes):
- """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
- representation for comparison to point form ground truth data.
- Args:
- boxes: (tensor) center-size default boxes from priorbox layers.
- Return:
- boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
- """
- return np.concatenate((boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin
- boxes[:, :2] + boxes[:, 2:] / 2), 1) # xmax, ymax
-
- def center_size(boxes):
- """ Convert prior_boxes to (cx, cy, w, h)
- representation for comparison to center-size form ground truth data.
- Args:
- boxes: (tensor) point_form boxes
- Return:
- boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
- """
- return np.concatenate((boxes[:, 2:] + boxes[:, :2]) / 2, # cx, cy
- boxes[:, 2:] - boxes[:, :2], 1) # w, h
-
- def intersect(box_a, box_b):
- """ We resize both tensors to [A,B,2] without new malloc:
- [A,2] -> [A,1,2] -> [A,B,2]
- [B,2] -> [1,B,2] -> [A,B,2]
- Then we compute the area of intersect between box_a and box_b.
- Args:
- box_a: (tensor) bounding boxes, Shape: [A,4].
- box_b: (tensor) bounding boxes, Shape: [B,4].
- Return:
- (tensor) intersection area, Shape: [A,B].
- """
- A = box_a.shape[0]
- B = box_b.shape[0]
- max_xy = np.minimum(np.broadcast_to(np.expand_dims(box_a[:, 2:], 1), (A, B, 2)),
- np.broadcast_to(np.expand_dims(box_b[:, 2:], 0), (A, B, 2)))
-
- min_xy = np.maximum(np.broadcast_to(np.expand_dims(box_a[:, :2], 1), (A, B, 2)),
- np.broadcast_to(np.expand_dims(box_b[:, :2], 0), (A, B, 2)))
-
- diff = max_xy - min_xy
- inter = np.clip(diff, 0, None)
- return inter[:, :, 0] * inter[:, :, 1]
-
- def jaccard(box_a, box_b):
- """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
- is simply the intersection over union of two boxes. Here we operate on
- ground truth boxes and default boxes.
- E.g.:
- A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
- Args:
- box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
- box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
- Return:
- jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
- """
- inter = intersect(box_a, box_b)
- area_a = np.expand_dims(((box_a[:, 2]-box_a[:, 0]) *
- (box_a[:, 3]-box_a[:, 1])), 1) # [A,B]
- area_b = np.expand_dims(((box_b[:, 2]-box_b[:, 0]) *
- (box_b[:, 3]-box_b[:, 1])), 0) # [A,B]
- area_a = np.broadcast_to(area_a, inter.shape)
- area_b = np.broadcast_to(area_b, inter.shape)
- union = area_a + area_b - inter
- return inter / union # [A,B]
-
- def matrix_iou(a, b):
- """
- return iou of a and b, numpy version for data augenmentation
- """
- lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
- rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
-
- area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
- area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
- area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
- return area_i / (area_a[:, np.newaxis] + area_b - area_i)
-
- def match(threshold, truths, priors, variances, labels):
- """Match each prior box with the ground truth box of the highest jaccard
- overlap, encode the bounding boxes, then return the matched indices
- corresponding to both confidence and location preds.
- Args:
- threshold: (float) The overlap threshold used when matching boxes.
- truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
- priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
- variances: (tensor) Variances corresponding to each prior coord,
- Shape: [num_priors, 4].
- labels: (tensor) All the class labels for the image, Shape: [num_obj].
- loc_t: (tensor) Tensor to be filled w/ encoded location targets.
- conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
- idx: (int) current batch index
- Return:
- The matched indices corresponding to 1)location and 2)confidence preds.
- """
- # jaccard index
- overlaps = jaccard(
- truths,
- point_form(priors)
- )
- # (Bipartite Matching)
- # [1,num_objects] best prior for each ground truth
- best_prior_idx = np.expand_dims(np.argmax(overlaps, axis=1), 1)
- best_prior_overlap = np.expand_dims(np.max(overlaps, axis=1), 1)
-
- # [1,num_priors] best ground truth for each prior
- best_truth_idx = np.expand_dims(np.argmax(overlaps, axis=0), 0)
- best_truth_overlap = np.expand_dims(np.max(overlaps, axis=0), 0)
-
- if best_truth_idx.shape[0] == 1:
- best_truth_idx = best_truth_idx.squeeze(0)
- if best_truth_overlap.shape[0] == 1:
- best_truth_overlap = best_truth_overlap.squeeze(0)
- if best_prior_idx.shape[1] == 1:
- best_prior_idx = best_prior_idx.squeeze(1)
- if best_prior_overlap.shape[1] == 1:
- best_prior_overlap = best_prior_overlap.squeeze(1)
-
- best_truth_overlap[best_prior_idx] = np.broadcast_to(np.array(2.), best_prior_idx.size)
-
- # TODO refactor: index best_prior_idx with long tensor
- # ensure every gt matches with its prior of max overlap
- for j in range(best_prior_idx.shape[0]):
- best_truth_idx[best_prior_idx[j]] = j
- matches = truths[best_truth_idx] # Shape: [num_priors,4]
- conf = labels.astype(int)[best_truth_idx] # Shape: [num_priors]
- conf[best_truth_overlap < threshold] = 0 # label as background
- loc = encode(matches, priors, variances)
- return loc, conf
-
- def encode(matched, priors, variances):
- """Encode the variances from the priorbox layers into the ground truth boxes
- we have matched (based on jaccard overlap) with the prior boxes.
- Args:
- matched: (tensor) Coords of ground truth for each prior in point-form
- Shape: [num_priors, 4].
- priors: (tensor) Prior boxes in center-offset form
- Shape: [num_priors,4].
- variances: (list[float]) Variances of priorboxes
- Return:
- encoded boxes (tensor), Shape: [num_priors, 4]
- """
-
- # dist b/t match center and prior's center
- g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
- # encode variance
- g_cxcy /= (variances[0] * priors[:, 2:])
- # match wh / prior wh
- g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
- g_wh = np.log(g_wh + 1e-10) / variances[1]
- # return target for smooth_l1_loss
- return np.concatenate([g_cxcy, g_wh], 1) # [num_priors,4]
-
- def decode(loc, priors, variances):
- """Decode locations from predictions using priors to undo
- the encoding we did for offset regression at train time.
- Args:
- loc (tensor): location predictions for loc layers,
- Shape: [num_priors,4]
- priors (tensor): Prior boxes in center-offset form.
- Shape: [num_priors,4].
- variances: (list[float]) Variances of priorboxes
- Return:
- decoded bounding box predictions
- """
- priors = Tensor(priors)
- boxes = ops.Concat(axis=1)((
- priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
- priors[:, 2:] * ops.Exp()(loc[:, 2:] * variances[1])))
- boxes[:, :2] -= boxes[:, 2:] / 2
- boxes[:, 2:] += boxes[:, :2]
- return boxes
-
- def log_sum_exp(x):
- """Utility function for computing log_sum_exp while determining
- This will be used to determine unaveraged confidence loss across
- all examples in a batch.
- Args:
- x (Variable(tensor)): conf_preds from conf layers
- """
- x_max = x.max()
- return ops.Log()(ops.ReduceSum(keep_dims=True)(ops.Exp()(x-x_max), 1)) + x_max
|