OpenModelZoo
/
RetinaFace_ResNet50

 
			
							# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Utils."""
from itertools import product
import math
import numpy as np


def prior_box(image_sizes, min_sizes, steps, clip=False):
    """prior box"""
    feature_maps = [
        [math.ceil(image_sizes[0] / step), math.ceil(image_sizes[1] / step)]
        for step in steps]

    anchors = []
    for k, f in enumerate(feature_maps):
        for i, j in product(range(f[0]), range(f[1])):
            for min_size in min_sizes[k]:
                s_kx = min_size / image_sizes[1]
                s_ky = min_size / image_sizes[0]
                cx = (j + 0.5) * steps[k] / image_sizes[1]
                cy = (i + 0.5) * steps[k] / image_sizes[0]
                anchors += [cx, cy, s_kx, s_ky]

    output = np.asarray(anchors).reshape([-1, 4]).astype(np.float32)

    if clip:
        output = np.clip(output, 0, 1)

    return output

def center_point_2_box(boxes):
    return np.concatenate((boxes[:, 0:2] - boxes[:, 2:4] / 2,
                           boxes[:, 0:2] + boxes[:, 2:4] / 2), axis=1)

def compute_intersect(a, b):
    A = a.shape[0]
    B = b.shape[0]
    max_xy = np.minimum(
        np.broadcast_to(np.expand_dims(a[:, 2:4], 1), [A, B, 2]),
        np.broadcast_to(np.expand_dims(b[:, 2:4], 0), [A, B, 2]))
    min_xy = np.maximum(
        np.broadcast_to(np.expand_dims(a[:, 0:2], 1), [A, B, 2]),
        np.broadcast_to(np.expand_dims(b[:, 0:2], 0), [A, B, 2]))
    inter = np.maximum((max_xy - min_xy), np.zeros_like(max_xy - min_xy))
    return inter[:, :, 0] * inter[:, :, 1]

def compute_overlaps(a, b):
    inter = compute_intersect(a, b)
    area_a = np.broadcast_to(
        np.expand_dims(
            (a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), 1),
        np.shape(inter))
    area_b = np.broadcast_to(
        np.expand_dims(
            (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]), 0),
        np.shape(inter))
    union = area_a + area_b - inter
    return inter / union

def match(threshold, boxes, priors, var, labels, landms):

    overlaps = compute_overlaps(boxes, center_point_2_box(priors))

    best_prior_overlap = overlaps.max(1, keepdims=True)
    best_prior_idx = np.argsort(-overlaps, axis=1)[:, 0:1]

    valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
    best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
    if best_prior_idx_filter.shape[0] <= 0:
        loc = np.zeros((priors.shape[0], 4), dtype=np.float32)
        conf = np.zeros((priors.shape[0],), dtype=np.int32)
        landm = np.zeros((priors.shape[0], 10), dtype=np.float32)
        return loc, conf, landm

    best_truth_overlap = overlaps.max(0, keepdims=True)
    best_truth_idx = np.argsort(-overlaps, axis=0)[:1, :]

    best_truth_idx = best_truth_idx.squeeze(0)
    best_truth_overlap = best_truth_overlap.squeeze(0)
    best_prior_idx = best_prior_idx.squeeze(1)
    best_prior_idx_filter = best_prior_idx_filter.squeeze(1)
    best_truth_overlap[best_prior_idx_filter] = 2

    for j in range(best_prior_idx.shape[0]):
        best_truth_idx[best_prior_idx[j]] = j

    matches = boxes[best_truth_idx]

    # encode boxes
    offset_cxcy = (matches[:, 0:2] + matches[:, 2:4]) / 2 - priors[:, 0:2]
    offset_cxcy /= (var[0] * priors[:, 2:4])
    wh = (matches[:, 2:4] - matches[:, 0:2]) / priors[:, 2:4]
    wh[wh == 0] = 1e-12
    wh = np.log(wh) / var[1]
    loc = np.concatenate([offset_cxcy, wh], axis=1)


    conf = labels[best_truth_idx]
    conf[best_truth_overlap < threshold] = 0

    matches_landm = landms[best_truth_idx]

    # encode landms
    matched = np.reshape(matches_landm, [-1, 5, 2])
    priors = np.broadcast_to(np.expand_dims(priors, 1), [priors.shape[0], 5, 4])
    offset_cxcy = matched[:, :, 0:2] - priors[:, :, 0:2]
    offset_cxcy /= (priors[:, :, 2:4] * var[0])
    landm = np.reshape(offset_cxcy, [-1, 10])


    return loc, np.array(conf, dtype=np.int32), landm


class bbox_encode():
    def __init__(self, cfg):
        self.match_thresh = cfg['match_thresh']
        self.variances = cfg['variance']
        self.priors = prior_box((cfg['image_size'], cfg['image_size']),
                                [[16, 32], [64, 128], [256, 512]],
                                [8, 16, 32],
                                cfg['clip'])

    def __call__(self, image, targets):

        boxes = targets[:, :4]
        labels = targets[:, -1]
        landms = targets[:, 4:14]
        priors = self.priors

        loc_t, conf_t, landm_t = match(self.match_thresh, boxes, priors, self.variances, labels, landms)

        return image, loc_t, conf_t, landm_t

def decode_bbox(bbox, priors, var):
    boxes = np.concatenate((
        priors[:, 0:2] + bbox[:, 0:2] * var[0] * priors[:, 2:4],
        priors[:, 2:4] * np.exp(bbox[:, 2:4] * var[1])), axis=1)  # (xc, yc, w, h)
    boxes[:, :2] -= boxes[:, 2:] / 2    # (x0, y0, w, h)
    boxes[:, 2:] += boxes[:, :2]        # (x0, y0, x1, y1)
    return boxes

def decode_landm(landm, priors, var):

    return np.concatenate((priors[:, 0:2] + landm[:, 0:2] * var[0] * priors[:, 2:4],
                           priors[:, 0:2] + landm[:, 2:4] * var[0] * priors[:, 2:4],
                           priors[:, 0:2] + landm[:, 4:6] * var[0] * priors[:, 2:4],
                           priors[:, 0:2] + landm[:, 6:8] * var[0] * priors[:, 2:4],
                           priors[:, 0:2] + landm[:, 8:10] * var[0] * priors[:, 2:4],
                           ), axis=1)