OpenModelZoo
/
masktextspotter

 
			
							# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""MaskRcnn ROIAlign module."""

import numpy as np
import mindspore.nn as nn
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
from mindspore.ops import composite as C
from mindspore.nn import layer as L
from mindspore.common.tensor import Tensor


class ROIAlign(nn.Cell):
    """
    Extract RoI features from mulitiple feature map.

    Args:
        out_size_h (int) - RoI height.
        out_size_w (int) - RoI width.
        spatial_scale (int) - RoI spatial scale.
        sample_num (int) - RoI sample number.
        roi_align_mode (int)- RoI align mode
    """

    def __init__(self, out_size_h, out_size_w, spatial_scale, sample_num=0, roi_align_mode=1):
        super(ROIAlign, self).__init__()

        self.out_size = (out_size_h, out_size_w)
        self.spatial_scale = float(spatial_scale)
        self.sample_num = int(sample_num)
        self.align_op = P.ROIAlign(
            self.out_size[0],
            self.out_size[1],
            self.spatial_scale,
            self.sample_num,
            roi_align_mode,
        )

    def construct(self, features, rois):
        return self.align_op(features, rois)

    def __repr__(self):
        format_str = self.__class__.__name__
        format_str += "(out_size={}, spatial_scale={}, sample_num={}".format(
            self.out_size, self.spatial_scale, self.sample_num
        )
        return format_str


class SingleRoIExtractor(nn.Cell):
    """
    Extract RoI features from a single level feature map.

    If there are multiple input feature levels, each RoI is mapped to a level
    according to its scale.

    Args:
        config (dict): Config
        roi_layer (dict): Specify RoI layer type and arguments.
        out_channels (int): Output channels of RoI layers.
        featmap_strides (int): Strides of input feature maps.
        batch_size (int)： Batchsize.
        finest_scale (int): Scale threshold of mapping to level 0.
        mask (bool): Specify ROIAlign for cls or mask branch
    """

    def __init__(self,
                 config,
                 roi_layer,
                 out_channels,
                 featmap_strides,
                 batch_size=1,
                 finest_scale=56,
                 mask=False):
        super(SingleRoIExtractor, self).__init__()
        cfg = config
        self.train_batch_size = batch_size
        self.out_channels = out_channels
        self.featmap_strides = featmap_strides
        self.num_levels = len(self.featmap_strides)
        self.out_size = (
            config.roi_layer.mask_out_size if mask else config.roi_layer.out_size
        )
        self.mask = mask
        self.sample_num = config.roi_layer.sample_num
        self.roi_layers = self.build_roi_layers(self.featmap_strides)
        self.roi_layers = L.CellList(self.roi_layers)

        self.sqrt = P.Sqrt()
        self.log = P.Log()
        self.finest_scale_ = finest_scale
        self.clamp = C.clip_by_value

        self.cast = P.Cast()
        self.equal = P.Equal()
        self.select = P.Select()

        _mode_16 = False
        self.dtype = np.float16 if _mode_16 else np.float32
        self.ms_dtype = mstype.float16 if _mode_16 else mstype.float32
        self.set_train_local(cfg, training=True)

    def set_train_local(self, config, training=True):
        """Set training flag."""
        self.training_local = training

        cfg = config
        # Init tensor
        roi_sample_num = (
            cfg.num_expected_pos_stage2 if self.mask else cfg.roi_sample_num
        )
        self.batch_size = roi_sample_num if self.training_local else cfg.rpn_max_num
        self.batch_size = (
            self.train_batch_size * self.batch_size
            if self.training_local
            else cfg.test_batch_size * self.batch_size
        )
        self.ones = Tensor(np.array(np.ones((self.batch_size, 1)), dtype=self.dtype))
        finest_scale = (
            np.array(np.ones((self.batch_size, 1)), dtype=self.dtype)
            * self.finest_scale_
        )
        self.finest_scale = Tensor(finest_scale)
        self.epslion = Tensor(
            np.array(np.ones((self.batch_size, 1)), dtype=self.dtype) * self.dtype(1e-6)
        )
        self.zeros = Tensor(np.array(np.zeros((self.batch_size, 1)), dtype=np.int32))
        self.max_levels = Tensor(
            np.array(np.ones((self.batch_size, 1)), dtype=np.int32)
            * (self.num_levels - 1)
        )
        self.twos = Tensor(
            np.array(np.ones((self.batch_size, 1)), dtype=self.dtype) * 2
        )
        self.res_ = Tensor(
            np.array(
                np.zeros(
                    (
                        self.batch_size,
                        self.out_channels,
                        self.out_size[0],
                        self.out_size[1],
                    )
                ),
                dtype=self.dtype,
            )
        )

    def num_inputs(self):
        return len(self.featmap_strides)

    def init_weights(self):
        pass

    def log2(self, value):
        return self.log(value) / self.log(self.twos)

    def build_roi_layers(self, featmap_strides):
        roi_layers = []
        for s in featmap_strides:
            layer_cls = ROIAlign(
                self.out_size[0],
                self.out_size[1],
                spatial_scale=1 / s,
                sample_num=self.sample_num,
                roi_align_mode=0,
            )
            roi_layers.append(layer_cls)
        return roi_layers

    def _c_map_roi_levels(self, rois):
        """Map rois to corresponding feature levels by scales.

        - scale < finest_scale * 2: level 0
        - finest_scale * 2 <= scale < finest_scale * 4: level 1
        - finest_scale * 4 <= scale < finest_scale * 8: level 2
        - scale >= finest_scale * 8: level 3

        Args:
            rois (Tensor): Input RoIs, shape (k, 5).
            num_levels (int): Total level number.

        Returns:
            Tensor: Level index (0-based) of each RoI, shape (k, )
        """
        scale = self.sqrt(rois[::, 3:4:1] - rois[::, 1:2:1] + self.ones) * self.sqrt(
            rois[::, 4:5:1] - rois[::, 2:3:1] + self.ones
        )

        target_lvls = self.log2(scale / self.finest_scale + self.epslion)
        target_lvls = P.Floor()(target_lvls)
        target_lvls = self.cast(target_lvls, mstype.int32)
        target_lvls = self.clamp(target_lvls, self.zeros, self.max_levels)

        return target_lvls

    def construct(self, rois, feat1, feat2, feat3, feat4):
        feats = (feat1, feat2, feat3, feat4)
        res = self.res_
        target_lvls = self._c_map_roi_levels(rois)
        for i in range(self.num_levels):
            mask = self.equal(target_lvls, P.ScalarToArray()(i))
            mask = P.Reshape()(mask, (-1, 1, 1, 1))
            roi_feats_t = self.roi_layers[i](feats[i], rois)
            mask = self.cast(
                P.Tile()(
                    self.cast(mask, mstype.int32),
                    (1, 256, self.out_size[0], self.out_size[1]),
                ),
                mstype.bool_,
            )
            res = self.select(mask, roi_feats_t, res)

        return res