|
- # Copyright 2020-2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """MaskRcnn Rcnn classification and box regression network."""
-
- import numpy as np
- import mindspore.common.dtype as mstype
- import mindspore.nn as nn
- from mindspore.ops import operations as P
- from mindspore.common.tensor import Tensor
- from mindspore.common.initializer import initializer
- from mindspore.common.parameter import Parameter
- from mindspore import context
-
-
- class DenseNoTranpose(nn.Cell):
- """Dense method"""
-
- def __init__(self, input_channels, output_channels, weight_init):
- super(DenseNoTranpose, self).__init__()
- self.weight = Parameter(
- initializer(weight_init, [input_channels, output_channels], mstype.float32)
- )
- self.bias = Parameter(initializer("zeros", [output_channels], mstype.float32))
- self.matmul = P.MatMul(transpose_b=False)
- self.bias_add = P.BiasAdd()
-
- def construct(self, x):
- output = self.bias_add(self.matmul(x, self.weight), self.bias)
- return output
-
-
- class FpnCls(nn.Cell):
- """dense layer of classification and box head"""
-
- def __init__(self, input_channels, output_channels,
- num_classes, char_classes, pool_size):
- super(FpnCls, self).__init__()
-
- if context.get_context("device_target") == "Ascend":
- self.cast_type = mstype.float16
- else:
- self.cast_type = mstype.float32
-
- representation_size = input_channels * pool_size[0] * pool_size[1]
- self.fc6 = nn.Dense(
- representation_size,
- output_channels,
- weight_init="normal",
- bias_init="zeros",
- has_bias=True,
- activation=None,
- ).to_float(self.cast_type)
- self.fc7 = nn.Dense(
- output_channels,
- output_channels,
- weight_init="normal",
- bias_init="zeros",
- has_bias=True,
- activation=None,
- ).to_float(self.cast_type)
- self.cls_score = nn.Dense(
- output_channels,
- num_classes,
- weight_init="normal",
- bias_init="zeros",
- has_bias=True,
- activation=None,
- ).to_float(self.cast_type)
-
- self.bbox_pred = nn.Dense(
- output_channels,
- num_classes * 4,
- weight_init="normal",
- bias_init="zeros",
- has_bias=True,
- activation=None,
- ).to_float(self.cast_type)
-
- self.relu = P.ReLU()
- self.flatten = P.Flatten()
-
- def construct(self, x):
- # two share fc layer
- x = self.flatten(x)
-
- x = self.relu(self.fc6(x))
- x = self.relu(self.fc7(x))
-
- # classifier head
- cls_scores = self.cls_score(x)
- # bbox head
- reg_scores = self.bbox_pred(x)
-
- # char_cls_scores = self.char_cls_scores(x)
- # char_reg_scores = self.char_reg_scores(x)
-
- return cls_scores, reg_scores # char_cls_scores, char_reg_scores
- class RcnnCls(nn.Cell):
- """
- Rcnn for classification and box regression subnet.
-
- Args:
- config (dict) - Config.
- batch_size (int) - Batchsize.
- num_classes (int) - Class number.
- target_means (list) - Means for encode function. Default: (.0, .0, .0, .0]).
- target_stds (list) - Stds for encode function. Default: (0.1, 0.1, 0.2, 0.2).
-
- Returns:
- Tuple, tuple of output tensor.
-
- Examples:
- RcnnCls(config=config, representation_size = 1024, batch_size=2, num_classes = 81, \
- target_means=(0., 0., 0., 0.), target_stds=(0.1, 0.1, 0.2, 0.2))
- """
-
- def __init__(
- self,
- config,
- batch_size,
- num_classes,
- char_classes,
- target_means=(0.0, 0.0, 0.0, 0.0),
- target_stds=(0.1, 0.1, 0.2, 0.2),
- ):
- super(RcnnCls, self).__init__()
- cfg = config
-
- if context.get_context("device_target") == "Ascend":
- self.cast_type = mstype.float16
- self.np_cast_type = np.float16
- else:
- self.cast_type = mstype.float32
- self.np_cast_type = np.float32
-
- self.rcnn_loss_cls_weight = Tensor(
- np.array(cfg.rcnn_loss_cls_weight).astype(self.np_cast_type)
- )
- self.rcnn_loss_reg_weight = Tensor(
- np.array(cfg.rcnn_loss_reg_weight).astype(self.np_cast_type)
- )
- self.rcnn_fc_out_channels = cfg.rcnn_fc_out_channels
- self.target_means = target_means
- self.target_stds = target_stds
- self.num_classes = num_classes
- self.char_classes = char_classes
-
- self.in_channels = cfg.rcnn_in_channels
- self.train_batch_size = batch_size
- self.test_batch_size = cfg.test_batch_size
-
- self.box_feature_extractor = FpnCls(
- self.in_channels,
- self.rcnn_fc_out_channels,
- self.num_classes,
- self.char_classes,
- cfg.roi_layer.out_size,
- )
- self.relu = P.ReLU()
- self.logicaland = P.LogicalAnd()
- self.loss_cls = P.SoftmaxCrossEntropyWithLogits()
- self.loss_bbox = P.SmoothL1Loss(beta=1.0)
- self.loss_mask = P.SigmoidCrossEntropyWithLogits()
- self.reshape = P.Reshape()
- self.onehot = P.OneHot()
- self.greater = P.Greater()
- self.cast = P.Cast()
- self.sum_loss = P.ReduceSum()
- self.tile = P.Tile()
- self.expandims = P.ExpandDims()
-
- self.gather = P.GatherNd()
- self.argmax = P.ArgMaxWithValue(axis=1)
-
- self.on_value = Tensor(1.0, mstype.float32)
- self.off_value = Tensor(0.0, mstype.float32)
- self.value = Tensor(1.0, self.cast_type)
-
- self.num_bboxes = (
- cfg.num_expected_pos_stage2 + cfg.num_expected_neg_stage2
- ) * batch_size
-
- rmv_first = np.ones((self.num_bboxes, self.num_classes))
- rmv_first[:, 0] = np.zeros((self.num_bboxes,))
- self.rmv_first_tensor = Tensor(rmv_first.astype(self.np_cast_type))
-
- self.num_bboxes_test = cfg.rpn_max_num * cfg.test_batch_size
-
- def construct(self, featuremap, bbox_targets, labels, mask):
- x_cls, x_reg = self.box_feature_extractor(featuremap)
-
- if self.training:
- bbox_weights = (
- self.cast(self.logicaland(self.greater(labels, 0), mask), mstype.int32)
- * labels
- )
- labels = self.cast(
- self.onehot(labels, self.num_classes, self.on_value, self.off_value),
- self.cast_type,
- )
- bbox_targets = self.tile(
- self.expandims(bbox_targets, 1), (1, self.num_classes, 1)
- )
-
- loss_cls, loss_reg = self.loss(
- x_cls, x_reg, bbox_targets, bbox_weights, labels, mask
- )
- out = (loss_cls, loss_reg)
- else:
- out = (x_cls, x_reg)
-
- return out
-
- def loss(self, cls_score, bbox_pred, bbox_targets, bbox_weights, labels, weights):
- """Loss method."""
- # loss_cls
- loss_cls, _ = self.loss_cls(cls_score, labels)
- weights = self.cast(weights, self.cast_type)
- loss_cls = loss_cls * weights
- loss_cls = self.sum_loss(loss_cls, (0,)) / (self.sum_loss(weights, (0,)) + 1e-5)
-
- # loss_reg
- bbox_weights = self.cast(
- self.onehot(bbox_weights, self.num_classes, self.on_value, self.off_value),
- self.cast_type,
- )
- bbox_weights = (
- bbox_weights * self.rmv_first_tensor
- ) # * self.rmv_first_tensor exclude background
- pos_bbox_pred = self.reshape(bbox_pred, (self.num_bboxes, -1, 4))
- loss_reg = self.loss_bbox(pos_bbox_pred, bbox_targets)
- loss_reg = self.sum_loss(loss_reg, (2,))
- loss_reg = loss_reg * bbox_weights
- loss_reg = loss_reg / (self.sum_loss(weights, (0,)) + 1e-5)
- loss_reg = self.sum_loss(loss_reg, (0, 1))
-
- return loss_cls, loss_reg
|