OpenModelZoo
/
AdvancedEAST

 
			
							# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
dataset processing.
"""
import mindspore
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.ops import composite as C
from mindspore.ops import ResizeNearestNeighbor
from mindspore import Tensor, ParameterTuple, Parameter
from mindspore.common.initializer import initializer, TruncatedNormal
from mindspore.train.serialization import load_checkpoint, load_param_into_net
import numpy as np

from src.vgg import Vgg
from src.config import config as cfg


vgg_cfg = {
    '11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    '13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    '16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    '19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg16(num_classes=1000, args=None, phase="train"):
    """
    Get Vgg16 neural network with batch normalization.

    Args:
        num_classes (int): Class numbers. Default: 1000.
        args(namespace): param for net init.
        phase(str): train or test mode.

    Returns:
        Cell, cell instance of Vgg16 neural network with batch normalization.

    Examples:
        >>> vgg16(num_classes=1000, args=args)
    """

    if args is None:
        from src.config import cifar_cfg
        args = cifar_cfg
    net = Vgg(vgg_cfg['16'], num_classes=num_classes, args=args, batch_norm=args.batch_norm, phase=phase)
    return net

class AdvancedEast(nn.Cell):
    """
    East model
    Args:
            args
    """

    def __init__(self, args):
        super(AdvancedEast, self).__init__()
        self.device_target = args.device_target
        if self.device_target == 'GPU':

            self.vgg16 = vgg16()
            if args.is_train:
                param_dict = load_checkpoint(cfg.vgg_weights)
                load_param_into_net(self.vgg16, param_dict)

            self.bn1 = nn.BatchNorm2d(1024, momentum=0.99, eps=1e-3)
            self.conv1 = nn.Conv2d(1024, 128, 1, weight_init='XavierUniform', has_bias=True)
            self.relu1 = nn.ReLU()

            self.bn2 = nn.BatchNorm2d(128, momentum=0.99, eps=1e-3)
            self.conv2 = nn.Conv2d(128, 128, 3, padding=1, pad_mode='pad', weight_init='XavierUniform')
            self.relu2 = nn.ReLU()

            self.bn3 = nn.BatchNorm2d(384, momentum=0.99, eps=1e-3)
            self.conv3 = nn.Conv2d(384, 64, 1, weight_init='XavierUniform', has_bias=True)
            self.relu3 = nn.ReLU()

            self.bn4 = nn.BatchNorm2d(64, momentum=0.99, eps=1e-3)
            self.conv4 = nn.Conv2d(64, 64, 3, padding=1, pad_mode='pad', weight_init='XavierUniform')
            self.relu4 = nn.ReLU()

            self.bn5 = nn.BatchNorm2d(192, momentum=0.99, eps=1e-3)
            self.conv5 = nn.Conv2d(192, 32, 1, weight_init='XavierUniform', has_bias=True)
            self.relu5 = nn.ReLU()

            self.bn6 = nn.BatchNorm2d(32, momentum=0.99, eps=1e-3)
            self.conv6 = nn.Conv2d(32, 32, 3, padding=1, pad_mode='pad', weight_init='XavierUniform', has_bias=True)
            self.relu6 = nn.ReLU()

            self.bn7 = nn.BatchNorm2d(32, momentum=0.99, eps=1e-3)
            self.conv7 = nn.Conv2d(32, 32, 3, padding=1, pad_mode='pad', weight_init='XavierUniform', has_bias=True)
            self.relu7 = nn.ReLU()

            self.cat = P.Concat(axis=1)

            self.conv8 = nn.Conv2d(32, 1, 1, weight_init='XavierUniform', has_bias=True)
            self.conv9 = nn.Conv2d(32, 2, 1, weight_init='XavierUniform', has_bias=True)
            self.conv10 = nn.Conv2d(32, 4, 1, weight_init='XavierUniform', has_bias=True)
        else:
            if args.is_train:
                vgg_dict = np.load(cfg.vgg_npy, encoding='latin1', allow_pickle=True).item()
            shape_dict = {
                'conv1_1': [64, 3, 3, 3],
                'conv1_2': [64, 64, 3, 3],
                'conv2_1': [128, 64, 3, 3],
                'conv2_2': [128, 128, 3, 3],
                'conv3_1': [256, 128, 3, 3],
                'conv3_2': [256, 256, 3, 3],
                'conv3_3': [256, 256, 3, 3],
                'conv4_1': [512, 256, 3, 3],
                'conv4_2': [512, 512, 3, 3],
                'conv4_3': [512, 512, 3, 3],
                'conv5_1': [512, 512, 3, 3],
                'conv5_2': [512, 512, 3, 3],
                'conv5_3': [512, 512, 3, 3],
            }

            def get_var(name, idx):
                value = vgg_dict[name][idx]
                if idx == 0:
                    value = np.transpose(value, [3, 2, 0, 1])
                var = Tensor(value)
                return var

            def get_conv_var(name):
                filters = get_var(name, 0)
                biases = get_var(name, 1)
                return filters, biases

            class VGG_Conv(nn.Cell):
                """
                VGG16 network definition.
                """

                def __init__(self, name):
                    super(VGG_Conv, self).__init__()
                    if args.is_train:
                        filters, conv_biases = get_conv_var(name)
                        out_channels, in_channels, filter_size, _ = filters.shape
                    else:
                        out_channels, in_channels, filter_size, _ = shape_dict[name]
                    self.conv2d = P.Conv2D(out_channels, filter_size, pad_mode='same', mode=1)
                    self.bias_add = P.BiasAdd()
                    self.weight = Parameter(initializer(filters if args.is_train else TruncatedNormal(),
                                                        [out_channels, in_channels, filter_size, filter_size]),
                                            name='weight')
                    self.bias = Parameter(initializer(conv_biases if args.is_train else TruncatedNormal(),
                                                      [out_channels]), name='bias')
                    self.relu = P.ReLU()
                    self.gn = nn.GroupNorm(32, out_channels)

                def construct(self, x):
                    output = self.conv2d(x, self.weight)
                    output = self.bias_add(output, self.bias)
                    output = self.gn(output)
                    output = self.relu(output)
                    return output

            self.conv1_1 = VGG_Conv('conv1_1')
            self.conv1_2 = VGG_Conv('conv1_2')
            self.pool1 = nn.MaxPool2d(2, 2)
            self.conv2_1 = VGG_Conv('conv2_1')
            self.conv2_2 = VGG_Conv('conv2_2')
            self.pool2 = nn.MaxPool2d(2, 2)
            self.conv3_1 = VGG_Conv('conv3_1')
            self.conv3_2 = VGG_Conv('conv3_2')
            self.conv3_3 = VGG_Conv('conv3_3')
            self.pool3 = nn.MaxPool2d(2, 2)
            self.conv4_1 = VGG_Conv('conv4_1')
            self.conv4_2 = VGG_Conv('conv4_2')
            self.conv4_3 = VGG_Conv('conv4_3')
            self.pool4 = nn.MaxPool2d(2, 2)
            self.conv5_1 = VGG_Conv('conv5_1')
            self.conv5_2 = VGG_Conv('conv5_2')
            self.conv5_3 = VGG_Conv('conv5_3')
            self.pool5 = nn.MaxPool2d(2, 2)
            self.merging1 = self.merging(i=2)
            self.merging2 = self.merging(i=3)
            self.merging3 = self.merging(i=4)
            self.last_bn = nn.GroupNorm(16, 32)
            self.conv_last = nn.Conv2d(32, 32, kernel_size=3, stride=1, has_bias=True, weight_init='XavierUniform')
            self.inside_score_conv = nn.Conv2d(32, 1, kernel_size=1, stride=1, has_bias=True,
                                               weight_init='XavierUniform')
            self.side_v_angle_conv = nn.Conv2d(32, 2, kernel_size=1, stride=1, has_bias=True,
                                               weight_init='XavierUniform')
            self.side_v_coord_conv = nn.Conv2d(32, 4, kernel_size=1, stride=1, has_bias=True,
                                               weight_init='XavierUniform')
            self.op_concat = P.Concat(axis=1)
            self.relu = P.ReLU()

    def merging(self, i=2):
        """
        def  merge layer
        """
        in_size = {'2': 1024, '3': 384, '4': 192}
        layers = [
            nn.Conv2d(in_size[str(i)], 128 // 2 ** (i - 2), kernel_size=1, stride=1, has_bias=True,
                      weight_init='XavierUniform'),
            nn.GroupNorm(16, 128 // 2 ** (i - 2)),
            nn.ReLU(),
            nn.Conv2d(128 // 2 ** (i - 2), 128 // 2 ** (i - 2), kernel_size=3, stride=1, has_bias=True,
                      weight_init='XavierUniform'),
            nn.GroupNorm(16, 128 // 2 ** (i - 2)),
            nn.ReLU()]
        return nn.SequentialCell(layers)

    def construct(self, x):
        """
            forward func
        """
        if self.device_target == 'GPU':
            l2, l3, l4, l5 = self.vgg16(x)
            h = l5

            _, _, h_, w_ = P.Shape()(h)
            g = ResizeNearestNeighbor((h_ * 2, w_ * 2))(h)
            c = self.cat((g, l4))

            c = self.bn1(c)
            c = self.conv1(c)
            c = self.relu1(c)

            h = self.bn2(c)
            h = self.conv2(h)
            h = self.relu2(h)

            _, _, h_, w_ = P.Shape()(h)
            g = ResizeNearestNeighbor((h_ * 2, w_ * 2))(h)
            c = self.cat((g, l3))

            c = self.bn3(c)
            c = self.conv3(c)
            c = self.relu3(c)

            h = self.bn4(c)
            h = self.conv4(h)  # bs 64 w/8 h/8
            h = self.relu4(h)

            _, _, h_, w_ = P.Shape()(h)
            g = ResizeNearestNeighbor((h_ * 2, w_ * 2))(h)
            c = self.cat((g, l2))

            c = self.bn5(c)
            c = self.conv5(c)
            c = self.relu5(c)

            h = self.bn6(c)
            h = self.conv6(h)  # bs 32 w/4 h/4
            h = self.relu6(h)

            g = self.bn7(h)
            g = self.conv7(g)  # bs 32 w/4 h/4
            g = self.relu7(g)
            # get output

            inside_score = self.conv8(g)
            side_v_code = self.conv9(g)
            side_v_coord = self.conv10(g)
            east_detect = self.cat((inside_score, side_v_code, side_v_coord))
        else:
            f4 = self.conv1_1(x)
            f4 = self.conv1_2(f4)
            f4 = self.pool1(f4)
            f4 = self.conv2_1(f4)
            f4 = self.conv2_2(f4)
            f4 = self.pool2(f4)
            f3 = self.conv3_1(f4)
            f3 = self.conv3_2(f3)
            f3 = self.conv3_3(f3)
            f3 = self.pool3(f3)
            f2 = self.conv4_1(f3)
            f2 = self.conv4_2(f2)
            f2 = self.conv4_3(f2)
            f2 = self.pool4(f2)
            f1 = self.conv5_1(f2)
            f1 = self.conv5_2(f1)
            f1 = self.conv5_3(f1)
            f1 = self.pool5(f1)
            h1 = f1
            _, _, h_, w_ = P.Shape()(h1)
            H1 = P.ResizeNearestNeighbor((h_ * 2, w_ * 2))(h1)
            concat1 = self.op_concat((H1, f2))
            h2 = self.merging1(concat1)
            _, _, h_, w_ = P.Shape()(h2)
            H2 = P.ResizeNearestNeighbor((h_ * 2, w_ * 2))(h2)
            concat2 = self.op_concat((H2, f3))
            h3 = self.merging2(concat2)
            _, _, h_, w_ = P.Shape()(h3)
            H3 = P.ResizeNearestNeighbor((h_ * 2, w_ * 2))(h3)
            concat3 = self.op_concat((H3, f4))
            h4 = self.merging3(concat3)
            before_output = self.relu(self.last_bn(self.conv_last(h4)))
            inside_score = self.inside_score_conv(before_output)
            side_v_angle = self.side_v_angle_conv(before_output)
            side_v_coord = self.side_v_coord_conv(before_output)
            east_detect = self.op_concat((inside_score, side_v_coord, side_v_angle))

        return east_detect


class EastWithLossCell(nn.Cell):
    """
    loss
    """

    def __init__(self, network):
        super(EastWithLossCell, self).__init__()
        self.East_network = network
        self.cat = P.Concat(axis=1)

    def dice_loss(self, gt_score, pred_score):
        """dice_loss1"""
        inter = P.ReduceSum()(gt_score * pred_score)
        union = P.ReduceSum()(gt_score) + P.ReduceSum()(pred_score) + 1e-5
        return 1. - (2 * (inter / union))

    def dice_loss2(self, gt_score, pred_score, mask):
        """dice_loss2"""
        inter = P.ReduceSum()(gt_score * pred_score * mask)
        union = P.ReduceSum()(gt_score * mask) + P.ReduceSum()(pred_score * mask) + 1e-5
        return 1. - (2 * (inter / union))

    def quad_loss(self, y_true, y_pred,
                  lambda_inside_score_loss=0.2,
                  lambda_side_vertex_code_loss=0.1,
                  lambda_side_vertex_coord_loss=1.0,
                  epsilon=1e-4):
        """quad loss"""
        y_true = P.Transpose()(y_true, (0, 2, 3, 1))
        y_pred = P.Transpose()(y_pred, (0, 2, 3, 1))
        logits = y_pred[:, :, :, :1]
        labels = y_true[:, :, :, :1]
        predicts = P.Sigmoid()(logits)
        inside_score_loss = self.dice_loss(labels, predicts)
        inside_score_loss = inside_score_loss * lambda_inside_score_loss
        # loss for side_vertex_code
        vertex_logitsp = P.Sigmoid()(y_pred[:, :, :, 1:2])
        vertex_labelsp = y_true[:, :, :, 1:2]
        vertex_logitsn = P.Sigmoid()(y_pred[:, :, :, 2:3])
        vertex_labelsn = y_true[:, :, :, 2:3]
        labels2 = y_true[:, :, :, 1:2]
        side_vertex_code_lossp = self.dice_loss2(vertex_labelsp, vertex_logitsp, labels)
        side_vertex_code_lossn = self.dice_loss2(vertex_labelsn, vertex_logitsn, labels2)
        side_vertex_code_loss = (side_vertex_code_lossp + side_vertex_code_lossn) * lambda_side_vertex_code_loss
        # loss for side_vertex_coord delta
        g_hat = y_pred[:, :, :, 3:]  # N*W*H*8
        g_true = y_true[:, :, :, 3:]
        vertex_weights = P.Cast()(P.Equal()(y_true[:, :, :, 1], 1), mindspore.float32)

        pixel_wise_smooth_l1norm = self.smooth_l1_loss(g_hat, g_true, vertex_weights)
        side_vertex_coord_loss = P.ReduceSum()(pixel_wise_smooth_l1norm) / (
            P.ReduceSum()(vertex_weights) + epsilon)
        side_vertex_coord_loss = side_vertex_coord_loss * lambda_side_vertex_coord_loss
        return inside_score_loss + side_vertex_code_loss + side_vertex_coord_loss

    def smooth_l1_loss(self, prediction_tensor, target_tensor, weights):
        """smooth l1 loss"""
        n_q = P.Reshape()(self.quad_norm(target_tensor), weights.shape)
        diff = P.SmoothL1Loss()(prediction_tensor, target_tensor)
        pixel_wise_smooth_l1norm = P.ReduceSum()(diff, -1) / n_q * weights
        return pixel_wise_smooth_l1norm

    def quad_norm(self, g_true, epsilon=1e-4):
        """ quad norm"""
        shape = g_true.shape
        delta_xy_matrix = P.Reshape()(g_true, (shape[0] * shape[1] * shape[2], 2, 2))
        diff = delta_xy_matrix[:, 0:1, :] - delta_xy_matrix[:, 1:2, :]
        square = diff * diff
        distance = P.Sqrt()(P.ReduceSum()(square, -1))
        distance = distance * 4.0
        distance = distance + epsilon
        return P.Reshape()(distance, (shape[0], shape[1], shape[2]))

    def construct(self, image, label):
        y_pred = self.East_network(image)
        loss = self.quad_loss(label, y_pred)
        return loss


class TrainStepWrap(nn.Cell):
    """
    train net
    """

    def __init__(self, network):
        super(TrainStepWrap, self).__init__()
        self.network = network
        self.network.set_train()
        self.weights = ParameterTuple(network.trainable_params())
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = 1.0

    def construct(self, image, label):
        weights = self.weights
        loss = self.network(image, label)
        sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
        grads = self.grad(self.network, weights)(image, label, sens)
        self.optimizer(grads)
        return loss


def get_AdvancedEast_net(args):
    """
    Get network of wide&deep model.
    """
    AdvancedEast_net = AdvancedEast(args)
    loss_net = EastWithLossCell(AdvancedEast_net)
    train_net = TrainStepWrap(loss_net)
    return loss_net, train_net