OpenModelZoo
/
R2Plus1D

 
			
			   
				 
					
						
						
							
							'''
用3D卷积实现了一个时空卷积模块
'''

import math

from myutils import _triple

import mindspore as ms
import mindspore.nn as nn

#from mindspore.ops.operations.nn_ops import Conv3D
from mindspore.nn import Conv3d
from mindspore.ops import Shape, Reshape
#from mindspore.nn.layer.normalization import BatchNorm3d

class BatchNorm3d(nn.Cell):
    def __init__(self, num_features):
        super().__init__()
        self.reshape = Reshape()
        self.shape = Shape()
        self.bn2d = nn.BatchNorm2d(num_features, momentum=0.98, data_format="NCHW", use_batch_statistics=True)

    def construct(self, x):
        x_shape = self.shape(x)
        x = self.reshape(x, (x_shape[0], x_shape[1], x_shape[2] * x_shape[3], x_shape[4]))
        bn2d_out = self.bn2d(x)
        bn3d_out = self.reshape(bn2d_out, x_shape)
        return bn3d_out

class SpatioTemporalConv(nn.Cell):
    '''
    使用华为MindSpore复现的SpatioTemporalConv单元
    '''
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
        super(SpatioTemporalConv, self).__init__()

        # if ints are entered, convert them to iterables, 1 -> [1, 1, 1]
        kernel_size = _triple(kernel_size)
        stride = _triple(stride)
        padding = _triple(padding)

        # decomposing the parameters into spatial and temporal components by
        # masking out the values with the defaults on the axis that
        # won't be convolved over. This is necessary to avoid unintentional
        # behavior such as padding being added twice
        spatial_kernel_size =  [1, kernel_size[1], kernel_size[2]]
        spatial_stride =  [1, stride[1], stride[2]]
        #spatial_padding =  [0, padding[1], padding[2]]
        spatial_padding =  [0, 0, padding[1], padding[1], padding[2], padding[2]]

        temporal_kernel_size = [kernel_size[0], 1, 1]
        temporal_stride =  [stride[0], 1, 1]
        #temporal_padding =  [padding[0], 0, 0]
        temporal_padding =  [padding[0], padding[0], 0, 0, 0, 0]

        # compute the number of intermediary channels (M) using formula 
        # from the paper section 3.5
        intermed_channels = int(math.floor((kernel_size[0] * kernel_size[1] * kernel_size[2] * in_channels * out_channels)/ \
                            (kernel_size[1]* kernel_size[2] * in_channels + kernel_size[0] * out_channels)))

        # the spatial conv is effectively a 2D conv due to the 
        # spatial_kernel_size, followed by batch_norm and ReLU
        
        self.spatial_conv =  Conv3d(in_channels = in_channels, out_channels = intermed_channels, kernel_size = tuple(spatial_kernel_size), pad_mode='pad', padding=tuple(spatial_padding), stride=tuple(spatial_stride), weight_init='he_normal', has_bias=True, data_format="NCDHW")
        self.bn = BatchNorm3d(num_features=intermed_channels)
        self.relu = nn.ReLU()

        # the temporal conv is effectively a 1D conv, but has batch norm 
        # and ReLU added inside the model constructor, not here. This is an 
        # intentional design choice, to allow this module to externally act 
        # identical to a standard Conv3D, so it can be reused easily in any 
        # other codebase

        self.temporal_conv = Conv3d(in_channels = intermed_channels, out_channels = out_channels, kernel_size = tuple(temporal_kernel_size), pad_mode='pad', padding=tuple(temporal_padding), stride=tuple(temporal_stride), weight_init='he_normal', has_bias=True, data_format="NCDHW")
        
        pass
    def construct(self, x):
        x = self.relu(self.bn(self.spatial_conv(x)))
        x = self.temporal_conv(x)
        return x
        
    pass

class R2Plus1dStem(nn.Cell):
    '''
    使用华为MindSpore复现的R2Plus1dStem单元
    '''
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=False):
        super(R2Plus1dStem, self).__init__()

        # if ints are entered, convert them to iterables, 1 -> [1, 1, 1]
        kernel_size = _triple(kernel_size)
        stride = _triple(stride)
        padding = _triple(padding)

        # decomposing the parameters into spatial and temporal components by
        # masking out the values with the defaults on the axis that
        # won't be convolved over. This is necessary to avoid unintentional
        # behavior such as padding being added twice
        spatial_kernel_size =  [1, kernel_size[1], kernel_size[2]]
        spatial_stride =  [1, stride[1], stride[2]]
        #spatial_padding =  [0, padding[1], padding[2]]
        spatial_padding =  [0, 0, padding[1], padding[1], padding[2], padding[2]]

        temporal_kernel_size = [kernel_size[0], 1, 1]
        temporal_stride =  [stride[0], 1, 1]
        #temporal_padding =  [padding[0], 0, 0]
        temporal_padding =  [padding[0], padding[0], 0, 0, 0, 0]

        # compute the number of intermediary channels (M) using formula 
        # from the paper section 3.5
        #intermed_channels = int(math.floor((kernel_size[0] * kernel_size[1] * kernel_size[2] * in_channels * out_channels)/ \
        #                    (kernel_size[1]* kernel_size[2] * in_channels + kernel_size[0] * out_channels)))
        intermed_channels = 45
        # the spatial conv is effectively a 2D conv due to the 
        # spatial_kernel_size, followed by batch_norm and ReLU

        self.spatial_conv =  Conv3d(in_channels = in_channels, out_channels = intermed_channels, kernel_size = tuple(spatial_kernel_size), pad_mode='pad', padding=tuple(spatial_padding), stride=tuple(spatial_stride), weight_init='he_normal', has_bias=False, data_format="NCDHW")
        self.bn = BatchNorm3d(num_features=intermed_channels)
        self.relu = nn.ReLU()

        # the temporal conv is effectively a 1D conv, but has batch norm 
        # and ReLU added inside the model constructor, not here. This is an 
        # intentional design choice, to allow this module to externally act 
        # identical to a standard Conv3D, so it can be reused easily in any 
        # other codebase

        self.temporal_conv = Conv3d(in_channels = intermed_channels, out_channels = out_channels, kernel_size = tuple(temporal_kernel_size), pad_mode='pad', padding=tuple(temporal_padding), stride=tuple(temporal_stride), weight_init='he_normal', has_bias=False, data_format="NCDHW")
        self.bn2 = BatchNorm3d(num_features=out_channels)
        pass
    def construct(self, x):
        x = self.relu(self.bn(self.spatial_conv(x)))
        x = self.relu(self.bn2(self.temporal_conv(x)))
        return x
        
    pass