|
- import torch
- from torch import nn
-
- from ..utils import constant_init, kaiming_init
- from .registry import PLUGIN_LAYERS
-
-
- def last_zero_init(m):
- if isinstance(m, nn.Sequential):
- constant_init(m[-1], val=0)
- else:
- constant_init(m, val=0)
-
-
- @PLUGIN_LAYERS.register_module()
- class ContextBlock(nn.Module):
- """ContextBlock module in GCNet.
-
- See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond'
- (https://arxiv.org/abs/1904.11492) for details.
-
- Args:
- in_channels (int): Channels of the input feature map.
- ratio (float): Ratio of channels of transform bottleneck
- pooling_type (str): Pooling method for context modeling.
- Options are 'att' and 'avg', stand for attention pooling and
- average pooling respectively. Default: 'att'.
- fusion_types (Sequence[str]): Fusion method for feature fusion,
- Options are 'channels_add', 'channel_mul', stand for channelwise
- addition and multiplication respectively. Default: ('channel_add',)
- """
-
- _abbr_ = 'context_block'
-
- def __init__(self,
- in_channels,
- ratio,
- pooling_type='att',
- fusion_types=('channel_add', )):
- super(ContextBlock, self).__init__()
- assert pooling_type in ['avg', 'att']
- assert isinstance(fusion_types, (list, tuple))
- valid_fusion_types = ['channel_add', 'channel_mul']
- assert all([f in valid_fusion_types for f in fusion_types])
- assert len(fusion_types) > 0, 'at least one fusion should be used'
- self.in_channels = in_channels
- self.ratio = ratio
- self.planes = int(in_channels * ratio)
- self.pooling_type = pooling_type
- self.fusion_types = fusion_types
- if pooling_type == 'att':
- self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1)
- self.softmax = nn.Softmax(dim=2)
- else:
- self.avg_pool = nn.AdaptiveAvgPool2d(1)
- if 'channel_add' in fusion_types:
- self.channel_add_conv = nn.Sequential(
- nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
- nn.LayerNorm([self.planes, 1, 1]),
- nn.ReLU(inplace=True), # yapf: disable
- nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
- else:
- self.channel_add_conv = None
- if 'channel_mul' in fusion_types:
- self.channel_mul_conv = nn.Sequential(
- nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
- nn.LayerNorm([self.planes, 1, 1]),
- nn.ReLU(inplace=True), # yapf: disable
- nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
- else:
- self.channel_mul_conv = None
- self.reset_parameters()
-
- def reset_parameters(self):
- if self.pooling_type == 'att':
- kaiming_init(self.conv_mask, mode='fan_in')
- self.conv_mask.inited = True
-
- if self.channel_add_conv is not None:
- last_zero_init(self.channel_add_conv)
- if self.channel_mul_conv is not None:
- last_zero_init(self.channel_mul_conv)
-
- def spatial_pool(self, x):
- batch, channel, height, width = x.size()
- if self.pooling_type == 'att':
- input_x = x
- # [N, C, H * W]
- input_x = input_x.view(batch, channel, height * width)
- # [N, 1, C, H * W]
- input_x = input_x.unsqueeze(1)
- # [N, 1, H, W]
- context_mask = self.conv_mask(x)
- # [N, 1, H * W]
- context_mask = context_mask.view(batch, 1, height * width)
- # [N, 1, H * W]
- context_mask = self.softmax(context_mask)
- # [N, 1, H * W, 1]
- context_mask = context_mask.unsqueeze(-1)
- # [N, 1, C, 1]
- context = torch.matmul(input_x, context_mask)
- # [N, C, 1, 1]
- context = context.view(batch, channel, 1, 1)
- else:
- # [N, C, 1, 1]
- context = self.avg_pool(x)
-
- return context
-
- def forward(self, x):
- # [N, C, 1, 1]
- context = self.spatial_pool(x)
-
- out = x
- if self.channel_mul_conv is not None:
- # [N, C, 1, 1]
- channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
- out = out * channel_mul_term
- if self.channel_add_conv is not None:
- # [N, C, 1, 1]
- channel_add_term = self.channel_add_conv(context)
- out = out + channel_add_term
-
- return out
|