|
- #!/usr/bin/env python3
- # -*- coding:utf-8 -*-
- # Copyright (c) Megvii, Inc. and its affiliates.
-
- import torch
- import torch.nn as nn
- from .ASFF import ASFF
- from .backbone import BaseConv, CSPDarknet, CSPLayer, DWConv, Bottleneck,Focus
-
-
- # class R_stem(nn.Module):
- # def __init__(self,out_channels,shortcut=True,depthwise=False,groups=1,act="silu"):
- # super().__init__()
- # Conv = DWConv if depthwise else BaseConv
- # self.conv1=BaseConv(3, out_channels, ksize=7, stride=2, act=act)
- # self.maxpooling=nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
- # # self.maxpooling = BaseConv(out_channels, out_channels, ksize=3, stride=2, act=act)
- # self.conv2=Conv(out_channels, 2*out_channels, ksize=5, stride=1, act=act)
- # self.block=Bottleneck(2*out_channels, 2*out_channels, shortcut, 1.0,depthwise,act=act)
- # # self.block=CSPLayer(out_channels * 2, out_channels * 2, n=2, depthwise=depthwise, act=act)
- # #first fusion way:saf
- # self.conv3=BaseConv(2*out_channels, 1, ksize=3, stride=1, act=act)
- # self.dw1 = BaseConv(1, 1, ksize=3, stride=2, act=act)
- # self.dw3 = BaseConv(1, 1, ksize=3, stride=2, act=act)
- # self.dw5 = BaseConv(1, 1, ksize=3, stride=2, act=act)
- # self.conv_saf1=nn.Conv2d(1,1,kernel_size=3,stride=1,padding=1,groups=groups)
- # self.conv_saf3=nn.Conv2d(1,1,kernel_size=3,stride=1,padding=1,groups=groups)
- # self.conv_saf5=nn.Conv2d(1,1,kernel_size=3,stride=1,padding=1,groups=groups)
-
- # #another fusion model struction
- # # self.conv1= BaseConv(3, out_channels, ksize=3, stride=1, act=act)
- # # self.conv2 = BaseConv(out_channels, out_channels, ksize=3, stride=1, act=act)
- # # self.maxpooling1 = nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
- # # self.conv3 = BaseConv(out_channels, 4*out_channels, ksize=3, stride=1, act=act)
- # # self.conv4 = BaseConv(4*out_channels, 4*out_channels, ksize=3, stride=1, act=act)
- # # self.maxpooling2 = nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
- # # self.conv5= BaseConv(4*out_channels, 2 * out_channels, ksize=3, stride=1, act=act)
- # # self.conv6 = BaseConv(2 * out_channels, 2 * out_channels, ksize=3, stride=1, act=act)
-
-
-
-
- # def forward(self,x):
- # #第一种结构
- # x=self.conv1(x)
- # # x = self.maxpooling(x)
- # x=self.conv2(x)
- # x=self.block(x)
- # x=self.conv3(x)
- # x = self.maxpooling(x)
- # #第二种结构
- # # x = self.conv1(x)
- # # x = self.conv2(x)
- # # x = self.maxpooling1(x)
- # # x = self.conv3(x)
- # # x = self.conv4(x)
- # # x = self.maxpooling2(x)
- # # x = self.conv5(x)
- # # x = self.conv6(x)
- # # # first fusion way:saf
- # x_dw1=self.dw1(x)
- # x_dw3 = self.dw3(x_dw1)
- # x_dw5 = self.dw5(x_dw3)
- # x_saf1=self.conv_saf1(x_dw1)
- # x_saf1=torch.sigmoid(x_saf1)
- # x_saf3=self.conv_saf3(x_dw3)
- # x_saf3=torch.sigmoid(x_saf3)
- # x_saf5=self.conv_saf5(x_dw5)
- # x_saf5=torch.sigmoid(x_saf5)
- # # x=x_saf1+x_saf3+x_saf5
- # # out = torch.div(x, 3)
- # out=(x_saf1,x_saf3,x_saf5)
-
- # return out
-
-
- # class R_stem(nn.Module):
- # def __init__(self, out_channels, shortcut=True, depthwise=False, groups=1, act="silu"):
-
- # super().__init__()
- # Conv = DWConv if depthwise else BaseConv
- # self.conv2=Conv(out_channels, out_channels * 2, 3, 2, act=act)
- # self.block2 = Bottleneck(2 * out_channels, 2 * out_channels, shortcut, 1.0, depthwise, act=act)
- # # self.maxpooling=nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
- # # self.maxpooling = BaseConv(out_channels, out_channels, ksize=3, stride=2, act=act)
- # self.conv3=Conv(2*out_channels, out_channels * 4, 3, 2, act=act)
- # self.block3 = Bottleneck(4 * out_channels, 4 * out_channels, shortcut, 1.0, depthwise, act=act)
- # self.conv4 = Conv(4 * out_channels, out_channels * 8, 3, 2, act=act)
- # self.block4 = Bottleneck(8 * out_channels, 8 * out_channels, shortcut, 1.0, depthwise, act=act)
- # self.conv5 = Conv(8 * out_channels, out_channels * 16, 3, 2, act=act)
- # self.block5 = Bottleneck(16 * out_channels, 16 * out_channels, shortcut, 1.0, depthwise, act=act)
- # self.conv_rf3 = nn.Conv2d(4 * out_channels, 1, kernel_size=3, stride=1, padding=1, groups=groups)
- # self.conv_rf4 = nn.Conv2d(8 * out_channels, 1, kernel_size=3, stride=1, padding=1, groups=groups)
- # self.conv_rf5 = nn.Conv2d(16 * out_channels, 1, kernel_size=3, stride=1, padding=1, groups=groups)
-
-
- # def forward(self,x):
- # x=self.conv2(x)
- # x=self.block2(x)
- # x = self.conv3(x)
- # x_3 = self.block3(x)
- # out_3=self.conv_rf3(x_3)
- # out_3 = torch.sigmoid(out_3)
- # x = self.conv4(x_3)
- # x_4 = self.block4(x)
- # out_4 = self.conv_rf4(x_4)
- # out_4 = torch.sigmoid(out_4)
- # x = self.conv5(x_4)
- # x_5 = self.block5(x)
- # out_5 = self.conv_rf5(x_5)
- # out_5 = torch.sigmoid(out_5)
- # return (out_3,out_4,out_5)
-
-
- class YOLOXHead(nn.Module):
- def __init__(self, num_classes, width=1.0, in_channels=[256, 512, 1024], act="silu", depthwise=False, ):
- super().__init__()
- Conv = DWConv if depthwise else BaseConv
-
- self.cls_convs = nn.ModuleList()
- self.reg_convs = nn.ModuleList()
- self.cls_preds = nn.ModuleList()
- self.reg_preds = nn.ModuleList()
- self.obj_preds = nn.ModuleList()
- self.stems = nn.ModuleList()
-
- for i in range(len(in_channels)):
- self.stems.append(
- BaseConv(in_channels=int(in_channels[i] * width), out_channels=int(256 * width), ksize=1, stride=1,
- act=act))
- self.cls_convs.append(nn.Sequential(*[
- Conv(in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act),
- Conv(in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act),
- ]))
- self.cls_preds.append(
- nn.Conv2d(in_channels=int(256 * width), out_channels=num_classes, kernel_size=1, stride=1, padding=0)
- )
-
- self.reg_convs.append(nn.Sequential(*[
- Conv(in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act),
- Conv(in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act)
- ]))
- self.reg_preds.append(
- nn.Conv2d(in_channels=int(256 * width), out_channels=4, kernel_size=1, stride=1, padding=0)
- )
- self.obj_preds.append(
- nn.Conv2d(in_channels=int(256 * width), out_channels=1, kernel_size=1, stride=1, padding=0)
- )
-
- def forward(self, inputs):
- # ---------------------------------------------------#
- # inputs输入
- # P3_out 80, 80, 256
- # P4_out 40, 40, 512
- # P5_out 20, 20, 1024
- # ---------------------------------------------------#
- outputs = []
- for k, x in enumerate(inputs):
- # ---------------------------------------------------#
- # 利用1x1卷积进行通道整合
- # ---------------------------------------------------#
- x = self.stems[k](x)
- # ---------------------------------------------------#
- # 利用两个卷积标准化激活函数来进行特征提取
- # ---------------------------------------------------#
- cls_feat = self.cls_convs[k](x)
- # ---------------------------------------------------#
- # 判断特征点所属的种类
- # 80, 80, num_classes
- # 40, 40, num_classes
- # 20, 20, num_classes
- # ---------------------------------------------------#
- cls_output = self.cls_preds[k](cls_feat)
-
- # ---------------------------------------------------#
- # 利用两个卷积标准化激活函数来进行特征提取
- # ---------------------------------------------------#
- reg_feat = self.reg_convs[k](x)
- # ---------------------------------------------------#
- # 特征点的回归系数
- # reg_pred 80, 80, 4
- # reg_pred 40, 40, 4
- # reg_pred 20, 20, 4
- # ---------------------------------------------------#
- reg_output = self.reg_preds[k](reg_feat)
- # ---------------------------------------------------#
- # 判断特征点是否有对应的物体
- # obj_pred 80, 80, 1
- # obj_pred 40, 40, 1
- # obj_pred 20, 20, 1
- # ---------------------------------------------------#
- obj_output = self.obj_preds[k](reg_feat)
-
- output = torch.cat([reg_output, obj_output, cls_output], 1)
- outputs.append(output)
- return outputs
-
-
- class YOLOXPAFPN(nn.Module):
- def __init__(self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"), in_channels=[256, 512, 1024],
- depthwise=False, act="silu"):
- super().__init__()
- Conv = DWConv if depthwise else BaseConv
- base_channels = int(width * 64) # 64
- # --------雷达图像的stem部分-----------#
- # self.r_stem = R_stem(base_channels, shortcut=True, depthwise=False, groups=1, act="silu")
- self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
- self.in_features = in_features
-
- self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
- # self.asff_1 = ASFF(level=0, multiplier=width)
- # self.asff_2 = ASFF(level=1, multiplier=width)
- # self.asff_3 = ASFF(level=2, multiplier=width)
-
- self.dconv3=BaseConv(int(in_channels[2] * width * 2), int(in_channels[2] * width), 1, 1, act=act)
- self.dconv2=BaseConv(int(in_channels[1] * width * 2), int(in_channels[1] * width), 1, 1, act=act)
- self.dconv1=BaseConv(int(in_channels[0] * width * 2), int(in_channels[0] * width), 1, 1, act=act)
-
- # -------------------------------------------#
- # 20, 20, 1024 -> 20, 20, 512
- # -------------------------------------------#
- self.lateral_conv0 = BaseConv(int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act)
-
- # -------------------------------------------#
- # 40, 40, 1024 -> 40, 40, 512
- # -------------------------------------------#
- self.C3_p4 = CSPLayer(
- int(2 * in_channels[1] * width),
- int(in_channels[1] * width),
- round(3 * depth),
- False,
- depthwise=depthwise,
- act=act,
- )
-
- # -------------------------------------------#
- # 40, 40, 512 -> 40, 40, 256
- # -------------------------------------------#
- self.reduce_conv1 = BaseConv(int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act)
- # -------------------------------------------#
- # 80, 80, 512 -> 80, 80, 256
- # -------------------------------------------#
- self.C3_p3 = CSPLayer(
- int(2 * in_channels[0] * width),
- int(in_channels[0] * width),
- round(3 * depth),
- False,
- depthwise=depthwise,
- act=act,
- )
-
- # -------------------------------------------#
- # 80, 80, 256 -> 40, 40, 256
- # -------------------------------------------#
- self.bu_conv2 = Conv(int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act)
- # -------------------------------------------#
- # 40, 40, 256 -> 40, 40, 512
- # -------------------------------------------#
- self.C3_n3 = CSPLayer(
- int(2 * in_channels[0] * width),
- int(in_channels[1] * width),
- round(3 * depth),
- False,
- depthwise=depthwise,
- act=act,
- )
-
- # -------------------------------------------#
- # 40, 40, 512 -> 20, 20, 512
- # -------------------------------------------#
- self.bu_conv1 = Conv(int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act)
- # -------------------------------------------#
- # 20, 20, 1024 -> 20, 20, 1024
- # -------------------------------------------#
- self.C3_n4 = CSPLayer(
- int(2 * in_channels[1] * width),
- int(in_channels[2] * width),
- round(3 * depth),
- False,
- depthwise=depthwise,
- act=act,
- )
-
- def forward(self, input,r_out_features):
- # import torchvision.transforms as transforms
- # import matplotlib.pyplot as plt
- # unloader = transforms.ToPILImage()
-
-
- # radar_image_feature = self.r_stem(radar_image)
-
- # plt.figure()
- # img=unloader(radar_image_feature[0])
- # plt.imshow(img)
- # plt.show()
- # out_features = self.backbone.forward(input,radar_image)
- out_features = self.backbone.forward(input)
- [ifeat1, ifeat2, ifeat3] = [out_features[f] for f in self.in_features]
- [rfeat1, rfeat2, rfeat3] = [r_out_features[f] for f in self.in_features]
- #feat1 = ifeat1 + rfeat1#+ifeat1
- #feat2 = ifeat2 + rfeat2#+ifeat2
- #feat3 = ifeat3 + rfeat3#+ifeat3
-
- feat1 = self.dconv1(torch.cat([ifeat1, rfeat1], 1))
- feat2 = self.dconv2(torch.cat([ifeat2, rfeat2], 1))
- feat3 = self.dconv3(torch.cat([ifeat3, rfeat3], 1))
-
- # -------------------------------------------#
- # 20, 20, 1024 -> 20, 20, 512
- # -------------------------------------------#
- P5 = self.lateral_conv0(feat3)
- # -------------------------------------------#
- # 20, 20, 512 -> 40, 40, 512
- # -------------------------------------------#
- P5_upsample = self.upsample(P5)
- # -------------------------------------------#
- # 40, 40, 512 + 40, 40, 512 -> 40, 40, 1024
- # -------------------------------------------#
- P5_upsample = torch.cat([P5_upsample, feat2], 1)
- # -------------------------------------------#
- # 40, 40, 1024 -> 40, 40, 512
- # -------------------------------------------#
- P5_upsample = self.C3_p4(P5_upsample)
-
- # -------------------------------------------#
- # 40, 40, 512 -> 40, 40, 256
- # -------------------------------------------#
- P4 = self.reduce_conv1(P5_upsample)
- # -------------------------------------------#
- # 40, 40, 256 -> 80, 80, 256
- # -------------------------------------------#
- P4_upsample = self.upsample(P4)
- # -------------------------------------------#
- # 80, 80, 256 + 80, 80, 256 -> 80, 80, 512
- # -------------------------------------------#
- P4_upsample = torch.cat([P4_upsample, feat1], 1)
- # -------------------------------------------#
- # 80, 80, 512 -> 80, 80, 256
- # -------------------------------------------#
- P3_out = self.C3_p3(P4_upsample)
-
- # -------------------------------------------#
- # 80, 80, 256 -> 40, 40, 256
- # -------------------------------------------#
- P3_downsample = self.bu_conv2(P3_out)
- # -------------------------------------------#
- # 40, 40, 256 + 40, 40, 256 -> 40, 40, 512
- # -------------------------------------------#
- P3_downsample = torch.cat([P3_downsample, P4], 1)
- # -------------------------------------------#
- # 40, 40, 256 -> 40, 40, 512
- # -------------------------------------------#
- P4_out = self.C3_n3(P3_downsample)
-
- # -------------------------------------------#
- # 40, 40, 512 -> 20, 20, 512
- # -------------------------------------------#
- P4_downsample = self.bu_conv1(P4_out)
- # -------------------------------------------#
- # 20, 20, 512 + 20, 20, 512 -> 20, 20, 1024
- # -------------------------------------------#
- P4_downsample = torch.cat([P4_downsample, P5], 1)
- # -------------------------------------------#
- # 20, 20, 1024 -> 20, 20, 1024
- # -------------------------------------------#
- P5_out = self.C3_n4(P4_downsample)
-
- outputs = (P3_out, P4_out, P5_out)
- # asff
- # pan_out0 = self.asff_1(outputs)
- # pan_out1 = self.asff_2(outputs)
- # pan_out2 = self.asff_3(outputs)
- # outputs_ = (pan_out2, pan_out1, pan_out0)
-
- return outputs#(P3_out, P4_out, P5_out)
-
-
- def ra_fusion(img_feature,r_feature):
- ff1=torch.mul(img_feature[0],r_feature[0])+img_feature[0]
- ff3 = torch.mul(img_feature[1], r_feature[1]) + img_feature[1]
- ff5 = torch.mul(img_feature[2], r_feature[2]) + img_feature[2]
- return (ff1,ff3,ff5)
-
- class YoloxBody(nn.Module):
- def __init__(self, num_classes, phi):
- super().__init__()
- depth_dict = {'nano': 0.33, 'tiny': 0.33, 's': 0.33, 'm': 0.67, 'l': 1.00, 'x': 1.33, }
- width_dict = {'nano': 0.25, 'tiny': 0.375, 's': 0.50, 'm': 0.75, 'l': 1.00, 'x': 1.25, }
- depth, width = depth_dict[phi], width_dict[phi]
- depthwise = True if phi == 'nano' else False
-
- base_channels = int(width * 64) # 64
- # self.r_stem = Focus(3, base_channels, ksize=3)
- # self.r_block = R_stem(base_channels, shortcut=True, depthwise=False, groups=1, act="silu")
- self.backbone = YOLOXPAFPN(depth, width, depthwise=depthwise)
- self.r_backbone = CSPDarknet(depth, width, depthwise=depthwise)
- # self.asff_1 = ASFF(level=0, multiplier=width)
- # self.asff_2 = ASFF(level=1, multiplier=width)
- # self.asff_3 = ASFF(level=2, multiplier=width)
- self.head = YOLOXHead(num_classes, width, depthwise=depthwise)
-
- def forward(self, x,rx):
- # x=input[0]
- # rx=input[1]
- # r=self.r_stem.forward(rx)
- # radar_image_features = self.r_block.forward(r)
- # fpn_outs = self.backbone.forward(x)
- # ff_outs=ra_fusion(fpn_outs,radar_image_features)
- # outputs = self.head.forward(ff_outs)
- radar_image_features= self.r_backbone.forward(rx)
- fpn_outs = self.backbone.forward(x,radar_image_features)
- # pan_out0 = self.asff_1.forward(fpn_outs)
- # pan_out1 = self.asff_2.forward(fpn_outs)
- # pan_out2 = self.asff_3.forward(fpn_outs)
- # outputs_ = (pan_out2, pan_out1, pan_out0)
- outputs = self.head.forward(fpn_outs)
- # outputs = self.head.forward(outputs_)
- return outputs
|