Browse Source

update

master
liubinfzi@126.com 6 months ago
parent
commit
a94e646239
76 changed files with 3125 additions and 0 deletions
  1. +56
    -0
      Case_CIFAR10/README.md
  2. +21
    -0
      Case_CIFAR10/case1/LICENSE
  3. BIN
      Case_CIFAR10/case1/__pycache__/utils.cpython-37.pyc
  4. BIN
      Case_CIFAR10/case1/img/train.png
  5. +154
    -0
      Case_CIFAR10/case1/main.py
  6. +18
    -0
      Case_CIFAR10/case1/models/__init__.py
  7. BIN
      Case_CIFAR10/case1/models/__pycache__/__init__.cpython-37.pyc
  8. BIN
      Case_CIFAR10/case1/models/__pycache__/densenet.cpython-37.pyc
  9. BIN
      Case_CIFAR10/case1/models/__pycache__/dla.cpython-37.pyc
  10. BIN
      Case_CIFAR10/case1/models/__pycache__/dla_simple.cpython-37.pyc
  11. BIN
      Case_CIFAR10/case1/models/__pycache__/dpn.cpython-37.pyc
  12. BIN
      Case_CIFAR10/case1/models/__pycache__/efficientnet.cpython-37.pyc
  13. BIN
      Case_CIFAR10/case1/models/__pycache__/googlenet.cpython-37.pyc
  14. BIN
      Case_CIFAR10/case1/models/__pycache__/lenet.cpython-37.pyc
  15. BIN
      Case_CIFAR10/case1/models/__pycache__/mobilenet.cpython-37.pyc
  16. BIN
      Case_CIFAR10/case1/models/__pycache__/mobilenetv2.cpython-37.pyc
  17. BIN
      Case_CIFAR10/case1/models/__pycache__/pnasnet.cpython-37.pyc
  18. BIN
      Case_CIFAR10/case1/models/__pycache__/preact_resnet.cpython-37.pyc
  19. BIN
      Case_CIFAR10/case1/models/__pycache__/regnet.cpython-37.pyc
  20. BIN
      Case_CIFAR10/case1/models/__pycache__/resnet.cpython-37.pyc
  21. BIN
      Case_CIFAR10/case1/models/__pycache__/resnext.cpython-37.pyc
  22. BIN
      Case_CIFAR10/case1/models/__pycache__/senet.cpython-37.pyc
  23. BIN
      Case_CIFAR10/case1/models/__pycache__/shufflenet.cpython-37.pyc
  24. BIN
      Case_CIFAR10/case1/models/__pycache__/shufflenetv2.cpython-37.pyc
  25. BIN
      Case_CIFAR10/case1/models/__pycache__/vgg.cpython-37.pyc
  26. +107
    -0
      Case_CIFAR10/case1/models/densenet.py
  27. +135
    -0
      Case_CIFAR10/case1/models/dla.py
  28. +128
    -0
      Case_CIFAR10/case1/models/dla_simple.py
  29. +98
    -0
      Case_CIFAR10/case1/models/dpn.py
  30. +175
    -0
      Case_CIFAR10/case1/models/efficientnet.py
  31. +107
    -0
      Case_CIFAR10/case1/models/googlenet.py
  32. +23
    -0
      Case_CIFAR10/case1/models/lenet.py
  33. +61
    -0
      Case_CIFAR10/case1/models/mobilenet.py
  34. +86
    -0
      Case_CIFAR10/case1/models/mobilenetv2.py
  35. +125
    -0
      Case_CIFAR10/case1/models/pnasnet.py
  36. +118
    -0
      Case_CIFAR10/case1/models/preact_resnet.py
  37. +155
    -0
      Case_CIFAR10/case1/models/regnet.py
  38. +132
    -0
      Case_CIFAR10/case1/models/resnet.py
  39. +95
    -0
      Case_CIFAR10/case1/models/resnext.py
  40. +121
    -0
      Case_CIFAR10/case1/models/senet.py
  41. +109
    -0
      Case_CIFAR10/case1/models/shufflenet.py
  42. +162
    -0
      Case_CIFAR10/case1/models/shufflenetv2.py
  43. +47
    -0
      Case_CIFAR10/case1/models/vgg.py
  44. +124
    -0
      Case_CIFAR10/case1/utils.py
  45. BIN
      Case_CIFAR10/case2/__pycache__/mox_parser.cpython-37.pyc
  46. +39
    -0
      Case_CIFAR10/case2/inference.py
  47. +122
    -0
      Case_CIFAR10/case2/main.py
  48. +28
    -0
      Case_CIFAR10/case2/mox_parser.py
  49. +311
    -0
      Case_CIFAR10/case2/resnet.py
  50. +30
    -0
      Case_CIFAR10/case2/train.py
  51. BIN
      Case_CIFAR10/cifar.zip
  52. BIN
      Case_CIFAR10/教程/img/L14-1.gif
  53. BIN
      Case_CIFAR10/教程/img/L14-10.png
  54. BIN
      Case_CIFAR10/教程/img/L14-11.png
  55. BIN
      Case_CIFAR10/教程/img/L14-12.png
  56. BIN
      Case_CIFAR10/教程/img/L14-13.png
  57. BIN
      Case_CIFAR10/教程/img/L14-14.png
  58. BIN
      Case_CIFAR10/教程/img/L14-15.png
  59. BIN
      Case_CIFAR10/教程/img/L14-16.png
  60. BIN
      Case_CIFAR10/教程/img/L14-17.png
  61. BIN
      Case_CIFAR10/教程/img/L14-18.png
  62. BIN
      Case_CIFAR10/教程/img/L14-19.png
  63. BIN
      Case_CIFAR10/教程/img/L14-2.gif
  64. BIN
      Case_CIFAR10/教程/img/L14-20.png
  65. BIN
      Case_CIFAR10/教程/img/L14-21.png
  66. BIN
      Case_CIFAR10/教程/img/L14-22.png
  67. BIN
      Case_CIFAR10/教程/img/L14-3.gif
  68. BIN
      Case_CIFAR10/教程/img/L14-3.png
  69. BIN
      Case_CIFAR10/教程/img/L14-4.gif
  70. BIN
      Case_CIFAR10/教程/img/L14-4.png
  71. BIN
      Case_CIFAR10/教程/img/L14-5.png
  72. BIN
      Case_CIFAR10/教程/img/L14-6.png
  73. BIN
      Case_CIFAR10/教程/img/L14-7.png
  74. BIN
      Case_CIFAR10/教程/img/L14-8.png
  75. BIN
      Case_CIFAR10/教程/img/L14-9.png
  76. +238
    -0
      Case_CIFAR10/教程/教程.md

+ 56
- 0
Case_CIFAR10/README.md View File

@@ -0,0 +1,56 @@
# CIFAR-10图像识别项目实战

本实战项目是一个CIFAR-10的图像分类任务,基于CIFAR10的数据集和【PyTorch】,通过在云脑环境调试和训练模型,最终评估模型的准确率。

## 文件资料说明

- 《Case1》文件夹,是基于云脑1算力资源(CPU/GPU)进行任务调试与训练的代码文件

- 《Case2》文件夹,是基于云脑2算力资源(Ascend NPU)进行任务调试与训练的代码文件

- 《教程》文件夹,是本次项目实战的详细教程

- CIFAR.zip,是本次项目使用的数据集

## 前置条件
- Python 3.6+
- PyTorch 1.0+

## 云脑1(CPU/GPU)模型调试与训练
```
ls

#(相应代码放在/code下,相应数据集放在/dataset下)
cd /code/case1

python main.py
```
<div align="center">
<img src= ./case1/img/train.png width=100%>
</div>
<br>

## 云脑2(Ascend NPU)模型调试
```
#克隆代码,在代码页的HTTPS点击复制链接,在此处!git clone后面粘贴链接
!git clone

#解压数据集
!unzip cifar.zip

#运行代码
!python OpenI_test/case2/train.py --dataset_path ./cifar-10-batches-bin/
```

## 云脑2(Ascend NPU)模型训练

在新建训练任务中,输入指定文件为“case2/train.py”,数据集指定为cifar.zip,点击提交即可


## 部分模型准确率训练结果:

| Model | Acc. |
| ----------------- | ----------- |
| [VGG16](https://arxiv.org/abs/1409.1556) | 92.64% |
| [DLA](https://arxiv.org/pdf/1707.06484.pdf) | 95.47% |


+ 21
- 0
Case_CIFAR10/case1/LICENSE View File

@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2017 liukuang

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

BIN
Case_CIFAR10/case1/__pycache__/utils.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/img/train.png View File

Before After
Width: 1894  |  Height: 601  |  Size: 95 kB

+ 154
- 0
Case_CIFAR10/case1/main.py View File

@@ -0,0 +1,154 @@
'''Train CIFAR10 with PyTorch.'''
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

import os
import argparse

from models import *
from utils import progress_bar


parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
parser.add_argument('--resume', '-r', action='store_true',
help='resume from checkpoint')
args = parser.parse_args()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0 # best test accuracy
start_epoch = 0 # start from epoch 0 or last checkpoint epoch

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
root='/dataset/cifar-10-batches-bin/', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
root='/dataset/cifar-10-verify-bin/', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck')

# Model
print('==> Building model..')
# net = VGG('VGG19')
# net = ResNet18()
# net = PreActResNet18()
# net = GoogLeNet()
# net = DenseNet121()
# net = ResNeXt29_2x64d()
# net = MobileNet()
# net = MobileNetV2()
# net = DPN92()
# net = ShuffleNetG2()
# net = SENet18()
# net = ShuffleNetV2(1)
# net = EfficientNetB0()
# net = RegNetX_200MF()
net = SimpleDLA()
net = net.to(device)
if device == 'cuda':
net = torch.nn.DataParallel(net)
cudnn.benchmark = True

if args.resume:
# Load checkpoint.
print('==> Resuming from checkpoint..')
assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
checkpoint = torch.load('./checkpoint/ckpt.pth')
net.load_state_dict(checkpoint['net'])
best_acc = checkpoint['acc']
start_epoch = checkpoint['epoch']

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=args.lr,
momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)


# Training
def train(epoch):
print('\nEpoch: %d' % epoch)
net.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(trainloader):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()

train_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()

progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (train_loss/(batch_idx+1), 100.*correct/total, correct, total))


def test(epoch):
global best_acc
net.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(testloader):
inputs, targets = inputs.to(device), targets.to(device)
outputs = net(inputs)
loss = criterion(outputs, targets)

test_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()

progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

# Save checkpoint.
acc = 100.*correct/total
if acc > best_acc:
print('Saving..')
state = {
'net': net.state_dict(),
'acc': acc,
'epoch': epoch,
}
if not os.path.isdir('checkpoint'):
os.mkdir('checkpoint')
torch.save(state, './checkpoint/ckpt.pth')
best_acc = acc


for epoch in range(start_epoch, start_epoch+200):
train(epoch)
test(epoch)
scheduler.step()

+ 18
- 0
Case_CIFAR10/case1/models/__init__.py View File

@@ -0,0 +1,18 @@
from .vgg import *
from .dpn import *
from .lenet import *
from .senet import *
from .pnasnet import *
from .densenet import *
from .googlenet import *
from .shufflenet import *
from .shufflenetv2 import *
from .resnet import *
from .resnext import *
from .preact_resnet import *
from .mobilenet import *
from .mobilenetv2 import *
from .efficientnet import *
from .regnet import *
from .dla_simple import *
from .dla import *

BIN
Case_CIFAR10/case1/models/__pycache__/__init__.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/densenet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/dla.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/dla_simple.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/dpn.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/efficientnet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/googlenet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/lenet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/mobilenet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/mobilenetv2.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/pnasnet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/preact_resnet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/regnet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/resnet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/resnext.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/senet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/shufflenet.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/shufflenetv2.cpython-37.pyc View File


BIN
Case_CIFAR10/case1/models/__pycache__/vgg.cpython-37.pyc View File


+ 107
- 0
Case_CIFAR10/case1/models/densenet.py View File

@@ -0,0 +1,107 @@
'''DenseNet in PyTorch.'''
import math

import torch
import torch.nn as nn
import torch.nn.functional as F


class Bottleneck(nn.Module):
def __init__(self, in_planes, growth_rate):
super(Bottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(4*growth_rate)
self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)

def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = self.conv2(F.relu(self.bn2(out)))
out = torch.cat([out,x], 1)
return out


class Transition(nn.Module):
def __init__(self, in_planes, out_planes):
super(Transition, self).__init__()
self.bn = nn.BatchNorm2d(in_planes)
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)

def forward(self, x):
out = self.conv(F.relu(self.bn(x)))
out = F.avg_pool2d(out, 2)
return out


class DenseNet(nn.Module):
def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
super(DenseNet, self).__init__()
self.growth_rate = growth_rate

num_planes = 2*growth_rate
self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)

self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
num_planes += nblocks[0]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans1 = Transition(num_planes, out_planes)
num_planes = out_planes

self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
num_planes += nblocks[1]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans2 = Transition(num_planes, out_planes)
num_planes = out_planes

self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
num_planes += nblocks[2]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans3 = Transition(num_planes, out_planes)
num_planes = out_planes

self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
num_planes += nblocks[3]*growth_rate

self.bn = nn.BatchNorm2d(num_planes)
self.linear = nn.Linear(num_planes, num_classes)

def _make_dense_layers(self, block, in_planes, nblock):
layers = []
for i in range(nblock):
layers.append(block(in_planes, self.growth_rate))
in_planes += self.growth_rate
return nn.Sequential(*layers)

def forward(self, x):
out = self.conv1(x)
out = self.trans1(self.dense1(out))
out = self.trans2(self.dense2(out))
out = self.trans3(self.dense3(out))
out = self.dense4(out)
out = F.avg_pool2d(F.relu(self.bn(out)), 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out

def DenseNet121():
return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)

def DenseNet169():
return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)

def DenseNet201():
return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)

def DenseNet161():
return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)

def densenet_cifar():
return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)

def test():
net = densenet_cifar()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)

# test()

+ 135
- 0
Case_CIFAR10/case1/models/dla.py View File

@@ -0,0 +1,135 @@
'''DLA in PyTorch.

Reference:
Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
expansion = 1

def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out


class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=1):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, kernel_size,
stride=1, padding=(kernel_size - 1) // 2, bias=False)
self.bn = nn.BatchNorm2d(out_channels)

def forward(self, xs):
x = torch.cat(xs, 1)
out = F.relu(self.bn(self.conv(x)))
return out


class Tree(nn.Module):
def __init__(self, block, in_channels, out_channels, level=1, stride=1):
super(Tree, self).__init__()
self.level = level
if level == 1:
self.root = Root(2*out_channels, out_channels)
self.left_node = block(in_channels, out_channels, stride=stride)
self.right_node = block(out_channels, out_channels, stride=1)
else:
self.root = Root((level+2)*out_channels, out_channels)
for i in reversed(range(1, level)):
subtree = Tree(block, in_channels, out_channels,
level=i, stride=stride)
self.__setattr__('level_%d' % i, subtree)
self.prev_root = block(in_channels, out_channels, stride=stride)
self.left_node = block(out_channels, out_channels, stride=1)
self.right_node = block(out_channels, out_channels, stride=1)

def forward(self, x):
xs = [self.prev_root(x)] if self.level > 1 else []
for i in reversed(range(1, self.level)):
level_i = self.__getattr__('level_%d' % i)
x = level_i(x)
xs.append(x)
x = self.left_node(x)
xs.append(x)
x = self.right_node(x)
xs.append(x)
out = self.root(xs)
return out


class DLA(nn.Module):
def __init__(self, block=BasicBlock, num_classes=10):
super(DLA, self).__init__()
self.base = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(True)
)

self.layer1 = nn.Sequential(
nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(True)
)

self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(32),
nn.ReLU(True)
)

self.layer3 = Tree(block, 32, 64, level=1, stride=1)
self.layer4 = Tree(block, 64, 128, level=2, stride=2)
self.layer5 = Tree(block, 128, 256, level=2, stride=2)
self.layer6 = Tree(block, 256, 512, level=1, stride=2)
self.linear = nn.Linear(512, num_classes)

def forward(self, x):
out = self.base(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = self.layer6(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def test():
net = DLA()
print(net)
x = torch.randn(1, 3, 32, 32)
y = net(x)
print(y.size())


if __name__ == '__main__':
test()

+ 128
- 0
Case_CIFAR10/case1/models/dla_simple.py View File

@@ -0,0 +1,128 @@
'''Simplified version of DLA in PyTorch.

Note this implementation is not identical to the original paper version.
But it seems works fine.

See dla.py for the original paper version.

Reference:
Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
expansion = 1

def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out


class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=1):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, kernel_size,
stride=1, padding=(kernel_size - 1) // 2, bias=False)
self.bn = nn.BatchNorm2d(out_channels)

def forward(self, xs):
x = torch.cat(xs, 1)
out = F.relu(self.bn(self.conv(x)))
return out


class Tree(nn.Module):
def __init__(self, block, in_channels, out_channels, level=1, stride=1):
super(Tree, self).__init__()
self.root = Root(2*out_channels, out_channels)
if level == 1:
self.left_tree = block(in_channels, out_channels, stride=stride)
self.right_tree = block(out_channels, out_channels, stride=1)
else:
self.left_tree = Tree(block, in_channels,
out_channels, level=level-1, stride=stride)
self.right_tree = Tree(block, out_channels,
out_channels, level=level-1, stride=1)

def forward(self, x):
out1 = self.left_tree(x)
out2 = self.right_tree(out1)
out = self.root([out1, out2])
return out


class SimpleDLA(nn.Module):
def __init__(self, block=BasicBlock, num_classes=10):
super(SimpleDLA, self).__init__()
self.base = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(True)
)

self.layer1 = nn.Sequential(
nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(True)
)

self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(32),
nn.ReLU(True)
)

self.layer3 = Tree(block, 32, 64, level=1, stride=1)
self.layer4 = Tree(block, 64, 128, level=2, stride=2)
self.layer5 = Tree(block, 128, 256, level=2, stride=2)
self.layer6 = Tree(block, 256, 512, level=1, stride=2)
self.linear = nn.Linear(512, num_classes)

def forward(self, x):
out = self.base(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = self.layer6(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def test():
net = SimpleDLA()
print(net)
x = torch.randn(1, 3, 32, 32)
y = net(x)
print(y.size())


if __name__ == '__main__':
test()

+ 98
- 0
Case_CIFAR10/case1/models/dpn.py View File

@@ -0,0 +1,98 @@
'''Dual Path Networks in PyTorch.'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class Bottleneck(nn.Module):
def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
super(Bottleneck, self).__init__()
self.out_planes = out_planes
self.dense_depth = dense_depth

self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
self.bn2 = nn.BatchNorm2d(in_planes)
self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)

self.shortcut = nn.Sequential()
if first_layer:
self.shortcut = nn.Sequential(
nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_planes+dense_depth)
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
x = self.shortcut(x)
d = self.out_planes
out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
out = F.relu(out)
return out


class DPN(nn.Module):
def __init__(self, cfg):
super(DPN, self).__init__()
in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']

self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.last_planes = 64
self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)

def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for i,stride in enumerate(strides):
layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
self.last_planes = out_planes + (i+2) * dense_depth
return nn.Sequential(*layers)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def DPN26():
cfg = {
'in_planes': (96,192,384,768),
'out_planes': (256,512,1024,2048),
'num_blocks': (2,2,2,2),
'dense_depth': (16,32,24,128)
}
return DPN(cfg)

def DPN92():
cfg = {
'in_planes': (96,192,384,768),
'out_planes': (256,512,1024,2048),
'num_blocks': (3,4,20,3),
'dense_depth': (16,32,24,128)
}
return DPN(cfg)


def test():
net = DPN92()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)

# test()

+ 175
- 0
Case_CIFAR10/case1/models/efficientnet.py View File

@@ -0,0 +1,175 @@
'''EfficientNet in PyTorch.

Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks".

Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


def swish(x):
return x * x.sigmoid()


def drop_connect(x, drop_ratio):
keep_ratio = 1.0 - drop_ratio
mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
mask.bernoulli_(keep_ratio)
x.div_(keep_ratio)
x.mul_(mask)
return x


class SE(nn.Module):
'''Squeeze-and-Excitation block with Swish.'''

def __init__(self, in_channels, se_channels):
super(SE, self).__init__()
self.se1 = nn.Conv2d(in_channels, se_channels,
kernel_size=1, bias=True)
self.se2 = nn.Conv2d(se_channels, in_channels,
kernel_size=1, bias=True)

def forward(self, x):
out = F.adaptive_avg_pool2d(x, (1, 1))
out = swish(self.se1(out))
out = self.se2(out).sigmoid()
out = x * out
return out


class Block(nn.Module):
'''expansion + depthwise + pointwise + squeeze-excitation'''

def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
expand_ratio=1,
se_ratio=0.,
drop_rate=0.):
super(Block, self).__init__()
self.stride = stride
self.drop_rate = drop_rate
self.expand_ratio = expand_ratio

# Expansion
channels = expand_ratio * in_channels
self.conv1 = nn.Conv2d(in_channels,
channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.bn1 = nn.BatchNorm2d(channels)

# Depthwise conv
self.conv2 = nn.Conv2d(channels,
channels,
kernel_size=kernel_size,
stride=stride,
padding=(1 if kernel_size == 3 else 2),
groups=channels,
bias=False)
self.bn2 = nn.BatchNorm2d(channels)

# SE layers
se_channels = int(in_channels * se_ratio)
self.se = SE(channels, se_channels)

# Output
self.conv3 = nn.Conv2d(channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.bn3 = nn.BatchNorm2d(out_channels)

# Skip connection if in and out shapes are the same (MV-V2 style)
self.has_skip = (stride == 1) and (in_channels == out_channels)

def forward(self, x):
out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
out = swish(self.bn2(self.conv2(out)))
out = self.se(out)
out = self.bn3(self.conv3(out))
if self.has_skip:
if self.training and self.drop_rate > 0:
out = drop_connect(out, self.drop_rate)
out = out + x
return out


class EfficientNet(nn.Module):
def __init__(self, cfg, num_classes=10):
super(EfficientNet, self).__init__()
self.cfg = cfg
self.conv1 = nn.Conv2d(3,
32,
kernel_size=3,
stride=1,
padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_channels=32)
self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)

def _make_layers(self, in_channels):
layers = []
cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
'stride']]
b = 0
blocks = sum(self.cfg['num_blocks'])
for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
strides = [stride] + [1] * (num_blocks - 1)
for stride in strides:
drop_rate = self.cfg['drop_connect_rate'] * b / blocks
layers.append(
Block(in_channels,
out_channels,
kernel_size,
stride,
expansion,
se_ratio=0.25,
drop_rate=drop_rate))
in_channels = out_channels
return nn.Sequential(*layers)

def forward(self, x):
out = swish(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.adaptive_avg_pool2d(out, 1)
out = out.view(out.size(0), -1)
dropout_rate = self.cfg['dropout_rate']
if self.training and dropout_rate > 0:
out = F.dropout(out, p=dropout_rate)
out = self.linear(out)
return out


def EfficientNetB0():
cfg = {
'num_blocks': [1, 2, 2, 3, 3, 4, 1],
'expansion': [1, 6, 6, 6, 6, 6, 6],
'out_channels': [16, 24, 40, 80, 112, 192, 320],
'kernel_size': [3, 3, 5, 3, 5, 5, 3],
'stride': [1, 2, 2, 2, 1, 2, 1],
'dropout_rate': 0.2,
'drop_connect_rate': 0.2,
}
return EfficientNet(cfg)


def test():
net = EfficientNetB0()
x = torch.randn(2, 3, 32, 32)
y = net(x)
print(y.shape)


if __name__ == '__main__':
test()

+ 107
- 0
Case_CIFAR10/case1/models/googlenet.py View File

@@ -0,0 +1,107 @@
'''GoogLeNet with PyTorch.'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class Inception(nn.Module):
def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
super(Inception, self).__init__()
# 1x1 conv branch
self.b1 = nn.Sequential(
nn.Conv2d(in_planes, n1x1, kernel_size=1),
nn.BatchNorm2d(n1x1),
nn.ReLU(True),
)

# 1x1 conv -> 3x3 conv branch
self.b2 = nn.Sequential(
nn.Conv2d(in_planes, n3x3red, kernel_size=1),
nn.BatchNorm2d(n3x3red),
nn.ReLU(True),
nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
nn.BatchNorm2d(n3x3),
nn.ReLU(True),
)

# 1x1 conv -> 5x5 conv branch
self.b3 = nn.Sequential(
nn.Conv2d(in_planes, n5x5red, kernel_size=1),
nn.BatchNorm2d(n5x5red),
nn.ReLU(True),
nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
nn.BatchNorm2d(n5x5),
nn.ReLU(True),
nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
nn.BatchNorm2d(n5x5),
nn.ReLU(True),
)

# 3x3 pool -> 1x1 conv branch
self.b4 = nn.Sequential(
nn.MaxPool2d(3, stride=1, padding=1),
nn.Conv2d(in_planes, pool_planes, kernel_size=1),
nn.BatchNorm2d(pool_planes),
nn.ReLU(True),
)

def forward(self, x):
y1 = self.b1(x)
y2 = self.b2(x)
y3 = self.b3(x)
y4 = self.b4(x)
return torch.cat([y1,y2,y3,y4], 1)


class GoogLeNet(nn.Module):
def __init__(self):
super(GoogLeNet, self).__init__()
self.pre_layers = nn.Sequential(
nn.Conv2d(3, 192, kernel_size=3, padding=1),
nn.BatchNorm2d(192),
nn.ReLU(True),
)

self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)

self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)

self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)

self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)

self.avgpool = nn.AvgPool2d(8, stride=1)
self.linear = nn.Linear(1024, 10)

def forward(self, x):
out = self.pre_layers(x)
out = self.a3(out)
out = self.b3(out)
out = self.maxpool(out)
out = self.a4(out)
out = self.b4(out)
out = self.c4(out)
out = self.d4(out)
out = self.e4(out)
out = self.maxpool(out)
out = self.a5(out)
out = self.b5(out)
out = self.avgpool(out)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def test():
net = GoogLeNet()
x = torch.randn(1,3,32,32)
y = net(x)
print(y.size())

# test()

+ 23
- 0
Case_CIFAR10/case1/models/lenet.py View File

@@ -0,0 +1,23 @@
'''LeNet in PyTorch.'''
import torch.nn as nn
import torch.nn.functional as F

class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
out = F.relu(self.conv1(x))
out = F.max_pool2d(out, 2)
out = F.relu(self.conv2(out))
out = F.max_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out

+ 61
- 0
Case_CIFAR10/case1/models/mobilenet.py View File

@@ -0,0 +1,61 @@
'''MobileNet in PyTorch.

See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class Block(nn.Module):
'''Depthwise conv + Pointwise conv'''
def __init__(self, in_planes, out_planes, stride=1):
super(Block, self).__init__()
self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
return out


class MobileNet(nn.Module):
# (128,2) means conv planes=128, conv stride=2, by default conv stride=1
cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]

def __init__(self, num_classes=10):
super(MobileNet, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
self.linear = nn.Linear(1024, num_classes)

def _make_layers(self, in_planes):
layers = []
for x in self.cfg:
out_planes = x if isinstance(x, int) else x[0]
stride = 1 if isinstance(x, int) else x[1]
layers.append(Block(in_planes, out_planes, stride))
in_planes = out_planes
return nn.Sequential(*layers)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.avg_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def test():
net = MobileNet()
x = torch.randn(1,3,32,32)
y = net(x)
print(y.size())

# test()

+ 86
- 0
Case_CIFAR10/case1/models/mobilenetv2.py View File

@@ -0,0 +1,86 @@
'''MobileNetV2 in PyTorch.

See the paper "Inverted Residuals and Linear Bottlenecks:
Mobile Networks for Classification, Detection and Segmentation" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class Block(nn.Module):
'''expand + depthwise + pointwise'''
def __init__(self, in_planes, out_planes, expansion, stride):
super(Block, self).__init__()
self.stride = stride

planes = expansion * in_planes
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes)

self.shortcut = nn.Sequential()
if stride == 1 and in_planes != out_planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(out_planes),
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out = out + self.shortcut(x) if self.stride==1 else out
return out


class MobileNetV2(nn.Module):
# (expansion, out_planes, num_blocks, stride)
cfg = [(1, 16, 1, 1),
(6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10
(6, 32, 3, 2),
(6, 64, 4, 2),
(6, 96, 3, 1),
(6, 160, 3, 2),
(6, 320, 1, 1)]

def __init__(self, num_classes=10):
super(MobileNetV2, self).__init__()
# NOTE: change conv1 stride 2 -> 1 for CIFAR10
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(1280)
self.linear = nn.Linear(1280, num_classes)

def _make_layers(self, in_planes):
layers = []
for expansion, out_planes, num_blocks, stride in self.cfg:
strides = [stride] + [1]*(num_blocks-1)
for stride in strides:
layers.append(Block(in_planes, out_planes, expansion, stride))
in_planes = out_planes
return nn.Sequential(*layers)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.relu(self.bn2(self.conv2(out)))
# NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def test():
net = MobileNetV2()
x = torch.randn(2,3,32,32)
y = net(x)
print(y.size())

# test()

+ 125
- 0
Case_CIFAR10/case1/models/pnasnet.py View File

@@ -0,0 +1,125 @@
'''PNASNet in PyTorch.

Paper: Progressive Neural Architecture Search
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class SepConv(nn.Module):
'''Separable Convolution.'''
def __init__(self, in_planes, out_planes, kernel_size, stride):
super(SepConv, self).__init__()
self.conv1 = nn.Conv2d(in_planes, out_planes,
kernel_size, stride,
padding=(kernel_size-1)//2,
bias=False, groups=in_planes)
self.bn1 = nn.BatchNorm2d(out_planes)

def forward(self, x):
return self.bn1(self.conv1(x))


class CellA(nn.Module):
def __init__(self, in_planes, out_planes, stride=1):
super(CellA, self).__init__()
self.stride = stride
self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
if stride==2:
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(out_planes)

def forward(self, x):
y1 = self.sep_conv1(x)
y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
if self.stride==2:
y2 = self.bn1(self.conv1(y2))
return F.relu(y1+y2)

class CellB(nn.Module):
def __init__(self, in_planes, out_planes, stride=1):
super(CellB, self).__init__()
self.stride = stride
# Left branch
self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
# Right branch
self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
if stride==2:
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(out_planes)
# Reduce channels
self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)

def forward(self, x):
# Left branch
y1 = self.sep_conv1(x)
y2 = self.sep_conv2(x)
# Right branch
y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
if self.stride==2:
y3 = self.bn1(self.conv1(y3))
y4 = self.sep_conv3(x)
# Concat & reduce channels
b1 = F.relu(y1+y2)
b2 = F.relu(y3+y4)
y = torch.cat([b1,b2], 1)
return F.relu(self.bn2(self.conv2(y)))

class PNASNet(nn.Module):
def __init__(self, cell_type, num_cells, num_planes):
super(PNASNet, self).__init__()
self.in_planes = num_planes
self.cell_type = cell_type

self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(num_planes)

self.layer1 = self._make_layer(num_planes, num_cells=6)
self.layer2 = self._downsample(num_planes*2)
self.layer3 = self._make_layer(num_planes*2, num_cells=6)
self.layer4 = self._downsample(num_planes*4)
self.layer5 = self._make_layer(num_planes*4, num_cells=6)

self.linear = nn.Linear(num_planes*4, 10)

def _make_layer(self, planes, num_cells):
layers = []
for _ in range(num_cells):
layers.append(self.cell_type(self.in_planes, planes, stride=1))
self.in_planes = planes
return nn.Sequential(*layers)

def _downsample(self, planes):
layer = self.cell_type(self.in_planes, planes, stride=2)
self.in_planes = planes
return layer

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = F.avg_pool2d(out, 8)
out = self.linear(out.view(out.size(0), -1))
return out


def PNASNetA():
return PNASNet(CellA, num_cells=6, num_planes=44)

def PNASNetB():
return PNASNet(CellB, num_cells=6, num_planes=32)


def test():
net = PNASNetB()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)

# test()

+ 118
- 0
Case_CIFAR10/case1/models/preact_resnet.py View File

@@ -0,0 +1,118 @@
'''Pre-activation ResNet in PyTorch.

Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks. arXiv:1603.05027
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class PreActBlock(nn.Module):
'''Pre-activation version of the BasicBlock.'''
expansion = 1

def __init__(self, in_planes, planes, stride=1):
super(PreActBlock, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)

if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
)

def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
out += shortcut
return out


class PreActBottleneck(nn.Module):
'''Pre-activation version of the original Bottleneck module.'''
expansion = 4

def __init__(self, in_planes, planes, stride=1):
super(PreActBottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)

if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
)

def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
out = self.conv3(F.relu(self.bn3(out)))
out += shortcut
return out


class PreActResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(PreActResNet, self).__init__()
self.in_planes = 64

self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)

def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)

def forward(self, x):
out = self.conv1(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def PreActResNet18():
return PreActResNet(PreActBlock, [2,2,2,2])

def PreActResNet34():
return PreActResNet(PreActBlock, [3,4,6,3])

def PreActResNet50():
return PreActResNet(PreActBottleneck, [3,4,6,3])

def PreActResNet101():
return PreActResNet(PreActBottleneck, [3,4,23,3])

def PreActResNet152():
return PreActResNet(PreActBottleneck, [3,8,36,3])


def test():
net = PreActResNet18()
y = net((torch.randn(1,3,32,32)))
print(y.size())

# test()

+ 155
- 0
Case_CIFAR10/case1/models/regnet.py View File

@@ -0,0 +1,155 @@
'''RegNet in PyTorch.

Paper: "Designing Network Design Spaces".

Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class SE(nn.Module):
'''Squeeze-and-Excitation block.'''

def __init__(self, in_planes, se_planes):
super(SE, self).__init__()
self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)

def forward(self, x):
out = F.adaptive_avg_pool2d(x, (1, 1))
out = F.relu(self.se1(out))
out = self.se2(out).sigmoid()
out = x * out
return out


class Block(nn.Module):
def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
super(Block, self).__init__()
# 1x1
w_b = int(round(w_out * bottleneck_ratio))
self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(w_b)
# 3x3
num_groups = w_b // group_width
self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3,
stride=stride, padding=1, groups=num_groups, bias=False)
self.bn2 = nn.BatchNorm2d(w_b)
# se
self.with_se = se_ratio > 0
if self.with_se:
w_se = int(round(w_in * se_ratio))
self.se = SE(w_b, w_se)
# 1x1
self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(w_out)

self.shortcut = nn.Sequential()
if stride != 1 or w_in != w_out:
self.shortcut = nn.Sequential(
nn.Conv2d(w_in, w_out,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(w_out)
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
if self.with_se:
out = self.se(out)
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out


class RegNet(nn.Module):
def __init__(self, cfg, num_classes=10):
super(RegNet, self).__init__()
self.cfg = cfg
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(0)
self.layer2 = self._make_layer(1)
self.layer3 = self._make_layer(2)
self.layer4 = self._make_layer(3)
self.linear = nn.Linear(self.cfg['widths'][-1], num_classes)

def _make_layer(self, idx):
depth = self.cfg['depths'][idx]
width = self.cfg['widths'][idx]
stride = self.cfg['strides'][idx]
group_width = self.cfg['group_width']
bottleneck_ratio = self.cfg['bottleneck_ratio']
se_ratio = self.cfg['se_ratio']

layers = []
for i in range(depth):
s = stride if i == 0 else 1
layers.append(Block(self.in_planes, width,
s, group_width, bottleneck_ratio, se_ratio))
self.in_planes = width
return nn.Sequential(*layers)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.adaptive_avg_pool2d(out, (1, 1))
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def RegNetX_200MF():
cfg = {
'depths': [1, 1, 4, 7],
'widths': [24, 56, 152, 368],
'strides': [1, 1, 2, 2],
'group_width': 8,
'bottleneck_ratio': 1,
'se_ratio': 0,
}
return RegNet(cfg)


def RegNetX_400MF():
cfg = {
'depths': [1, 2, 7, 12],
'widths': [32, 64, 160, 384],
'strides': [1, 1, 2, 2],
'group_width': 16,
'bottleneck_ratio': 1,
'se_ratio': 0,
}
return RegNet(cfg)


def RegNetY_400MF():
cfg = {
'depths': [1, 2, 7, 12],
'widths': [32, 64, 160, 384],
'strides': [1, 1, 2, 2],
'group_width': 16,
'bottleneck_ratio': 1,
'se_ratio': 0.25,
}
return RegNet(cfg)


def test():
net = RegNetX_200MF()
print(net)
x = torch.randn(2, 3, 32, 32)
y = net(x)
print(y.shape)


if __name__ == '__main__':
test()

+ 132
- 0
Case_CIFAR10/case1/models/resnet.py View File

@@ -0,0 +1,132 @@
'''ResNet in PyTorch.

For Pre-activation ResNet, see 'preact_resnet.py'.

Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
expansion = 1

def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out


class Bottleneck(nn.Module):
expansion = 4

def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion *
planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out


class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64

self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)

def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def ResNet18():
return ResNet(BasicBlock, [2, 2, 2, 2])


def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet50():
return ResNet(Bottleneck, [3, 4, 6, 3])


def ResNet101():
return ResNet(Bottleneck, [3, 4, 23, 3])


def ResNet152():
return ResNet(Bottleneck, [3, 8, 36, 3])


def test():
net = ResNet18()
y = net(torch.randn(1, 3, 32, 32))
print(y.size())

# test()

+ 95
- 0
Case_CIFAR10/case1/models/resnext.py View File

@@ -0,0 +1,95 @@
'''ResNeXt in PyTorch.

See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class Block(nn.Module):
'''Grouped convolution block.'''
expansion = 2

def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
super(Block, self).__init__()
group_width = cardinality * bottleneck_width
self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(group_width)
self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
self.bn2 = nn.BatchNorm2d(group_width)
self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*group_width)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*group_width:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*group_width)
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out


class ResNeXt(nn.Module):
def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
super(ResNeXt, self).__init__()
self.cardinality = cardinality
self.bottleneck_width = bottleneck_width
self.in_planes = 64

self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(num_blocks[0], 1)
self.layer2 = self._make_layer(num_blocks[1], 2)
self.layer3 = self._make_layer(num_blocks[2], 2)
# self.layer4 = self._make_layer(num_blocks[3], 2)
self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)

def _make_layer(self, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
# Increase bottleneck_width by 2 after each stage.
self.bottleneck_width *= 2
return nn.Sequential(*layers)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
# out = self.layer4(out)
out = F.avg_pool2d(out, 8)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def ResNeXt29_2x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)

def ResNeXt29_4x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)

def ResNeXt29_8x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)

def ResNeXt29_32x4d():
return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)

def test_resnext():
net = ResNeXt29_2x64d()
x = torch.randn(1,3,32,32)
y = net(x)
print(y.size())

# test_resnext()

+ 121
- 0
Case_CIFAR10/case1/models/senet.py View File

@@ -0,0 +1,121 @@
'''SENet in PyTorch.

SENet is the winner of ImageNet-2017. The paper is not released yet.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes)
)

# SE layers
self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear
self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))

# Squeeze
w = F.avg_pool2d(out, out.size(2))
w = F.relu(self.fc1(w))
w = F.sigmoid(self.fc2(w))
# Excitation
out = out * w # New broadcasting feature from v0.2!

out += self.shortcut(x)
out = F.relu(out)
return out


class PreActBlock(nn.Module):
def __init__(self, in_planes, planes, stride=1):
super(PreActBlock, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)

if stride != 1 or in_planes != planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
)

# SE layers
self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)

def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))

# Squeeze
w = F.avg_pool2d(out, out.size(2))
w = F.relu(self.fc1(w))
w = F.sigmoid(self.fc2(w))
# Excitation
out = out * w

out += shortcut
return out


class SENet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(SENet, self).__init__()
self.in_planes = 64

self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512, num_classes)

def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes
return nn.Sequential(*layers)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out


def SENet18():
return SENet(PreActBlock, [2,2,2,2])


def test():
net = SENet18()
y = net(torch.randn(1,3,32,32))
print(y.size())

# test()

+ 109
- 0
Case_CIFAR10/case1/models/shufflenet.py View File

@@ -0,0 +1,109 @@
'''ShuffleNet in PyTorch.

See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class ShuffleBlock(nn.Module):
def __init__(self, groups):
super(ShuffleBlock, self).__init__()
self.groups = groups

def forward(self, x):
'''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
N,C,H,W = x.size()
g = self.groups
return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)


class Bottleneck(nn.Module):
def __init__(self, in_planes, out_planes, stride, groups):
super(Bottleneck, self).__init__()
self.stride = stride

mid_planes = out_planes/4
g = 1 if in_planes==24 else groups
self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
self.bn1 = nn.BatchNorm2d(mid_planes)
self.shuffle1 = ShuffleBlock(groups=g)
self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
self.bn2 = nn.BatchNorm2d(mid_planes)
self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes)

self.shortcut = nn.Sequential()
if stride == 2:
self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.shuffle1(out)
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
res = self.shortcut(x)
out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
return out


class ShuffleNet(nn.Module):
def __init__(self, cfg):
super(ShuffleNet, self).__init__()
out_planes = cfg['out_planes']
num_blocks = cfg['num_blocks']
groups = cfg['groups']

self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(24)
self.in_planes = 24
self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
self.linear = nn.Linear(out_planes[2], 10)

def _make_layer(self, out_planes, num_blocks, groups):
layers = []
for i in range(num_blocks):
stride = 2 if i == 0 else 1
cat_planes = self.in_planes if i == 0 else 0
layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
self.in_planes = out_planes
return nn.Sequential(*layers)

def forward(self, x):