#9 dev

Merged
zhengxiawu merged 6 commits from dev into master 2 years ago
  1. +4
    -0
      .gitignore
  2. +13
    -8
      README.md
  3. +17
    -0
      configs/search/RMINAS/darts_cifar10.yaml
  4. +17
    -0
      configs/search/RMINAS/darts_cifar100.yaml
  5. +17
    -0
      configs/search/RMINAS/darts_imagenet.yaml
  6. +15
    -0
      configs/search/RMINAS/nb201_cifar10.yaml
  7. +15
    -0
      configs/search/RMINAS/nb201_cifar100.yaml
  8. +15
    -0
      configs/search/RMINAS/nb201_imagenet16.yaml
  9. +73
    -0
      doc/RMI_NAS.md
  10. BIN
      doc/images/nasbench201.png
  11. BIN
      doc/images/normal.png
  12. BIN
      doc/images/reduce.png
  13. +74
    -0
      search/RMINAS/README.md
  14. +209
    -0
      search/RMINAS/RMINAS_darts.py
  15. +154
    -0
      search/RMINAS/RMINAS_mb_imagenet.py
  16. +220
    -0
      search/RMINAS/RMINAS_nb201.py
  17. +5
    -0
      search/RMINAS/download_weight.sh
  18. +18
    -0
      xnas/core/config.py
  19. +270
    -0
      xnas/search_algorithm/RMINAS/sampler/RF_sampling.py
  20. +1
    -0
      xnas/search_algorithm/RMINAS/sampler/available_archs.txt
  21. +100
    -0
      xnas/search_algorithm/RMINAS/sampler/sampling.py
  22. +46
    -0
      xnas/search_algorithm/RMINAS/sampler/sampling_darts.py
  23. +253
    -0
      xnas/search_algorithm/RMINAS/teacher_model/fbresnet_imagenet/fbresnet.py
  24. +133
    -0
      xnas/search_algorithm/RMINAS/teacher_model/resnet101_cifar100/resnet.py
  25. +173
    -0
      xnas/search_algorithm/RMINAS/teacher_model/resnet20_cifar10/resnet.py
  26. +182
    -0
      xnas/search_algorithm/RMINAS/utils/RMI_torch.py
  27. +207
    -0
      xnas/search_algorithm/RMINAS/utils/get_accuracy.ipynb
  28. +182
    -0
      xnas/search_algorithm/RMINAS/utils/imagenet16120_loader.py
  29. +91
    -0
      xnas/search_algorithm/RMINAS/utils/loader.py
  30. +1
    -1
      xnas/search_space/DARTS/cnn.py
  31. +195
    -0
      xnas/search_space/RMINAS/DARTS/darts_cnn.py
  32. +222
    -0
      xnas/search_space/RMINAS/DARTS/darts_img.py
  33. +52
    -0
      xnas/search_space/RMINAS/DARTS/darts_plot.py
  34. +104
    -0
      xnas/search_space/RMINAS/DARTS/pcdarts_op.py
  35. +151
    -0
      xnas/search_space/RMINAS/MBConv/mb_v3_cnn.py
  36. +274
    -0
      xnas/search_space/RMINAS/NB201/geno.py
  37. +554
    -0
      xnas/search_space/RMINAS/NB201/ops.py
  38. +203
    -0
      xnas/search_space/RMINAS/NB201/utils.py

+ 4
- 0
.gitignore View File

@@ -17,6 +17,10 @@ test.py
test.ipynb
nohup*.out

# model weights
*.pth
*.th

# C extensions
*.so



+ 13
- 8
README.md View File

@@ -11,11 +11,11 @@ This project is now supported by PengCheng Lab
### Beta

- DARTS
`python search/DARTS.py --cfg configs/search/DARTS.yaml`
- `python search/DARTS.py --cfg configs/search/DARTS.yaml`
- PCDARTS
`python search/PDARTS.py --cfg configs/search/PDARTS.yaml`
- `python search/PDARTS.py --cfg configs/search/PDARTS.yaml`
- PDARTS
`python search/PCDARTS.py --cfg configs/search/PCDARTS.yaml`
- `python search/PCDARTS.py --cfg configs/search/PCDARTS.yaml`
- SNG
- ASNG
- MDENAS
@@ -23,11 +23,16 @@ This project is now supported by PengCheng Lab
- MIGONAS
- GridSearch
- DrNAS
`python search/DrNAS/nb201space.py --cfg configs/search/DrNAS/nb201_cifar10_Dirichlet.yaml`
`python search/DrNAS/nb201space.py --cfg configs/search/DrNAS/nb201_cifar100_Dirichlet.yaml`
`python search/DrNAS/DARTSspace.py --cfg configs/search/DrNAS/DARTS_cifar10.yaml`
- `python search/DrNAS/nb201space.py --cfg configs/search/DrNAS/nb201_cifar10_Dirichlet.yaml`
- `python search/DrNAS/nb201space.py --cfg configs/search/DrNAS/nb201_cifar100_Dirichlet.yaml`
- `python search/DrNAS/DARTSspace.py --cfg configs/search/DrNAS/DARTS_cifar10.yaml`
- TENAS
`python search/TENAS.py --cfg configs/search/TENAS/nb201_cifar10.yaml`
- `python search/TENAS.py --cfg configs/search/TENAS/nb201_cifar10.yaml`
- RMINAS
- `./search/RMINAS/download_weight.sh # prepare weights of teacher models`
- `./python search/RMINAS/RMINAS_nb201.py --cfg configs/search/RMINAS/nb201_cifar10.yaml`
- `./python search/RMINAS/RMINAS_darts.py --cfg configs/search/RMINAS/darts_cifar10.yaml`


## Supported Search Spaces

@@ -125,7 +130,7 @@ We reimplement several widely used NAS methods including:
| dynamic_SNG |2 |2.927 |0.0 |24.13 |96.87|473.156 |- |78.07 |cell-based |
| dynamic_SNG |3 |2.724 |0.0 |28.07 |97.45|442.826 |- |77.68 |cell-based |
| dynamic_SNG |4 |3.323 |0.0 |31.85 |96.65|528.784 |- |79.78 |cell-based |
| RMINAS |- |- |1.92 |31.9 |97.36|- |- |- |cell-based |

### TODO



+ 17
- 0
configs/search/RMINAS/darts_cifar10.yaml View File

@@ -0,0 +1,17 @@
RNG_SEED: 2
SEARCH:
DATASET: 'cifar10'
NUM_CLASSES: 10
IM_SIZE: 32
DATA_LOADER:
BACKEND: 'custom'
OUT_DIR: 'experiment/train_test'
OPTIM:
BASE_LR: 0.025
MOMENTUM: 0.9
WEIGHT_DECAY: 0.0003
MAX_EPOCH: 250
TRAIN:
BATCH_SIZE: 128
CHANNELS: 16
LAYERS: 8

+ 17
- 0
configs/search/RMINAS/darts_cifar100.yaml View File

@@ -0,0 +1,17 @@
RNG_SEED: 2
SEARCH:
DATASET: 'cifar100'
NUM_CLASSES: 100
IM_SIZE: 32
DATA_LOADER:
BACKEND: 'custom'
OUT_DIR: 'experiment/train_test'
OPTIM:
BASE_LR: 0.025
MOMENTUM: 0.9
WEIGHT_DECAY: 0.0003
MAX_EPOCH: 250
TRAIN:
BATCH_SIZE: 128
CHANNELS: 16
LAYERS: 8

+ 17
- 0
configs/search/RMINAS/darts_imagenet.yaml View File

@@ -0,0 +1,17 @@
RNG_SEED: 2
SEARCH:
DATASET: 'imagenet'
NUM_CLASSES: 1000
IM_SIZE: 32
DATA_LOADER:
BACKEND: 'custom'
OUT_DIR: 'experiment/train_test'
OPTIM:
BASE_LR: 0.025
MOMENTUM: 0.9
WEIGHT_DECAY: 0.0003
MAX_EPOCH: 250
TRAIN:
BATCH_SIZE: 32
CHANNELS: 16
LAYERS: 8

+ 15
- 0
configs/search/RMINAS/nb201_cifar10.yaml View File

@@ -0,0 +1,15 @@
RNG_SEED: 7
SEARCH:
DATASET: 'cifar10'
NUM_CLASSES: 10
DATA_LOADER:
BACKEND: 'custom'
OUT_DIR: 'experiment/nb201_train'
OPTIM:
BASE_LR: 0.1
MOMENTUM: 0.9
WEIGHT_DECAY: 0.0005
MAX_EPOCH: 150
TRAIN:
BATCH_SIZE: 32
CHECKPOINT_PERIOD: 10

+ 15
- 0
configs/search/RMINAS/nb201_cifar100.yaml View File

@@ -0,0 +1,15 @@
RNG_SEED: 7
SEARCH:
DATASET: 'cifar100'
NUM_CLASSES: 100
DATA_LOADER:
BACKEND: 'custom'
OUT_DIR: 'experiment/nb201_train'
OPTIM:
BASE_LR: 0.1
MOMENTUM: 0.9
WEIGHT_DECAY: 0.0005
MAX_EPOCH: 150
TRAIN:
BATCH_SIZE: 32
CHECKPOINT_PERIOD: 10

+ 15
- 0
configs/search/RMINAS/nb201_imagenet16.yaml View File

@@ -0,0 +1,15 @@
RNG_SEED: 7
SEARCH:
DATASET: 'imagenet16_120'
NUM_CLASSES: 120
DATA_LOADER:
BACKEND: 'custom'
OUT_DIR: 'experiment/nb201_train'
OPTIM:
BASE_LR: 0.1
MOMENTUM: 0.9
WEIGHT_DECAY: 0.0005
MAX_EPOCH: 150
TRAIN:
BATCH_SIZE: 32
CHECKPOINT_PERIOD: 10

+ 73
- 0
doc/RMI_NAS.md View File

@@ -0,0 +1,73 @@
## Introduction

Code for paper: **Neural Architecture Search with Representation Mutual Information**

RMI-NAS is an efficient architecture search method based on Representation Mutual Information (RMI) theory. It aims at improving the speed of performance evaluation by ranking architectures with RMI, which is an accurate and effective indicator to facilitate NAS. RMI-NAS uses only one batch of data to complete training and generalizes well to different search spaces. For more details, please refer to our paper.



## Usage

### Installation

```bash
git clone https://github.com/MAC-AutoML/XNAS.git
cd XNAS
# set root path
export PYTHONPATH=$PYTHONPATH:/Path/to/XNAS
```

File [`NAS-Bench-201-v1_0-e61699.pth`](https://drive.google.com/open?id=1SKW0Cu0u8-gb18zDpaAGi0f74UdXeGKs) is needed for a previous version of `NAS-Bench-201` we are using. It should be downloaded and put into the `utils` directory.

#### Search

```bash
# download weight files for teacher models
./search/RMINAS/download_weight.sh
# NAS-Bench-201 + CIFAR-10
python search/RMINAS/RMINAS_nb201.py --cfg configs/search/RMINAS/nb201_cifar10.yaml
# NAS-Bench-201 + CIFAR-100
python search/RMINAS/RMINAS_nb201.py --cfg configs/search/RMINAS/nb201_cifar100.yaml
# NAS-Bench-201 + ImageNet
python search/RMINAS/RMINAS_nb201.py --cfg configs/search/RMINAS/nb201_imagenet16.yaml
# DARTS + CIFAR-10
python search/RMINAS/RMINAS_darts.py --cfg configs/search/RMINAS/darts_cifar10.yaml
# DARTS + CIFAR-100
python search/RMINAS/RMINAS_darts.py --cfg configs/search/RMINAS/darts_cifar100.yaml
# DARTS + ImageNet
python search/RMINAS/RMINAS_darts.py --cfg configs/search/RMINAS/darts_imagenet.yaml
```

## Results

### Results on NAS-Bench-201

| Method | Search Cost<br />(seconds) | CIFAR-10 <br />Test Acc.(%) | CIFAR-100 <br />Test Acc.(%) | ImageNet16-120 <br />Test Acc.(%) |
| ----------- | -------------------------- | --------------------------- | ---------------------------- | --------------------------------- |
| RL | 27870.7 | 93.85±0.37 | 71.71±1.09 | 45.24±1.18 |
| DARTS-V2 | 35781.8 | 54.30±0.00 | 15.61±0.00 | 16.32±0.00 |
| GDAS | 31609.8 | 93.61±0.09 | 70.70±0.30 | 41.71±0.98 |
| FairNAS | 9845.0 | 93.23±0.18 | 71.00±1.46 | 42.19±0.31 |
| **RMI-NAS** | **1258.2** | **94.28±0.10** | **73.36±0.19** | **46.34±0.00** |

![img.png](images/nasbench201.png)
### Results on DARTS

| Method | Search Cost<br />(gpu-days) | CIFAR-10 <br />Test Acc.(%)<br />(paper) | CIFAR-10 <br />Test Acc.(%)<br />(retrain) |
| ----------- |-----------------------------| ---------------------------------------- | ------------------------------------------ |
| AmoebaNet-B | 3150 | 2.55±0.05 | - |
| NASNet-A | 1800 | 2.65 | - |
| DARTS (1st) | 0.4 | 3.00±0.14 | 2.75 |
| DARTS (2nd) | 1 | 2.76±0.09 | 2.60 |
| SNAS | 1.5 | 2.85±0.02 | 2.68 |
| PC-DARTS | 1 | 2.57±0.07 | 2.71±0.11 |
| FairDARTS-D | 0.4 | 2.54±0.05 | 2.71 |
| **RMI-NAS** | **0.08** | - | 2.64±0.04 |

Comparisons with other methods in DARTS. We also report retrained results under exactly the same settings to ensure a fair comparison. Our method delivers a comparable accuracy but substantial improvements on time comsumption.

#### Normal cell
![img.png](images/normal.png)

#### Reduce cell
![img.png](images/reduce.png)

BIN
doc/images/nasbench201.png View File

Before After
Width: 729  |  Height: 649  |  Size: 55 KiB

BIN
doc/images/normal.png View File

Before After
Width: 1208  |  Height: 538  |  Size: 86 KiB

BIN
doc/images/reduce.png View File

Before After
Width: 1208  |  Height: 538  |  Size: 86 KiB

+ 74
- 0
search/RMINAS/README.md View File

@@ -0,0 +1,74 @@
## Introduction

Code for paper: **Neural Architecture Search with Representation Mutual Information**

RMI-NAS is an efficient architecture search method based on Representation Mutual Information (RMI) theory. It aims at improving the speed of performance evaluation by ranking architectures with RMI, which is an accurate and effective indicator to facilitate NAS. RMI-NAS uses only one batch of data to complete training and generalizes well to different search spaces. For more details, please refer to our paper.

## Results

### Results on NAS-Bench-201

| Method | Search Cost<br />(seconds) | CIFAR-10 <br />Test Acc.(%) | CIFAR-100 <br />Test Acc.(%) | ImageNet16-120 <br />Test Acc.(%) |
| ----------- | -------------------------- | --------------------------- | ---------------------------- | --------------------------------- |
| RL | 27870.7 | 93.85±0.37 | 71.71±1.09 | 45.24±1.18 |
| DARTS-V2 | 35781.8 | 54.30±0.00 | 15.61±0.00 | 16.32±0.00 |
| GDAS | 31609.8 | 93.61±0.09 | 70.70±0.30 | 41.71±0.98 |
| FairNAS | 9845.0 | 93.23±0.18 | 71.00±1.46 | 42.19±0.31 |
| **RMI-NAS** | **1258.2** | **94.28±0.10** | **73.36±0.19** | **46.34±0.00** |

Our method shows significant efficiency and accuracy improvements.

### Results on DARTS

| Method | Search Cost<br />(seconds) | CIFAR-10 <br />Test Acc.(%)<br />(paper) | CIFAR-10 <br />Test Acc.(%)<br />(retrain) |
| ----------- | -------------------------- | ---------------------------------------- | ------------------------------------------ |
| AmoebaNet-B | 3150 | 2.55±0.05 | - |
| NASNet-A | 1800 | 2.65 | - |
| DARTS (1st) | 0.4 | 3.00±0.14 | 2.75 |
| DARTS (2nd) | 1 | 2.76±0.09 | 2.60 |
| SNAS | 1.5 | 2.85±0.02 | 2.68 |
| PC-DARTS | 1 | 2.57±0.07 | 2.71±0.11 |
| FairDARTS-D | 0.4 | 2.54±0.05 | 2.71 |
| **RMI-NAS** | **0.08** | - | 2.64±0.04 |

Comparisons with other methods in DARTS. We also report retrained results under exactly the same settings to ensure a fair comparison. Our method delivers a comparable accuracy but substantial improvements on time comsumption.



## Usage

#### Install RMI-NAS

Our code contains functions from XNAS repository, which is required to be installed.

```bash
# install XNAS
git clone https://github.com/MAC-AutoML/XNAS.git
export PYTHONPATH=$PYTHONPATH:/PATH/to/XNAS

# prepare environment for RMI-NAS (conda)
conda env create --file environment.yaml

# download weight files for teacher models
chmod +x search/RMINAS/download_weight.sh
bash search/RMINAS/download_weight.sh
```

File [`NAS-Bench-201-v1_0-e61699.pth`](https://drive.google.com/open?id=1SKW0Cu0u8-gb18zDpaAGi0f74UdXeGKs) is required for a previous version of `NAS-Bench-201` we are using. It should be downloaded and put into the `utils` directory.

#### Search

```bash
# NAS-Bench-201 + CIFAR-10
python search/RMINAS/RMINAS_nb201.py --cfg configs/search/RMINAS/nb201_cifar10.yaml

# DARTS + CIFAR-100 + specific exp path
python search/RMINAS/RMINAS_darts.py --cfg configs/search/RMINAS/darts_cifar100.yaml OUT_DIR experiments/
```


## Related work

[NAS-Bench-201](https://github.com/D-X-Y/NAS-Bench-201)

[XNAS](https://github.com/MAC-AutoML/XNAS)

+ 209
- 0
search/RMINAS/RMINAS_darts.py View File

@@ -0,0 +1,209 @@
import time
import numpy as np
import time

import xnas.search_algorithm.RMINAS.utils.RMI_torch as RMI
from xnas.search_algorithm.RMINAS.sampler.RF_sampling import RF_suggest
import xnas.search_algorithm.RMINAS.sampler.sampling_darts as sampling

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim

import xnas.core.config as config
import xnas.core.logging as logging
from xnas.core.config import cfg
from xnas.core.trainer import setup_env

from xnas.search_space.RMINAS.DARTS.darts_cnn import AugmentCNN, geno_from_alpha, reformat_DARTS


class CKA_loss(nn.Module):
def __init__(self, datasize):
super(CKA_loss, self).__init__()
self.datasize = datasize

def forward(self, features_1, features_2):
s = []
for i in range(len(features_1)):
s.append(RMI.tensor_cka(RMI.tensor_gram_linear(features_1[i].view(self.datasize, -1)), RMI.tensor_gram_linear(features_2[i].view(self.datasize, -1))))
return torch.sum(3 - s[0] - s[1] - s[2])


def main():
logger = logging.get_logger(__name__)
# Load config and check
config.load_cfg_fom_args()
config.assert_and_infer_cfg()
cfg.freeze()
setup_env()
print(cfg.SEARCH.DATASET)
# assert cfg.SEARCH.DATASET in ['cifar10', 'cifar100'], 'dataset error'
assert cfg.SEARCH.DATASET in ['cifar10', 'cifar100', 'imagenet'], 'dataset error'
if cfg.SEARCH.DATASET == 'imagenet':
print('='*30+' NOTE '+'='*30)
print('Our method does not directly search in ImageNet.')
print('Only partial tests have been conducted, please use with caution.')
print('='*66)

if cfg.SEARCH.DATASET == 'cifar10':
from xnas.search_algorithm.RMINAS.utils.loader import cifar10_data
import xnas.search_algorithm.RMINAS.teacher_model.resnet20_cifar10.resnet as resnet
"""Data preparing"""
more_data_X, more_data_y = cifar10_data(cfg.TRAIN.BATCH_SIZE, cfg.DATA_LOADER.NUM_WORKERS)

"""ResNet codes"""
checkpoint_res = torch.load('xnas/search_algorithm/RMINAS/teacher_model/resnet20_cifar10/resnet20.th')
model_res = torch.nn.DataParallel(resnet.__dict__['resnet20']())
model_res.cuda()
model_res.load_state_dict(checkpoint_res['state_dict'])
"""selecting well-performed data."""
with torch.no_grad():
ce_loss = torch.nn.CrossEntropyLoss(reduction='none').cuda()
more_logits = model_res(more_data_X)
_, indices = torch.topk(-ce_loss(more_logits, more_data_y).cpu().detach(), cfg.TRAIN.BATCH_SIZE)
data_y = torch.Tensor([more_data_y[i] for i in indices]).long().cuda()
data_X = torch.Tensor([more_data_X[i].cpu().numpy() for i in indices]).cuda()
with torch.no_grad():
feature_res = model_res.module.feature_extractor(data_X)

elif cfg.SEARCH.DATASET == 'cifar100':
from xnas.search_algorithm.RMINAS.utils.loader import cifar100_data
from xnas.search_algorithm.RMINAS.teacher_model.resnet101_cifar100.resnet import resnet101
"""Data preparing"""
more_data_X, more_data_y = cifar100_data(cfg.TRAIN.BATCH_SIZE, cfg.DATA_LOADER.NUM_WORKERS)
"""ResNet codes"""
net = resnet101()
net.load_state_dict(torch.load('xnas/search_algorithm/RMINAS/teacher_model/resnet101_cifar100/resnet101.pth'))
net.cuda()
"""selecting well-performed data."""
with torch.no_grad():
ce_loss = torch.nn.CrossEntropyLoss(reduction='none').cuda()
more_logits = net(more_data_X)
_, indices = torch.topk(-ce_loss(more_logits, more_data_y).cpu().detach(), cfg.TRAIN.BATCH_SIZE)
data_y = torch.Tensor([more_data_y[i] for i in indices]).long().cuda()
data_X = torch.Tensor([more_data_X[i].cpu().numpy() for i in indices]).cuda()
with torch.no_grad():
feature_res = net.feature_extractor(data_X)

elif cfg.SEARCH.DATASET == 'imagenet':
from xnas.search_algorithm.RMINAS.utils.loader import imagenet_data
import xnas.search_algorithm.RMINAS.teacher_model.fbresnet_imagenet.fbresnet as fbresnet
"""Data preparing"""
more_data_X, more_data_y = imagenet_data(cfg.TRAIN.BATCH_SIZE, cfg.DATA_LOADER.NUM_WORKERS, '/media/DATASET/ILSVRC2012/')
"""ResNet codes"""
model_res = fbresnet.fbresnet152()
model_res.cuda()
"""selecting well-performed data."""
with torch.no_grad():
ce_loss = torch.nn.CrossEntropyLoss(reduction='none').cuda()
more_logits = model_res(more_data_X)
_, indices = torch.topk(-ce_loss(more_logits, more_data_y).cpu().detach(), cfg.TRAIN.BATCH_SIZE)
data_y = torch.Tensor([more_data_y[i] for i in indices]).long().cuda()
data_X = torch.Tensor([more_data_X[i].cpu().numpy() for i in indices]).cuda()
with torch.no_grad():
feature_res = model_res.features_extractor(data_X)

RFS = RF_suggest(space='darts', logger=logger, thres_rate=cfg.RMINAS.RF_THRESRATE, seed=cfg.RNG_SEED)
# loss function
loss_fun_cka = CKA_loss(data_X.size()[0])
loss_fun_cka = loss_fun_cka.requires_grad_()
loss_fun_cka.cuda()
loss_fun_log = torch.nn.CrossEntropyLoss().cuda()
def train_arch(genotype):
s_time = time.time()
model = AugmentCNN(
cfg.SEARCH.IM_SIZE,
cfg.SEARCH.INPUT_CHANNEL,
cfg.TRAIN.CHANNELS,
cfg.SEARCH.NUM_CLASSES,
cfg.TRAIN.LAYERS,
False, # don't use auxiliary head
genotype)
model.cuda()
model.train()
# weights optimizer
optimizer = torch.optim.SGD(
model.parameters(),
cfg.OPTIM.BASE_LR,
momentum=cfg.OPTIM.MOMENTUM,
weight_decay=cfg.OPTIM.WEIGHT_DECAY)

for cur_epoch in range(1, cfg.OPTIM.MAX_EPOCH+1):
optimizer.zero_grad()

features, logits, aux_logits = model(data_X)
loss_cka = loss_fun_cka(features, feature_res)
loss_logits = loss_fun_log(logits, data_y)
loss = cfg.RMINAS.LOSS_BETA * loss_cka + (1-cfg.RMINAS.LOSS_BETA)*loss_logits
loss.backward()

optimizer.step()

if cur_epoch == cfg.OPTIM.MAX_EPOCH:
logger.info("training arch cost: {}".format(time.time()-s_time))
return loss.cpu().detach().numpy()
start_time = time.time()
trained_arch, trained_loss = [], []

# ====== Warmup ======
warmup_samples = RFS.warmup_samples(cfg.RMINAS.RF_WARMUP)
logger.info("Warming up with {} archs".format(cfg.RMINAS.RF_WARMUP))
for sample in warmup_samples:
sample_alpha = sampling.ransug2alpha(sample) # shape=(28, 8)
sample_geno = geno_from_alpha(sample_alpha) # type=Genotype
# if cfg.SEARCH.DATASET == 'imagenet' :
# sample_geno = reformat_DARTS(sample_geno)
mixed_loss = train_arch(sample_geno)
mixed_loss = np.inf if np.isnan(mixed_loss) else mixed_loss
trained_arch.append(str(sample_geno))
trained_loss.append(mixed_loss)
RFS.trained_arch.append({'arch':sample, 'loss':mixed_loss})
RFS.Warmup()
logger.info('warmup time cost: {}'.format(str(time.time() - start_time)))
# ====== RF Sampling ======
sampling_time = time.time()
sampling_cnt = 0
while sampling_cnt < cfg.RMINAS.RF_SUCC:
sample = RFS.fitting_samples()
sample_alpha = sampling.ransug2alpha(sample) # shape=(28, 8)
sample_geno = geno_from_alpha(sample_alpha) # type=Genotype
# if cfg.SEARCH.DATASET == 'imagenet' :
# sample_geno = reformat_DARTS(sample_geno)
mixed_loss = train_arch(sample_geno)
mixed_loss = np.inf if np.isnan(mixed_loss) else mixed_loss
trained_arch.append(str(sample_geno))
trained_loss.append(mixed_loss)
RFS.trained_arch.append({'arch':sample, 'loss':mixed_loss})
sampling_cnt += RFS.Fitting()
if sampling_cnt >= cfg.RMINAS.RF_SUCC:
logger.info('successfully sampling good archs for {} times'.format(sampling_cnt))
else:
logger.info('failed sampling good archs for only {} times'.format(sampling_cnt))
logger.info('RF sampling time cost: {}'.format(str(time.time() - sampling_time)))
# ====== Evaluation ======
logger.info('Total time cost:{}'.format(str(time.time() - start_time)))
logger.info('Actual training times: {}'.format(len(trained_arch)))
op_sample = RFS.optimal_arch(method='sum', top=50)
op_alpha = torch.from_numpy(np.r_[op_sample, op_sample])
op_geno = reformat_DARTS(geno_from_alpha(op_alpha))
logger.info('Searched architecture@top50:\n{}'.format(str(op_geno)))

if __name__ == "__main__":
main()


+ 154
- 0
search/RMINAS/RMINAS_mb_imagenet.py View File

@@ -0,0 +1,154 @@
import numpy as np
import random
import os
import time

import torch
import torch.nn as nn
from torch.optim import lr_scheduler
import torch.utils
import torchvision.datasets as dset
import torchvision.transforms as transforms

import xnas.core.logging as logging
import xnas.core.config as config

from xnas.core.utils import one_hot_to_index
from xnas.core.trainer import setup_env
from xnas.core.config import cfg

from xnas.search_space.RMINAS.MBConv.mb_v3_cnn import MobileNetV3
import xnas.search_algorithm.RMINAS.utils.RMI_torch as RMI
from xnas.search_algorithm.RMINAS.sampler.RF_sampling import RF_suggest

from xnas.search_algorithm.RMINAS.utils.loader import imagenet_data

import xnas.search_algorithm.RMINAS.teacher_model.fbresnet_imagenet.fbresnet as fbresnet


# NOTE: this code is not fully tested.
# OBSERVE_EPO = 250
# RF_WARMUP = 200


class CKA_loss(nn.Module):
def __init__(self, datasize):
super(CKA_loss, self).__init__()
self.datasize = datasize

def forward(self, features_1, features_2):
s = []
for i in range(len(features_1)):
s.append(RMI.tensor_cka(RMI.tensor_gram_linear(features_1[i].view(self.datasize, -1)), RMI.tensor_gram_linear(features_2[i].view(self.datasize, -1))))
return torch.sum(3 - s[0] - s[1] - s[2])

def main():
# Load config and check
config.load_cfg_fom_args()
config.assert_and_infer_cfg()
cfg.freeze()
setup_env()

logger = logging.get_logger(__name__)
"""Data preparing"""
more_data_X, more_data_y = imagenet_data(cfg.TRAIN.BATCH_SIZE, cfg.DATA_LOADER.NUM_WORKERS, '/media/DATASET/ILSVRC2012/')
"""ResNet codes"""
model_res = fbresnet.fbresnet152()
model_res.cuda()
"""selecting well-performed data."""
with torch.no_grad():
ce_loss = torch.nn.CrossEntropyLoss(reduction='none').cuda()
more_logits = model_res(more_data_X)
_, indices = torch.topk(-ce_loss(more_logits, more_data_y).cpu().detach(), cfg.TRAIN.BATCH_SIZE)

data_y = torch.Tensor([more_data_y[i] for i in indices]).long().cuda()
data_X = torch.Tensor([more_data_X[i].cpu().numpy() for i in indices]).cuda()
with torch.no_grad():
feature_res = model_res.features_extractor(data_X)
RFS = RF_suggest(space='mb', logger=logger, thres_rate=cfg.RMINAS.RF_THRESRATE, seed=cfg.RNG_SEED)
# loss function
loss_fun_cka = CKA_loss(data_X.size()[0])
loss_fun_cka = loss_fun_cka.requires_grad_()
loss_fun_cka.cuda()
loss_fun_log = torch.nn.CrossEntropyLoss().cuda()
def train_arch(sample):
model = MobileNetV3(n_classes=1000)
model.cuda()
w_optim = torch.optim.SGD(model.parameters(),
cfg.OPTIM.BASE_LR,
momentum=cfg.OPTIM.MOMENTUM,
weight_decay=cfg.OPTIM.WEIGHT_DECAY)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(w_optim, cfg.OPTIM.MAX_EPOCH, eta_min=cfg.OPTIM.MIN_LR)
model.train()
logger.info("Sampling: {}".format(one_hot_to_index(sample)))
for cur_epoch in range(1, cfg.OPTIM.MAX_EPOCH+1):
lr = w_optim.param_groups[0]['lr']

logits, features = model(data_X, sample)
loss_cka = loss_fun_cka(features, feature_res)
loss_logits = loss_fun_log(logits, data_y)
loss = cfg.RMINAS.LOSS_BETA * loss_cka + (1-cfg.RMINAS.LOSS_BETA)*loss_logits

w_optim.zero_grad()
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), cfg.OPTIM.GRAD_CLIP)
w_optim.step()

lr_scheduler.step()
if cur_epoch == cfg.OPTIM.MAX_EPOCH:
return loss.cpu().detach().numpy()
start_time = time.time()

# ====== Warmup ======
warmup_samples = RFS.warmup_samples(cfg.RMINAS.RF_WARMUP)
logger.info("Warming up with {} archs".format(cfg.RMINAS.RF_WARMUP))
for sample in warmup_samples:
mixed_loss = train_arch(sample)
mixed_loss = np.inf if np.isnan(mixed_loss) else mixed_loss
RFS.trained_arch.append({'arch':sample, 'loss':mixed_loss})
# print(str(sample_geno), mixed_loss)
RFS.Warmup()
logger.info('warmup time cost: {}'.format(str(time.time() - start_time)))
# ====== RF Sampling ======
sampling_time = time.time()
sampling_cnt = 0
while sampling_cnt < cfg.RMINAS.RF_SUCC:
sample = RFS.fitting_samples()
mixed_loss = train_arch(sample)
mixed_loss = np.inf if np.isnan(mixed_loss) else mixed_loss
RFS.trained_arch.append({'arch':sample, 'loss':mixed_loss})
# print(str(sample_geno), mixed_loss)
sampling_cnt += RFS.Fitting()
if sampling_cnt >= cfg.RMINAS.RF_SUCC:
logger.info('successfully sampling good archs for {} times'.format(sampling_cnt))
else:
logger.info('failed sampling good archs for only {} times'.format(sampling_cnt))
logger.info('RF sampling time cost: {}'.format(str(time.time() - sampling_time)))
# ====== Evaluation ======
logger.info('Total time cost:{}'.format(str(time.time() - start_time)))
logger.info('Actual training times: {}'.format(len(RFS.trained_arch)))
op_sample = RFS.optimal_arch(method='sum', top=30)
logger.info('Searched architecture@top50:\n{}'.format(str(op_sample)))
# logger.info(model.genotype(torch.Tensor(op_sample)))

if __name__ == "__main__":
main()

+ 220
- 0
search/RMINAS/RMINAS_nb201.py View File

@@ -0,0 +1,220 @@
import time
import random
import numpy as np

import xnas.search_algorithm.RMINAS.utils.RMI_torch as RMI
from xnas.search_algorithm.RMINAS.sampler.RF_sampling import RF_suggest
import xnas.search_algorithm.RMINAS.sampler.sampling as sampling
from xnas.search_space.RMINAS.NB201.utils import *
from nas_201_api import NASBench201API as api

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim

import xnas.core.config as config
import xnas.core.logging as logging
from xnas.core.config import cfg
from xnas.core.trainer import setup_env


class CKA_loss(nn.Module):
def __init__(self, datasize):
super(CKA_loss, self).__init__()
self.datasize = datasize

def forward(self, features_1, features_2):
s = []
for i in range(len(features_1)):
s.append(RMI.tensor_cka(RMI.tensor_gram_linear(features_1[i].view(self.datasize, -1)), RMI.tensor_gram_linear(features_2[i].view(self.datasize, -1))))
return torch.sum(3 - s[0] - s[1] - s[2])


def main():
logger = logging.get_logger(__name__)

# Load config and check
config.load_cfg_fom_args()
config.assert_and_infer_cfg()
cfg.freeze()
setup_env()
print(cfg.SEARCH.DATASET)
assert cfg.SEARCH.DATASET in ['cifar10', 'cifar100', 'imagenet16_120'], 'dataset error'

if cfg.SEARCH.DATASET == 'cifar10':
from xnas.search_algorithm.RMINAS.utils.loader import cifar10_data
import xnas.search_algorithm.RMINAS.teacher_model.resnet20_cifar10.resnet as resnet

"""Data preparing"""
more_data_X, more_data_y = cifar10_data(cfg.TRAIN.BATCH_SIZE, cfg.DATA_LOADER.NUM_WORKERS)

"""ResNet codes"""
checkpoint_res = torch.load('xnas/search_algorithm/RMINAS/teacher_model/resnet20_cifar10/resnet20.th')
model_res = torch.nn.DataParallel(resnet.__dict__['resnet20']())
model_res.cuda()
model_res.load_state_dict(checkpoint_res['state_dict'])
"""selecting well-performed data."""
with torch.no_grad():
ce_loss = torch.nn.CrossEntropyLoss(reduction='none').cuda()
more_logits = model_res(more_data_X)
_, indices = torch.topk(-ce_loss(more_logits, more_data_y).cpu().detach(), cfg.TRAIN.BATCH_SIZE)
data_y = torch.Tensor([more_data_y[i] for i in indices]).long().cuda()
data_X = torch.Tensor([more_data_X[i].cpu().numpy() for i in indices]).cuda()
with torch.no_grad():
feature_res = model_res.module.feature_extractor(data_X)
elif cfg.SEARCH.DATASET == 'cifar100':
from xnas.search_algorithm.RMINAS.utils.loader import cifar100_data
from xnas.search_algorithm.RMINAS.teacher_model.resnet101_cifar100.resnet import resnet101

"""Data preparing"""
more_data_X, more_data_y = cifar100_data(cfg.TRAIN.BATCH_SIZE, cfg.DATA_LOADER.NUM_WORKERS)

"""ResNet codes"""
model_res = resnet101()
model_res.load_state_dict(torch.load('xnas/search_algorithm/RMINAS/teacher_model/resnet101_cifar100/resnet101.pth'))
model_res.cuda()
"""selecting well-performed data."""
with torch.no_grad():
ce_loss = torch.nn.CrossEntropyLoss(reduction='none').cuda()
more_logits = model_res(more_data_X)
_, indices = torch.topk(-ce_loss(more_logits, more_data_y).cpu().detach(), cfg.TRAIN.BATCH_SIZE)
data_y = torch.Tensor([more_data_y[i] for i in indices]).long().cuda()
data_X = torch.Tensor([more_data_X[i].cpu().numpy() for i in indices]).cuda()
with torch.no_grad():
feature_res = model_res.feature_extractor(data_X)
elif cfg.SEARCH.DATASET == 'imagenet16_120':
import xnas.search_algorithm.RMINAS.utils.imagenet16120_loader as imagenetloader
from xnas.search_space.RMINAS.NB201.geno import Structure as cellstructure
from nas_201_api import ResultsCount

"""Data preparing"""
train_loader, _ = imagenetloader.get_loader(batch_size=cfg.TRAIN.BATCH_SIZE*16)
target_i = random.randint(0, len(train_loader)-1)
more_data_X, more_data_y = None, None
for i, (more_data_X, more_data_y) in enumerate(train_loader):
if i == target_i:
break
more_data_X = more_data_X.cuda()
more_data_y = more_data_y.cuda()

"""Teacher Network: using best arch searched from cifar10 and weight from nb201."""
filename = 'xnas/search_algorithm/RMINAS/teacher_model/nb201model_imagenet16120/009930-FULL.pth'
xdata = torch.load(filename)
odata = xdata['full']['all_results'][('ImageNet16-120', 777)]
result = ResultsCount.create_from_state_dict(odata)
result.get_net_param()
arch_config = result.get_config(cellstructure.str2structure) # create the network with params
net_config = dict2config(arch_config, None)
network = get_cell_based_tiny_net(net_config)
network.load_state_dict(result.get_net_param())
network.cuda()
"""selecting well-performed data."""
with torch.no_grad():
ce_loss = torch.nn.CrossEntropyLoss(reduction='none').cuda()
_, more_logits = network(more_data_X)
_, indices = torch.topk(-ce_loss(more_logits, more_data_y).cpu().detach(), cfg.TRAIN.BATCH_SIZE)
data_y = torch.Tensor([more_data_y[i] for i in indices]).long().cuda()
data_X = torch.Tensor([more_data_X[i].cpu().numpy() for i in indices]).cuda()
with torch.no_grad():
feature_res, _ = network(data_X)
"""Codes: build from config file."""
nb201_api = api('./data/NAS-Bench-201-v1_0-e61699.pth')
RFS = RF_suggest(space='nasbench201', logger=logger, api=nb201_api, thres_rate=cfg.RMINAS.RF_THRESRATE, seed=cfg.RNG_SEED)

# loss function
loss_fun_cka = CKA_loss(data_X.size()[0])
loss_fun_cka = loss_fun_cka.requires_grad_()
loss_fun_cka.cuda()
loss_fun_log = torch.nn.CrossEntropyLoss().cuda()
def train_arch(arch_index):
# get arch
arch_config = {
'name': 'infer.tiny',
'C': 16, 'N': 5,
'arch_str':nb201_api.arch(arch_index),
'num_classes': cfg.SEARCH.NUM_CLASSES}
net_config = dict2config(arch_config, None)
model = get_cell_based_tiny_net(net_config)
model.cuda()
model.train()

# weights optimizer
optimizer = torch.optim.SGD(
model.parameters(),
cfg.OPTIM.BASE_LR,
momentum=cfg.OPTIM.MOMENTUM,
weight_decay=cfg.OPTIM.WEIGHT_DECAY)

for cur_epoch in range(1, cfg.OPTIM.MAX_EPOCH+1):
optimizer.zero_grad()
features, logits = model(data_X)
loss_logits = loss_fun_log(logits, data_y)
loss_cka = loss_fun_cka(features, feature_res)
loss = cfg.RMINAS.LOSS_BETA * loss_cka + (1-cfg.RMINAS.LOSS_BETA)*loss_logits
loss.backward()

optimizer.step()
if cur_epoch == cfg.OPTIM.MAX_EPOCH:
logger.info('Arch:{} Loss:{}'.format(str(arch_index), str(loss.cpu().detach().numpy())))
return loss.cpu().detach().numpy()
start_time = time.time()
trained_loss = []
# ====== Warmup ======
warmup_samples = RFS.warmup_samples(cfg.RMINAS.RF_WARMUP)
logger.info("Warming up with {} archs".format(cfg.RMINAS.RF_WARMUP))
for arch_index in warmup_samples:
mixed_loss = train_arch(arch_index)
mixed_loss = np.inf if np.isnan(mixed_loss) else mixed_loss
trained_loss.append(mixed_loss)
arch_arr = sampling.genostr2array(nb201_api.arch(arch_index))
RFS.trained_arch.append({'arch':arch_arr, 'loss':mixed_loss})
RFS.trained_arch_index.append(arch_index)
# print(arch_index, mixed_loss)
RFS.Warmup()
logger.info('warmup time cost: {}'.format(str(time.time() - start_time)))
# ====== RF Sampling ======
sampling_time = time.time()
sampling_cnt= 0
while sampling_cnt < cfg.RMINAS.RF_SUCC:
arch_index = RFS.fitting_samples()
assert arch_index not in list(RFS.trained_arch_index), "RFS.trained_arch_index error"
mixed_loss = train_arch(arch_index)
mixed_loss = np.inf if np.isnan(mixed_loss) else mixed_loss
RFS.trained_arch_index.append(arch_index)
trained_loss.append(mixed_loss)
arch_arr = sampling.genostr2array(nb201_api.arch(arch_index))
RFS.trained_arch.append({'arch':arch_arr, 'loss':mixed_loss})
# print(arch_index, mixed_loss)
sampling_cnt += RFS.Fitting()
if sampling_cnt >= cfg.RMINAS.RF_SUCC:
logger.info('successfully sampling good archs for {} times'.format(sampling_cnt))
else:
logger.info('failed sampling good archs for only {} times'.format(sampling_cnt))
logger.info('RF sampling time cost:{}'.format(str(time.time() - sampling_time)))
# ====== Evaluation ======
logger.info('Total time cost: {}'.format(str(time.time() - start_time)))
logger.info('Actual training times: {}'.format(len(RFS.trained_arch_index)))
logger.info('Searched architecture:\n{}'.format(str(RFS.optimal_arch(method='sum', top=50))))
# logger.info('Searched architecture:\n{}'.format(str(RFS.optimal_arch(method='greedy', top=50))))

if __name__ == '__main__':
main()

+ 5
- 0
search/RMINAS/download_weight.sh View File

@@ -0,0 +1,5 @@
echo `cd xnas/search_algorithm/RMINAS/teacher_model/resnet20_cifar10 && wget http://cdn.thrase.cn/rmi/resnet20.th`
echo `cd xnas/search_algorithm/RMINAS/teacher_model/nb201model_imagenet16120 && wget http://cdn.thrase.cn/rmi/009930-FULL.pth`
echo `cd xnas/search_algorithm/RMINAS/teacher_model/fbresnet_imagenet && wget http://cdn.thrase.cn/rmi/fbresnet152.pth`
echo `cd xnas/search_algorithm/RMINAS/teacher_model/resnet101_cifar100 && wget http://cdn.thrase.cn/rmi/resnet101.pth`
echo "Finish downloading weight files."

+ 18
- 0
xnas/core/config.py View File

@@ -462,6 +462,24 @@ _C.TENAS.REPEAT = 3
_C.TENAS.PRUNE_NUMBER = 1


# ------------------------------------------------------------------------------------ #
# RMINAS options
# ------------------------------------------------------------------------------------ #
_C.RMINAS = CfgNode()

# beta of mixed loss
_C.RMINAS.LOSS_BETA = 0.80

# number of archs for random forest warming up
_C.RMINAS.RF_WARMUP = 100

# threshold of random forest to choose good archs
_C.RMINAS.RF_THRESRATE = 0.05

# number of good archs when random forest terminates
_C.RMINAS.RF_SUCC = 100



def dump_cfg():
"""Dumps the config to the output directory."""


+ 270
- 0
xnas/search_algorithm/RMINAS/sampler/RF_sampling.py View File

@@ -0,0 +1,270 @@
import numpy as np
import pickle
import copy
import time
# import torch.nn as nn
import scipy
import torch
from scipy.stats import ks_2samp
from scipy import stats
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier

import xnas.search_algorithm.RMINAS.sampler.sampling as sampling

def softmax(x):
"""Compute softmax values for each sets of scores in x."""
return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)

class RF_suggest():
def __init__(self, space, logger, api=None, thres_rate=0.05, batch=1000, seed=10):
np.random.seed(seed)
self.sampled_history = [] # list[arch_index] / list[arch.ravel()]
self.trained_arch = [] # list[dict{'arch':arch, 'loss':loss}]
self.trained_arch_index = []
self.thres_rate = thres_rate
self.loss_thres = 0.
self.batch = batch
self.space = space
self.logger = logger
self.times_suggest = 0 # without warmup
if self.space == 'nasbench201':
self.api = api
self.max_space = 15625
self.num_estimator = 30
elif self.space == 'darts':
self.num_estimator = 98
elif self.space == 'mb':
self.num_estimator = 140
self.model = RandomForestClassifier(n_estimators=self.num_estimator)
def _update_lossthres(self):
losses = [i['loss'] for i in self.trained_arch]
# losses_wo_inf = []
# for i in losses:
# if not np.isinf(i):
# losses_wo_inf.append(i)
self.loss_thres = np.quantile(losses, self.thres_rate) + 1e-9
self.logger.info("CKA loss_thres: {}".format(self.loss_thres))
good_arch = (np.array(losses) < self.loss_thres).tolist()
assert np.sum(good_arch) > 1, "no enough good architectures"
def _index2arch_nb201(self, index):
assert self.space == 'nasbench201', 'api dismatch'
_arch_str = self.api.arch(index)
_arch_arr = sampling.genostr2array(_arch_str)
return _arch_arr
def _trainedarch2xy(self):
features = []
labels = []
for i in self.trained_arch:
features.append(i['arch'].ravel())
labels.append(i['loss'] < self.loss_thres if self.loss_thres else False)
return features, labels
def warmup_samples(self, num_warmup):
if self.space == 'nasbench201':
sampled = list(np.random.choice(self.max_space, size=num_warmup, replace=False))
self.sampled_history = copy.deepcopy(sampled)
return sampled
elif self.space == 'darts':
return [self._single_sample() for _ in range(num_warmup)]
elif self.space == 'mb':
return [self._single_sample() for _ in range(num_warmup)]
def _single_sample(self, unique=True):
if self.space == 'nasbench201':
assert len(self.sampled_history) < self.max_space, "error: oversampled"
while True:
sample = np.random.randint(self.max_space)
if sample not in self.sampled_history:
self.sampled_history.append(sample)
return sample
elif self.space == 'darts':
if unique:
while True:
sample = np.zeros((14, 7)) # 14边,7op
node_ids = np.asarray([np.random.choice(range(x,x+i+2), size=2, replace=False) for i, x in enumerate((0,2,5,9))]).ravel() # 选择哪8个边
op = np.random.multinomial(1,[1/7.]*7, size=8) # 8条选择的边、7个有意义op
sample[node_ids] = op
if str(sample) not in self.sampled_history:
self.sampled_history.append(str(sample))
return sample
else:
sample = np.zeros((14, 7)) # 14边,7op
node_ids = np.asarray([np.random.choice(range(x,x+i+2), size=2, replace=False) for i, x in enumerate((0,2,5,9))]).ravel() # 选择哪8个边
op = np.random.multinomial(1,[1/7.]*7, size=8) # 8条选择的边、7个有意义op
sample[node_ids] = op
return sample
elif self.space == 'mb':
if unique:
while True:
c = np.zeros((20, 7))
for i in range(20):
j = np.random.randint(7)
c[i, j] = True
if str(c) not in self.sampled_history:
self.sampled_history.append(str(c))
return c
else:
c = np.zeros((20, 7))
for i in range(20):
j = np.random.randint(7)
c[i, j] = True
return c

def Warmup(self):
self._update_lossthres()
features, labels = self._trainedarch2xy()
self.model.fit(np.asarray(features, dtype='float'), np.asarray(labels, dtype='float'))
def fitting_samples(self):
self.times_suggest += 1
start_time = time.time()
if self.space == 'nasbench201':
_sample_indexes = np.random.choice(self.max_space, size=self.batch, replace=False)
_sample_archs = []
_sample_archs_idx = []
for i in _sample_indexes:
if i not in self.trained_arch_index:
_sample_archs.append(self._index2arch_nb201(i).ravel())
_sample_archs_idx.append(i)
# print("sample {} archs/batch, cost time: {}".format(len(_sample_archs), time.time()-start_time))
_sample_archs = np.array(_sample_archs)
best_id = np.argmax(self.model.predict_proba(_sample_archs)[:,1])
best_arch_id = _sample_archs_idx[best_id]
return best_arch_id
elif self.space == 'darts':
_sample_batch = np.array([self._single_sample(unique=False).ravel() for _ in range(self.batch)])
_tmp_trained_arch = [str(i['arch'].ravel()) for i in self.trained_arch]
_sample_archs = []
for i in _sample_batch:
if str(i) not in _tmp_trained_arch:
_sample_archs.append(i)
# print("sample {} archs/batch, cost time: {}".format(len(_sample_archs), time.time()-start_time))
best_id = np.argmax(self.model.predict_proba(_sample_archs)[:,1])
best_arch = _sample_archs[best_id].reshape((14, 7))
return best_arch
elif self.space == 'mb':
_sample_batch = np.array([self._single_sample(unique=False).ravel() for _ in range(self.batch)])
_tmp_trained_arch = [str(i['arch'].ravel()) for i in self.trained_arch]
_sample_archs = []
for i in _sample_batch:
if str(i) not in _tmp_trained_arch:
_sample_archs.append(i)
# print("sample {} archs/batch, cost time: {}".format(len(_sample_archs), time.time()-start_time))
best_id = np.argmax(self.model.predict_proba(_sample_archs)[:,1])
best_arch = _sample_archs[best_id].reshape((20, 7))
return best_arch
def Fitting(self):
# Called after adding data into trained_arch list.
loss = self.trained_arch[-1]['loss']
features, labels = self._trainedarch2xy()
self.model.fit(np.asarray(features, dtype='float'), np.asarray(labels, dtype='float'))
return loss < self.loss_thres if self.loss_thres else False
def optimal_arch(self, method, top=300, use_softmax=True):
assert method in ['sum', 'greedy'], 'method error.'
# with open('RF_sampling.pkl', 'wb') as f:
# pickle.dump((self.loss_thres, self.trained_arch, self.sampled_history), f)
self.logger.info("#times suggest: {}".format(self.times_suggest))
_tmp_trained_arch = [i['arch'].ravel() for i in self.trained_arch]
# self.logger.info("Unique archs {} in total archs {}".format(len(np.unique(_tmp_trained_arch, axis=0)), len(self.trained_arch)))
estimate_archs_tmp = []
for i in self.trained_arch:
if (i['loss'] < self.loss_thres if self.loss_thres else False):
estimate_archs_tmp.append(i)
self.logger.info("#arch < CKA loss_thres: {}".format(len(estimate_archs_tmp)))

_est_archs_sort = sorted(estimate_archs_tmp, key=lambda d: d['loss'])
estimate_archs = []
if top>len(_est_archs_sort):
self.logger.info('top>all, using all archs.')
for i in range(min(top, len(_est_archs_sort))):
estimate_archs.append(_est_archs_sort[i]['arch'])
if self.space == 'nasbench201':
result = []
if method == 'sum':
all_sum = estimate_archs[0]
for i in estimate_archs[1:]:
all_sum = np.add(all_sum, i)
# print(all_sum)
sum_max = list(np.argmax(all_sum, axis=1))
result = copy.deepcopy(sum_max)
elif method == 'greedy':
path_info =[[[0 for _ in range(5)] for _ in range(5)] for _ in range(6)]
for i in estimate_archs:
for j in range(1, 6):
path_info[j][np.argmax(i[j-1])][np.argmax(i[j])] += 1
_esti_arch_0 = [0]*5
for i in estimate_archs:
_esti_arch_0 = np.add(i[0], _esti_arch_0)

startindex = np.argmax(_esti_arch_0)
path_max = [startindex]
for i in range(1, 6):
# path_max.append(np.argmax(path_info[i][path_max[i-1]]))
# one more step
max_op_sum = np.max(path_info[i][path_max[i-1]])
_tmp_max_idx = []
for j in range(5):
if path_info[i][path_max[i-1]][j] == max_op_sum:
_tmp_max_idx.append(j)
if len(_tmp_max_idx) == 1 or i==5:
path_max.append(np.argmax(path_info[i][path_max[i-1]]))
else:
_next_step = np.array([np.sum(path_info[i+1][j]) for j in _tmp_max_idx])
_chosen_op = _tmp_max_idx[np.argmax(_next_step)]
path_max.append(_chosen_op)
self.logger.info("path info:\n{}".format(str(path_info)))
result = copy.deepcopy(path_max)
_tmp_np = np.array(result)
op_arr = np.zeros((_tmp_np.size, 5))
op_arr[np.arange(_tmp_np.size),_tmp_np] = 1
return op_arr
elif self.space == 'darts':
assert method == 'sum', 'only sum is supported in darts.'
all_sum = estimate_archs[0]
for i in estimate_archs[1:]:
all_sum = np.add(all_sum, i)
if use_softmax:
all_sum = softmax(all_sum)
sum_max = np.argmax(all_sum, axis=1)
start_index = 0
end_index = 0
for i in range(2, 6):
end_index += i
_, top_index = torch.topk(torch.from_numpy(sum_max[start_index:end_index]), 2)
mask = list(set(range(i)) - set(list(top_index.numpy())))
for j in mask:
sum_max[start_index+j] = 7
start_index = end_index
# print(sum_max)
_tmp_np = np.array(sum_max)
op_arr = np.zeros((_tmp_np.size, 8))
op_arr[np.arange(_tmp_np.size),_tmp_np] = 1
return op_arr
elif self.space == 'mb':
assert method == 'sum', 'only sum is supported in mb.'
all_sum = estimate_archs[0]
for i in estimate_archs[1:]:
all_sum = np.add(all_sum, i)
print(all_sum)
if use_softmax:
all_sum = softmax(all_sum)
sum_max = np.argmax(all_sum, axis=1)
print(sum_max)
_tmp_np = np.array(sum_max)
op_arr = np.zeros((_tmp_np.size, 7))
op_arr[np.arange(_tmp_np.size),_tmp_np] = 1
return op_arr

+ 1
- 0
xnas/search_algorithm/RMINAS/sampler/available_archs.txt
File diff suppressed because it is too large
View File


+ 100
- 0
xnas/search_algorithm/RMINAS/sampler/sampling.py View File

@@ -0,0 +1,100 @@
import random
import numpy as np
from scipy import stats

true_list = []
with open('xnas/search_algorithm/RMINAS/sampler/available_archs.txt', 'r') as f:
true_list = eval(f.readline())

def random_sampling(times):
sample_list = []
if times > sum(true_list):
print('can only sample {} times.'.format(sum(true_list)))
times = sum(true_list)
for _ in range(times):
i = random.randint(0, 15624)
while (not true_list[i]) or (i in sample_list):
i = random.randint(0, 15624)
sample_list.append(i)
return sample_list

def genostr2array(geno_str):
# |none~0|+|nor_conv_1x1~0|none~1|+|avg_pool_3x3~0|skip_connect~1|nor_conv_3x3~2|
OPS = ["none", "skip_connect", "nor_conv_1x1", "nor_conv_3x3", "avg_pool_3x3"]
_tmp = geno_str.split('|')
_tmp2 = []
for i in range(len(_tmp)):
if i in [1,3,4,6,7,8]:
_tmp2.append(_tmp[i][:-2])
_tmp_np = np.array([0]*6)
for i in range(6):
_tmp_np[i] = OPS.index(_tmp2[i])
_tmp_oh = np.zeros((_tmp_np.size, 5))
_tmp_oh[np.arange(_tmp_np.size),_tmp_np] = 1
return _tmp_oh

def array2genostr(arr):
OPS = ["none", "skip_connect", "nor_conv_1x1", "nor_conv_3x3", "avg_pool_3x3"]
"""[[1. 0. 0. 0. 0.]
[0. 0. 1. 0. 0.]
[1. 0. 0. 0. 0.]
[0. 0. 0. 0. 1.]
[0. 1. 0. 0. 0.]
[0. 0. 0. 1. 0.]]"""
idx = [list(i).index(1.) for i in arr]
op = [OPS[x] for x in idx]
mixed = '|' + op[0] + '~0|+|' + op[1] + '~0|' + op[2] + '~1|+|' + op[3] + '~0|' + op[4] + '~1|' + op[5] + '~2|'
return mixed

def base_transform(n, x):
a=[0,1,2,3,4,5,6,7,8,9,'A','b','C','D','E','F']
b=[]
while True:
s=n//x
y=n%x
b=b+[y]
if s==0:
break
n=s
b.reverse()
zero_arr = [0]*(6-len(b))
return zero_arr+b

def array_morearch(arr, distance):
"""[[1. 0. 0. 0. 0.]
[0. 0. 1. 0. 0.]
[1. 0. 0. 0. 0.]
[0. 0. 0. 0. 1.]
[0. 1. 0. 0. 0.]
[0. 0. 0. 1. 0.]]"""
am = list(arr.argmax(axis=1)) # [0,2,0,4,1,3]
morearch = []
if distance == 1:
for i in range(len(am)):
for j in range(5):
if am[i]!=j:
_tmp = am[:]
_tmp[i] = j
_tmp_np = np.array(_tmp)
_tmp_oh = np.zeros((_tmp_np.size, 5))
_tmp_oh[np.arange(_tmp_np.size),_tmp_np] = 1
morearch.append(_tmp_oh)
else:
for i in range(15625):
arr = base_transform(i, 5)
if distance == 6-sum([arr[i]==am[i] for i in range(6)]):
_tmp_np = np.array(arr)
_tmp_oh = np.zeros((_tmp_np.size, 5))
_tmp_oh[np.arange(_tmp_np.size),_tmp_np] = 1
morearch.append(_tmp_oh)
# morearch.append(arr)
return morearch



# test_arr = np.array([[1., 0., 0., 0., 0.],
# [0., 0., 1., 0., 0.],
# [1., 0., 0., 0., 0.],
# [0., 0., 0., 0., 1.],
# [0., 1., 0., 0., 0.],
# [0., 0., 0., 1., 0.]])

+ 46
- 0
xnas/search_algorithm/RMINAS/sampler/sampling_darts.py View File

@@ -0,0 +1,46 @@
import numpy as np
import torch
from collections import namedtuple

basic_op_list = ['max_pool_3x3', 'avg_pool_3x3', 'skip_connect', 'sep_conv_3x3', 'sep_conv_5x5', 'dil_conv_3x3', 'dil_conv_5x5', 'none']
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')

def random_suggest():
sample = np.zeros((14, 7)) # 14边,7op
node_ids = np.asarray([np.random.choice(range(x,x+i+2), size=2, replace=False) for i, x in enumerate((0,2,5,9))]).ravel() # 选择哪8个边
op = np.random.multinomial(1,[1/7.]*7, size=8) # 8条选择的边、7个有意义op
sample[node_ids] = op
return sample

def ransug2alpha(suggest_sample):
b = np.c_[suggest_sample, np.zeros(14)]
return torch.from_numpy(np.r_[b,b])

def geno2147array(genotype):
"""
Genotype(normal=[[('max_pool_3x3', 0), ('dil_conv_3x3', 1)], [('max_pool_3x3', 0), ('dil_conv_5x5', 1)], [('avg_pool_3x3', 1), ('dil_conv_3x3', 0)], [('dil_conv_3x3', 0), ('sep_conv_3x3', 3)]], normal_concat=range(2, 6), reduce=[[('max_pool_3x3', 0), ('dil_conv_3x3', 1)], [('max_pool_3x3', 0), ('dil_conv_5x5', 1)], [('avg_pool_3x3', 1), ('dil_conv_3x3', 0)], [('dil_conv_3x3', 0), ('sep_conv_3x3', 3)]], reduce_concat=range(2, 6))
"""
genotype = eval(genotype)
sample = np.zeros([28, 7])
norm_gene = genotype[0]
reduce_gene = genotype[2]
num_select = list(range(2, 6))
for j, _gene in enumerate([norm_gene, reduce_gene]):
for i, node in enumerate(_gene):
for op in node:
op_name = op[0]
op_id = op[1]
if i == 0:
true_id = op_id + j * 14
else:
if i == 1:
_temp = num_select[0]
else:
_temp = sum(num_select[0:i])
true_id = op_id + _temp + j * 14
sample[true_id, basic_op_list.index(op_name)] = 1
# for i in range(28):
# if np.sum(sample[i, :]) == 0:
# sample[i, 7] = 1
return sample[0:14]

+ 253
- 0
xnas/search_algorithm/RMINAS/teacher_model/fbresnet_imagenet/fbresnet.py View File

@@ -0,0 +1,253 @@
"""code from https://github.com/Cadene/pretrained-models.pytorch.git"""

from __future__ import print_function, division, absolute_import
import torch.nn as nn
import torch.nn.functional as F
import math
import torch.utils.model_zoo as model_zoo
import torch

WEIGHT_PATH = 'teacher_model/fbresnet_imagenet/fbresnet152.pth'

__all__ = ['FBResNet',
#'fbresnet18', 'fbresnet34', 'fbresnet50', 'fbresnet101',
'fbresnet152']

pretrained_settings = {
'fbresnet152': {
'imagenet': {
'url': 'http://data.lip6.fr/cadene/pretrainedmodels/fbresnet152-2e20f6b4.pth',
'input_space': 'RGB',
'input_size': [3, 224, 224],
'input_range': [0, 1],
'mean': [0.485, 0.456, 0.406],
'std': [0.229, 0.224, 0.225],
'num_classes': 1000
}
}
}


def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=True)


class BasicBlock(nn.Module):
expansion = 1

def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride

def forward(self, x):
residual = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)

if self.downsample is not None:
residual = self.downsample(x)

out += residual
out = self.relu(out)

return out


class Bottleneck(nn.Module):
expansion = 4

def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=True)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=True)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride

def forward(self, x):
residual = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)

out = self.conv3(out)
out = self.bn3(out)

if self.downsample is not None:
residual = self.downsample(x)

out += residual
out = self.relu(out)

return out

class FBResNet(nn.Module):

def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
# Special attributs
self.input_space = None
self.input_size = (299, 299, 3)
self.mean = None
self.std = None
super(FBResNet, self).__init__()
# Modules
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=True)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.last_linear = nn.Linear(512 * block.expansion, num_classes)

for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()

def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=True),
nn.BatchNorm2d(planes * block.expansion),
)

layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))

return nn.Sequential(*layers)

def features(self, input):
x = self.conv1(input)
self.conv1_input = x.clone()
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def features_extractor(self, input):
features = []
x = self.conv1(input)
self.conv1_input = x.clone()
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
features.append(x)
x = self.layer1(x)
x = self.layer2(x)
features.append(x)
x = self.layer3(x)
x = self.layer4(x)
features.append(x)
return features

def logits(self, features):
adaptiveAvgPoolWidth = features.shape[2]
x = F.avg_pool2d(features, kernel_size=adaptiveAvgPoolWidth)
x = x.view(x.size(0), -1)
x = self.last_linear(x)
return x

def forward(self, input):
x = self.features(input)
x = self.logits(x)
return x


def fbresnet18(num_classes=1000):
"""Constructs a ResNet-18 model.

Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = FBResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)
return model


def fbresnet34(num_classes=1000):
"""Constructs a ResNet-34 model.

Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = FBResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes)
return model


def fbresnet50(num_classes=1000):
"""Constructs a ResNet-50 model.

Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = FBResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes)
return model


def fbresnet101(num_classes=1000):
"""Constructs a ResNet-101 model.

Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = FBResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes)
return model


def fbresnet152(num_classes=1000, pretrained='imagenet'):
"""Constructs a ResNet-152 model.

Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = FBResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes)
if pretrained is not None:
settings = pretrained_settings['fbresnet152'][pretrained]
assert num_classes == settings['num_classes'], \
"num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
model.load_state_dict(torch.load(WEIGHT_PATH))
model.input_space = settings['input_space']
model.input_size = settings['input_size']
model.input_range = settings['input_range']
model.mean = settings['mean']
model.std = settings['std']
return model

+ 133
- 0
xnas/search_algorithm/RMINAS/teacher_model/resnet101_cifar100/resnet.py View File

@@ -0,0 +1,133 @@
"""
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
"""

import torch
import torch.nn as nn

class BasicBlock(nn.Module):
expansion = 1

def __init__(self, in_channels, out_channels, stride=1):
super().__init__()

self.residual_function = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
)

self.shortcut = nn.Sequential()
if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
)

def forward(self, x):
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))

class BottleNeck(nn.Module):
expansion = 4
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
self.residual_function = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels * BottleNeck.expansion),
)

self.shortcut = nn.Sequential()

if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels * BottleNeck.expansion)
)

def forward(self, x):
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))

class ResNet(nn.Module):

def __init__(self, block, num_block, num_classes=100):
super().__init__()

self.in_channels = 64

self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True))
#we use a different inputsize than the original paper
#so conv2_x's stride is 1
self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)

def _make_layer(self, block, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels * block.expansion

return nn.Sequential(*layers)

def forward(self, x):
output = self.conv1(x)
output = self.conv2_x(output)
output = self.conv3_x(output)
output = self.conv4_x(output)
output = self.conv5_x(output)
output = self.avg_pool(output)
output = output.view(output.size(0), -1)
output = self.fc(output)

return output
def feature_extractor(self, x):
features = []
output = self.conv1(x)
output = self.conv2_x(output)
features.append(output)
output = self.conv3_x(output)
output = self.conv4_x(output)
features.append(output)
output = self.conv5_x(output)
features.append(output)
# output = self.avg_pool(output)
# output = output.view(output.size(0), -1)
# output = self.fc(output)

return features

def resnet18():
return ResNet(BasicBlock, [2, 2, 2, 2])

def resnet34():
return ResNet(BasicBlock, [3, 4, 6, 3])

def resnet50():
return ResNet(BottleNeck, [3, 4, 6, 3])

def resnet101():
return ResNet(BottleNeck, [3, 4, 23, 3])

def resnet152():
return ResNet(BottleNeck, [3, 8, 36, 3])




+ 173
- 0
xnas/search_algorithm/RMINAS/teacher_model/resnet20_cifar10/resnet.py View File

@@ -0,0 +1,173 @@
'''
Properly implemented ResNet-s for CIFAR10 as described in paper [1].

The implementation and structure of this file is hugely influenced by [2]
which is implemented for ImageNet and doesn't have option A for identity.
Moreover, most of the implementations on the web is copy-paste from
torchvision's resnet and has wrong number of params.

Proper ResNet-s for CIFAR10 (for fair comparision and etc.) has following
number of layers and parameters:

name | layers | params
ResNet20 | 20 | 0.27M
ResNet32 | 32 | 0.46M
ResNet44 | 44 | 0.66M
ResNet56 | 56 | 0.85M
ResNet110 | 110 | 1.7M
ResNet1202| 1202 | 19.4m

which this implementation indeed has.

Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
[2] https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py

If you use this implementation in you work, please don't forget to mention the
author, Yerlan Idelbayev.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

from torch.autograd import Variable

__all__ = ['ResNet', 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet1202']

def _weights_init(m):
classname = m.__class__.__name__
#print(classname)
if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight)

class LambdaLayer(nn.Module):
def __init__(self, lambd):
super(LambdaLayer, self).__init__()
self.lambd = lambd

def forward(self, x):
return self.lambd(x)


class BasicBlock(nn.Module):
expansion = 1

def __init__(self, in_planes, planes, stride=1, option='A'):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != planes:
if option == 'A':
"""
For CIFAR10 ResNet paper uses option A.
"""
self.shortcut = LambdaLayer(lambda x:
F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
elif option == 'B':
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion * planes)
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out


class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 16

self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
self.linear = nn.Linear(64, num_classes)

self.apply(_weights_init)

def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion

return nn.Sequential(*layers)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, out.size()[3])
out = out.view(out.size(0), -1)
out = self.linear(out)
return out

def feature_extractor(self, x):
features = []
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
features.append(out)
out = self.layer2(out)
features.append(out)
out = self.layer3(out)
features.append(out)
# out = F.avg_pool2d(out, out.size()[3])
# out = out.view(out.size(0), -1)
# out = self.linear(out)
return features


def resnet20():
return ResNet(BasicBlock, [3, 3, 3])


def resnet32():
return ResNet(BasicBlock, [5, 5, 5])


def resnet44():
return ResNet(BasicBlock, [7, 7, 7])


def resnet56():
return ResNet(BasicBlock, [9, 9, 9])


def resnet110():
return ResNet(BasicBlock, [18, 18, 18])


def resnet1202():
return ResNet(BasicBlock, [200, 200, 200])


def test(net):
import numpy as np
total_params = 0

for x in filter(lambda p: p.requires_grad, net.parameters()):
total_params += np.prod(x.data.numpy().shape)
print("Total number of params", total_params)
print("Total layers", len(list(filter(lambda p: p.requires_grad and len(p.data.size())>1, net.parameters()))))


if __name__ == "__main__":
for net_name in __all__:
if net_name.startswith('resnet'):
print(net_name)
test(globals()[net_name]())
print()

+ 182
- 0
xnas/search_algorithm/RMINAS/utils/RMI_torch.py View File

@@ -0,0 +1,182 @@
import numpy as np
import torch
import sys
import pdb

def gram_linear(x): #np和tensor都可以用这个方法
"""Compute Gram (kernel) matrix for a linear kernel.

Args:
x: A num_examples x num_features matrix of features.

Returns:
A num_examples x num_examples Gram matrix of examples.
"""
return x.dot(x.T)

def tensor_gram_linear(x):
return torch.mm(x, x.T)

def gram_rbf(x, threshold=1.0):
"""Compute Gram (kernel) matrix for an RBF kernel.

Args:
x: A num_examples x num_features matrix of features.
threshold: Fraction of median Euclidean distance to use as RBF kernel
bandwidth. (This is the heuristic we use in the paper. There are other
possible ways to set the bandwidth; we didn't try them.)

Returns:
A num_examples x num_examples Gram matrix of examples.
"""
dot_products = x.dot(x.T)
sq_norms = np.diag(dot_products)
sq_distances = -2 * dot_products + sq_norms[:, None] + sq_norms[None, :]
sq_median_distance = np.median(sq_distances)
return np.exp(-sq_distances / (2 * threshold ** 2 * sq_median_distance))


def center_gram(gram, unbiased=False):
"""Center a symmetric Gram matrix.

This is equvialent to centering the (possibly infinite-dimensional) features
induced by the kernel before computing the Gram matrix.

Args:
gram: A num_examples x num_examples symmetric matrix.
unbiased: Whether to adjust the Gram matrix in order to compute an unbiased
estimate of HSIC. Note that this estimator may be negative.

Returns:
A symmetric matrix with centered columns and rows.
"""
if not np.allclose(gram, gram.T):
raise ValueError('Input must be a symmetric matrix.')
gram = gram.copy()

if unbiased:
# This formulation of the U-statistic, from Szekely, G. J., & Rizzo, M.
# L. (2014). Partial distance correlation with methods for dissimilarities.
# The Annals of Statistics, 42(6), 2382-2412, seems to be more numerically
# stable than the alternative from Song et al. (2007).
n = gram.shape[0]
np.fill_diagonal(gram, 0)
means = np.sum(gram, 0, dtype=np.float64) / (n - 2)
means -= np.sum(means) / (2 * (n - 1))
gram -= means[:, None]
gram -= means[None, :]
np.fill_diagonal(gram, 0)
else:#(256, 256)
means = np.mean(gram, 0, dtype=np.float64) #(256,)
means -= np.mean(means) / 2 #(256,)
gram -= means[:, None] ##(256, 256)
gram -= means[None, :] ##(256, 256)

return gram

def tensor_center_gram(gram, unbiased=False):
# if not torch.allclose(gram, gram.T):
# raise ValueError('Input must be a symmetric matrix.')
if unbiased:
n = gram.shape[0]
gram.fill_diagonal(0)
means = torch.sum(gram, 0, dtype=torch.float64) / (n-2)
means -= torch.sum(means) / (2 * (n-1))
gram -= means[:, None]
gram -= means[None, :]
gram.fill_diagonal(0)
else:
means = torch.mean(gram, 0, dtype=torch.float64)
means -= torch.mean(means) / 2
gram -= means[:, None]
gram -= means[None, :]
return gram

def cka(gram_x, gram_y, debiased=False):
"""Compute CKA.

Args:
gram_x: A num_examples x num_examples Gram matrix.
gram_y: A num_examples x num_examples Gram matrix.
debiased: Use unbiased estimator of HSIC. CKA may still be biased.

Returns:
The value of CKA between X and Y.
"""
gram_x = center_gram(gram_x, unbiased=debiased)
gram_y = center_gram(gram_y, unbiased=debiased)

# Note: To obtain HSIC, this should be divided by (n-1)**2 (biased variant) or
# n*(n-3) (unbiased variant), but this cancels for CKA.
scaled_hsic = gram_x.ravel().dot(gram_y.ravel())

normalization_x = np.linalg.norm(gram_x)
normalization_y = np.linalg.norm(gram_y)
return scaled_hsic / (normalization_x * normalization_y)

def _ravel(gram):
return torch.reshape(gram, (-1,))

def tensor_cka(gram_x, gram_y, debiased=False):
gram_x = tensor_center_gram(gram_x, unbiased=debiased)
gram_y = tensor_center_gram(gram_y, unbiased=debiased)
# scaled_hsic = gram_x.ravel().dot(gram_y.ravel())
scaled_hsic = _ravel(gram_x).dot(_ravel(gram_y)) # works under pytorch 1.5. Same for below.
# normalization_x = torch.linalg.norm(gram_x)
# normalization_y = torch.linalg.norm(gram_y)
normalization_x = torch.norm(gram_x)
normalization_y = torch.norm(gram_y)
return scaled_hsic / (normalization_x * normalization_y)

def _debiased_dot_product_similarity_helper(
xty, sum_squared_rows_x, sum_squared_rows_y, squared_norm_x, squared_norm_y,
n):
"""Helper for computing debiased dot product similarity (i.e. linear HSIC)."""
# This formula can be derived by manipulating the unbiased estimator from
# Song et al. (2007).
return (
xty - n / (n - 2.) * sum_squared_rows_x.dot(sum_squared_rows_y)
+ squared_norm_x * squared_norm_y / ((n - 1) * (n - 2)))

def feature_space_linear_cka(features_x, features_y, debiased=False):
"""Compute CKA with a linear kernel, in feature space.

This is typically faster than computing the Gram matrix when there are fewer
features than examples.

Args:
features_x: A num_examples x num_features matrix of features.
features_y: A num_examples x num_features matrix of features.
debiased: Use unbiased estimator of dot product similarity. CKA may still be
biased. Note that this estimator may be negative.

Returns:
The value of CKA between X and Y.
"""
features_x = features_x - np.mean(features_x, 0, keepdims=True)
features_y = features_y - np.mean(features_y, 0, keepdims=True)

dot_product_similarity = np.linalg.norm(features_x.T.dot(features_y)) ** 2
normalization_x = np.linalg.norm(features_x.T.dot(features_x))
normalization_y = np.linalg.norm(features_y.T.dot(features_y))

if debiased:
n = features_x.shape[0]
# Equivalent to np.sum(features_x ** 2, 1) but avoids an intermediate array.
sum_squared_rows_x = np.einsum('ij,ij->i', features_x, features_x)
sum_squared_rows_y = np.einsum('ij,ij->i', features_y, features_y)
squared_norm_x = np.sum(sum_squared_rows_x)
squared_norm_y = np.sum(sum_squared_rows_y)

dot_product_similarity = _debiased_dot_product_similarity_helper(
dot_product_similarity, sum_squared_rows_x, sum_squared_rows_y,
squared_norm_x, squared_norm_y, n)
normalization_x = np.sqrt(_debiased_dot_product_similarity_helper(
normalization_x ** 2, sum_squared_rows_x, sum_squared_rows_x,
squared_norm_x, squared_norm_x, n))
normalization_y = np.sqrt(_debiased_dot_product_similarity_helper(
normalization_y ** 2, sum_squared_rows_y, sum_squared_rows_y,
squared_norm_y, squared_norm_y, n))

return dot_product_similarity / (normalization_x * normalization_y)

+ 207
- 0
xnas/search_algorithm/RMINAS/utils/get_accuracy.ipynb View File

@@ -0,0 +1,207 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"try to create the NAS-Bench-201 api from ./NAS-Bench-201-v1_0-e61699.pth\n",
"done.\n"
]
}
],
"source": [
"import copy\n",
"from scipy import stats\n",
"import numpy as np\n",
"import sampler.sampling as sampling\n",
"from nas_201_api import NASBench201API as api\n",
"\n",
"nb201_api = api('./NAS-Bench-201-v1_0-e61699.pth')\n",
"print('done.')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"\n",
"def get_acc_valid(dataset, index):\n",
" \"\"\"dataset in 'cifar10', 'cifar100', 'imgagenet'.\"\"\"\n",
" strings = nb201_api.query_by_arch(nb201_api.arch(index))\n",
" strings = strings.split('\\n')\n",
" if dataset == 'cifar10':\n",
" cifar10_valid_res = strings[3]\n",
" startpoint = cifar10_valid_res.find('valid : [loss = ') + len('valid : [loss = ')\n",
" toppoint = cifar10_valid_res.find('top1 = ', startpoint) + len('top1 = ')\n",
" endpoint = cifar10_valid_res.find('%]', toppoint)\n",
" ans = cifar10_valid_res[toppoint:endpoint]\n",
" return float(ans)\n",
" elif dataset == 'cifar100':\n",
" cifar100_res = strings[7]\n",
" startpoint = cifar100_res.find('valid : [loss = ') + len('valid : [loss = ')\n",
" toppoint = cifar100_res.find('top1 = ', startpoint) + len('top1 = ')\n",
" endpoint = cifar100_res.find('%]', toppoint)\n",
" ans = cifar100_res[toppoint:endpoint]\n",
" return float(ans)\n",
" elif dataset == 'imagenet':\n",
" imagenet_res = strings[9]\n",
" startpoint = imagenet_res.find('valid : [loss = ') + len('valid : [loss = ')\n",
" toppoint = imagenet_res.find('top1 = ', startpoint) + len('top1 = ')\n",
" endpoint = imagenet_res.find('%]', toppoint)\n",
" ans = imagenet_res[toppoint:endpoint]\n",
" return float(ans)\n",
" else:\n",
" print('dataset error')\n",
" exit(1)\n",
"\n",
"def get_acc_test(dataset, index):\n",
" \"\"\"dataset in 'cifar10', 'cifar100', 'imgagenet'.\"\"\"\n",
" strings = nb201_api.query_by_arch(nb201_api.arch(index))\n",
" strings = strings.split('\\n')\n",
" if dataset == 'cifar10':\n",
" cifar10_test_res = strings[5]\n",
" startpoint = cifar10_test_res.find('test : [loss = ') + len('test : [loss = ')\n",
" toppoint = cifar10_test_res.find('top1 = ', startpoint) + len('top1 = ')\n",
" endpoint = cifar10_test_res.find('%]', toppoint)\n",
" ans = cifar10_test_res[toppoint:endpoint]\n",
" return float(ans)\n",
" elif dataset == 'cifar100':\n",
" cifar100_res = strings[7]\n",
" startpoint = cifar100_res.find('test : [loss = ') + len('test : [loss = ')\n",
" toppoint = cifar100_res.find('top1 = ', startpoint) + len('top1 = ')\n",
" endpoint = cifar100_res.find('%]', toppoint)\n",
" ans = cifar100_res[toppoint:endpoint]\n",
" return float(ans)\n",
" elif dataset == 'imagenet':\n",
" imagenet_res = strings[9]\n",
" startpoint = imagenet_res.find('test : [loss = ') + len('test : [loss = ')\n",
" toppoint = imagenet_res.find('top1 = ', startpoint) + len('top1 = ')\n",
" endpoint = imagenet_res.find('%]', toppoint)\n",
" ans = imagenet_res[toppoint:endpoint]\n",
" return float(ans)\n",
" else:\n",
" print('dataset error')\n",
" exit(1)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"cifar10_valid = []\n",
"cifar100_valid = []\n",
"imagenet_valid = []\n",
"\n",
"cifar10_test = []\n",
"cifar100_test = []\n",
"imagenet_test = []\n",
"\n",
"for i in range(15625):\n",
" cifar10_valid.append(get_acc_valid('cifar10', i))\n",
" cifar100_valid.append(get_acc_valid('cifar100', i))\n",
" imagenet_valid.append(get_acc_valid('imagenet', i))\n",
" cifar10_test.append(get_acc_test('cifar10', i))\n",
" cifar100_test.append(get_acc_test('cifar100', i))\n",
" imagenet_test.append(get_acc_test('imagenet', i))\n",
" \n",
"\n",
"cifar10_valid_sort = copy.deepcopy(cifar10_valid)\n",
"cifar10_valid_sort.sort(reverse=True)\n",
"cifar100_valid_sort = copy.deepcopy(cifar100_valid)\n",
"cifar100_valid_sort.sort(reverse=True)\n",
"imagenet_valid_sort = copy.deepcopy(imagenet_valid)\n",
"imagenet_valid_sort.sort(reverse=True)\n",
"\n",
"cifar10_test_sort = copy.deepcopy(cifar10_test)\n",
"cifar10_test_sort.sort(reverse=True)\n",
"cifar100_test_sort = copy.deepcopy(cifar100_test)\n",
"cifar100_test_sort.sort(reverse=True)\n",
"imagenet_test_sort = copy.deepcopy(imagenet_test)\n",
"imagenet_test_sort.sort(reverse=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"valid\n",
"acc_cifar10: 91.22, rank_cifar10: 43\n",
"acc_cifar100: 71.97, rank_cifar100: 45\n",
"acc_imgnet16: 45.59, rank_imgnet16: 71\n",
"test\n",
"acc_cifar10: 93.98, rank_cifar10: 48\n",
"acc_cifar100: 71.69, rank_cifar100: 79\n",
"acc_imgnet16: 45.82, rank_imgnet16: 66\n"
]
}
],
"source": [
"res = [2,3,3,1,0,3]\n",
"\n",
"\n",
"import numpy as np\n",
"\n",
"def array2genostr(arr):\n",
" OPS = [\"none\", \"skip_connect\", \"nor_conv_1x1\", \"nor_conv_3x3\", \"avg_pool_3x3\"]\n",
" idx = [list(i).index(1.) for i in arr]\n",
" op = [OPS[x] for x in idx]\n",
" mixed = '|' + op[0] + '~0|+|' + op[1] + '~0|' + op[2] + '~1|+|' + op[3] + '~0|' + op[4] + '~1|' + op[5] + '~2|'\n",
" return mixed\n",
"\n",
"_tmp_np = np.array(res)\n",
"_tmp_oh = np.zeros((_tmp_np.size, 5))\n",
"_tmp_oh[np.arange(_tmp_np.size),_tmp_np] = 1\n",
"# print(_tmp_oh)\n",
"geno_str = array2genostr(_tmp_oh)\n",
"# print(geno_str)\n",
"index = nb201_api.query_index_by_arch(geno_str)\n",
"# print(index)\n",
"\n",
"print('valid')\n",
"print('acc_cifar10: {}, rank_cifar10: {}'.format(cifar10_valid[index], cifar10_valid_sort.index(cifar10_valid[index])))\n",
"print('acc_cifar100: {}, rank_cifar100: {}'.format(cifar100_valid[index], cifar100_valid_sort.index(cifar100_valid[index])))\n",
"print('acc_imgnet16: {}, rank_imgnet16: {}'.format(imagenet_valid[index], imagenet_valid_sort.index(imagenet_valid[index])))\n",
"\n",
"print('test')\n",
"print('acc_cifar10: {}, rank_cifar10: {}'.format(cifar10_test[index], cifar10_test_sort.index(cifar10_test[index])))\n",
"print('acc_cifar100: {}, rank_cifar100: {}'.format(cifar100_test[index], cifar100_test_sort.index(cifar100_test[index])))\n",
"print('acc_imgnet16: {}, rank_imgnet16: {}'.format(imagenet_test[index], imagenet_test_sort.index(imagenet_test[index])))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

+ 182
- 0
xnas/search_algorithm/RMINAS/utils/imagenet16120_loader.py View File

@@ -0,0 +1,182 @@
import os, sys, torch
import numpy as np
import torchvision.datasets as dset
import torchvision.transforms as transforms
from copy import deepcopy
from PIL import Image

import hashlib
import pickle
import torch.utils.data as data

DATA_PATH = 'data/ImageNet16'

def calculate_md5(fpath, chunk_size=1024 * 1024):
md5 = hashlib.md5()
with open(fpath, "rb") as f:
for chunk in iter(lambda: f.read(chunk_size), b""):
md5.update(chunk)
return md5.hexdigest()


def check_md5(fpath, md5, **kwargs):
return md5 == calculate_md5(fpath, **kwargs)


def check_integrity(fpath, md5=None):
if not os.path.isfile(fpath):
return False
if md5 is None:
return True
else:
return check_md5(fpath, md5)

class ImageNet16(data.Dataset):
# http://image-net.org/download-images
# A Downsampled Variant of ImageNet as an Alternative to the CIFAR datasets
# https://arxiv.org/pdf/1707.08819.pdf

train_list = [
["train_data_batch_1", "27846dcaa50de8e21a7d1a35f30f0e91"],
["train_data_batch_2", "c7254a054e0e795c69120a5727050e3f"],
["train_data_batch_3", "4333d3df2e5ffb114b05d2ffc19b1e87"],
["train_data_batch_4", "1620cdf193304f4a92677b695d70d10f"],
["train_data_batch_5", "348b3c2fdbb3940c4e9e834affd3b18d"],
["train_data_batch_6", "6e765307c242a1b3d7d5ef9139b48945"],
["train_data_batch_7", "564926d8cbf8fc4818ba23d2faac7564"],
["train_data_batch_8", "f4755871f718ccb653440b9dd0ebac66"],
["train_data_batch_9", "bb6dd660c38c58552125b1a92f86b5d4"],
["train_data_batch_10", "8f03f34ac4b42271a294f91bf480f29b"],
]
valid_list = [
["val_data", "3410e3017fdaefba8d5073aaa65e4bd6"],
]

def __init__(self, root, train, transform, use_num_of_class_only=None):
self.root = root
self.transform = transform
self.train = train # training set or valid set
if not self._check_integrity():
raise RuntimeError("Dataset not found or corrupted.")

if self.train:
downloaded_list = self.train_list
else:
downloaded_list = self.valid_list
self.data = []
self.targets = []

# now load the picked numpy arrays
for i, (file_name, checksum) in enumerate(downloaded_list):
file_path = os.path.join(self.root, file_name)
# print ('Load {:}/{:02d}-th : {:}'.format(i, len(downloaded_list), file_path))
with open(file_path, "rb") as f:
if sys.version_info[0] == 2:
entry = pickle.load(f)
else:
entry = pickle.load(f, encoding="latin1")
self.data.append(entry["data"])
self.targets.extend(entry["labels"])
self.data = np.vstack(self.data).reshape(-1, 3, 16, 16)
self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC
if use_num_of_class_only is not None:
assert (
isinstance(use_num_of_class_only, int)
and use_num_of_class_only > 0
and use_num_of_class_only < 1000
), "invalid use_num_of_class_only : {:}".format(use_num_of_class_only)
new_data, new_targets = [], []
for I, L in zip(self.data, self.targets):
if 1 <= L <= use_num_of_class_only:
new_data.append(I)
new_targets.append(L)
self.data = new_data
self.targets = new_targets

def __repr__(self):
return "{name}({num} images, {classes} classes)".format(
name=self.__class__.__name__,
num=len(self.data),
classes=len(set(self.targets)),
)

def __getitem__(self, index):
img, target = self.data[index], self.targets[index] - 1

img = Image.fromarray(img)

if self.transform is not None:
img = self.transform(img)

return img, target

def __len__(self):
return len(self.data)

def _check_integrity(self):
root = self.root
for fentry in self.train_list + self.valid_list:
filename, md5 = fentry[0], fentry[1]
fpath = os.path.join(root, filename)
if not check_integrity(fpath, md5):
return False
return True

class CUTOUT(object):
def __init__(self, length):
self.length = length

def __repr__(self):
return "{name}(length={length})".format(
name=self.__class__.__name__, **self.__dict__
)

def __call__(self, img):
h, w = img.size(1), img.size(2)
mask = np.ones((h, w), np.float32)
y = np.random.randint(h)
x = np.random.randint(w)

y1 = np.clip(y - self.length // 2, 0, h)
y2 = np.clip(y + self.length // 2, 0, h)
x1 = np.clip(x - self.length // 2, 0, w)
x2 = np.clip(x + self.length // 2, 0, w)

mask[y1:y2, x1:x2] = 0.0
mask = torch.from_numpy(mask)
mask = mask.expand_as(img)
img *= mask
return img

def get_loader(cutout=0, batch_size=32, workers=8):
mean = [x / 255 for x in [122.68, 116.66, 104.01]]
std = [x / 255 for x in [63.22, 61.26, 65.09]]
lists = [
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(16, padding=2),
transforms.ToTensor(),
transforms.Normalize(mean, std),
]
if cutout > 0:
lists += [CUTOUT(cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize(mean, std)]
)
# xshape = (1, 3, 16, 16)

train_data = ImageNet16(DATA_PATH, True, train_transform, 120)
test_data = ImageNet16(DATA_PATH, False, test_transform, 120)
assert len(train_data) == 151700 and len(test_data) == 6000
# assert len(train_data) == 151700
num_classes = 120
train_loader = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True, num_workers=workers)
valid_loader = torch.utils.data.DataLoader(test_data, batch_size, num_workers=workers)
# return train_data, test_data, xshape, num_classes
return train_loader, valid_loader

+ 91
- 0
xnas/search_algorithm/RMINAS/utils/loader.py View File

@@ -0,0 +1,91 @@
import os
import random
import torch
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import CIFAR100
from torchvision.datasets import ImageFolder


def cifar10_data(batchsize, workers):
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])

_train_loader = torch.utils.data.DataLoader(
CIFAR10(root='./data', train=True, transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
normalize,
]), download=True),
batch_size=batchsize*16, shuffle=True,
num_workers=workers, pin_memory=True)

target_i = random.randint(0, len(_train_loader)-1)
more_data_X, more_data_y = None, None
for i, (more_data_X, more_data_y) in enumerate(_train_loader):
if i == target_i:
break
more_data_X = more_data_X.cuda()
more_data_y = more_data_y.cuda()
return more_data_X, more_data_y


def cifar100_data(batchsize, workers):
CIFAR100_TRAIN_MEAN = (
0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
CIFAR100_TRAIN_STD = (
0.2673342858792401, 0.2564384629170883, 0.27615047132568404)

transform_train = transforms.Compose([
# transforms.ToPILImage(),
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ToTensor(),
transforms.Normalize(CIFAR100_TRAIN_MEAN, CIFAR100_TRAIN_STD)
])
cifar100_training = CIFAR100(
root='./data', train=True, download=True, transform=transform_train)
cifar100_training_loader = torch.utils.data.DataLoader(
cifar100_training, shuffle=True,
batch_size=batchsize*16, num_workers=workers)

target_i = random.randint(0, len(cifar100_training_loader)-1)
more_data_X, more_data_y = None, None
for i, (more_data_X, more_data_y) in enumerate(cifar100_training_loader):
if i == target_i:
break
more_data_X = more_data_X.cuda()
more_data_y = more_data_y.cuda()
return more_data_X, more_data_y

def imagenet_data(batchsize, workers, data_dir='/gdata/ImageNet2012/'):
"""Data preparing"""
traindir = os.path.join(data_dir, 'train')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_data = ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(
brightness=0.4,
contrast=0.4,
saturation=0.4,
hue=0.2),
transforms.ToTensor(),
normalize,
]))

train_loader = torch.utils.data.DataLoader(
train_data, batch_size=batchsize*16, shuffle=True, pin_memory=True, num_workers=workers)

target_i = random.randint(0, len(train_loader)-1)
more_data_X, more_data_y = None, None
for i, (more_data_X, more_data_y) in enumerate(train_loader):
if i == target_i:
break
more_data_X = more_data_X.cuda()
more_data_y = more_data_y.cuda()
return more_data_X, more_data_y

+ 1
- 1
xnas/search_space/DARTS/cnn.py View File

@@ -146,7 +146,7 @@ class DartsCNN(nn.Module):
sample[true_id, self.basic_op_list.index(op_name)] = 1
for i in range(self.all_edges):
if np.sum(sample[i, :]) == 0:
sample[i, 7] = 1
sample[i, len(self.basic_op_list)-1] = 1
return sample

def _node_index(self, n_nodes, input_nodes=2, start_index=0):


+ 195
- 0
xnas/search_space/RMINAS/DARTS/darts_cnn.py View File

@@ -0,0 +1,195 @@
from xnas.search_space.cellbased_basic_ops import *
import xnas.search_space.cellbased_basic_genotypes as gt

basic_op_list = ['max_pool_3x3', 'avg_pool_3x3', 'skip_connect', 'sep_conv_3x3', 'sep_conv_5x5', 'dil_conv_3x3', 'dil_conv_5x5', 'none']

# Augmented DARTS

def geno_from_alpha(theta):
Genotype = namedtuple(
'Genotype', 'normal normal_concat reduce reduce_concat')
theta_norm = darts_weight_unpack(
theta[0:14], 4)
theta_reduce = darts_weight_unpack(
theta[14:], 4)
gene_normal = parse_from_numpy(
theta_norm, k=2, basic_op_list=basic_op_list)
gene_reduce = parse_from_numpy(
theta_reduce, k=2, basic_op_list=basic_op_list)
concat = range(2, 6) # concat all intermediate nodes
return Genotype(normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat)

def reformat_DARTS(genotype):
"""
format genotype for DARTS-like
from:
Genotype(normal=[[('sep_conv_3x3', 1), ('sep_conv_5x5', 0)], [('sep_conv_3x3', 2), ('max_pool_3x3', 1)], [('sep_conv_3x3', 3), ('dil_conv_3x3', 2)], [('dil_conv_5x5', 4), ('dil_conv_5x5', 3)]], normal_concat=range(2, 6), reduce=[[('max_pool_3x3', 0), ('sep_conv_5x5', 1)], [('max_pool_3x3', 0), ('dil_conv_5x5', 2)], [('max_pool_3x3', 0), ('sep_conv_5x5', 1)], [('dil_conv_5x5', 4), ('max_pool_3x3', 0)]], reduce_concat=range(2, 6))
to:
Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])
"""
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
_normal = []
_reduce = []
for i in genotype.normal:
for j in i:
_normal.append(j)
for i in genotype.reduce:
for j in i:
_reduce.append(j)
_normal_concat = [i for i in genotype.normal_concat]
_reduce_concat = [i for i in genotype.reduce_concat]
r_genotype = Genotype(
normal=_normal,
normal_concat=_normal_concat,
reduce=_reduce,
reduce_concat=_reduce_concat
)
return r_genotype

class AuxiliaryHead(nn.Module):
""" Auxiliary head in 2/3 place of network to let the gradient flow well """
def __init__(self, input_size, C, n_classes):
""" assuming input size 7x7 or 8x8 """
# assert input_size in [7, 8]
super().__init__()
if input_size in [7, 8]:
self.net = nn.Sequential(
nn.ReLU(inplace=True),
nn.AvgPool2d(5, stride=input_size-5, padding=0, count_include_pad=False), # 2x2 out
nn.Conv2d(C, 128, kernel_size=1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, kernel_size=2, bias=False), # 1x1 out
nn.BatchNorm2d(768),
nn.ReLU(inplace=True))
else:
self.net = nn.Sequential(
nn.ReLU(inplace=True),
nn.AdaptiveAvgPool2d((2, 2)),
nn.Conv2d(C, 128, kernel_size=1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, kernel_size=2, bias=False), # 1x1 out
nn.BatchNorm2d(768),
nn.ReLU(inplace=True))
self.linear = nn.Linear(768, n_classes)

def forward(self, x):
out = self.net(x)
out = out.view(out.size(0), -1) # flatten
logits = self.linear(out)
return logits


class AugmentCell(nn.Module):
""" Cell for augmentation
Each edge is discrete.
"""
def __init__(self, genotype, C_pp, C_p, C, reduction_p, reduction):
super().__init__()
self.reduction = reduction
self.n_nodes = len(genotype.normal)

if reduction_p:
self.preproc0 = FactorizedReduce(C_pp, C)
else:
self.preproc0 = StdConv(C_pp, C, 1, 1, 0)
self.preproc1 = StdConv(C_p, C, 1, 1, 0)

# generate dag
if reduction:
gene = genotype.reduce
self.concat = genotype.reduce_concat
else:
gene = genotype.normal
self.concat = genotype.normal_concat

self.dag = gt.to_dag(C, gene, reduction)

def forward(self, s0, s1):
s0 = self.preproc0(s0)
s1 = self.preproc1(s1)

states = [s0, s1]
for edges in self.dag:
s_cur = sum(op(states[op.s_idx]) for op in edges)
states.append(s_cur)

s_out = torch.cat([states[i] for i in self.concat], dim=1)

return s_out


class AugmentCNN(nn.Module):
""" Augmented CNN model """
def __init__(self, input_size, C_in, C, n_classes, n_layers, auxiliary, genotype,
stem_multiplier=3):
"""
Args:
input_size: size of height and width (assuming height = width)
C_in: # of input channels
C: # of starting model channels
"""
super().__init__()
self.C_in = C_in
self.C = C
self.n_classes = n_classes
self.n_layers = n_layers
# self.genotype = gt.from_str(genotype)
self.genotype = genotype
# aux head position
self.aux_pos = 2*n_layers//3 if auxiliary else -1

C_cur = stem_multiplier * C
self.stem = nn.Sequential(
nn.Conv2d(C_in, C_cur, 3, 1, 1, bias=False),
nn.BatchNorm2d(C_cur)
)

C_pp, C_p, C_cur = C_cur, C_cur, C

self.cells = nn.ModuleList()
reduction_p = False
for i in range(n_layers):
if i in [n_layers//3, 2*n_layers//3]:
C_cur *= 2
reduction = True
else:
reduction = False

cell = AugmentCell(self.genotype, C_pp, C_p, C_cur, reduction_p, reduction)
reduction_p = reduction
self.cells.append(cell)
C_cur_out = C_cur * len(cell.concat)
C_pp, C_p = C_p, C_cur_out

if i == self.aux_pos:
# [!] this auxiliary head is ignored in computing parameter size
# by the name 'aux_head'
self.aux_head = AuxiliaryHead(input_size//4, C_p, n_classes)

self.gap = nn.AdaptiveAvgPool2d(1)
self.linear = nn.Linear(C_p, n_classes)

def forward(self, x):
s0 = s1 = self.stem(x)
features = []
aux_logits = None
for i, cell in enumerate(self.cells):
s0, s1 = s1, cell(s0, s1)
if i in [int(self.n_layers//3-1), int(2*self.n_layers//3-1), int(self.n_layers-1)]:
features.append(s1)
if i == self.aux_pos and self.training:
aux_logits = self.aux_head(s1)
out = self.gap(s1)
out = out.view(out.size(0), -1) # flatten
logits = self.linear(out)
return features, logits, aux_logits

def drop_path_prob(self, p):
""" Set drop path probability """
for module in self.modules():
if isinstance(module, DropPath_):
module.p = p

+ 222
- 0
xnas/search_space/RMINAS/DARTS/darts_img.py View File

@@ -0,0 +1,222 @@
import torch
import torch.nn as nn
from pcdarts_op import *
from torch.autograd import Variable

def drop_path(x, drop_prob):
if drop_prob > 0.:
keep_prob = 1.-drop_prob
mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
x.div_(keep_prob)
x.mul_(mask)
return x

class Cell(nn.Module):

def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
print(C_prev_prev, C_prev, C)

if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
if reduction:
op_names, indices = zip(*genotype.reduce)
concat = genotype.reduce_concat
else:
op_names, indices = zip(*genotype.normal)
concat = genotype.normal_concat
self._compile(C, op_names, indices, concat, reduction)

def _compile(self, C, op_names, indices, concat, reduction):
assert len(op_names) == len(indices)
self._steps = len(op_names) // 2
self._concat = concat
self.multiplier = len(concat)

self._ops = nn.ModuleList()
for name, index in zip(op_names, indices):
stride = 2 if reduction and index < 2 else 1
op = OPS[name](C, stride, True)
self._ops += [op]
self._indices = indices

def forward(self, s0, s1, drop_prob):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)

states = [s0, s1]
for i in range(self._steps):
h1 = states[self._indices[2*i]]
h2 = states[self._indices[2*i+1]]
op1 = self._ops[2*i]
op2 = self._ops[2*i+1]
h1 = op1(h1)
h2 = op2(h2)
if self.training and drop_prob > 0.:
if not isinstance(op1, Identity):
h1 = drop_path(h1, drop_prob)
if not isinstance(op2, Identity):
h2 = drop_path(h2, drop_prob)
s = h1 + h2
states += [s]
return torch.cat([states[i] for i in self._concat], dim=1)


class AuxiliaryHeadCIFAR(nn.Module):

def __init__(self, C, num_classes):
"""assuming input size 8x8"""
super(AuxiliaryHeadCIFAR, self).__init__()
self.features = nn.Sequential(
nn.ReLU(inplace=True),
nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
nn.Conv2d(C, 128, 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, 2, bias=False),
nn.BatchNorm2d(768),
nn.ReLU(inplace=True)
)
self.classifier = nn.Linear(768, num_classes)

def forward(self, x):
x = self.features(x)
x = self.classifier(x.view(x.size(0),-1))
return x


class AuxiliaryHeadImageNet(nn.Module):

def __init__(self, C, num_classes):
"""assuming input size 14x14"""
super(AuxiliaryHeadImageNet, self).__init__()
self.features = nn.Sequential(
nn.ReLU(inplace=True),
nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
nn.Conv2d(C, 128, 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, 2, bias=False),
# NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
# Commenting it out for consistency with the experiments in the paper.
# nn.BatchNorm2d(768),
nn.ReLU(inplace=True)
)
self.classifier = nn.Linear(768, num_classes)

def forward(self, x):
x = self.features(x)
x = self.classifier(x.view(x.size(0),-1))
return x


class NetworkCIFAR(nn.Module):

def __init__(self, C, num_classes, layers, auxiliary, genotype):
super(NetworkCIFAR, self).__init__()
self._layers = layers
self._auxiliary = auxiliary

stem_multiplier = 3
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
self.cells = nn.ModuleList()
reduction_prev = False
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
self.cells += [cell]
C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
if i == 2*layers//3:
C_to_auxiliary = C_prev

if auxiliary:
self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)

def forward(self, input):
logits_aux = None
s0 = s1 = self.stem(input)
for i, cell in enumerate(self.cells):
s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
if i == 2*self._layers//3:
if self._auxiliary and self.training:
logits_aux = self.auxiliary_head(s1)
out = self.global_pooling(s1)
logits = self.classifier(out.view(out.size(0),-1))
return logits, logits_aux


class NetworkImageNet(nn.Module):

def __init__(self, C, num_classes, layers, auxiliary, genotype):
super(NetworkImageNet, self).__init__()
self._layers = layers
self._auxiliary = auxiliary

self.stem0 = nn.Sequential(
nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C // 2),
nn.ReLU(inplace=True),
nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C),
)

self.stem1 = nn.Sequential(
nn.ReLU(inplace=True),
nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C),
)

C_prev_prev, C_prev, C_curr = C, C, C

self.cells = nn.ModuleList()
reduction_prev = True
for i in range(layers):
if i in [layers // 3, 2 * layers // 3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
self.cells += [cell]
C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
if i == 2 * layers // 3:
C_to_auxiliary = C_prev

if auxiliary:
self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
self.global_pooling = nn.AvgPool2d(7)
self.classifier = nn.Linear(C_prev, num_classes)

def forward(self, input):
logits_aux = None
features = []
s0 = self.stem0(input)
s1 = self.stem1(s0)
for i, cell in enumerate(self.cells):
s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
if i in [int(self._layers//3-1), int(2*self._layers//3-1), int(self._layers-1)]:
features.append(s1)
if i == 2 * self._layers // 3:
if self._auxiliary and self.training:
logits_aux = self.auxiliary_head(s1)
out = self.global_pooling(s1)
logits = self.classifier(out.view(out.size(0), -1))
return features, logits, logits_aux

+ 52
- 0
xnas/search_space/RMINAS/DARTS/darts_plot.py View File

@@ -0,0 +1,52 @@
from collections import namedtuple
from graphviz import Digraph
import sys

Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')

Genotype(normal=[('sep_conv_5x5', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('avg_pool_3x3', 3), ('dil_conv_5x5', 2), ('avg_pool_3x3', 4)], normal_concat=[2, 3, 4, 5], reduce=[('sep_conv_5x5', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('avg_pool_3x3', 3), ('dil_conv_5x5', 2), ('avg_pool_3x3', 4)], reduce_concat=[2, 3, 4, 5])

def plot(genotype, filename):
g = Digraph(
format='pdf',
edge_attr=dict(fontsize='20', fontname="times"),
node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
engine='dot')
g.body.extend(['rankdir=LR'])

g.node("c_{k-2}", fillcolor='darkseagreen2')
g.node("c_{k-1}", fillcolor='darkseagreen2')
assert len(genotype) % 2 == 0
steps = len(genotype) // 2

for i in range(steps):
g.node(str(i), fillcolor='lightblue')

for i in range(steps):
for k in [2*i, 2*i + 1]:
op, j = genotype[k]
if j == 0:
u = "c_{k-2}"
elif j == 1:
u = "c_{k-1}"
else:
u = str(j-2)
v = str(i)
g.edge(u, v, label=op, fillcolor="gray")

g.node("c_{k}", fillcolor='palegoldenrod')
for i in range(steps):
g.edge(str(i), "c_{k}", fillcolor="gray")

g.render(filename, view=False)


if __name__ == '__main__':
# try:
# genotype = eval('genotypes.{}'.format(genotype))
# except AttributeError:
# print("{} is not specified in genotypes.py".format(genotype_name))
# sys.exit(1)

plot(genotype.normal, "normal")
plot(genotype.reduce, "reduction")

+ 104
- 0
xnas/search_space/RMINAS/DARTS/pcdarts_op.py View File

@@ -0,0 +1,104 @@
import torch
import torch.nn as nn

OPS = {
'none' : lambda C, stride, affine: Zero(stride),
'avg_pool_3x3' : lambda C, stride, affine: nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
'max_pool_3x3' : lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1),
'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
'conv_7x1_1x7' : lambda C, stride, affine: nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
nn.Conv2d(C, C, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
nn.BatchNorm2d(C, affine=affine)
),
}

class ReLUConvBN(nn.Module):

def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super(ReLUConvBN, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
nn.BatchNorm2d(C_out, affine=affine)
)

def forward(self, x):
return self.op(x)

class DilConv(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
super(DilConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)

def forward(self, x):
return self.op(x)


class SepConv(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super(SepConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_in, affine=affine),
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)

def forward(self, x):
return self.op(x)


class Identity(nn.Module):

def __init__(self):
super(Identity, self).__init__()

def forward(self, x):
return x


class Zero(nn.Module):

def __init__(self, stride):
super(Zero, self).__init__()
self.stride = stride

def forward(self, x):
if self.stride == 1:
return x.mul(0.)
return x[:,:,::self.stride,::self.stride].mul(0.)


class FactorizedReduce(nn.Module):

def __init__(self, C_in, C_out, affine=True):
super(FactorizedReduce, self).__init__()
assert C_out % 2 == 0
self.relu = nn.ReLU(inplace=False)
self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
self.bn = nn.BatchNorm2d(C_out, affine=affine)

def forward(self, x):
x = self.relu(x)
out = torch.cat([self.conv_1(x), self.conv_2(x[:,:,1:,1:])], dim=1)
out = self.bn(out)
return out

+ 151
- 0
xnas/search_space/RMINAS/MBConv/mb_v3_cnn.py View File

@@ -0,0 +1,151 @@
from xnas.search_space.mb_ops import *
from xnas.search_space.proxyless_cnn import ProxylessNASNets
from xnas.search_space.utils import profile, make_divisible
import json
import xnas.core.logging as logging
import numpy as np
import os
from xnas.core.config import cfg

logger = logging.get_logger(__name__)


class MobileNetV3(MyNetwork):

def __init__(self, n_classes=1000, width_mult=1.2, depth=4):
super(MobileNetV3, self).__init__()

self.width_mult = width_mult
self.depth = depth
self.conv_candidates = [
'3x3_MBConv3', '3x3_MBConv6',
'5x5_MBConv3', '5x5_MBConv6',
'7x7_MBConv3', '7x7_MBConv6',
] if len(cfg.MB.BASIC_OP) == 0 else cfg.MB.BASIC_OP

# ofa
self.base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280]

final_expand_width = make_divisible(
self.base_stage_width[-2] * self.width_mult, 8)
last_channel = make_divisible(
self.base_stage_width[-1] * self.width_mult, 8)

self.stride_stages = [1, 2, 2, 2, 1, 2] if len(
cfg.MB.STRIDE_STAGES) == 0 else cfg.MB.STRIDE_STAGES
self.act_stages = ['relu', 'relu', 'relu', 'h_swish',
'h_swish', 'h_swish'] if len(cfg.MB.ACT_STAGES) == 0 else cfg.MB.ACT_STAGES
self.se_stages = [False, False, True, False, True, True] if len(
cfg.MB.SE_STAGES) == 0 else cfg.MB.SE_STAGES
n_block_list = [1] + [self.depth] * 5
width_list = []
for base_width in self.base_stage_width[:-2]:
width = make_divisible(base_width * self.width_mult, 8)
width_list.append(width)
input_channel = width_list[0]

# first conv layer
first_conv = ConvLayer(
3, input_channel, kernel_size=3, stride=2, act_func='h_swish')

# first block
first_block_conv = MBInvertedConvLayer(
in_channels=input_channel, out_channels=input_channel, kernel_size=3, stride=self.stride_stages[0],
expand_ratio=1, act_func=self.act_stages[0], use_se=self.se_stages[0],
)
first_block = MobileInvertedResidualBlock(
first_block_conv, IdentityLayer(input_channel, input_channel))

# inverted residual blocks
blocks = nn.ModuleList()
blocks.append(first_block)
feature_dim = input_channel
self.candidate_ops = []

for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:],
self.stride_stages[1:], self.act_stages[1:], self.se_stages[1:]):

for i in range(n_block):
if i == 0:
stride = s
else:
stride = 1
# conv
if stride == 1 and feature_dim == width:
modified_conv_candidates = self.conv_candidates + ['Zero']
else:
modified_conv_candidates = self.conv_candidates + \
['3x3_MBConv1']
self.candidate_ops.append(modified_conv_candidates)
conv_op = MixedEdge(candidate_ops=build_candidate_ops(
modified_conv_candidates, feature_dim, width, stride, 'weight_bn_act',
act_func=act_func, use_se=use_se), )
if stride == 1 and feature_dim == width:
shortcut = IdentityLayer(feature_dim, feature_dim)
else:
shortcut = None
blocks.append(MobileInvertedResidualBlock(conv_op, shortcut))
feature_dim = width
# final expand layer, feature mix layer & classifier
final_expand_layer = ConvLayer(
feature_dim, final_expand_width, kernel_size=1, act_func='h_swish')
feature_mix_layer = ConvLayer(
final_expand_width, last_channel, kernel_size=1, bias=False, use_bn=False, act_func='h_swish',
)
classifier = LinearLayer(last_channel, n_classes)

self.first_conv = first_conv
self.blocks = blocks
self.final_expand_layer = final_expand_layer
self.feature_mix_layer = feature_mix_layer
self.classifier = classifier
self.global_avg_pooling = nn.AdaptiveAvgPool2d(1)

self.all_edges = len(self.blocks) - 1
self.num_edges = len(self.blocks) - 1
self.num_ops = len(self.conv_candidates) + 1

""" MyNetwork required methods """

@staticmethod
def name():
return 'OFAMobileNetV3'

def forward(self, x, sample):
features = []
# first conv
x = self.first_conv(x)
assert len(self.blocks) - 1 == len(sample)
for i in range(len(self.blocks[1:])):
this_block_conv = self.blocks[i+1].mobile_inverted_conv
if isinstance(this_block_conv, MixedEdge):
# one hot like vector
this_block_conv.active_vector = sample[i]
else:
raise NotImplementedError
for k,block in enumerate(self.blocks):
x = block(x)
if k in [4,12,20]:
features.append(x)
x = self.final_expand_layer(x)
x = self.global_avg_pooling(x)
x = self.feature_mix_layer(x)
x = x.view(x.size(0), -1) # flatten
x = self.classifier(x)
return x, features

def genotype(self, theta):
genotype = []
for i in range(theta.shape[0]):
genotype.append(self.candidate_ops[i][np.argmax(theta[i])])
return genotype


def _MobileNetV3CNN():
# remember to add cuda() for it.
return MobileNetV3(
n_classes=cfg.SEARCH.NUM_CLASSES,
width_mult=cfg.MB.WIDTH_MULTI,
depth=cfg.MB.DEPTH)

+ 274
- 0
xnas/search_space/RMINAS/NB201/geno.py View File

@@ -0,0 +1,274 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################

from copy import deepcopy

def get_combination(space, num):
combs = []
for i in range(num):
if i == 0:
for func in space:
combs.append([(func, i)])
else:
new_combs = []
for string in combs:
for func in space:
xstring = string + [(func, i)]
new_combs.append(xstring)
combs = new_combs
return combs


class Structure:
def __init__(self, genotype):
assert isinstance(genotype, list) or isinstance(
genotype, tuple
), "invalid class of genotype : {:}".format(type(genotype))
self.node_num = len(genotype) + 1
self.nodes = []
self.node_N = []
for idx, node_info in enumerate(genotype):
assert isinstance(node_info, list) or isinstance(
node_info, tuple
), "invalid class of node_info : {:}".format(type(node_info))
assert len(node_info) >= 1, "invalid length : {:}".format(len(node_info))
for node_in in node_info:
assert isinstance(node_in, list) or isinstance(
node_in, tuple
), "invalid class of in-node : {:}".format(type(node_in))
assert (
len(node_in) == 2 and node_in[1] <= idx
), "invalid in-node : {:}".format(node_in)
self.node_N.append(len(node_info))
self.nodes.append(tuple(deepcopy(node_info)))

def tolist(self, remove_str):
# convert this class to the list, if remove_str is 'none', then remove the 'none' operation.
# note that we re-order the input node in this function
# return the-genotype-list and success [if unsuccess, it is not a connectivity]
genotypes = []
for node_info in self.nodes:
node_info = list(node_info)
node_info = sorted(node_info, key=lambda x: (x[1], x[0]))
node_info = tuple(filter(lambda x: x[0] != remove_str, node_info))
if len(node_info) == 0:
return None, False
genotypes.append(node_info)
return genotypes, True

def node(self, index):
assert index > 0 and index <= len(self), "invalid index={:} < {:}".format(
index, len(self)
)
return self.nodes[index]

def tostr(self):
strings = []
for node_info in self.nodes:
string = "|".join([x[0] + "~{:}".format(x[1]) for x in node_info])
string = "|{:}|".format(string)
strings.append(string)
return "+".join(strings)

def check_valid(self):
nodes = {0: True}
for i, node_info in enumerate(self.nodes):
sums = []
for op, xin in node_info:
if op == "none" or nodes[xin] is False:
x = False
else:
x = True
sums.append(x)
nodes[i + 1] = sum(sums) > 0
return nodes[len(self.nodes)]

def to_unique_str(self, consider_zero=False):
# this is used to identify the isomorphic cell, which rerquires the prior knowledge of operation
# two operations are special, i.e., none and skip_connect
nodes = {0: "0"}
for i_node, node_info in enumerate(self.nodes):
cur_node = []
for op, xin in node_info:
if consider_zero is None:
x = "(" + nodes[xin] + ")" + "@{:}".format(op)
elif consider_zero:
if op == "none" or nodes[xin] == "#":
x = "#" # zero
elif op == "skip_connect":
x = nodes[xin]
else:
x = "(" + nodes[xin] + ")" + "@{:}".format(op)
else:
if op == "skip_connect":
x = nodes[xin]
else:
x = "(" + nodes[xin] + ")" + "@{:}".format(op)
cur_node.append(x)
nodes[i_node + 1] = "+".join(sorted(cur_node))
return nodes[len(self.nodes)]

def check_valid_op(self, op_names):
for node_info in self.nodes:
for inode_edge in node_info:
# assert inode_edge[0] in op_names, 'invalid op-name : {:}'.format(inode_edge[0])
if inode_edge[0] not in op_names:
return False
return True

def __repr__(self):
return "{name}({node_num} nodes with {node_info})".format(
name=self.__class__.__name__, node_info=self.tostr(), **self.__dict__
)

def __len__(self):
return len(self.nodes) + 1

def __getitem__(self, index):
return self.nodes[index]

@staticmethod
def str2structure(xstr):
if isinstance(xstr, Structure):
return xstr
assert isinstance(xstr, str), "must take string (not {:}) as input".format(
type(xstr)
)
nodestrs = xstr.split("+")
genotypes = []
for i, node_str in enumerate(nodestrs):
inputs = list(filter(lambda x: x != "", node_str.split("|")))
for xinput in inputs:
assert len(xinput.split("~")) == 2, "invalid input length : {:}".format(
xinput
)
inputs = (xi.split("~") for xi in inputs)
input_infos = tuple((op, int(IDX)) for (op, IDX) in inputs)
genotypes.append(input_infos)
return Structure(genotypes)

@staticmethod
def str2fullstructure(xstr, default_name="none"):
assert isinstance(xstr, str), "must take string (not {:}) as input".format(
type(xstr)
)
nodestrs = xstr.split("+")
genotypes = []
for i, node_str in enumerate(nodestrs):
inputs = list(filter(lambda x: x != "", node_str.split("|")))
for xinput in inputs:
assert len(xinput.split("~")) == 2, "invalid input length : {:}".format(
xinput
)
inputs = (xi.split("~") for xi in inputs)
input_infos = list((op, int(IDX)) for (op, IDX) in inputs)
all_in_nodes = list(x[1] for x in input_infos)
for j in range(i):
if j not in all_in_nodes:
input_infos.append((default_name, j))
node_info = sorted(input_infos, key=lambda x: (x[1], x[0]))
genotypes.append(tuple(node_info))
return Structure(genotypes)

@staticmethod
def gen_all(search_space, num, return_ori):
assert isinstance(search_space, list) or isinstance(
search_space, tuple
), "invalid class of search-space : {:}".format(type(search_space))
assert (
num >= 2
), "There should be at least two nodes in a neural cell instead of {:}".format(
num
)
all_archs = get_combination(search_space, 1)
for i, arch in enumerate(all_archs):
all_archs[i] = [tuple(arch)]

for inode in range(2, num):
cur_nodes = get_combination(search_space, inode)
new_all_archs = []
for previous_arch in all_archs:
for cur_node in cur_nodes:
new_all_archs.append(previous_arch + [tuple(cur_node)])
all_archs = new_all_archs
if return_ori:
return all_archs
else:
return [Structure(x) for x in all_archs]


ResNet_CODE = Structure(
[
(("nor_conv_3x3", 0),), # node-1
(("nor_conv_3x3", 1),), # node-2
(("skip_connect", 0), ("skip_connect", 2)),
] # node-3
)

AllConv3x3_CODE = Structure(
[
(("nor_conv_3x3", 0),), # node-1
(("nor_conv_3x3", 0), ("nor_conv_3x3", 1)), # node-2
(("nor_conv_3x3", 0), ("nor_conv_3x3", 1), ("nor_conv_3x3", 2)),
] # node-3
)

AllFull_CODE = Structure(
[
(
("skip_connect", 0),
("nor_conv_1x1", 0),
("nor_conv_3x3", 0),
("avg_pool_3x3", 0),
), # node-1
(
("skip_connect", 0),
("nor_conv_1x1", 0),
("nor_conv_3x3", 0),
("avg_pool_3x3", 0),
("skip_connect", 1),
("nor_conv_1x1", 1),
("nor_conv_3x3", 1),
("avg_pool_3x3", 1),
), # node-2
(
("skip_connect", 0),
("nor_conv_1x1", 0),
("nor_conv_3x3", 0),
("avg_pool_3x3", 0),
("skip_connect", 1),
("nor_conv_1x1", 1),
("nor_conv_3x3", 1),
("avg_pool_3x3", 1),
("skip_connect", 2),
("nor_conv_1x1", 2),
("nor_conv_3x3", 2),
("avg_pool_3x3", 2),
),
] # node-3
)

AllConv1x1_CODE = Structure(
[
(("nor_conv_1x1", 0),), # node-1
(("nor_conv_1x1", 0), ("nor_conv_1x1", 1)), # node-2
(("nor_conv_1x1", 0), ("nor_conv_1x1", 1), ("nor_conv_1x1", 2)),
] # node-3
)

AllIdentity_CODE = Structure(
[
(("skip_connect", 0),), # node-1
(("skip_connect", 0), ("skip_connect", 1)), # node-2
(("skip_connect", 0), ("skip_connect", 1), ("skip_connect", 2)),
] # node-3
)

architectures = {
"resnet": ResNet_CODE,
"all_c3x3": AllConv3x3_CODE,
"all_c1x1": AllConv1x1_CODE,
"all_idnt": AllIdentity_CODE,
"all_full": AllFull_CODE,
}

+ 554
- 0
xnas/search_space/RMINAS/NB201/ops.py View File

@@ -0,0 +1,554 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################

import torch
import torch.nn as nn

__all__ = ["OPS", "RAW_OP_CLASSES", "ResNetBasicblock", "SearchSpaceNames"]

OPS = {
"none": lambda C_in, C_out, stride, affine, track_running_stats: Zero(
C_in, C_out, stride
),
"avg_pool_3x3": lambda C_in, C_out, stride, affine, track_running_stats: POOLING(
C_in, C_out, stride, "avg", affine, track_running_stats
),
"max_pool_3x3": lambda C_in, C_out, stride, affine, track_running_stats: POOLING(
C_in, C_out, stride, "max", affine, track_running_stats
),
"nor_conv_7x7": lambda C_in, C_out, stride, affine, track_running_stats: ReLUConvBN(
C_in,
C_out,
(7, 7),
(stride, stride),
(3, 3),
(1, 1),
affine,
track_running_stats,
),
"nor_conv_3x3": lambda C_in, C_out, stride, affine, track_running_stats: ReLUConvBN(
C_in,
C_out,
(3, 3),
(stride, stride),
(1, 1),
(1, 1),
affine,
track_running_stats,
),
"nor_conv_1x1": lambda C_in, C_out, stride, affine, track_running_stats: ReLUConvBN(
C_in,
C_out,
(1, 1),
(stride, stride),
(0, 0),
(1, 1),
affine,
track_running_stats,
),
"dua_sepc_3x3": lambda C_in, C_out, stride, affine, track_running_stats: DualSepConv(
C_in,
C_out,
(3, 3),
(stride, stride),
(1, 1),
(1, 1),
affine,
track_running_stats,
),
"dua_sepc_5x5": lambda C_in, C_out, stride, affine, track_running_stats: DualSepConv(
C_in,
C_out,
(5, 5),
(stride, stride),
(2, 2),
(1, 1),
affine,
track_running_stats,
),
"dil_sepc_3x3": lambda C_in, C_out, stride, affine, track_running_stats: SepConv(
C_in,
C_out,
(3, 3),
(stride, stride),
(2, 2),
(2, 2),
affine,
track_running_stats,
),
"dil_sepc_5x5": lambda C_in, C_out, stride, affine, track_running_stats: SepConv(
C_in,
C_out,
(5, 5),
(stride, stride),
(4, 4),
(2, 2),
affine,
track_running_stats,
),
"skip_connect": lambda C_in, C_out, stride, affine, track_running_stats: Identity()
if stride == 1 and C_in == C_out
else FactorizedReduce(C_in, C_out, stride, affine, track_running_stats),
}

CONNECT_NAS_BENCHMARK = ["none", "skip_connect", "nor_conv_3x3"]
NAS_BENCH_201 = ["none", "skip_connect", "nor_conv_1x1", "nor_conv_3x3", "avg_pool_3x3"]
DARTS_SPACE = [
"none",
"skip_connect",
"dua_sepc_3x3",
"dua_sepc_5x5",
"dil_sepc_3x3",
"dil_sepc_5x5",
"avg_pool_3x3",
"max_pool_3x3",
]

SearchSpaceNames = {
"connect-nas": CONNECT_NAS_BENCHMARK,
"nats-bench": NAS_BENCH_201,
"nas-bench-201": NAS_BENCH_201,
"darts": DARTS_SPACE,
}


class ReLUConvBN(nn.Module):
def __init__(
self,
C_in,
C_out,
kernel_size,
stride,
padding,
dilation,
affine,
track_running_stats=True,
):
super(ReLUConvBN, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in,
C_out,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=not affine,
),
nn.BatchNorm2d(
C_out, affine=affine, track_running_stats=track_running_stats
),
)

def forward(self, x):
return self.op(x)


class SepConv(nn.Module):
def __init__(
self,
C_in,
C_out,
kernel_size,
stride,
padding,
dilation,
affine,
track_running_stats=True,
):
super(SepConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in,
C_in,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=C_in,
bias=False,
),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=not affine),
nn.BatchNorm2d(
C_out, affine=affine, track_running_stats=track_running_stats
),
)

def forward(self, x):
return self.op(x)


class DualSepConv(nn.Module):
def __init__(
self,
C_in,
C_out,
kernel_size,
stride,
padding,
dilation,
affine,
track_running_stats=True,
):
super(DualSepConv, self).__init__()
self.op_a = SepConv(
C_in,
C_in,
kernel_size,
stride,
padding,
dilation,
affine,
track_running_stats,
)
self.op_b = SepConv(
C_in, C_out, kernel_size, 1, padding, dilation, affine, track_running_stats
)

def forward(self, x):
x = self.op_a(x)
x = self.op_b(x)
return x


class ResNetBasicblock(nn.Module):
def __init__(self, inplanes, planes, stride, affine=True, track_running_stats=True):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, "invalid stride {:}".format(stride)
self.conv_a = ReLUConvBN(
inplanes, planes, 3, stride, 1, 1, affine, track_running_stats
)
self.conv_b = ReLUConvBN(
planes, planes, 3, 1, 1, 1, affine, track_running_stats
)
if stride == 2:
self.downsample = nn.Sequential(
nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
nn.Conv2d(
inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False
),
)
elif inplanes != planes:
self.downsample = ReLUConvBN(
inplanes, planes, 1, 1, 0, 1, affine, track_running_stats
)
else:
self.downsample = None
self.in_dim = inplanes
self.out_dim = planes
self.stride = stride
self.num_conv = 2

def extra_repr(self):
string = "{name}(inC={in_dim}, outC={out_dim}, stride={stride})".format(
name=self.__class__.__name__, **self.__dict__
)
return string

def forward(self, inputs):

basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)

if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
return residual + basicblock


class POOLING(nn.Module):
def __init__(
self, C_in, C_out, stride, mode, affine=True, track_running_stats=True
):
super(POOLING, self).__init__()
if C_in == C_out:
self.preprocess = None
else:
self.preprocess = ReLUConvBN(
C_in, C_out, 1, 1, 0, 1, affine, track_running_stats
)
if mode == "avg":
self.op = nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False)
elif mode == "max":
self.op = nn.MaxPool2d(3, stride=stride, padding=1)
else:
raise ValueError("Invalid mode={:} in POOLING".format(mode))

def forward(self, inputs):
if self.preprocess:
x = self.preprocess(inputs)
else:
x = inputs
return self.op(x)


class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()

def forward(self, x):
return x


class Zero(nn.Module):
def __init__(self, C_in, C_out, stride):
super(Zero, self).__init__()
self.C_in = C_in
self.C_out = C_out
self.stride = stride
self.is_zero = True

def forward(self, x):
if self.C_in == self.C_out:
if self.stride == 1:
return x.mul(0.0)
else:
return x[:, :, :: self.stride, :: self.stride].mul(0.0)
else:
shape = list(x.shape)
shape[1] = self.C_out
zeros = x.new_zeros(shape, dtype=x.dtype, device=x.device)
return zeros

def extra_repr(self):
return "C_in={C_in}, C_out={C_out}, stride={stride}".format(**self.__dict__)


class FactorizedReduce(nn.Module):
def __init__(self, C_in, C_out, stride, affine, track_running_stats):
super(FactorizedReduce, self).__init__()
self.stride = stride
self.C_in = C_in
self.C_out = C_out
self.relu = nn.ReLU(inplace=False)
if stride == 2:
# assert C_out % 2 == 0, 'C_out : {:}'.format(C_out)
C_outs = [C_out // 2, C_out - C_out // 2]
self.convs = nn.ModuleList()
for i in range(2):
self.convs.append(
nn.Conv2d(
C_in, C_outs[i], 1, stride=stride, padding=0, bias=not affine
)
)
self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
elif stride == 1:
self.conv = nn.Conv2d(
C_in, C_out, 1, stride=stride, padding=0, bias=not affine
)
else:
raise ValueError("Invalid stride : {:}".format(stride))
self.bn = nn.BatchNorm2d(
C_out, affine=affine, track_running_stats=track_running_stats
)

def forward(self, x):
if self.stride == 2:
x = self.relu(x)
y = self.pad(x)
out = torch.cat([self.convs[0](x), self.convs[1](y[:, :, 1:, 1:])], dim=1)
else:
out = self.conv(x)
out = self.bn(out)
return out

def extra_repr(self):
return "C_in={C_in}, C_out={C_out}, stride={stride}".format(**self.__dict__)


# Auto-ReID: Searching for a Part-Aware ConvNet for Person Re-Identification, ICCV 2019
class PartAwareOp(nn.Module):
def __init__(self, C_in, C_out, stride, part=4):
super().__init__()
self.part = 4
self.hidden = C_in // 3
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.local_conv_list = nn.ModuleList()
for i in range(self.part):
self.local_conv_list.append(
nn.Sequential(
nn.ReLU(),
nn.Conv2d(C_in, self.hidden, 1),
nn.BatchNorm2d(self.hidden, affine=True),
)
)
self.W_K = nn.Linear(self.hidden, self.hidden)
self.W_Q = nn.Linear(self.hidden, self.hidden)

if stride == 2:
self.last = FactorizedReduce(C_in + self.hidden, C_out, 2)
elif stride == 1:
self.last = FactorizedReduce(C_in + self.hidden, C_out, 1)
else:
raise ValueError("Invalid Stride : {:}".format(stride))

def forward(self, x):
batch, C, H, W = x.size()
assert H >= self.part, "input size too small : {:} vs {:}".format(
x.shape, self.part
)
IHs = [0]
for i in range(self.part):
IHs.append(min(H, int((i + 1) * (float(H) / self.part))))
local_feat_list = []
for i in range(self.part):
feature = x[:, :, IHs[i] : IHs[i + 1], :]
xfeax = self.avg_pool(feature)
xfea = self.local_conv_list[i](xfeax)
local_feat_list.append(xfea)
part_feature = torch.cat(local_feat_list, dim=2).view(batch, -1, self.part)
part_feature = part_feature.transpose(1, 2).contiguous()
part_K = self.W_K(part_feature)
part_Q = self.W_Q(part_feature).transpose(1, 2).contiguous()
weight_att = torch.bmm(part_K, part_Q)
attention = torch.softmax(weight_att, dim=2)
aggreateF = torch.bmm(attention, part_feature).transpose(1, 2).contiguous()
features = []
for i in range(self.part):
feature = aggreateF[:, :, i : i + 1].expand(
batch, self.hidden, IHs[i + 1] - IHs[i]
)
feature = feature.view(batch, self.hidden, IHs[i + 1] - IHs[i], 1)
features.append(feature)
features = torch.cat(features, dim=2).expand(batch, self.hidden, H, W)
final_fea = torch.cat((x, features), dim=1)
outputs = self.last(final_fea)
return outputs


def drop_path(x, drop_prob):
if drop_prob > 0.0:
keep_prob = 1.0 - drop_prob
mask = x.new_zeros(x.size(0), 1, 1, 1)
mask = mask.bernoulli_(keep_prob)
x = torch.div(x, keep_prob)
x.mul_(mask)
return x


# Searching for A Robust Neural Architecture in Four GPU Hours
class GDAS_Reduction_Cell(nn.Module):
def __init__(
self, C_prev_prev, C_prev, C, reduction_prev, affine, track_running_stats
):
super(GDAS_Reduction_Cell, self).__init__()
if reduction_prev:
self.preprocess0 = FactorizedReduce(
C_prev_prev, C, 2, affine, track_running_stats
)
else:
self.preprocess0 = ReLUConvBN(
C_prev_prev, C, 1, 1, 0, 1, affine, track_running_stats
)
self.preprocess1 = ReLUConvBN(
C_prev, C, 1, 1, 0, 1, affine, track_running_stats
)

self.reduction = True
self.ops1 = nn.ModuleList(
[
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(
C,
C,
(1, 3),
stride=(1, 2),
padding=(0, 1),
groups=8,
bias=not affine,
),
nn.Conv2d(
C,
C,
(3, 1),
stride=(2, 1),
padding=(1, 0),
groups=8,
bias=not affine,
),
nn.BatchNorm2d(
C, affine=affine, track_running_stats=track_running_stats
),
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 1, stride=1, padding=0, bias=not affine),
nn.BatchNorm2d(
C, affine=affine, track_running_stats=track_running_stats
),
),
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(
C,
C,
(1, 3),
stride=(1, 2),
padding=(0, 1),
groups=8,
bias=not affine,
),
nn.Conv2d(
C,
C,
(3, 1),
stride=(2, 1),
padding=(1, 0),
groups=8,
bias=not affine,
),
nn.BatchNorm2d(
C, affine=affine, track_running_stats=track_running_stats
),
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 1, stride=1, padding=0, bias=not affine),
nn.BatchNorm2d(
C, affine=affine, track_running_stats=track_running_stats
),
),
]
)

self.ops2 = nn.ModuleList(
[
nn.Sequential(
nn.MaxPool2d(3, stride=2, padding=1),
nn.BatchNorm2d(
C, affine=affine, track_running_stats=track_running_stats
),
),
nn.Sequential(
nn.MaxPool2d(3, stride=2, padding=1),
nn.BatchNorm2d(
C, affine=affine, track_running_stats=track_running_stats
),
),
]
)

@property
def multiplier(self):
return 4

def forward(self, s0, s1, drop_prob=-1):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)

X0 = self.ops1[0](s0)
X1 = self.ops1[1](s1)
if self.training and drop_prob > 0.0:
X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob)

# X2 = self.ops2[0] (X0+X1)
X2 = self.ops2[0](s0)
X3 = self.ops2[1](s1)
if self.training and drop_prob > 0.0:
X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob)
return torch.cat([X0, X1, X2, X3], dim=1)


# To manage the useful classes in this file.
RAW_OP_CLASSES = {"gdas_reduction": GDAS_Reduction_Cell}

+ 203
- 0
xnas/search_space/RMINAS/NB201/utils.py View File

@@ -0,0 +1,203 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################

from collections import namedtuple
import torch.nn as nn
from xnas.search_space.RMINAS.NB201.ops import OPS, ResNetBasicblock

from copy import deepcopy

from xnas.search_space.RMINAS.NB201.geno import Structure as CellStructure


@staticmethod
def str2lists(arch_str):
"""
This function shows how to read the string-based architecture encoding.
It is the same as the `str2structure` func in `AutoDL-Projects/lib/models/cell_searchs/genotypes.py`
:param
arch_str: the input is a string indicates the architecture topology, such as
|nor_conv_1x1~0|+|none~0|none~1|+|none~0|none~1|skip_connect~2|
:return: a list of tuple, contains multiple (op, input_node_index) pairs.
:usage
arch = api.str2lists( '|nor_conv_1x1~0|+|none~0|none~1|+|none~0|none~1|skip_connect~2|' )
print ('there are {:} nodes in this arch'.format(len(arch)+1)) # arch is a list
for i, node in enumerate(arch):
print('the {:}-th node is the sum of these {:} nodes with op: {:}'.format(i+1, len(node), node))
"""
node_strs = arch_str.split('+')
genotypes = []
for i, node_str in enumerate(node_strs):
inputs = list(filter(lambda x: x != '', node_str.split('|')))
for xinput in inputs: assert len(xinput.split('~')) == 2, 'invalid input length : {:}'.format(xinput)
inputs = ( xi.split('~') for xi in inputs )
input_infos = tuple( (op, int(IDX)) for (op, IDX) in inputs)
genotypes.append( input_infos )
return genotypes

def dict2config(xdict, logger):
assert isinstance(xdict, dict), "invalid type : {:}".format(type(xdict))
Arguments = namedtuple("Configure", " ".join(xdict.keys()))
content = Arguments(**xdict)
if hasattr(logger, "log"):
logger.log("{:}".format(content))
return content

def config2dict(content):
return content._asdict()

def get_cell_based_tiny_net(config):

if hasattr(config, "genotype"):
genotype = config.genotype
elif hasattr(config, "arch_str"):
genotype = CellStructure.str2structure(config.arch_str)
else:
raise ValueError(
"Can not find genotype from this config : {:}".format(config)
)
return TinyNetwork(config.C, config.N, genotype, config.num_classes)


# Cell for NAS-Bench-201
class InferCell(nn.Module):
def __init__(
self, genotype, C_in, C_out, stride, affine=True, track_running_stats=True
):
super(InferCell, self).__init__()

self.layers = nn.ModuleList()
self.node_IN = []
self.node_IX = []
self.genotype = deepcopy(genotype)
for i in range(1, len(genotype)):
node_info = genotype[i - 1]
cur_index = []
cur_innod = []
for (op_name, op_in) in node_info:
if op_in == 0:
layer = OPS[op_name](
C_in, C_out, stride, affine, track_running_stats
)
else:
layer = OPS[op_name](C_out, C_out, 1, affine, track_running_stats)
cur_index.append(len(self.layers))
cur_innod.append(op_in)
self.layers.append(layer)
self.node_IX.append(cur_index)
self.node_IN.append(cur_innod)
self.nodes = len(genotype)
self.in_dim = C_in
self.out_dim = C_out

def extra_repr(self):
string = "info :: nodes={nodes}, inC={in_dim}, outC={out_dim}".format(
**self.__dict__
)
laystr = []
for i, (node_layers, node_innods) in enumerate(zip(self.node_IX, self.node_IN)):
y = [
"I{:}-L{:}".format(_ii, _il)
for _il, _ii in zip(node_layers, node_innods)
]
x = "{:}<-({:})".format(i + 1, ",".join(y))
laystr.append(x)
return (
string
+ ", [{:}]".format(" | ".join(laystr))
+ ", {:}".format(self.genotype.tostr())
)

def forward(self, inputs):
nodes = [inputs]
for i, (node_layers, node_innods) in enumerate(zip(self.node_IX, self.node_IN)):
node_feature = sum(
self.layers[_il](nodes[_ii])
for _il, _ii in zip(node_layers, node_innods)
)
nodes.append(node_feature)
return nodes[-1]


# The macro structure for architectures in NAS-Bench-201
class TinyNetwork(nn.Module):
def __init__(self, C, N, genotype, num_classes):
super(TinyNetwork, self).__init__()
self._C = C
self._layerN = N
# self._datasize = datasize
# self._feature_res = feature_res

self.stem = nn.Sequential(
nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(C)
)

layer_channels = [C] * N + [C * 2] + [C * 2] * N + [C * 4] + [C * 4] * N
layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N

C_prev = C
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(
zip(layer_channels, layer_reductions)
):
if reduction:
cell = ResNetBasicblock(C_prev, C_curr, 2, True)
else:
cell = InferCell(genotype, C_prev, C_curr, 1)
self.cells.append(cell)
C_prev = cell.out_dim
self._Layer = len(self.cells)

self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)

def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += "\n {:02d}/{:02d} :: {:}".format(
i, len(self.cells), cell.extra_repr()
)
return string

def extra_repr(self):
return "{name}(C={_C}, N={_layerN}, L={_Layer})".format(
name=self.__class__.__name__, **self.__dict__
)
def feature_extractor(self, inputs):
features = []
feature = self.stem(inputs)
features.append(feature)
for i, cell in enumerate(self.cells):
feature = cell(feature)
features.append(feature)
out = self.lastact(feature)
features.append(out)
return features

def forward(self, inputs):
features = []
feature = self.stem(inputs)

for i, cell in enumerate(self.cells):
feature = cell(feature)
if i == 4:
tensor1 = feature
elif i == 10:
tensor2 = feature
# if i in [4,10]:
# features.append(feature)
feature = self.lastact(feature)
tensor3 = feature

features = [tensor1, tensor2, tensor3]
# features.append(feature)

out = self.global_pooling(feature)
out = out.view(out.size(0), -1)
logits = self.classifier(out)

return features, logits

Loading…
Cancel
Save