|
- # -*- coding:utf-8 -*-
- import torch
- import torch.nn as nn
- from torchvision.datasets import CIFAR10
- from torchvision import transforms
- from torch.utils.data import DataLoader,Dataset
- from torch.optim import Adam
- import os
- import shutil
-
- import argparse
-
- # Training settings
- parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
- #The dataset location is placed under /dataset
- parser.add_argument('--traindata', default="/dataset" ,help='path to train dataset')
- parser.add_argument('--testdata', default="/dataset" ,help='path to test dataset')
- parser.add_argument('--epoch_size', type=int, default=5, help='how much epoch to train')
- parser.add_argument('--batch_size', type=int, default=256, help='how much batch_size in epoch')
-
- """
- torch.nn是专门为神经网络设计的模块化接口。nn构建于autograd之上,可以用来定义和运行神经网络。
- nn.Module是nn中十分重要的类,包含网络各层的定义及forward方法。
- 定义自已的网络:
- 需要继承nn.Module类,并实现forward方法。
- 一般把网络中具有可学习参数的层放在构造函数__init__()中,
- 不具有可学习参数的层(如ReLU)可放在构造函数中,也可不放在构造函数中(而在forward中使用nn.functional来代替)。
- 只要在nn.Module的子类中定义了forward函数,backward函数就会被自动实现(利用Autograd)。
- 注:Pytorch基于nn.Module构建的模型中,只支持mini-batch的Variable输入方式,
- 比如,只有一张输入图片,也需要变成 N x C x H x W 的形式:
- input_image = torch.FloatTensor(1, 28, 28)
- input_image = input_image.unsqueeze(0) # 1 x 1 x 28 x 28
- """
- class Unit(nn.Module):#以上解释了这里为什么必须写出继承类nn.Module
- def __init__(self,inc,ouc):
- super(Unit,self).__init__()
- self.unit_net = nn.Sequential(nn.Conv2d(inc,ouc,kernel_size=3,padding=1),
- nn.BatchNorm2d(ouc),
- nn.ReLU())
- def forward(self, x):
- return self.unit_net(x)
-
- class Net(nn.Module):
- def __init__(self):
- super(Net,self).__init__()
- self.net = nn.Sequential(Unit(3,32),#32*32
- Unit(32,32),
- Unit(32,32),
-
- nn.MaxPool2d(2),#16
-
- Unit(32,64),
- Unit(64,64),
- Unit(64,64),
- Unit(64,64),
-
- nn.MaxPool2d(2),#8
-
- Unit(64, 128),
- Unit(128, 128),
- Unit(128, 128),
- Unit(128, 128),
-
- nn.MaxPool2d(2),#4
-
- Unit(128,128),
- Unit(128,128),
- Unit(128,128),
-
- nn.AvgPool2d(4)#1
- )
- self.fc = nn.Linear(128, 10)
-
- def forward(self, x):
- y = self.net(x)
- y = y.view(-1,128)
- return self.fc(y)
-
- def adjust_lr_rate(epoch):
- lr = 0.001
- if epoch>180:
- lr = lr / 1000000
- elif epoch>150:
- lr = lr / 100000
- elif epoch>120:
- lr = lr / 10000
- elif epoch>90:
- lr = lr / 1000
- elif epoch>60:
- lr = lr / 100
- elif epoch>30:
- lr = lr / 10
- for param_group in optimizer.param_groups:
- param_group['lr'] = lr
-
- def test(test_dataloader):#测试集1万张
- test_acc = 0
- module.eval()
- for j,(imgs, labels) in enumerate(test_dataloader):#每次处理512张
- if torch.cuda.is_available():
- imgs = imgs.cuda()
- labels = labels.cuda()
- outs = module(imgs)
- #训练求loss是为了做权重更新,测试里不需要
- _, prediction = torch.max(outs, 1)
- test_acc += torch.sum(prediction == labels)
- test_acc = test_acc.cpu().item() / 10000
- return test_acc
-
- def train(num_epoch,module,optimizer,loss_f,train_dataloader,test_dataloader):#训练集5万张
- param_path = r'/model/cifar10_cnn.pkl'
- tmp_param_path = r'/model/cifar10_cnn_temp.pkl'
- if os.path.exists(param_path):
- module.load_state_dict(torch.load(param_path))
- for epoch in range(num_epoch):
- print("epoch")
- print(epoch)
- train_loss = 0
- train_acc = 0
- module.train()
- for i, (imgs, labels) in enumerate(train_dataloader):#每次处理512张
- # print('labels:',labels)#每个标签对应一个0-9的数字
- if torch.cuda.is_available():
- imgs = imgs.cuda()
- labels = labels.cuda()
- outs = module(imgs)
- # print(outs.shape)
- # print('outs:',outs)
- loss = loss_f(outs, labels)
-
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
-
- # print('1111',loss)
- # print('2222',loss.data)#tensor且GPU
- # print('3333',loss.cpu())
- # print('4444',loss.cpu().data)#tensor且CPU
- # # print('5555',loss.cpu().data[0])#报错 IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number
- # # print('6666',loss.cpu().numpy())#报错 RuntimeError: Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead.
- # print('7777',loss.cpu().detach().numpy())
- # print('8888',loss.cpu().data.numpy())
- # print('9999',loss.cpu().item())
- # print('aaaa',loss.item())#后四者一样,都是把数值取出来
- train_loss += loss.cpu().item() * imgs.size(0)#imgs.size(0)批次
- #'分类问题,常用torch.max(outs,1)得到索引来表示类别'
- _, prediction = torch.max(outs,1)#prediction对应每行最大值所在位置的索引值,即0-9
- train_acc += torch.sum(prediction == labels)
- # print(train_acc.cpu().item())
-
- adjust_lr_rate(epoch)
- train_loss = train_loss / 50000
- train_acc = train_acc.cpu().item() / 50000 #此处求概率必须用item()把数值取出,否则求出的不是小数
-
- #'每训练完一个epoch,用测试集做一遍评估'
- test_acc = test(test_dataloader)
- best_acc = 0
- if test_acc > best_acc:
- best_acc = test_acc
- if os.path.exists(tmp_param_path):
- shutil.copyfile(tmp_param_path, param_path)#防权重损坏
- torch.save(module.state_dict(),tmp_param_path)
- print("save " + tmp_param_path)
- print('Epoch:',epoch,'Train_Loss:',train_loss,'Train_Acc:',train_acc,'Test_Acc:',test_acc)
-
- def getModel():
- return Net()
-
- if __name__ == '__main__':
- args, unknown = parser.parse_known_args()
- #log output
- print('cuda is available:{}'.format(torch.cuda.is_available()))
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-
- #'训练集转换'
- train_transforms = transforms.Compose([
- transforms.RandomHorizontalFlip(),#随机翻转
- transforms.RandomCrop(32,padding=4),#剪裁
- transforms.ToTensor(),
- transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
- #'train=True加载训练集'
- train_set = CIFAR10(args.traindata,train=True,transform=train_transforms,download=False)
- train_dataloader = DataLoader(train_set,batch_size=512,shuffle=True)
- print("load train set finished.")
- #'测试集转换'
- test_transforms = transforms.Compose([
- transforms.ToTensor(),
- transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
- #'train=False加载测试集'
- test_set = CIFAR10(args.traindata,train=False,transform=test_transforms,download=False)
- test_dataloader = DataLoader(test_set,batch_size=512,shuffle=False)
- print("load test set finished.")
- # print(CUDA)
- module = Net().to(device)
-
- optimizer = Adam(module.parameters(),lr=0.001,weight_decay=0.0001)
- loss_f = nn.CrossEntropyLoss()#分类用交叉熵
-
- train(args.epoch_size,module,optimizer,loss_f,train_dataloader,test_dataloader)
|