@@ -65,6 +65,7 @@ class Model(Abstract_PBT_Model):
self.device = torch.device(kwargs.get('device', 'cpu'))
self.train_loader = DataLoader(Model.trn_dataset, batch_size=64, shuffle=False,num_workers=0)
self.test_loader = DataLoader(Model.tst_dataset, batch_size=64, shuffle=False, num_workers=0)
self.iter_train_loader = iter(self.train_loader)
self.net = ConvNet().to(self.device)
# self.opt_wrap = lambda params: optim.SGD(self.net.parameters(), lr=lr, momentum=momentum)
self.opt = optim.SGD(self.net.parameters(), lr=0.11, momentum=0.9)
@@ -73,8 +74,8 @@ class Model(Abstract_PBT_Model):
self.ready = False # not ready
self.history_hp = [] # for record strategy
self.trajectory_hp = []
self.trajectory_score = [] # 记录该个体score 过程
self.history_loss = [] # 记录使用了(考虑权重迁移)hp-stategy后的score 过程
self.trajectory_loss = [] # 记录该个体loss 过程
self.history_loss = [] # 记录使用了(考虑权重迁移)hp-stategy后的loss 过程
def __len__(self): # one epoch has how many batchs
return len(self.train_loader)
@@ -86,43 +87,34 @@ class Model(Abstract_PBT_Model):
for param_group in self.opt.param_groups:
param_group[hyperparam_name] = v
def step(self, num): # train need training(optimizer)
def _one_step(self, **kwargs) -> float:
try:
inp, target = next(self.iter_train_loader)
except StopIteration:
self.iter_train_loader = iter(self.train_loader)
inp, target = next(self.iter_train_loader)
inp = inp.to(self.device)
target = target.to(self.device)
output = self.net(inp)
loss = self.loss_fn(output, target)
if np.isnan(loss.item()):
print("Loss is NaN.")
return np.inf
# raise LossIsNaN
self.opt.zero_grad()
loss.backward()
self.opt.step()
return loss.item()
def step(self, num, **kwargs): # train need training(optimizer)
self.net.train()
st = self.step_num % len(self.train_loader)
ed = st + num
it = 0
while it < ed:
for (inp, target) in (self.train_loader):
if it < st:
it += 1
continue
# it += 1
inp = inp.to(self.device)
target = target.to(self.device)
output = self.net(inp)
loss = self.loss_fn(output, target)
if np.isnan(loss.item()):
print("Loss is NaN.")
self.step_num += ed - it
it = ed
break
# raise LossIsNaN
self.opt.zero_grad()
loss.backward()
self.opt.step()
self.step_num += 1
it += 1
if ed == it:
break
# inp, target = next(self.train_loader)
# if self.step_num % len(Model.trn_dataset) == 0:
# self.ready = True
for it in range(num):
loss: float = self._one_step(**kwargs)
if not np.isfinite(loss):
self.step_num += num - it
return
self.step_num += 1
def evaluate(self): # val no training need(optimizer)
correct = 0
@@ -134,16 +126,19 @@ class Model(Abstract_PBT_Model):
output = self.net(inp)
correct += (output.max(1)[1] == target).sum().cpu().item()
acc = 100 * correct / len(self.tst_dataset)
self.score = -1 if np.isnan(acc) else acc
self.trajectory_score.append((self.step_num, self.score ))
self.history_loss.append((self.step_num, -self.score ))
return -self.score
self.loss = np.inf if np.isnan(acc) else - acc
self.trajectory_loss.append((self.step_num, self.loss ))
self.history_loss.append((self.step_num, self.loss ))
return self.loss
def load_checkpoint(self, checkpoint):
self.net.load_state_dict(checkpoint['model_state_dict'])
self.opt.load_state_dict(checkpoint['optim_state_dict'])
def save_checkpoint(self):
'''
Optional Serialization to disk
'''
checkpoint = dict(model_state_dict=self.net.state_dict(),
optim_state_dict=self.opt.state_dict())
return checkpoint