--- a/examples/PopulationBasedTraining/mnist_model.py
+++ b/examples/PopulationBasedTraining/mnist_model.py
@@ -65,6 +65,7 @@ class Model(Abstract_PBT_Model):
        self.device = torch.device(kwargs.get('device', 'cpu'))
        self.train_loader = DataLoader(Model.trn_dataset, batch_size=64, shuffle=False,num_workers=0)
        self.test_loader = DataLoader(Model.tst_dataset, batch_size=64, shuffle=False, num_workers=0)
        self.iter_train_loader = iter(self.train_loader)
        self.net = ConvNet().to(self.device)
        # self.opt_wrap = lambda params: optim.SGD(self.net.parameters(), lr=lr, momentum=momentum)
        self.opt = optim.SGD(self.net.parameters(), lr=0.11, momentum=0.9)
@@ -73,8 +74,8 @@ class Model(Abstract_PBT_Model):
        self.ready = False # not ready
        self.history_hp = [] # for record strategy
        self.trajectory_hp = []
        self.trajectory_score = [] # 记录该个体score过程
        self.history_loss = [] # 记录使用了（考虑权重迁移）hp-stategy后的score过程
        self.trajectory_loss = [] # 记录该个体loss过程
        self.history_loss = [] # 记录使用了（考虑权重迁移）hp-stategy后的loss过程

    def __len__(self): # one epoch has how many batchs
        return len(self.train_loader)
@@ -86,43 +87,34 @@ class Model(Abstract_PBT_Model):
            for param_group in self.opt.param_groups:
                param_group[hyperparam_name] = v

    def step(self, num): # train need training(optimizer)
    def _one_step(self, **kwargs) -> float:
        try:
            inp, target = next(self.iter_train_loader)
        except StopIteration:
            self.iter_train_loader = iter(self.train_loader)
            inp, target = next(self.iter_train_loader)
        inp = inp.to(self.device)
        target = target.to(self.device)
        output = self.net(inp)
        loss = self.loss_fn(output, target)
        if np.isnan(loss.item()):
            print("Loss is NaN.")
            return np.inf
        # raise LossIsNaN
        self.opt.zero_grad()
        loss.backward()
        self.opt.step()
        return loss.item()

    def step(self, num, **kwargs): # train need training(optimizer)
        self.net.train()
        st = self.step_num % len(self.train_loader)
        ed = st + num
        it = 0
        while it < ed:

            for (inp, target) in (self.train_loader):

                if it < st:
                    it += 1
                    continue
                # it += 1
                inp = inp.to(self.device)
                target = target.to(self.device)
                output = self.net(inp)
                loss = self.loss_fn(output, target)
                if np.isnan(loss.item()):
                    print("Loss is NaN.")
                    self.step_num += ed - it
                    it = ed
                    break
                    # raise LossIsNaN
                self.opt.zero_grad()
                loss.backward()
                self.opt.step()
                self.step_num += 1
                it += 1
                if ed == it:
                    break
        # inp, target = next(self.train_loader)


        # if self.step_num % len(Model.trn_dataset) == 0:
        #     self.ready = True


        for it in range(num):
            loss: float = self._one_step(**kwargs)
            if not np.isfinite(loss):
                self.step_num += num - it
                return
            self.step_num += 1
        

    def evaluate(self): # val no training need(optimizer)
        correct = 0
@@ -134,16 +126,19 @@ class Model(Abstract_PBT_Model):
                output = self.net(inp)
                correct += (output.max(1)[1] == target).sum().cpu().item()
        acc = 100 * correct / len(self.tst_dataset)
        self.score = -1 if np.isnan(acc) else acc
        self.trajectory_score.append((self.step_num, self.score))
        self.history_loss.append((self.step_num, -self.score))
        return -self.score
        self.loss = np.inf if np.isnan(acc) else -acc
        self.trajectory_loss.append((self.step_num, self.loss))
        self.history_loss.append((self.step_num, self.loss))
        return self.loss

    def load_checkpoint(self, checkpoint):
        self.net.load_state_dict(checkpoint['model_state_dict'])
        self.opt.load_state_dict(checkpoint['optim_state_dict'])

    def save_checkpoint(self):
        '''
        Optional Serialization to disk
        '''
        checkpoint = dict(model_state_dict=self.net.state_dict(),
                          optim_state_dict=self.opt.state_dict())
        return checkpoint
--- a/examples/PopulationBasedTraining/mnist_pbt.py
+++ b/examples/PopulationBasedTraining/mnist_pbt.py
@@ -30,6 +30,9 @@ class MnistPBT(PBT):
            top_id = self.rng.choice(top_ids)
            checkpoint = population_model[top_id].save_checkpoint()
            population_model[bot_id].load_checkpoint(checkpoint)
            # Keep dataloader iter syncronize(when loss is nan or early stopping)
            population_model[bot_id].iter_train_loader = population_model[top_id].iter_train_loader
            
            self.population_hp_array[bot_id] = self.population_hp_array[
                top_id].copy()
            # explore
@@ -45,7 +48,7 @@ class MnistPBT(PBT):
            # x_unwarped = DenseConfiguration.array_to_dict(self.space, x_array)
            # self.population_hp[bot_id] = x_unwarped
            population_model[bot_id].history_hp = copy.copy(
                population_model[top_id].history_loss)
                population_model[top_id].history_hp)
            population_model[bot_id].history_loss = copy.copy(
                population_model[top_id].history_loss)
            population_model[bot_id].update_hp(new_config.get_dictionary())
@@ -53,12 +56,12 @@ class MnistPBT(PBT):
        
 if __name__ == "__main__":
    device = "cuda" if torch.cuda.is_available() else "cpu"
    epoch_num = 100
    epoch_num = 10
    rng = np.random.RandomState(42)
    
    config_space = Model.get_configuration_space(rng.randint(MAXINT))
    # define black box optimizer
    pbt = MnistPBT(space=config_space, pop_size=2, seed=rng.randint(MAXINT))
    pbt = MnistPBT(space=config_space, pop_size=5, seed=rng.randint(MAXINT))

    population_model = [
        Model(seed=rng.randint(MAXINT), device=device) for _ in range(pbt.pop_size)
@@ -74,7 +77,7 @@ if __name__ == "__main__":
    for i in range(pbt.pop_size):
        desc_data = np.array(population_model[i].history_loss)
        desc_data[:, 0] /= len(population_model[-1])
        ax1.plot(desc_data[:, 0], desc_data[:, 1], alpha=0.5)
        ax1.plot(desc_data[:, 0], -desc_data[:, 1], alpha=0.5)
    ax1.set_xlabel("epoch")
    ax1.set_ylabel("score")
    # for i in range(self.pop_size):
@@ -86,15 +89,15 @@ if __name__ == "__main__":
    for i in range(pbt.pop_size):
        desc_data = np.array([[x[0], x[-1]['lr']] for x in population_model[i].history_hp])
        desc_data[:, 0] /= len(population_model[-1])
        desc_data = np.append(desc_data, [[pbt.epoch, desc_data[-1, 1]]], axis=0)
        desc_data = np.append(desc_data, [[epoch_num, desc_data[-1, 1]]], axis=0)
        ax2.plot(desc_data[:, 0], desc_data[:, 1], label='best individual' if i==best_individual_index else None)
    ax2.set_xlabel("epoch")
    ax2.set_ylabel("lr")
    plt.legend()
    plt.suptitle("PBT search (lr, momentum) in MNIST")
    plt.tight_layout()
    plt.savefig('./out/PBT_mnist.png')
    # plt.savefig('./a.png')
    plt.show()

    print('-----\nBest hyper-param strategy: {}'.format(pbt.population_model[best_individual_index].history_hp))
    print('final score: {}'.format(-pbt.population_model[best_individual_index].history_loss[-1]))
    print('-----\nBest hyper-param strategy: {}'.format(population_model[best_individual_index].history_hp))
    print('final -score: {}'.format(population_model[best_individual_index].history_loss[-1]))
--- a/examples/PopulationBasedTraining/toy_model.py
+++ b/examples/PopulationBasedTraining/toy_model.py
@@ -20,8 +20,8 @@ class Model(Abstract_PBT_Model):
        self.opt = SGD([self.theta], lr=0.01)
        self.history_hp = []  # for record strategy
        self.trajectory_hp = []
        self.trajectory_loss = []  # 记录该个体score过程
        self.history_loss = []  # 记录使用了（考虑权重迁移）hp-stategy后的score过程
        self.trajectory_loss = []  # 记录该个体loss过程
        self.history_loss = []  # 记录使用了（考虑权重迁移）hp-stategy后的loss过程
        self.hp = torch.empty(2, device=self.device)
        self.obj_val_func = lambda theta: 1.2 - (theta**2).sum()
        self.obj_train_func = lambda theta, h: 1.2 - ((h * theta)**2).sum()
--- a/xbbo/problem/pbt_toy.py
+++ b/xbbo/problem/pbt_toy.py
@@ -1,99 +0,0 @@
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch import optim
 from torch.autograd import Variable
 from torch.nn.parameter import Parameter
 from torchvision import datasets, transforms
 from torch.utils.data import DataLoader, Dataset

 from xbbo.core.constants import MAXINT

 from xbbo.core import TestFunction

 class LossIsNaN(Exception):
    pass




 class Model(TestFunction):

    def __init__(self, cfg, seed, **kwargs):
        # np.random.seed(cfg.GENERAL.random_seed)
        self.cfg = cfg
        # self.dim = 30
        # assert self.dim % 2 == 0
        super().__init__(seed=seed)

        self.api_config = self._load_api_config()
        torch.seed(self.rng.randint(MAXINT))
        torch.manual_seed(self.rng.randint(MAXINT))
        self.device = torch.device(kwargs.get('device', 'cpu'))

        self.theta = Parameter(torch.FloatTensor([0.9, 0.9]).to(self.device))
        # self.opt_wrap = lambda params: optim.SGD(self.net.parameters(), lr=lr, momentum=momentum)
        self.opt = optim.SGD([self.theta], lr=0.01)
        self.step_num = 0
        self.history_hp = [] # for record strategy
        self.trajectory_hp = []
        self.trajectory_loss = [] # 记录该个体score过程
        self.history_loss = [] # 记录使用了（考虑权重迁移）hp-stategy后的score过程
        self.hp = torch.empty(2, device=self.device)
        self.obj_val_func = lambda theta: 1.2 - (theta ** 2).sum()
        self.obj_train_func = lambda theta, h: 1.2 - ((h * theta) ** 2).sum()

        self.trajectory_theta = []

    def __len__(self): # one epoch has how many batchs
        return 1

    def update_hp(self, params: dict):
        self.history_hp.append((self.step_num, params)) # 在该steps上更改超参，acc为该step时的结果（受该step*前*所有超参影响）
        self.trajectory_hp.append((self.step_num, params))
        self.trajectory_theta.append(self.theta.detach().cpu().numpy())
        self.hp[0] = params['h1']
        self.hp[1] = params['h2']

    def step(self, num): # train need training(optimizer)
        for it in range(num):
            self.trajectory_theta.append(self.theta.detach().cpu().numpy())
            loss = self.obj_train_func(self.theta, self.hp)
            if np.isnan(loss.item()):
                print("Loss is NaN.")
                self.step_num += 1
                return
                # raise LossIsNaN
            self.opt.zero_grad()
            loss.backward()
            self.opt.step()
            self.step_num += 1


    def evaluate(self): # val no training need(optimizer)
        with torch.no_grad():
            loss = self.obj_val_func(self.theta).item()
        self.loss = np.inf if np.isnan(loss) else loss
        self.trajectory_loss.append((self.step_num, self.loss))
        self.history_loss.append((self.step_num, self.loss))
        return self.loss

    def load_checkpoint(self, checkpoint):
        with torch.no_grad():
            self.theta.set_(checkpoint['model_state_dict'])
        # self.opt.load_state_dict(checkpoint['optim_state_dict'])

    def save_checkpoint(self):
        checkpoint = dict(model_state_dict=self.theta.data.clone())
        return checkpoint

    def _load_api_config(self):
        return {
            'h1': {
                'type': 'float', 'warp': 'linear', 'range': [0, 1]},
            'h2': {
                'type': 'float', 'warp': 'linear', 'range': [0, 1]
            }
        }

--- a/xbbo/search_algorithm/pbt_optimizer.py
+++ b/xbbo/search_algorithm/pbt_optimizer.py
@@ -137,6 +137,7 @@ class PBT(AbstractOptimizer):
                for i in range(self.pop_size):
                    population_model[i].evaluate()
                losses = [net.loss for net in population_model]
                assert np.any(np.isfinite(losses)), "ERROR: At Least 1 loss is finite"
                if finished:
                    break
                # Update respective config