#57 fix transfer-bo bug

Merged
isleizhang merged 1 commits from mathcoder/XBBO:dev into dev 1 year ago
  1. +0
    -0
      examples/transfer/README.md
  2. +63
    -0
      examples/transfer/transfer_bo.py
  3. +0
    -101
      examples/transfer_bo.py
  4. +5
    -2
      xbbo/acquisition_function/transfer/taf.py
  5. +2
    -2
      xbbo/problem/base.py
  6. +0
    -1
      xbbo/problem/fast_example_problem.py
  7. +370
    -0
      xbbo/problem/transfer_problem.py
  8. +86
    -71
      xbbo/search_algorithm/transfer_bo_optimizer.py
  9. +0
    -19
      xbbo/surrogate/gaussian_process.py
  10. +1
    -1
      xbbo/surrogate/transfer/weight_stategy.py
  11. +21
    -1
      xbbo/utils/util.py

+ 0
- 0
examples/transfer/README.md View File


+ 63
- 0
examples/transfer/transfer_bo.py View File

@@ -0,0 +1,63 @@
import numpy as np
# import matplotlib.pyplot as plt
from ConfigSpace import ConfigurationSpace
from xbbo.configspace.space import DenseConfiguration
from ConfigSpace.hyperparameters import UniformFloatHyperparameter
from ConfigSpace.conditions import LessThanCondition

# from xbbo.search_algorithm.transfer_tst_optimizer import TransferBO
# from xbbo.search_algorithm.transfer_taf_optimizer import TransferBO
# from xbbo.search_algorithm.transfer_rgpe_mean_optimizer import TransferBO
# from xbbo.search_algorithm.transfer_taf_rgpe_optimizer import TransferBO
# from xbbo.search_algorithm.transfer_RMoGP_optimizer import TransferBO
from xbbo.search_algorithm.transfer_bo_optimizer import TransferBO

from xbbo.problem.transfer_problem import BenchName, TransferBenchmark
from xbbo.core.constants import MAXINT

if __name__ == "__main__":
MAX_CALL = 30
rng = np.random.RandomState(42)

transfer_bench = TransferBenchmark(bench_name=BenchName.TST,
target_task_name="A9A",
data_path_root='./data',
data_base_name='svm',
rng=rng.randint(MAXINT))
# transfer_bench = TransferBenchmark(bench_name=BenchName.Table_deepar,
# target_task_name="wiki-rolling",
# data_path_root='./data/offline_evaluations',
# data_base_name='DeepAR.csv.zip',
# rng=rng.randint(MAXINT))
cs = transfer_bench.get_configuration_space()
old_D_X, old_D_y = transfer_bench.get_old_data()

# use transfer
# hpopt = TransferBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='tst', acq_func='ei', weight_srategy='kernel', acq_opt='rs') # TST-R
hpopt = TransferBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='gp', acq_func='taf', weight_srategy='kernel', acq_opt='rs') # TAF
# hpopt = TransferBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='tst', acq_func='ei', weight_srategy='rw', acq_opt='rs') # RGPE(mean)
# hpopt = TransferBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='gp', acq_func='taf', weight_srategy='rw', acq_opt='rs') # TAF(rw)
# hpopt = TransferBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='gp', acq_func='mogp', weight_srategy='rw', acq_opt='rs') # RMoGP
# not use transfer
# hpopt = TransferBO(space=cs,
# seed=rng.randint(MAXINT),
# suggest_limit=MAX_CALL,
# initial_design='sobol',
# surrogate='gp',
# acq_func='ei',
# weight_srategy='kernel',
# acq_opt='rs') # vanila bo
hpopt.get_transfer_knowledge(old_D_X, old_D_y)
# ---- Begin BO-loop ----
for i in range(MAX_CALL):
# suggest
trial_list = hpopt.suggest()
# evaluate
obs = transfer_bench(trial_list[0].config_dict)
# observe
trial_list[0].add_observe_value(obs)
hpopt.observe(trial_list=trial_list)

print(obs)

print('find best (value, config):{}'.format(hpopt.trials.get_best()))

+ 0
- 101
examples/transfer_bo.py View File

@@ -1,101 +0,0 @@
import numpy as np
# import matplotlib.pyplot as plt
from ConfigSpace import ConfigurationSpace
from xbbo.configspace.space import DenseConfiguration
from ConfigSpace.hyperparameters import UniformFloatHyperparameter
from ConfigSpace.conditions import LessThanCondition

# from xbbo.search_algorithm.transfer_tst_optimizer import SMBO
# from xbbo.search_algorithm.transfer_taf_optimizer import SMBO
# from xbbo.search_algorithm.transfer_rgpe_mean_optimizer import SMBO
# from xbbo.search_algorithm.transfer_taf_rgpe_optimizer import SMBO
# from xbbo.search_algorithm.transfer_RMoGP_optimizer import SMBO
from xbbo.search_algorithm.transfer_bo_optimizer import SMBO

from xbbo.problem.offline_hp import Model
from xbbo.core.constants import MAXINT
from xbbo.surrogate.transfer.base_surrogate import BaseModel

def rosenbrock_2d(x):
""" The 2 dimensional Rosenbrock function as a toy model
The Rosenbrock function is well know in the optimization community and
often serves as a toy problem. It can be defined for arbitrary
dimensions. The minimium is always at x_i = 1 with a function value of
zero. All input parameters are continuous. The search domain for
all x's is the interval [-5, 10].
"""

x1 = x["x0"]
# x2 = x["x1"]
x2 = x.get('x1', x1)

val = 100. * (x2 - x1 ** 2.) ** 2. + (1 - x1) ** 2.
return val

def branin(config):
x1, x2 = config['x1'], config['x2']
y = (x2 - 5.1 / (4 * np.pi ** 2) * x1 ** 2 + 5 / np.pi * x1 - 6) ** 2 \
+ 10 * (1 - 1 / (8 * np.pi)) * np.cos(x1) + 10
return y

def build_space(rng):
cs = ConfigurationSpace(seed=rng.randint(MAXINT))
x0 = UniformFloatHyperparameter("x0", -5, 10, default_value=-3)
x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=-4)
cs.add_hyperparameters([x0, x1])
con = LessThanCondition(x1, x0, 1.)
cs.add_condition(con)
return cs

def build_branin_space(rng):
cs = ConfigurationSpace(seed=rng.randint(MAXINT))
x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=0)
x2 = UniformFloatHyperparameter("x2", 0, 15, default_value=0)
cs.add_hyperparameters([x1, x2])
return cs

if __name__ == "__main__":
MAX_CALL = 30
rng = np.random.RandomState(42)

test_model = Model(rng.randint(MAXINT), test_task='a6a', )

cs = ConfigurationSpace(seed=rng.randint(MAXINT))
confs = test_model.get_api_config()
for conf in confs:
cs.add_hyperparameter(UniformFloatHyperparameter(conf, confs[conf]['range'][0], confs[conf]['range'][1]))
blackbox_func = test_model.evaluate
base_models = []
for i in range(len(test_model.old_D_x)):
base_models.append(BaseModel(cs, rng=rng,do_optimize=False))
base_models[-1].train(test_model.old_D_x[i], test_model.old_D_y[i])

# use transfer
# hpopt = SMBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='gp', acq_func='ei', weight_srategy='kernel', acq_opt='rs', base_models=base_models) # vanila bo
# hpopt = SMBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='tst', acq_func='ei', weight_srategy='kernel', acq_opt='rs', base_models=base_models) # TST-R
# hpopt = SMBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='gp', acq_func='taf', weight_srategy='kernel', acq_opt='rs', base_models=base_models) # TAF
# hpopt = SMBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='tst', acq_func='ei', weight_srategy='rw', acq_opt='rs', base_models=base_models) # RGPE(mean)
# hpopt = SMBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='gp', acq_func='taf', weight_srategy='rw', acq_opt='rs', base_models=base_models) # TAF(rw)
hpopt = SMBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='gp', acq_func='mogp', weight_srategy='rw', acq_opt='rs', base_models=base_models) # RMoGP
# not use transfer
# hpopt = SMBO(space=cs, seed=rng.randint(MAXINT), suggest_limit=MAX_CALL, initial_design='sobol', surrogate='gp', acq_opt='rs_ls', base_models=[]])
# Example call of the black-box function
def_value = blackbox_func(cs.get_default_configuration())
print("Default Value: %.2f" % def_value)
# ---- Begin BO-loop ----
for i in range(MAX_CALL):
# suggest
trial_list = hpopt.suggest()
# evaluate
value = blackbox_func(trial_list[0].config_dict)
# observe
trial_list[0].add_observe_value(observe_value=value)
hpopt.observe(trial_list=trial_list)
print(value)
# plt.plot(hpopt.trials.get_history()[0])
# plt.savefig('./out/rosenbrock_bo_gp.png')
# plt.show()
print('find best value:{}'.format(hpopt.trials.get_best()[0]))


+ 5
- 2
xbbo/acquisition_function/transfer/taf.py View File

@@ -95,8 +95,11 @@ class TAF_AcqFunc(AbstractAcquisitionFunction):
# denominator = self.selfWeight
f *= self.selfWeight
for d in range(len(self.weight)):
f += self.weight[d] * (self.base_incuments[d] - self.pre_weight_model[d].predict(
X, None)[0]).clip(0)
base_pred_mean = self.pre_weight_model[d].predict(
X, None)[0]
if len(base_pred_mean.shape) == 1:
base_pred_mean = np.expand_dims(base_pred_mean, axis=-1)
f += self.weight[d] * (self.base_incuments[d] - base_pred_mean).clip(0)
# denominator += self.weight[d]

return f


+ 2
- 2
xbbo/problem/base.py View File

@@ -1,5 +1,5 @@
import abc
import numpy as np
import ConfigSpace
from xbbo.core.constants import Key

@@ -23,7 +23,7 @@ class AbstractBenchmark(object, metaclass=abc.ABCMeta):
self.counter = 0

self.rng = create_rng(rng)
self.configuration_space = self.get_configuration_space()
# self.configuration_space = self.get_configuration_space()

@abc.abstractmethod
def objective_function(self, configuration, **kwargs):


+ 0
- 1
xbbo/problem/fast_example_problem.py View File

@@ -16,7 +16,6 @@ from xbbo.core.constants import MAXINT, Key
class Ackley(AbstractBenchmark):
def __init__(self, dim=10, rng=np.random.RandomState(42)):
self.dims = dim
self.rng = rng
self.keys = ["x_{}".format(i) for i in range(self.dims)]
super().__init__(rng)
self.get_configuration_space()


+ 370
- 0
xbbo/problem/transfer_problem.py View File

@@ -0,0 +1,370 @@
from abc import abstractmethod
import os
from enum import Enum
from typing import Tuple, List, Callable
import numpy as np
import pandas as pd
from pathlib import Path
from ConfigSpace import ConfigurationSpace
import ConfigSpace as CS
from ConfigSpace.conditions import InCondition, LessThanCondition
from ConfigSpace.hyperparameters import \
CategoricalHyperparameter, UniformFloatHyperparameter, UniformIntegerHyperparameter
from xbbo.core.constants import MAXINT, Key
from xbbo.problem.base import AbstractBenchmark

class BenchName(Enum):
TST = 0
# surrogate = 1
Table_deepar = 1
Table_fcnet = 2
Table_xgboost = 3
Table_nas102 = 4

class TransferData():
def __init__(self, bench_name:int, data_path_root:str, data_base_name:str, target_task_name:str) -> None:
self.bench_name = bench_name
self.data_base_name = data_base_name
self.data_path_root = data_path_root
self.target_task_name = target_task_name
def load_data(self,):
key = str(self.__class__) + "_" + self.target_task_name
res = CACHE_DATA.get(key, False)
if res:
return res
CACHE_DATA[key] = self._load_data()
return CACHE_DATA[key]

def _load_data(self):
pass
def get_configuration_space(self,):
pass
@abstractmethod
def download_data(self,):
pass
@property
def hp_names(self,):
return None

# prevent duplicate load data
CACHE_DATA = {}


class TST_Data(TransferData):
def __init__(self, bench_name:int,data_path_root:str, data_base_name:str,target_task_name:str, download=True, sparse=False, hp_num=3,min_max_features=False, rng=np.random.RandomState(), **kwargs) -> None:
super().__init__(bench_name,data_path_root, data_base_name, target_task_name)
self.data_path = os.path.join(self.data_path_root, data_base_name)
self.min_max_features = min_max_features
self.sparse = sparse
self.hp_num = hp_num
self.download = download
self.url = "https://git.openi.org.cn/isleizhang/BBO-Datasets/datasets"
self.rng = rng
self.hp_keys = ['C', 'gamma', 'd']
def _load_data(self):
if not os.path.exists(self.data_path):
assert self.download, 'ERROR: "{}" not exits.'.format(self.data_path)
self.download_data()
file_lists = os.listdir(self.data_path)
file_lists = list(map(lambda x: os.path.join(self.data_path,x), file_lists))
datasets_hp = []
datasets_label = []
filenames = []
for file in file_lists:
# data = []
filename = file.rsplit('/', maxsplit=1)[-1]
filenames.append(filename)
with open(file, 'r') as f:
insts = [] # 2dim
for line in f.readlines(): # convet categories
line_array_raw = list(map(float, line.strip().split(' ')))
idx_start = 1
line_array = [line_array_raw[0]]
# for ind_num in self.hp_indicator_num:
# line_array.append(line_array_raw[idx_start:idx_start+ind_num].index(1))
# idx_start += ind_num

# line_array.extend(line_array_raw[idx_start:self.hp_num+1])
line_array.extend(line_array_raw[idx_start:self.hp_num + 1+3])
insts.append(line_array)

datasets = np.asarray(insts, dtype=np.float)
if self.sparse:
mask = datasets[:, 1] == 1
datasets_hp.append(datasets[mask, 1+3:])
# datasets_hp[-1] = datasets_hp[-1][mask]
datasets_label.append(-datasets[mask, 0:1]) # TODO convet to minimize problem (regret)
else:
datasets_hp.append(datasets[:, 1:])
datasets_label.append(-datasets[:, 0:1]) # TODO convet to minimize problem (regret)
mask = datasets_hp[-1][:, 0].astype(np.bool_) # TODO
datasets_hp[-1] = datasets_hp[-1][mask, 3:]
datasets_label[-1] = datasets_label[-1][mask]
# if True:
# datasets_label[-1] = datasets_label[-1]
if self.min_max_features:
# min-max scaling of input features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler().fit(np.vstack(datasets_hp))
datasets_hp = [scaler.transform(X) for X in datasets_hp]
test_idx = filenames.index(self.target_task_name)
test_task = datasets_hp.pop(test_idx)
test_task_label = datasets_label.pop(test_idx)
return (datasets_hp, datasets_label, test_task, test_task_label)

def get_configuration_space(self):
if hasattr(self, "configuration_space"):
return self.configuration_space
self.configuration_space = ConfigurationSpace(seed=self.rng.randint(MAXINT))
x0 = UniformFloatHyperparameter("C", -1, 1)
x1 = UniformFloatHyperparameter("gamma", -1, 1)
x2 = UniformFloatHyperparameter("d", 0, 1)
self.configuration_space.add_hyperparameters([x0, x1, x2])
return self.configuration_space
@property
def hp_names(self,):
return self.hp_keys

def download_data(self):
raise NotImplementedError("plese download {} in {}".format(self.url, self.data_path))

class Table_Data(TransferData):
blackbox_tasks = {
BenchName.Table_nas102: [
'cifar10',
'cifar100',
'ImageNet16-120'
],
BenchName.Table_fcnet: [
'naval',
'parkinsons',
'protein',
'slice',
],
BenchName.Table_deepar: [
'm4-Hourly',
'm4-Daily',
'm4-Weekly',
'm4-Monthly',
'm4-Quarterly',
'm4-Yearly',
'electricity',
'exchange-rate',
'solar',
'traffic',
],
BenchName.Table_xgboost: [
'a6a',
'australian',
'german.numer',
'heart',
'ijcnn1',
'madelon',
'skin_nonskin',
'spambase',
'svmguide1',
'w6a'
],
}

error_metric = {
BenchName.Table_deepar: 'metric_CRPS',
BenchName.Table_fcnet: 'metric_error',
BenchName.Table_nas102: 'metric_error',
BenchName.Table_xgboost: 'metric_error',
}
def __init__(self, bench_name:int,data_path_root:str, data_base_name:str, target_task_name:str, download=True, sparse=False, hp_num=3,min_max_features=False, rng=np.random.RandomState(), **kwargs) -> None:
super().__init__(bench_name,data_path_root, data_base_name, target_task_name)
self.data_base_name = data_base_name
self.data_path = os.path.join(data_path_root, data_base_name)
self.min_max_features = min_max_features
self.sparse = sparse
self.hp_num = hp_num
self.download = download
self.url = "https://git.openi.org.cn/isleizhang/BBO-Datasets/datasets"
self.rng = rng
self._metric_col = self.error_metric[bench_name]
def _load_data(self):
if not os.path.exists(self.data_path):
assert self.download, 'ERROR: "{}" not exits.'.format(self.data_path)
self.download_data()
df = pd.read_csv(self.data_path)

assert self.target_task_name in df.task.unique()
assert self._metric_col in df.columns

Xy_dict = {}
for task in sorted(df.task.unique()):
mask = df.loc[:, 'task'] == task
hp_cols = [c for c in sorted(df.columns) if c.startswith("hp_")]
X = df.loc[mask, hp_cols].values
y = df.loc[mask, self._metric_col].values
if len(y.shape) == 1:
y = np.expand_dims(y, axis=1)
Xy_dict[task] = X, y

# todo it would be better done as a post-processing step
if self.bench_name in [BenchName.Table_fcnet, BenchName.Table_nas102]:
# applies onehot encoding to *all* hp columns as all hps are categories for those two blackboxes
# it would be nice to detect column types or pass it as an argument
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
hp_cols = [c for c in sorted(df.columns) if c.startswith("hp_")]
enc.fit(df.loc[:, hp_cols])
for task, (X, y) in Xy_dict.items():
X_features = enc.transform(X)
Xy_dict[task] = X_features, y

if self.min_max_features:
# min-max scaling of input features
from sklearn.preprocessing import MinMaxScaler
X = np.vstack([X for (X, y) in Xy_dict.values()])
scaler = MinMaxScaler().fit(X)
Xy_dict = {t: (scaler.transform(X), y) for (t, (X, y)) in Xy_dict.items()}

Xys_train = [Xy_dict[t] for t in df.task.unique() if t != self.target_task_name]
Xy_test = Xy_dict[self.target_task_name]
self.hp_keys = [f'hp_{i}' for i in range(Xy_test[0].shape[1])]
L = list(zip(*Xys_train))
L.extend(Xy_test)
return L

def get_configuration_space(self):
if hasattr(self, "configuration_space"):
return self.configuration_space
self.configuration_space = ConfigurationSpace(seed=self.rng.randint(MAXINT))
for name in self.hp_keys:
x = UniformFloatHyperparameter(name, 0, 1)
self.configuration_space.add_hyperparameter(x)
return self.configuration_space

@property
def hp_names(self,):
return self.hp_keys

def download_data(self):
raise NotImplementedError("plese download {} in {}".format(self.url, self.data_path))
# download_and_extract_archive(self.url, download_root=self.data_path_root, filename=self.data_path, remove_finished=True)

class TransferBenchmark(AbstractBenchmark):

def __init__(self, bench_name:int, data_base_name:str, target_task_name:str, rng=np.random.RandomState(), normalize_y=False, data_path_root='./data',**kwargs):
# np.random.seed(cfg.GENERAL.random_seed)
self.bench_name = bench_name
self.target_task_name = target_task_name
self.normalize_y = normalize_y
self.data_path_root = data_path_root
super().__init__(rng)
if bench_name == BenchName.TST:
self.data_loader = TST_Data(bench_name=bench_name,data_path_root=data_path_root, data_base_name=data_base_name,target_task_name=target_task_name, rng=self.rng,**kwargs)
# self.old_D_x, self.old_D_y, self.new_D_x, self.new_D_y, self.hp_config =
# self.api_config = self.hp_config
elif bench_name in [BenchName.Table_deepar, BenchName.Table_fcnet, BenchName.Table_nas102, BenchName.Table_xgboost]:
self.data_loader = Table_Data(bench_name=bench_name,data_path_root=data_path_root, data_base_name=data_base_name,target_task_name=target_task_name,min_max_features=True, rng=self.rng,**kwargs)
else:
raise NotImplemented
if normalize_y:
self.old_D_y = [(y - y.min())/(y.max()-y.min()) for y in self.old_D_y]

self._old_D_x, self._old_D_y, _new_D_x, _new_D_y = self.data_loader.load_data()
self._bbfunc = BlackboxOffline(_new_D_x, _new_D_y)
self._best_f = min(_new_D_y).item()
self._f_range = max(_new_D_y).item() - self._best_f
self._sorted_new_D_y = np.sort(_new_D_y).ravel()
self.get_configuration_space()
self.idxs = []
for name in self.data_loader.hp_names:
self.idxs.append(self.configuration_space.get_idx_by_hyperparameter_name(name))
self.idxs = np.argsort(self.idxs)
@AbstractBenchmark._check_configuration
def objective_function(self, config, **kwargs):
f = self._bbfunc(np.asarray([config[k] for k in self.data_loader.hp_names])).item()
y = (f - self._best_f) / self._f_range if self.normalize_y else f

return {Key.FUNC_VALUE: y}
@AbstractBenchmark._check_configuration
def objective_function_test(self, config, **kwargs):
return self.objective_function(config, **kwargs)
def get_configuration_space(self,):
if hasattr(self, "configuration_space"):
return self.configuration_space
self.configuration_space = self.data_loader.get_configuration_space()
return self.configuration_space

def get_old_data(self):
return np.take(self._old_D_x, self.idxs, axis=-1), self._old_D_y
def get_old_configurations(self,):
return CS.Configuration(self.configuration_space, vector=np.take(self._old_D_x,self.idxs, axis=-1)), self._old_D_y
@staticmethod
def get_meta_information():
return {'name': 'Test Function: Transfer blackbox benchmark'}

class Blackbox:
def __init__(
self,
input_dim: int,
output_dim: int,
eval_fun: Callable[[np.array], np.array],
):
self.input_dim = input_dim
self.output_dim = output_dim
self.eval_fun = eval_fun

def __call__(self, x: np.array) -> np.array:
"""
:param x: shape (input_dim,)
:return: shape (output_dim,)
"""
assert x.shape == (self.input_dim,)
y = self.eval_fun(x)
assert y.shape == (self.output_dim,)
return y


class BlackboxOffline(Blackbox):
def __init__(
self,
X: np.array,
y: np.array,
):
"""
A blackbox whose evaluations are already known.
To evaluate a new point, we return the value of the closest known point.
:param input_dim:
:param output_dim:
:param X: list of arguments evaluated, shape (n, input_dim)
:param y: list of outputs evaluated, shape (n, output_dim)
"""
assert len(X) == len(y)
n, input_dim = X.shape
n, output_dim = y.shape

from sklearn.neighbors import KNeighborsRegressor
proj = KNeighborsRegressor(n_neighbors=1).fit(X, y)

super().__init__(
input_dim=input_dim,
output_dim=output_dim,
eval_fun=lambda x: proj.predict(x.reshape(1, -1))[0]
)
if __name__ == "__main__":
bench = TransferBenchmark(BenchName.Table_deepar, 'DeepAR.csv.zip', target_task_name="m4-Hourly", data_path_root='./data/offline_evaluations')
cs = bench.get_configuration_space()
bench(cs.get_default_configuration())

+ 86
- 71
xbbo/search_algorithm/transfer_bo_optimizer.py View File

@@ -17,11 +17,12 @@ logger = logging.getLogger(__name__)


@alg_register.register('bo-transfer')
class SMBO(AbstractOptimizer):
class TransferBO(AbstractOptimizer):
def __init__(self,
space: DenseConfigurationSpace,
space,
seed: int = 42,
initial_design: str = 'sobol',
init_budget: int = None,
suggest_limit: int = np.inf,
surrogate: str = 'gp',
acq_func: str = 'ei',
@@ -37,81 +38,27 @@ class SMBO(AbstractOptimizer):
suggest_limit=suggest_limit,
**kwargs)
self.predict_x_best = predict_x_best
self.dimension = self.space.get_dimensions()
self.dimension = self.space.get_dimensions(sparse=True)

self.initial_design = ALL_avaliable_design[initial_design](
self.space, self.rng, ta_run_limit=suggest_limit, **kwargs)
self.space,
self.rng,
ta_run_limit=suggest_limit,
init_budget=init_budget,
**kwargs)
self.init_budget = self.initial_design.init_budget
self.hp_num = len(self.space)
self.initial_design_configs = self.initial_design.select_configurations(
)
self.trials = Trials(space,dim=self.dimension)
self.trials = Trials(space, dim=self.dimension)

# self.rho = kwargs.get("rho", 1)
self.bandwidth = kwargs.get("bandwdth", 0.1)
self.base_models = kwargs.get("base_models")
if self.base_models:
assert isinstance(self.base_models[0], BaseModel)
if surrogate == 'gp':
self.surrogate_model = GPR_sklearn(self.space, rng=self.rng)
elif surrogate == 'tst':
self.surrogate_model = TST_surrogate(self.space,
self.base_models,
rng=self.rng)
else:
raise NotImplementedError()
if weight_srategy == 'kernel':
self.weight_sratety = KernelRegress(self.space, self.base_models,
self.surrogate_model, self.rng)
elif weight_srategy == 'rw':
self.weight_sratety = RankingWeight(self.space,
self.base_models,
self.surrogate_model,
self.rng,
budget=suggest_limit,
is_purn=True)
elif weight_srategy == 'zero':
self.weight_sratety = ZeroWeight(self.space, self.base_models,
self.surrogate_model, self.rng)
else:
raise NotImplementedError()

if acq_func == 'mogp':
self.acquisition_func = MoGP_AcqFunc(self.surrogate_model,
self.base_models, self.rng)
elif acq_func == 'taf':
self.acquisition_func = TAF_AcqFunc(self.surrogate_model,
self.base_models, self.rng)
elif acq_func == 'ei':
self.acquisition_func = EI_AcqFunc(self.surrogate_model, self.rng)
else:
raise NotImplementedError()

if acq_opt == 'ls':
self.acq_maximizer = LocalSearch(self.acquisition_func, self.space,
self.rng)
elif acq_opt == 'rs':
self.acq_maximizer = RandomSearch(self.acquisition_func,
self.space, self.rng)
elif acq_opt == 'rs_ls':
self.acq_maximizer = InterleavedLocalAndRandomSearch(
self.acquisition_func, self.space, self.rng)
elif acq_opt == 'scipy':
self.acq_maximizer = ScipyOptimizer(self.acquisition_func,
self.space, self.rng)
elif acq_opt == 'scipy_global':
self.acq_maximizer = ScipyGlobalOptimizer(self.acquisition_func,
self.space, self.rng)
elif acq_opt == 'r_scipy':
self.acq_maximizer = RandomScipyOptimizer(self.acquisition_func,
self.space, self.rng)
else:
raise ValueError('acq_opt {} not in {}'.format(
acq_opt,
['ls', 'rs', 'rs_ls', 'scipy', 'scipy_global', 'r_scipy']))
logger.info(
"Execute Bayesian optimization...\n [Using ({})surrogate, ({})acquisition function, ({})acquisition optmizer]"
.format(surrogate, acq_func, acq_opt))
self._surrogate = surrogate
self._acq_func = acq_func
self._weight_srategy = weight_srategy
self._acq_opt = acq_opt
self._suggest_limit = suggest_limit

def _suggest(self, n_suggestions=1):
trial_list = []
@@ -126,7 +73,7 @@ class SMBO(AbstractOptimizer):
trial_list.append(
Trial(configuration=config,
config_dict=config.get_dictionary(),
array=config.get_array(sparse=False)))
array=config.get_array(sparse=True)))
else:
# update target surrogate model
self.surrogate_model.train(
@@ -160,7 +107,7 @@ class SMBO(AbstractOptimizer):
trial_list.append(
Trial(configuration=config,
config_dict=config.get_dictionary(),
array=config.get_array(sparse=False)))
array=config.get_array(sparse=True)))
_idx += 1

break
@@ -216,5 +163,73 @@ class SMBO(AbstractOptimizer):

return x_best_array, best_observation

def get_transfer_knowledge(self, old_D_X, old_D_y):
self.base_models = []
for i in range(len(old_D_X)):
self.base_models.append(BaseModel(self.space, rng=self.rng, do_optimize=False))
self.base_models[-1].train(old_D_X[i], old_D_y[i])
# self.base_models = kwargs.get("base_models")
if self.base_models:
assert isinstance(self.base_models[0], BaseModel)
if self._surrogate == 'gp':
self.surrogate_model = GPR_sklearn(self.space, rng=self.rng)
elif self._surrogate == 'tst':
self.surrogate_model = TST_surrogate(self.space,
self.base_models,
rng=self.rng)
else:
raise NotImplementedError()
if self._weight_srategy == 'kernel':
self.weight_sratety = KernelRegress(self.space, self.base_models,
self.surrogate_model, self.rng)
elif self._weight_srategy == 'rw':
self.weight_sratety = RankingWeight(self.space,
self.base_models,
self.surrogate_model,
self.rng,
budget=self._suggest_limit,
is_purn=True)
elif self._weight_srategy == 'zero':
self.weight_sratety = ZeroWeight(self.space, self.base_models,
self.surrogate_model, self.rng)
else:
raise NotImplementedError()

if self._acq_func == 'mogp':
self.acquisition_func = MoGP_AcqFunc(self.surrogate_model,
self.base_models, self.rng)
elif self._acq_func == 'taf':
self.acquisition_func = TAF_AcqFunc(self.surrogate_model,
self.base_models, self.rng)
elif self._acq_func == 'ei':
self.acquisition_func = EI_AcqFunc(self.surrogate_model, self.rng)
else:
raise NotImplementedError()

if self._acq_opt == 'ls':
self.acq_maximizer = LocalSearch(self.acquisition_func, self.space,
self.rng)
elif self._acq_opt == 'rs':
self.acq_maximizer = RandomSearch(self.acquisition_func,
self.space, self.rng)
elif self._acq_opt == 'rs_ls':
self.acq_maximizer = InterleavedLocalAndRandomSearch(
self.acquisition_func, self.space, self.rng)
elif self._acq_opt == 'scipy':
self.acq_maximizer = ScipyOptimizer(self.acquisition_func,
self.space, self.rng)
elif self._acq_opt == 'scipy_global':
self.acq_maximizer = ScipyGlobalOptimizer(self.acquisition_func,
self.space, self.rng)
elif self._acq_opt == 'r_scipy':
self.acq_maximizer = RandomScipyOptimizer(self.acquisition_func,
self.space, self.rng)
else:
raise ValueError('acq_opt {} not in {}'.format(
self._acq_opt,
['ls', 'rs', 'rs_ls', 'scipy', 'scipy_global', 'r_scipy']))
logger.info(
"Execute Bayesian optimization...\n [Using ({})surrogate, ({})acquisition function, ({})acquisition optmizer]"
.format(self._surrogate, self._acq_func, self._acq_opt))

opt_class = SMBO
opt_class = TransferBO

+ 0
- 19
xbbo/surrogate/gaussian_process.py View File

@@ -404,22 +404,3 @@ class GPR_sklearn(BaseGP):
f_opt_star = f_opt
theta_star = theta
return theta_star

# def _set_has_conditions(self) -> None:
# has_conditions = len(self.configspace.get_conditions()) > 0
# to_visit = []
# to_visit.append(self.kernel)
# while len(to_visit) > 0:
# current_param = to_visit.pop(0)
# if isinstance(current_param,
# sklearn.gaussian_process.kernels.KernelOperator):
# to_visit.insert(0, current_param.k1)
# to_visit.insert(1, current_param.k2)
# current_param.has_conditions = has_conditions
# elif isinstance(current_param,
# sklearn.gaussian_process.kernels.Kernel):
# current_param.has_conditions = has_conditions
# else:
# raise ValueError(current_param)



+ 1
- 1
xbbo/surrogate/transfer/weight_stategy.py View File

@@ -173,7 +173,7 @@ class KernelRegress(ABCWeightStategy):
base_model_means = []
for model in self.base_models:
base_model_means.append(
model._predict_normalize(trials.get_sparse_array(), None)[0])
model._predict_normalize(trials.get_array(), None)[0])
if not base_model_means:
return []
base_model_means = np.stack(base_model_means) # [model, obs_num, 1]


+ 21
- 1
xbbo/utils/util.py View File

@@ -1,10 +1,13 @@
from typing import Optional
import numpy as np
import pickle, os, json
from ConfigSpace.hyperparameters import (CategoricalHyperparameter,
OrdinalHyperparameter, Constant,
UniformFloatHyperparameter,
UniformIntegerHyperparameter)
import urllib

import tqdm

def dumpOBJ(path, filename, obj):
with open(os.path.join(path, filename), 'wb') as f:
@@ -105,4 +108,21 @@ def create_rng(rng):
return np.random.RandomState(rng)
else:
raise ValueError("%s is neither a number nor a RandomState. "
"Initializing RandomState failed")
"Initializing RandomState failed")
def download_and_extract_archive(
url: str,
download_root: str,
extract_root: Optional[str] = None,
filename: Optional[str] = None,
remove_finished: bool = False,
) -> None:
download_root = os.path.expanduser(download_root)
if extract_root is None:
extract_root = download_root
if not filename:
filename = os.path.basename(url)

os.system('wget {} -O {} && unzip -d {} {}'.format(url,filename, extract_root, filename))
if remove_finished:
os.system('rm {}'.format(filename))

Loading…
Cancel
Save