Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_init_only_scenario_quality(self):
smbo = SMAC4AC(self.scenario).solver
self.assertIsInstance(smbo.model, RandomForestWithInstances)
self.assertIsInstance(smbo.rh2EPM, RunHistory2EPM4Cost)
self.assertIsInstance(smbo.acquisition_func, EI)
def test_init_only_scenario_quality(self):
epils = EPILS(self.scenario).solver
self.assertIsInstance(epils.model, RandomForestWithInstances)
self.assertIsInstance(epils.rh2EPM, RunHistory2EPM4Cost)
self.assertIsInstance(epils.acquisition_func, EI)
Returns
-------
np.ndarray
"""
min_y = self.min_y - (self.perc - self.min_y) # Subtract the difference between the percentile and the minimum
# linear scaling
if min_y == self.max_y:
# prevent diving by zero
min_y *= 1 - 10 ** -10
values = (values - min_y) / (self.max_y - min_y)
values = np.sqrt(values)
return values
class RunHistory2EPM4LogScaledCost(RunHistory2EPM4Cost):
"""TODO"""
def transform_response_values(self, values: np.ndarray) -> np.ndarray:
"""Transform function response values.
Transform the response values by linearly scaling them between zero and one and then using the log
transformation.
Parameters
----------
values : np.ndarray
Response values to be transformed.
Returns
-------
np.ndarray
if use_epm and not self.block_epm:
for entry in traj:
time.append(entry["wallclock_time"])
configs.append(entry["incumbent"])
# self.logger.debug('Time: %d Runs: %d', time[-1], len(rh.get_runs_for_config(configs[-1])))
self.logger.debug("Using %d samples (%d distinct) from trajectory.", len(time), len(set(configs)))
# Initialize EPM
if validator.epm: # not log as validator epm is trained on cost, not log cost
epm = validator.epm
else:
self.logger.debug("No EPM passed! Training new one from runhistory.")
# Train random forest and transform training data (from given rh)
# Not using validator because we want to plot uncertainties
rh2epm = RunHistory2EPM4Cost(num_params=len(self.scenario.cs.get_hyperparameters()), scenario=self.scenario)
X, y = rh2epm.transform(rh)
self.logger.debug("Training model with data of shape X: %s, y: %s", str(X.shape), str(y.shape))
types, bounds = get_types(self.scenario.cs, self.scenario.feature_array)
epm = RandomForestWithInstances(self.scenario.cs,
types=types,
bounds=bounds,
seed=self.rng.randint(MAXINT),
instance_features=self.scenario.feature_array,
ratio_features=1.0)
epm.train(X, y)
config_array = convert_configurations_to_array(configs)
mean, var = epm.predict_marginalized_over_instances(config_array)
var = np.zeros(mean.shape)
# We don't want to show the uncertainty of the model but uncertainty over multiple optimizer runs
# This variance is computed in an outer loop.
time, configs = [], []
for entry in traj:
time.append(entry["wallclock_time"])
configs.append(entry["incumbent"])
self.logger.debug("Using %d samples (%d distinct) from trajectory.",
len(time), len(set(configs)))
if validator.epm:
epm = validator.epm
else:
self.logger.debug("No EPM passed! Training new one from runhistory.")
# Train random forest and transform training data (from given rh)
# Not using validator because we want to plot uncertainties
rh2epm = RunHistory2EPM4Cost(num_params=len(self.scenario.cs.get_hyperparameters()),
scenario=self.scenario)
X, y = rh2epm.transform(rh)
self.logger.debug("Training model with data of shape X: %s, y:%s",
str(X.shape), str(y.shape))
types, bounds = get_types(self.scenario.cs, self.scenario.feature_array)
epm = RandomForestWithInstances(types=types,
bounds=bounds,
instance_features=self.scenario.feature_array,
#seed=self.rng.randint(MAXINT),
ratio_features=1.0)
epm.train(X, y)
## not necessary right now since the EPM only knows the features
## of the training instances
# use only training instances
ratio_features=1.0,
)
# Use imputor if objective is runtime
imputor = None
impute_state = None
impute_censored_data = False
if self.scen.run_obj == 'runtime':
threshold = self.scen.cutoff * self.scen.par_factor
imputor = RFRImputator(rng=self.rng,
cutoff=self.scen.cutoff,
threshold=threshold,
model=self.epm)
impute_censored_data=True
impute_state=[StatusType.CAPPED]
# Transform training data (from given rh)
rh2epm = RunHistory2EPM4Cost(num_params=len(self.scen.cs.get_hyperparameters()),
scenario=self.scen, rng=self.rng,
impute_censored_data=impute_censored_data,
imputor=imputor,
impute_state=impute_state)
X, y = rh2epm.transform(runhistory)
self.logger.debug("Training model with data of shape X: %s, y:%s",
str(X.shape), str(y.shape))
# Train random forest
self.epm.train(X, y)
# Predict desired runs
runs, rh_epm = self._get_runs(config_mode, instance_mode, repetitions, runhistory)
feature_array_size = len(self.scen.cs.get_hyperparameters())
if self.scen.feature_array is not None:
feature_array_size += self.scen.feature_array.shape[1]
Returns the input values.
Parameters
----------
values : np.ndarray
Response values to be transformed.
Returns
-------
np.ndarray
"""
return values
class RunHistory2EPM4LogCost(RunHistory2EPM4Cost):
"""TODO"""
def transform_response_values(self, values: np.ndarray) -> np.ndarray:
"""Transform function response values.
Transforms the response values by using a log transformation.
Parameters
----------
values : np.ndarray
Response values to be transformed.
Returns
-------
np.ndarray
"""
acq = EI(model=model, par=conf.get("par_ei", 0))
elif conf["acq_func"] == "LCB":
acq = LCB(model=model, par=conf.get("par_lcb", 0.05))
elif conf["acq_func"] == "PI":
acq = PI(model=model, par=conf.get("par_pi", 0))
elif conf["acq_func"] == "LogEI":
# par value should be in log-space
acq = LogEI(model=model, par=conf.get("par_logei", 0))
else:
raise ValueError(conf['acq_func'])
num_params = len(self.scenario.cs.get_hyperparameters())
success_states = [StatusType.SUCCESS, StatusType.CRASHED]
#TODO: only designed for black box problems without instances
if conf["y_transform"] == "y":
rh2epm = RunHistory2EPM4Cost(scenario=self.scenario,
num_params=num_params,
success_states=success_states,
impute_censored_data=False,
impute_state=None)
elif conf["y_transform"] == "log_scaled":
rh2epm = RunHistory2EPM4LogScaledCost(scenario=self.scenario,
num_params=num_params,
success_states=success_states,
impute_censored_data=False,
impute_state=None)
elif conf["y_transform"] == "inv_scaled":
rh2epm = RunHistory2EPM4InvScaledCost(scenario=self.scenario,
num_params=num_params,
success_states=success_states,
impute_censored_data=False,
impute_state=None)
for entry in traj:
time.append(entry["wallclock_time"])
configs.append(entry["incumbent"])
# self.logger.debug('Time: %d Runs: %d', time[-1],
# len(rh.get_runs_for_config(configs[-1])))
self.logger.debug("Using %d samples (%d distinct) from trajectory.",
len(time), len(set(configs)))
if validator.epm: # not log as validator epm is trained on cost, not log cost
epm = validator.epm
else:
self.logger.debug("No EPM passed! Training new one from runhistory.")
# Train random forest and transform training data (from given rh)
# Not using validator because we want to plot uncertainties
rh2epm = RunHistory2EPM4Cost(num_params=len(self.scenario.cs.get_hyperparameters()),
scenario=self.scenario)
X, y = rh2epm.transform(rh)
self.logger.debug("Training model with data of shape X: %s, y:%s",
str(X.shape), str(y.shape))
types, bounds = get_types(self.scenario.cs, self.scenario.feature_array)
epm = RandomForestWithInstances(types=types,
bounds=bounds,
instance_features=self.scenario.feature_array,
# seed=self.rng.randint(MAXINT),
ratio_features=1.0)
epm.train(X, y)
config_array = convert_configurations_to_array(configs)
mean, var = epm.predict_marginalized_over_instances(config_array)
var = np.zeros(mean.shape)
# We don't want to show the uncertainty of the model but uncertainty over multiple optimizer runs