Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix PriorAcquisitionFunction #1185

Merged
merged 9 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

## Bugfixes
- Fix kwargs for DifferentialEvolution (#1187)
- Fix PiBo implementation (#1076)

# 2.2.1

Expand Down
77 changes: 72 additions & 5 deletions examples/1_basics/6_priors.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
ConfigurationSpace,
NormalFloatHyperparameter,
UniformIntegerHyperparameter,
UniformFloatHyperparameter,
)
from sklearn.datasets import load_digits
from sklearn.exceptions import ConvergenceWarning
Expand All @@ -38,7 +39,7 @@

class MLP:
@property
def configspace(self) -> ConfigurationSpace:
def prior_configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
Expand Down Expand Up @@ -100,7 +101,67 @@ def configspace(self) -> ConfigurationSpace:
)

# Add all hyperparameters at once:
cs.add([n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init])
cs.add(
[n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]
)

return cs

@property
def configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()

# We do not have an educated belief on the number of layers beforehand
n_layer = UniformIntegerHyperparameter(
"n_layer",
lower=1,
upper=5,
)

# Define network width without a specific prior
n_neurons = UniformIntegerHyperparameter(
"n_neurons",
lower=8,
upper=256,
)

# Define activation functions without specific weights
activation = CategoricalHyperparameter(
"activation",
["logistic", "tanh", "relu"],
default_value="relu",
)

# Define optimizer without specific weights
optimizer = CategoricalHyperparameter(
"optimizer",
["sgd", "adam"],
default_value="adam",
)

# Define batch size without specific distribution
batch_size = UniformIntegerHyperparameter(
"batch_size",
16,
512,
default_value=128,
)

# Define learning rate range without log-normal prior
learning_rate_init = UniformFloatHyperparameter(
"learning_rate_init",
lower=1e-5,
upper=1.0,
default_value=1e-3,
)

# Add all hyperparameters at once:
cs.add(
[n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]
)

return cs

Expand All @@ -119,8 +180,12 @@ def train(self, config: Configuration, seed: int = 0) -> float:
)

# Returns the 5-fold cross validation accuracy
cv = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # to make CV splits consistent
score = cross_val_score(classifier, digits.data, digits.target, cv=cv, error_score="raise")
cv = StratifiedKFold(
n_splits=5, random_state=seed, shuffle=True
) # to make CV splits consistent
score = cross_val_score(
classifier, digits.data, digits.target, cv=cv, error_score="raise"
)

return 1 - np.mean(score)

Expand All @@ -140,7 +205,9 @@ def train(self, config: Configuration, seed: int = 0) -> float:

# We define the prior acquisition function, which conduct the optimization using priors over the optimum
acquisition_function = PriorAcquisitionFunction(
acquisition_function=HyperparameterOptimizationFacade.get_acquisition_function(scenario),
acquisition_function=HyperparameterOptimizationFacade.get_acquisition_function(
scenario
),
decay_beta=scenario.n_trials / 10, # Proven solid value
)

Expand Down
19 changes: 15 additions & 4 deletions smac/acquisition/function/prior_acquisition_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def __init__(
acquisition_type = self._acquisition_function

self._rescale = isinstance(acquisition_type, (LCB, TS))

# Variables needed to adapt the weighting of the prior
self._initial_design_size = None
self._iteration_number = 0

@property
Expand Down Expand Up @@ -116,7 +119,12 @@ def _update(self, **kwargs: Any) -> None:
Current incumbent value.
"""
assert "eta" in kwargs
self._iteration_number += 1

# Compute intiial design size
if self._initial_design_size is None:
self._initial_design_size = kwargs["num_data"]

self._iteration_number = kwargs["num_data"] - self._initial_design_size
self._eta = kwargs["eta"]

assert self.model is not None
Expand Down Expand Up @@ -146,8 +154,10 @@ def _compute_prior(self, X: np.ndarray) -> np.ndarray:
for parameter, X_col in zip(self._hyperparameters.values(), X.T):
if self._discretize and isinstance(parameter, FloatHyperparameter):
assert self._discrete_bins_factor is not None
number_of_bins = int(np.ceil(self._discrete_bins_factor * self._decay_beta / self._iteration_number))
prior_values *= self._compute_discretized_pdf(parameter, X_col, number_of_bins) + self._prior_floor
number_of_bins = int(
np.ceil(self._discrete_bins_factor * self._decay_beta / (self._iteration_number + 1))
)
prior_values *= self._compute_discretized_pdf(parameter, X_col, number_of_bins)
else:
prior_values *= parameter._pdf(X_col[:, np.newaxis])

Expand Down Expand Up @@ -178,6 +188,7 @@ def _compute_discretized_pdf(
The user prior over the optimum for the parameter at hand.
"""
# Evaluates the actual pdf on all the relevant points
# Replace deprecated method
pdf_values = hyperparameter._pdf(X_col[:, np.newaxis])

# Retrieves the largest value of the pdf in the domain
Expand Down Expand Up @@ -221,6 +232,6 @@ def _compute(self, X: np.ndarray) -> np.ndarray:
acq_values = self._acquisition_function._compute(X)

prior_values = self._compute_prior(X) + self._prior_floor
decayed_prior_values = np.power(prior_values, self._decay_beta / self._iteration_number)
decayed_prior_values = np.power(prior_values, self._decay_beta / (self._iteration_number + 1))

return acq_values * decayed_prior_values
22 changes: 11 additions & 11 deletions tests/test_acquisition/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,16 +298,16 @@ def test_prior_init_ts(prior_model, acq_ts, beta):

def test_prior_update(prior_model, acquisition_function, beta):
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=2)
paf.update(model=prior_model, eta=2, num_data=10)
assert paf._eta == 2
assert paf._acquisition_function._eta == 2
assert paf._iteration_number == 1
assert paf._iteration_number == 0


def test_prior_compute_prior_Nx1(prior_model, hyperparameter_dict, acquisition_function, beta):
prior_model.update_prior(hyperparameter_dict)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

X = np.array([0, 0.5, 1]).reshape(3, 1)
prior_values = paf._compute_prior(X)
Expand All @@ -321,7 +321,7 @@ def test_prior_compute_prior_Nx1(prior_model, hyperparameter_dict, acquisition_f
def test_prior_compute_prior_NxD(prior_model, hyperparameter_dict, acquisition_function, beta):
prior_model.update_prior(hyperparameter_dict)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

X = np.array([[0, 0], [0, 1], [1, 1]])
prior_values = paf._compute_prior(X)
Expand All @@ -339,7 +339,7 @@ def test_prior_compute_prior_1xD(prior_model, acquisition_function, beta):

prior_model.update_prior(hyperparameter_dict)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

X = np.array([[0.5, 0.5]])
prior_values = paf._compute_prior(X)
Expand All @@ -351,7 +351,7 @@ def test_prior_compute_prior_1xD(prior_model, acquisition_function, beta):
def test_prior_compute_prior_1x1(prior_model, hyperparameter_dict, acquisition_function, beta):
prior_model.update_prior(hyperparameter_dict)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

X = np.array([0.5]).reshape(1, 1)
prior_values = paf._compute_prior(X)
Expand All @@ -378,7 +378,7 @@ def hp_dict3(x0_prior, x1_prior, x2_prior):
def test_prior_1xD(hp_dict3, prior_model, acquisition_function, beta, prior_floor):
prior_model.update_prior(hp_dict3)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta, prior_floor=prior_floor)
paf.update(model=prior_model, eta=1.0)
paf.update(model=prior_model, eta=1.0, num_data=1)
configurations = [ConfigurationMock([1.0, 1.0, 1.0])]
acq = paf(configurations)
assert acq.shape == (1, 1)
Expand All @@ -391,7 +391,7 @@ def test_prior_1xD(hp_dict3, prior_model, acquisition_function, beta, prior_floo
def test_prior_NxD(hp_dict3, prior_model, acquisition_function, beta, prior_floor):
prior_model.update_prior(hp_dict3)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta, prior_floor=prior_floor)
paf.update(model=prior_model, eta=1.0)
paf.update(model=prior_model, eta=1.0, num_data=1)

# These are the exact same numbers as in the EI tests below
configurations = [
Expand Down Expand Up @@ -449,15 +449,15 @@ def test_prior_NxD_TS(prior_model, hp_dict3, acq_ts, beta, prior_floor):
def test_prior_decay(hp_dict3, prior_model, acquisition_function, beta, prior_floor):
prior_model.update_prior(hp_dict3)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta, prior_floor=prior_floor)
paf.update(model=prior_model, eta=1.0)
paf.update(model=prior_model, eta=1.0, num_data=0)
configurations = [ConfigurationMock([0.1, 0.1, 0.1])]

for i in range(1, 6):
prior_factor = np.power(0.2 * 1.0 * 1.8 + paf._prior_floor, beta / i)
acq = paf(configurations)
print(acq, 0.90020601136712231 * prior_factor)
assert np.isclose(acq[0][0], 0.90020601136712231 * prior_factor)
paf.update(model=prior_model, eta=1.0) # increase iteration number
paf.update(model=prior_model, eta=1.0, num_data = i) # increase iteration number


def test_prior_discretize_pdf(prior_model, acquisition_function, beta, prior_floor):
Expand All @@ -467,7 +467,7 @@ def test_prior_discretize_pdf(prior_model, acquisition_function, beta, prior_flo
paf = PriorAcquisitionFunction(
acquisition_function=acquisition_function, decay_beta=beta, prior_floor=prior_floor, discretize=True
)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

number_of_bins_1 = 13
number_of_bins_2 = 27521
Expand Down
Loading