Skip to content

Commit

Permalink
Refactor(Optimizers): Use ask() instead of two-stage `load_optimiza…
Browse files Browse the repository at this point in the history
…tion_state()` and `get_config_and_ids()` (#146)
  • Loading branch information
eddiebergman authored Oct 9, 2024
1 parent 5ed2bf3 commit 38b91d5
Show file tree
Hide file tree
Showing 61 changed files with 918 additions and 1,607 deletions.
7 changes: 4 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ files: |
)/.*\.py$
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: check-added-large-files
files: ".*"
Expand Down Expand Up @@ -36,13 +36,14 @@ repos:
)/.*\.py$
additional_dependencies:
- "types-pyyaml"
- "types-requests"
args:
- "--no-warn-return-any" # Disable this because it doesn't know about 3rd party imports
- "--ignore-missing-imports"
- "--show-traceback"

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.29.2
rev: 0.29.3
hooks:
- id: check-github-workflows
files: '^github/workflows/.*\.ya?ml$'
Expand All @@ -51,7 +52,7 @@ repos:
files: '^\.github/dependabot\.ya?ml$'

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.6.5
rev: v0.6.9
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --no-cache]
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ traceback and the environment in which you are running, i.e. python version, OS,

Regression tests are run on each push to the repository to assure the performance of the optimizers don't degrade.

Currently, regression runs are recorded on JAHS-Bench-201 data for 2 tasks: `cifar10` and `fashion_mnist` and only for optimizers: `random_search`, `bayesian_optimization`, `mf_bayesian_optimization`, `regularized_evolution`.
Currently, regression runs are recorded on JAHS-Bench-201 data for 2 tasks: `cifar10` and `fashion_mnist` and only for optimizers: `random_search`, `bayesian_optimization`, `mf_bayesian_optimization`.
This information is stored in the `tests/regression_runner.py` as two lists: `TASKS`, `OPTIMIZERS`.
The recorded results are stored as a json dictionary in the `tests/losses.json` file.

Expand Down
2 changes: 0 additions & 2 deletions neps/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def run(
"priorband",
"mobster",
"asha",
"regularized_evolution",
]
| BaseOptimizer
| Path
Expand Down Expand Up @@ -278,7 +277,6 @@ def _run_args(
"priorband",
"mobster",
"asha",
"regularized_evolution",
]
| BaseOptimizer
) = "default",
Expand Down
30 changes: 13 additions & 17 deletions neps/optimizers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,31 @@
from collections.abc import Callable, Mapping
from functools import partial
from typing import TYPE_CHECKING

from .base_optimizer import BaseOptimizer
from .bayesian_optimization.optimizer import BayesianOptimization
from .grid_search.optimizer import GridSearch
from .multi_fidelity.hyperband import (
from neps.optimizers.base_optimizer import BaseOptimizer
from neps.optimizers.bayesian_optimization.optimizer import BayesianOptimization
from neps.optimizers.grid_search.optimizer import GridSearch
from neps.optimizers.multi_fidelity import (
IFBO,
MOBSTER,
AsynchronousHyperband,
Hyperband,
HyperbandCustomDefault,
)
from .multi_fidelity.ifbo import IFBO
from .multi_fidelity.successive_halving import (
AsynchronousSuccessiveHalving,
AsynchronousSuccessiveHalvingWithPriors,
Hyperband,
HyperbandCustomDefault,
SuccessiveHalving,
SuccessiveHalvingWithPriors,
)
from .multi_fidelity_prior.async_priorband import PriorBandAsha, PriorBandAshaHB
from .multi_fidelity_prior.priorband import PriorBand
from .random_search.optimizer import RandomSearch
from .regularized_evolution.optimizer import RegularizedEvolution
from neps.optimizers.multi_fidelity_prior import (
PriorBand,
PriorBandAsha,
PriorBandAshaHB,
)
from neps.optimizers.random_search.optimizer import RandomSearch

# TODO: Rename Searcher to Optimizer...
SearcherMapping: Mapping[str, Callable[..., BaseOptimizer]] = {
"bayesian_optimization": partial(BayesianOptimization, use_priors=False),
"pibo": partial(BayesianOptimization, use_priors=True),
"random_search": RandomSearch,
"regularized_evolution": RegularizedEvolution,
"assisted_regularized_evolution": partial(RegularizedEvolution, assisted=True),
"grid_search": GridSearch,
"successive_halving": SuccessiveHalving,
"successive_halving_prior": SuccessiveHalvingWithPriors,
Expand Down
108 changes: 18 additions & 90 deletions neps/optimizers/base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,42 @@
import logging
from abc import abstractmethod
from collections.abc import Mapping
from dataclasses import asdict, dataclass
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

from neps.state.trial import Report, Trial
from neps.utils.data_loading import _get_cost, _get_learning_curve, _get_loss
from neps.utils.types import ERROR, ConfigResult, RawConfig, ResultDict

if TYPE_CHECKING:
from neps.search_spaces.search_space import SearchSpace
from neps.state.optimizer import BudgetInfo
from neps.utils.types import ERROR, ResultDict


@dataclass
class SampledConfig:
id: Trial.ID
id: str
config: Mapping[str, Any]
previous_config_id: Trial.ID | None = None
previous_config_id: str | None = None


class BaseOptimizer:
"""Base sampler class. Implements all the low-level work."""

# TODO: Remove a lot of these init params
# Ideally we just make this a `Protocol`, i.e. an interface
# and it has no functionality
def __init__(
self,
*,
pipeline_space: SearchSpace,
patience: int = 50,
logger: logging.Logger | None = None,
budget: int | float | None = None,
loss_value_on_error: float | None = None,
cost_value_on_error: float | None = None,
learning_curve_on_error: float | list[float] | None = None,
ignore_errors=False,
ignore_errors: bool = False,
) -> None:
if patience < 1:
raise ValueError("Patience should be at least 1")
Expand All @@ -50,107 +54,31 @@ def __init__(
self.ignore_errors = ignore_errors

@abstractmethod
def load_optimization_state(
self,
previous_results: dict[str, ConfigResult],
pending_evaluations: dict[str, SearchSpace],
budget_info: BudgetInfo | None,
optimizer_state: dict[str, Any],
) -> None:
raise NotImplementedError

@abstractmethod
def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
"""Sample a new configuration.
Returns:
config: serializable object representing the configuration
config_id: unique identifier for the configuration
previous_config_id: if provided, id of a previous on which this
configuration is based
"""
raise NotImplementedError

def ask(
self,
trials: Mapping[str, Trial],
budget_info: BudgetInfo | None,
optimizer_state: dict[str, Any],
) -> SampledConfig | tuple[SampledConfig, dict[str, Any]]:
) -> SampledConfig:
"""Sample a new configuration.
!!! note
The plan is this method replaces the two-step procedure of `load_optimization_state`
and `get_config_and_ids` in the future, replacing both with a single method `ask`
which would be easier for developer of NePS optimizers to implement.
!!! note
The `optimizer_state` right now is just a `dict` that optimizers are free to mutate
as desired. A `dict` is not ideal as its _stringly_ typed but this was the least
invasive way to add this at the moment. It's actually an existing feature no
optimizer uses except _cost-cooling_ which basically just took a value from
`budget_info`.
Ideally an optimizer overwriting this can decide what to return instead of having
to rely on them mutating it, however this is the best work-around I could come up with
for now.
Args:
trials: All of the trials that are known about.
budget_info: information about the budget
optimizer_state: extra state the optimizer would like to keep between calls
Returns:
SampledConfig: a sampled configuration
dict: state the optimizer would like to keep between calls
"""
completed: dict[Trial.ID, ConfigResult] = {}
pending: dict[Trial.ID, SearchSpace] = {}
for trial_id, trial in trials.items():
if trial.report is not None:
completed[trial_id] = ConfigResult(
id=trial_id,
config=self.pipeline_space.from_dict(trial.config),
result=trial.report,
# TODO: Better if we could just pass around this metadata
# object instead of converting to a dict each time.
metadata=asdict(trial.metadata),
)
elif trial.state in (
Trial.State.PENDING,
Trial.State.SUBMITTED,
Trial.State.EVALUATING,
):
pending[trial_id] = self.pipeline_space.from_dict(trial.config)

self.load_optimization_state(
previous_results=completed,
pending_evaluations=pending,
budget_info=budget_info,
optimizer_state=optimizer_state,
)
config, config_id, previous_config_id = self.get_config_and_ids()
return SampledConfig(
id=config_id, config=config, previous_config_id=previous_config_id
)

def update_state_post_evaluation(
self, state: dict[str, Any], report: Trial.Report
) -> dict[str, Any]:
# TODO: There's a slot in `OptimizerState` to store extra things
# required for the optimizer but is currently not used
# state["key"] = "value"
return state
...

def get_loss(self, result: ERROR | ResultDict | float | Report) -> float | ERROR:
"""Calls result.utils.get_loss() and passes the error handling through.
Please use self.get_loss() instead of get_loss() in all optimizer classes.
"""
# TODO(eddiebergman): This is a forward change for whenever we can have optimizers
# use `Trial` and `Report`, they already take care of this and save having to do this
# `_get_loss` at every call. We can also then just use `None` instead of the string `"error"`
# use `Trial` and `Report`, they already take care of this and save having to do
# this `_get_loss` at every call. We can also then just use `None` instead of
# the string `"error"`
if isinstance(result, Report):
return result.loss if result.loss is not None else "error"

Expand All @@ -165,8 +93,8 @@ def get_cost(self, result: ERROR | ResultDict | float | Report) -> float | ERROR
Please use self.get_cost() instead of get_cost() in all optimizer classes.
"""
# TODO(eddiebergman): This is a forward change for whenever we can have optimizers
# use `Trial` and `Report`, they already take care of this and save having to do this
# `_get_loss` at every call
# use `Trial` and `Report`, they already take care of this and save having to do
# this `_get_loss` at every call
if isinstance(result, Report):
return result.loss if result.loss is not None else "error"

Expand All @@ -183,8 +111,8 @@ def get_learning_curve(
Please use self.get_loss() instead of get_loss() in all optimizer classes.
"""
# TODO(eddiebergman): This is a forward change for whenever we can have optimizers
# use `Trial` and `Report`, they already take care of this and save having to do this
# `_get_loss` at every call
# use `Trial` and `Report`, they already take care of this and save having to do
# this `_get_loss` at every call
if isinstance(result, Report):
return result.learning_curve

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from collections.abc import Callable
from functools import partial

from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
BaseAcquisition,
)
from neps.optimizers.bayesian_optimization.acquisition_functions.ei import (
ComprehensiveExpectedImprovement,
)
Expand All @@ -23,8 +26,9 @@
augmented_ei=False,
log_ei=True,
),
## Uses the augmented EI heuristic and changed the in-fill criterion to the best test location with
## the highest *posterior mean*, which are preferred when the optimisation is noisy.
## Uses the augmented EI heuristic and changed the in-fill criterion to the best test
## location with the highest *posterior mean*, which are preferred when the
## optimisation is noisy.
"AEI": partial(
ComprehensiveExpectedImprovement,
in_fill="posterior",
Expand All @@ -41,4 +45,5 @@
"ComprehensiveExpectedImprovement",
"UpperConfidenceBound",
"DecayingPriorWeightedAcquisition",
"BaseAcquisition",
]
Loading

0 comments on commit 38b91d5

Please sign in to comment.