Skip to content

Commit

Permalink
refactor: Use vulture to remove dead code (#147)
Browse files Browse the repository at this point in the history
  • Loading branch information
eddiebergman authored Oct 10, 2024
1 parent 38b91d5 commit bc9611a
Show file tree
Hide file tree
Showing 47 changed files with 146 additions and 2,507 deletions.
11 changes: 0 additions & 11 deletions neps/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,6 @@ def is_nullable(e: str) -> bool:
default=120,
)

JOBQUEUE_FILELOCK_POLL = get_env(
"NEPS_JOBQUEUE_FILELOCK_POLL",
parse=float,
default=0.05,
)
JOBQUEUE_FILELOCK_TIMEOUT = get_env(
"NEPS_JOBQUEUE_FILELOCK_TIMEOUT",
parse=lambda e: None if is_nullable(e) else float(e),
default=120,
)

SEED_SNAPSHOT_FILELOCK_POLL = get_env(
"NEPS_SEED_SNAPSHOT_FILELOCK_POLL",
parse=float,
Expand Down
4 changes: 0 additions & 4 deletions neps/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,3 @@ class WorkerRaiseError(NePSError):
Includes additional information on how to recover
"""


class SurrogateFailedToFitError(NePSError):
"""Raised when a surrogate model fails to fit."""
75 changes: 55 additions & 20 deletions neps/optimizers/base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,68 @@
from typing import TYPE_CHECKING, Any

from neps.state.trial import Report, Trial
from neps.utils.data_loading import _get_cost, _get_learning_curve, _get_loss

if TYPE_CHECKING:
from neps.search_spaces.search_space import SearchSpace
from neps.state.optimizer import BudgetInfo
from neps.utils.types import ERROR, ResultDict


def _get_loss(
result: ERROR | ResultDict | float,
loss_value_on_error: float | None = None,
*,
ignore_errors: bool = False,
) -> ERROR | float:
if result == "error":
if ignore_errors:
return "error"

if loss_value_on_error is not None:
return loss_value_on_error

raise ValueError(
"An error happened during the execution of your run_pipeline function."
" You have three options: 1. If the error is expected and corresponds to"
" a loss value in your application (e.g., 0% accuracy), you can set"
" loss_value_on_error to some float. 2. If sometimes your pipeline"
" crashes randomly, you can set ignore_errors=True. 3. Fix your error."
)

if isinstance(result, dict):
return float(result["loss"])

assert isinstance(result, float)
return float(result)


def _get_cost(
result: ERROR | ResultDict | float,
cost_value_on_error: float | None = None,
*,
ignore_errors: bool = False,
) -> float | Any:
if result == "error":
if ignore_errors:
return "error"

if cost_value_on_error is None:
raise ValueError(
"An error happened during the execution of your run_pipeline function."
" You have three options: 1. If the error is expected and corresponds to"
" a cost value in your application, you can set"
" cost_value_on_error to some float. 2. If sometimes your pipeline"
" crashes randomly, you can set ignore_errors=True. 3. Fix your error."
)

return cost_value_on_error

if isinstance(result, Mapping):
return float(result["cost"])

return float(result)


@dataclass
class SampledConfig:
id: str
Expand Down Expand Up @@ -43,7 +97,6 @@ def __init__(
if patience < 1:
raise ValueError("Patience should be at least 1")

self.used_budget: float = 0.0
self.budget = budget
self.pipeline_space = pipeline_space
self.patience = patience
Expand Down Expand Up @@ -104,23 +157,5 @@ def get_cost(self, result: ERROR | ResultDict | float | Report) -> float | ERROR
ignore_errors=self.ignore_errors,
)

def get_learning_curve(
self, result: str | dict | float | Report
) -> list[float] | Any:
"""Calls result.utils.get_loss() and passes the error handling through.
Please use self.get_loss() instead of get_loss() in all optimizer classes.
"""
# TODO(eddiebergman): This is a forward change for whenever we can have optimizers
# use `Trial` and `Report`, they already take care of this and save having to do
# this `_get_loss` at every call
if isinstance(result, Report):
return result.learning_curve

return _get_learning_curve(
result,
learning_curve_on_error=self.learning_curve_on_error,
ignore_errors=self.ignore_errors,
)

def whoami(self) -> str:
return type(self).__name__
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,7 @@
# TODO: Chop this the hell out, it's pretty bad
# We have much better and efficient ways to generate acquisition samples now
class RandomSampler(AcquisitionSampler):
def __init__(
self,
pipeline_space: SearchSpace,
patience: int = 100,
budget: int | None = None, # TODO: Remove
):
def __init__(self, pipeline_space: SearchSpace, patience: int = 100):
super().__init__(pipeline_space=pipeline_space, patience=patience)

def sample(self, acquisition_function: Callable | None = None) -> SearchSpace:
Expand Down
6 changes: 0 additions & 6 deletions neps/optimizers/multi_fidelity/hyperband.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,6 @@ def __init__(
self.full_rung_trace.extend([s] * len(self.sh_brackets[s].full_rung_trace))
# book-keeping variables
self.current_sh_bracket: int = 0
self.old_history_len = None

def _update_state_counter(self) -> None:
# TODO: get rid of this dependency
self._counter += 1

def _update_sh_bracket_state(self) -> None:
# `load_results()` for each of the SH bracket objects are not called as they are
Expand Down Expand Up @@ -155,7 +150,6 @@ def ask(

# previous optimization run exists and needs to be loaded
self._load_previous_observations(completed)
self.total_fevals = len(trials)

# account for pending evaluations
self._handle_pending_evaluations(pending)
Expand Down
2 changes: 0 additions & 2 deletions neps/optimizers/multi_fidelity/ifbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,6 @@ def __init__(
for cat_name, cat in space.categoricals.items()
},
)
self._border_sampler = Sampler.borders(len(params))
self._cached_border_configs: torch.Tensor | None = None

# Domain of fidelity values, i.e. what is given in the configs that we
# give to the user to evaluate at.
Expand Down
9 changes: 0 additions & 9 deletions neps/optimizers/multi_fidelity/successive_halving.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,8 @@ def __init__(
self.rung_members: dict = {} # stores config IDs per rung
self.rung_members_performance: dict = {} # performances recorded per rung
self.rung_promotions: dict = {} # records a promotable config per rung
self.total_fevals = 0

# setup SH state counter
self._counter = 0
self.full_rung_trace = SuccessiveHalving._get_rung_trace(
self.rung_map, self.config_map
)
Expand All @@ -174,12 +172,6 @@ def _get_rung_trace(cls, rung_map: dict, config_map: dict) -> list[int]:
rung_trace.extend([rung] * config_map[rung])
return rung_trace

def get_incumbent_score(self) -> float:
y_star = np.inf # minimizing optimizer
if len(self.observed_configs):
y_star = self.observed_configs.perf.values.min()
return y_star

def _get_rung_map(self, s: int = 0) -> dict:
"""Maps rungs (0,1,...,k) to a fidelity value based on fidelity bounds, eta, s."""
assert s <= self.stopping_rate_limit
Expand Down Expand Up @@ -352,7 +344,6 @@ def ask(

# previous optimization run exists and needs to be loaded
self._load_previous_observations(completed)
self.total_fevals = len(trials)

# account for pending evaluations
self._handle_pending_evaluations(pending)
Expand Down
50 changes: 12 additions & 38 deletions neps/optimizers/multi_fidelity/utils.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,12 @@
# type: ignore
from __future__ import annotations

from collections.abc import Sequence
from copy import deepcopy
from typing import TYPE_CHECKING, Any
from typing import Any

import numpy as np
import pandas as pd

if TYPE_CHECKING:
from neps.search_spaces.search_space import SearchSpace


def continuous_to_tabular(
config: SearchSpace, categorical_space: SearchSpace
) -> SearchSpace:
"""Convert the continuous parameters in the config into categorical ones based on
the categorical_space provided.
"""
result = config.clone()
for hp_name, _ in config.items():
if hp_name in categorical_space:
choices = np.array(categorical_space[hp_name].choices)
diffs = choices - config[hp_name].value
# NOTE: in case of a tie the first value in the choices array will be returned
closest = choices[np.abs(diffs).argmin()]
result[hp_name].set_value(closest)

return result


class MFObservedData:
"""(Under development).
Expand Down Expand Up @@ -77,11 +55,11 @@ def __init__(
self.df = pd.DataFrame([], columns=columns, index=index)

@property
def pending_condition(self):
def pending_condition(self) -> pd.Series:
return self.df[self.perf_col].isna()

@property
def error_condition(self):
def error_condition(self) -> pd.Series:
return self.df[self.perf_col] == "error"

@property
Expand All @@ -98,7 +76,7 @@ def pending_runs_index(self) -> pd.Index | pd.MultiIndex:
return self.df.loc[self.pending_condition].index

@property
def completed_runs(self):
def completed_runs(self) -> pd.DataFrame:
return self.df[~(self.pending_condition | self.error_condition)]

@property
Expand All @@ -116,7 +94,7 @@ def add_data(
index: tuple[int, ...] | Sequence[tuple[int, ...]] | Sequence[int] | int,
*,
error: bool = False,
):
) -> None:
"""Add data only if none of the indices are already existing in the DataFrame."""
# TODO: If index is only config_id extend it
if not isinstance(index, list):
Expand All @@ -143,7 +121,7 @@ def update_data(
index: tuple[int, ...] | Sequence[tuple[int, ...]] | Sequence[int] | int,
*,
error: bool = False,
):
) -> None:
"""Update data if all the indices already exist in the DataFrame."""
index_list = [index] if not isinstance(index, list) else index
if self.df.index.isin(index_list).sum() == len(index_list):
Expand All @@ -158,7 +136,7 @@ def update_data(
f"Given indices: {index_list}"
)

def get_learning_curves(self):
def get_learning_curves(self) -> pd.DataFrame:
return self.df.pivot_table(
index=self.df.index.names[0],
columns=self.df.index.names[1],
Expand All @@ -168,7 +146,7 @@ def get_learning_curves(self):
def all_configs_list(self) -> list[Any]:
return self.df.loc[:, self.config_col].sort_index().values.tolist()

def get_best_learning_curve_id(self, *, maximize: bool = False):
def get_best_learning_curve_id(self, *, maximize: bool = False) -> int:
"""Returns a single configuration id of the best observed performance.
Note: this will always return the single best lowest ID
Expand All @@ -179,22 +157,22 @@ def get_best_learning_curve_id(self, *, maximize: bool = False):
return learning_curves.max(axis=1).idxmax()
return learning_curves.min(axis=1).idxmin()

def get_best_seen_performance(self, *, maximize: bool = False):
def get_best_seen_performance(self, *, maximize: bool = False) -> float:
learning_curves = self.get_learning_curves()
if maximize:
return learning_curves.max(axis=1).max()
return learning_curves.min(axis=1).min()

def add_budget_column(self):
def add_budget_column(self) -> pd.DataFrame:
combined_df = self.df.reset_index(level=1)
return combined_df.set_index(keys=[self.budget_idx], drop=False, append=True)

def reduce_to_max_seen_budgets(self):
def reduce_to_max_seen_budgets(self) -> pd.DataFrame:
self.df = self.df.sort_index()
combined_df = self.add_budget_column()
return combined_df.groupby(level=0).last()

def get_partial_configs_at_max_seen(self):
def get_partial_configs_at_max_seen(self) -> pd.Series:
return self.reduce_to_max_seen_budgets()[self.config_col]

def extract_learning_curve(
Expand Down Expand Up @@ -242,10 +220,6 @@ def get_max_observed_fidelity_level_per_config(self) -> pd.Series:
}
return pd.Series(max_z_observed)

@property
def token_ids(self) -> np.ndarray:
return self.df.index.values


if __name__ == "__main__":
# TODO: Either delete these or convert them to tests (karibbov)
Expand Down
1 change: 0 additions & 1 deletion neps/optimizers/multi_fidelity_prior/async_priorband.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ def ask(

# previous optimization run exists and needs to be loaded
self._load_previous_observations(completed)
self.total_fevals = len(trials)

# account for pending evaluations
self._handle_pending_evaluations(pending)
Expand Down
36 changes: 0 additions & 36 deletions neps/optimizers/multi_fidelity_prior/priorband.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,39 +406,3 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
for _, sh in self.sh_brackets.items():
sh.sampling_args = self.sampling_args
return super().get_config_and_ids()


class PriorBandNoIncToPrior(PriorBand):
"""Disables incumbent sampling to replace with prior-based sampling.
This is equivalent to running HyperBand with Prior and Random sampling, where their
relationship is controlled by the `prior_weight_type` argument.
"""

def _set_sampling_weights_and_inc(self, rung: int) -> dict:
super()._set_sampling_weights_and_inc(rung)
# distributing the inc weight to the prior entirely
self.sampling_args["weights"]["prior"] += self.sampling_args["weights"]["inc"]
self.sampling_args["weights"]["inc"] = 0

return self.sampling_args


class PriorBandNoPriorToInc(PriorBand):
"""Disables prior based sampling to replace with incumbent-based sampling."""

def __init__(self, **kwargs: Any):
super().__init__(**kwargs)
# cannot use prior in this version
self.pipeline_space.has_prior = False

def _set_sampling_weights_and_inc(self, rung: int) -> dict:
super()._set_sampling_weights_and_inc(rung)
# distributing the prior weight to the incumbent entirely
if self.sampling_args["weights"]["inc"] > 0:
self.sampling_args["weights"]["inc"] += self.sampling_args["weights"]["prior"]
self.sampling_args["weights"]["prior"] = 0
else:
self.sampling_args["weights"]["random"] = 1
self.sampling_args["weights"]["prior"] = 0
return self.sampling_args
Loading

0 comments on commit bc9611a

Please sign in to comment.