refactor: Use vulture to remove dead code (#147)

automl · Oct 10, 2024 · bc9611a · bc9611a
1 parent 38b91d5
commit bc9611a
Show file tree

Hide file tree

Showing 47 changed files with 146 additions and 2,507 deletions.
diff --git a/neps/env.py b/neps/env.py
@@ -39,17 +39,6 @@ def is_nullable(e: str) -> bool:
     default=120,
 )
 
-JOBQUEUE_FILELOCK_POLL = get_env(
-    "NEPS_JOBQUEUE_FILELOCK_POLL",
-    parse=float,
-    default=0.05,
-)
-JOBQUEUE_FILELOCK_TIMEOUT = get_env(
-    "NEPS_JOBQUEUE_FILELOCK_TIMEOUT",
-    parse=lambda e: None if is_nullable(e) else float(e),
-    default=120,
-)
-
 SEED_SNAPSHOT_FILELOCK_POLL = get_env(
     "NEPS_SEED_SNAPSHOT_FILELOCK_POLL",
     parse=float,

diff --git a/neps/exceptions.py b/neps/exceptions.py
@@ -52,7 +52,3 @@ class WorkerRaiseError(NePSError):
 
     Includes additional information on how to recover
     """
-
-
-class SurrogateFailedToFitError(NePSError):
-    """Raised when a surrogate model fails to fit."""
diff --git a/neps/optimizers/base_optimizer.py b/neps/optimizers/base_optimizer.py
@@ -7,14 +7,68 @@
 from typing import TYPE_CHECKING, Any
 
 from neps.state.trial import Report, Trial
-from neps.utils.data_loading import _get_cost, _get_learning_curve, _get_loss
 
 if TYPE_CHECKING:
     from neps.search_spaces.search_space import SearchSpace
     from neps.state.optimizer import BudgetInfo
     from neps.utils.types import ERROR, ResultDict
 
 
+def _get_loss(
+    result: ERROR | ResultDict | float,
+    loss_value_on_error: float | None = None,
+    *,
+    ignore_errors: bool = False,
+) -> ERROR | float:
+    if result == "error":
+        if ignore_errors:
+            return "error"
+
+        if loss_value_on_error is not None:
+            return loss_value_on_error
+
+        raise ValueError(
+            "An error happened during the execution of your run_pipeline function."
+            " You have three options: 1. If the error is expected and corresponds to"
+            " a loss value in your application (e.g., 0% accuracy), you can set"
+            " loss_value_on_error to some float. 2. If sometimes your pipeline"
+            " crashes randomly, you can set ignore_errors=True. 3. Fix your error."
+        )
+
+    if isinstance(result, dict):
+        return float(result["loss"])
+
+    assert isinstance(result, float)
+    return float(result)
+
+
+def _get_cost(
+    result: ERROR | ResultDict | float,
+    cost_value_on_error: float | None = None,
+    *,
+    ignore_errors: bool = False,
+) -> float | Any:
+    if result == "error":
+        if ignore_errors:
+            return "error"
+
+        if cost_value_on_error is None:
+            raise ValueError(
+                "An error happened during the execution of your run_pipeline function."
+                " You have three options: 1. If the error is expected and corresponds to"
+                " a cost value in your application, you can set"
+                " cost_value_on_error to some float. 2. If sometimes your pipeline"
+                " crashes randomly, you can set ignore_errors=True. 3. Fix your error."
+            )
+
+        return cost_value_on_error
+
+    if isinstance(result, Mapping):
+        return float(result["cost"])
+
+    return float(result)
+
+
 @dataclass
 class SampledConfig:
     id: str
@@ -43,7 +97,6 @@ def __init__(
         if patience < 1:
             raise ValueError("Patience should be at least 1")
 
-        self.used_budget: float = 0.0
         self.budget = budget
         self.pipeline_space = pipeline_space
         self.patience = patience
@@ -104,23 +157,5 @@ def get_cost(self, result: ERROR | ResultDict | float | Report) -> float | ERROR
             ignore_errors=self.ignore_errors,
         )
 
-    def get_learning_curve(
-        self, result: str | dict | float | Report
-    ) -> list[float] | Any:
-        """Calls result.utils.get_loss() and passes the error handling through.
-        Please use self.get_loss() instead of get_loss() in all optimizer classes.
-        """
-        # TODO(eddiebergman): This is a forward change for whenever we can have optimizers
-        # use `Trial` and `Report`, they already take care of this and save having to do
-        # this `_get_loss` at every call
-        if isinstance(result, Report):
-            return result.learning_curve
-
-        return _get_learning_curve(
-            result,
-            learning_curve_on_error=self.learning_curve_on_error,
-            ignore_errors=self.ignore_errors,
-        )
-
     def whoami(self) -> str:
         return type(self).__name__
diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py
@@ -12,12 +12,7 @@
 # TODO: Chop this the hell out, it's pretty bad
 # We have much better and efficient ways to generate acquisition samples now
 class RandomSampler(AcquisitionSampler):
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        patience: int = 100,
-        budget: int | None = None,  # TODO: Remove
-    ):
+    def __init__(self, pipeline_space: SearchSpace, patience: int = 100):
         super().__init__(pipeline_space=pipeline_space, patience=patience)
 
     def sample(self, acquisition_function: Callable | None = None) -> SearchSpace:

diff --git a/neps/optimizers/multi_fidelity/hyperband.py b/neps/optimizers/multi_fidelity/hyperband.py
@@ -93,11 +93,6 @@ def __init__(
             self.full_rung_trace.extend([s] * len(self.sh_brackets[s].full_rung_trace))
         # book-keeping variables
         self.current_sh_bracket: int = 0
-        self.old_history_len = None
-
-    def _update_state_counter(self) -> None:
-        # TODO: get rid of this dependency
-        self._counter += 1
 
     def _update_sh_bracket_state(self) -> None:
         # `load_results()` for each of the SH bracket objects are not called as they are
@@ -155,7 +150,6 @@ def ask(
 
         # previous optimization run exists and needs to be loaded
         self._load_previous_observations(completed)
-        self.total_fevals = len(trials)
 
         # account for pending evaluations
         self._handle_pending_evaluations(pending)

diff --git a/neps/optimizers/multi_fidelity/ifbo.py b/neps/optimizers/multi_fidelity/ifbo.py
@@ -152,8 +152,6 @@ def __init__(
                 for cat_name, cat in space.categoricals.items()
             },
         )
-        self._border_sampler = Sampler.borders(len(params))
-        self._cached_border_configs: torch.Tensor | None = None
 
         # Domain of fidelity values, i.e. what is given in the configs that we
         # give to the user to evaluate at.

diff --git a/neps/optimizers/multi_fidelity/successive_halving.py b/neps/optimizers/multi_fidelity/successive_halving.py
@@ -148,10 +148,8 @@ def __init__(
         self.rung_members: dict = {}  # stores config IDs per rung
         self.rung_members_performance: dict = {}  # performances recorded per rung
         self.rung_promotions: dict = {}  # records a promotable config per rung
-        self.total_fevals = 0
 
         # setup SH state counter
-        self._counter = 0
         self.full_rung_trace = SuccessiveHalving._get_rung_trace(
             self.rung_map, self.config_map
         )
@@ -174,12 +172,6 @@ def _get_rung_trace(cls, rung_map: dict, config_map: dict) -> list[int]:
             rung_trace.extend([rung] * config_map[rung])
         return rung_trace
 
-    def get_incumbent_score(self) -> float:
-        y_star = np.inf  # minimizing optimizer
-        if len(self.observed_configs):
-            y_star = self.observed_configs.perf.values.min()
-        return y_star
-
     def _get_rung_map(self, s: int = 0) -> dict:
         """Maps rungs (0,1,...,k) to a fidelity value based on fidelity bounds, eta, s."""
         assert s <= self.stopping_rate_limit
@@ -352,7 +344,6 @@ def ask(
 
         # previous optimization run exists and needs to be loaded
         self._load_previous_observations(completed)
-        self.total_fevals = len(trials)
 
         # account for pending evaluations
         self._handle_pending_evaluations(pending)

diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py
@@ -1,34 +1,12 @@
-# type: ignore
 from __future__ import annotations
 
 from collections.abc import Sequence
 from copy import deepcopy
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
 import numpy as np
 import pandas as pd
 
-if TYPE_CHECKING:
-    from neps.search_spaces.search_space import SearchSpace
-
-
-def continuous_to_tabular(
-    config: SearchSpace, categorical_space: SearchSpace
-) -> SearchSpace:
-    """Convert the continuous parameters in the config into categorical ones based on
-    the categorical_space provided.
-    """
-    result = config.clone()
-    for hp_name, _ in config.items():
-        if hp_name in categorical_space:
-            choices = np.array(categorical_space[hp_name].choices)
-            diffs = choices - config[hp_name].value
-            # NOTE: in case of a tie the first value in the choices array will be returned
-            closest = choices[np.abs(diffs).argmin()]
-            result[hp_name].set_value(closest)
-
-    return result
-
 
 class MFObservedData:
     """(Under development).
@@ -77,11 +55,11 @@ def __init__(
         self.df = pd.DataFrame([], columns=columns, index=index)
 
     @property
-    def pending_condition(self):
+    def pending_condition(self) -> pd.Series:
         return self.df[self.perf_col].isna()
 
     @property
-    def error_condition(self):
+    def error_condition(self) -> pd.Series:
         return self.df[self.perf_col] == "error"
 
     @property
@@ -98,7 +76,7 @@ def pending_runs_index(self) -> pd.Index | pd.MultiIndex:
         return self.df.loc[self.pending_condition].index
 
     @property
-    def completed_runs(self):
+    def completed_runs(self) -> pd.DataFrame:
         return self.df[~(self.pending_condition | self.error_condition)]
 
     @property
@@ -116,7 +94,7 @@ def add_data(
         index: tuple[int, ...] | Sequence[tuple[int, ...]] | Sequence[int] | int,
         *,
         error: bool = False,
-    ):
+    ) -> None:
         """Add data only if none of the indices are already existing in the DataFrame."""
         # TODO: If index is only config_id extend it
         if not isinstance(index, list):
@@ -143,7 +121,7 @@ def update_data(
         index: tuple[int, ...] | Sequence[tuple[int, ...]] | Sequence[int] | int,
         *,
         error: bool = False,
-    ):
+    ) -> None:
         """Update data if all the indices already exist in the DataFrame."""
         index_list = [index] if not isinstance(index, list) else index
         if self.df.index.isin(index_list).sum() == len(index_list):
@@ -158,7 +136,7 @@ def update_data(
                 f"Given indices: {index_list}"
             )
 
-    def get_learning_curves(self):
+    def get_learning_curves(self) -> pd.DataFrame:
         return self.df.pivot_table(
             index=self.df.index.names[0],
             columns=self.df.index.names[1],
@@ -168,7 +146,7 @@ def get_learning_curves(self):
     def all_configs_list(self) -> list[Any]:
         return self.df.loc[:, self.config_col].sort_index().values.tolist()
 
-    def get_best_learning_curve_id(self, *, maximize: bool = False):
+    def get_best_learning_curve_id(self, *, maximize: bool = False) -> int:
         """Returns a single configuration id of the best observed performance.
 
         Note: this will always return the single best lowest ID
@@ -179,22 +157,22 @@ def get_best_learning_curve_id(self, *, maximize: bool = False):
             return learning_curves.max(axis=1).idxmax()
         return learning_curves.min(axis=1).idxmin()
 
-    def get_best_seen_performance(self, *, maximize: bool = False):
+    def get_best_seen_performance(self, *, maximize: bool = False) -> float:
         learning_curves = self.get_learning_curves()
         if maximize:
             return learning_curves.max(axis=1).max()
         return learning_curves.min(axis=1).min()
 
-    def add_budget_column(self):
+    def add_budget_column(self) -> pd.DataFrame:
         combined_df = self.df.reset_index(level=1)
         return combined_df.set_index(keys=[self.budget_idx], drop=False, append=True)
 
-    def reduce_to_max_seen_budgets(self):
+    def reduce_to_max_seen_budgets(self) -> pd.DataFrame:
         self.df = self.df.sort_index()
         combined_df = self.add_budget_column()
         return combined_df.groupby(level=0).last()
 
-    def get_partial_configs_at_max_seen(self):
+    def get_partial_configs_at_max_seen(self) -> pd.Series:
         return self.reduce_to_max_seen_budgets()[self.config_col]
 
     def extract_learning_curve(
@@ -242,10 +220,6 @@ def get_max_observed_fidelity_level_per_config(self) -> pd.Series:
         }
         return pd.Series(max_z_observed)
 
-    @property
-    def token_ids(self) -> np.ndarray:
-        return self.df.index.values
-
 
 if __name__ == "__main__":
     # TODO: Either delete these or convert them to tests (karibbov)

diff --git a/neps/optimizers/multi_fidelity_prior/async_priorband.py b/neps/optimizers/multi_fidelity_prior/async_priorband.py
@@ -275,7 +275,6 @@ def ask(
 
         # previous optimization run exists and needs to be loaded
         self._load_previous_observations(completed)
-        self.total_fevals = len(trials)
 
         # account for pending evaluations
         self._handle_pending_evaluations(pending)

diff --git a/neps/optimizers/multi_fidelity_prior/priorband.py b/neps/optimizers/multi_fidelity_prior/priorband.py
@@ -406,39 +406,3 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         for _, sh in self.sh_brackets.items():
             sh.sampling_args = self.sampling_args
         return super().get_config_and_ids()
-
-
-class PriorBandNoIncToPrior(PriorBand):
-    """Disables incumbent sampling to replace with prior-based sampling.
-
-    This is equivalent to running HyperBand with Prior and Random sampling, where their
-    relationship is controlled by the `prior_weight_type` argument.
-    """
-
-    def _set_sampling_weights_and_inc(self, rung: int) -> dict:
-        super()._set_sampling_weights_and_inc(rung)
-        # distributing the inc weight to the prior entirely
-        self.sampling_args["weights"]["prior"] += self.sampling_args["weights"]["inc"]
-        self.sampling_args["weights"]["inc"] = 0
-
-        return self.sampling_args
-
-
-class PriorBandNoPriorToInc(PriorBand):
-    """Disables prior based sampling to replace with incumbent-based sampling."""
-
-    def __init__(self, **kwargs: Any):
-        super().__init__(**kwargs)
-        # cannot use prior in this version
-        self.pipeline_space.has_prior = False
-
-    def _set_sampling_weights_and_inc(self, rung: int) -> dict:
-        super()._set_sampling_weights_and_inc(rung)
-        # distributing the prior weight to the incumbent entirely
-        if self.sampling_args["weights"]["inc"] > 0:
-            self.sampling_args["weights"]["inc"] += self.sampling_args["weights"]["prior"]
-            self.sampling_args["weights"]["prior"] = 0
-        else:
-            self.sampling_args["weights"]["random"] = 1
-        self.sampling_args["weights"]["prior"] = 0
-        return self.sampling_args