ListSurrogate

Summary: `Surrogate` for `ModelListGP`: constructs a `ModelListGP` with specified sub-model classes under the hood Usage example: ``` m = BoTorchModel( surrogate=ListSurrogate( # Dictionary of which model class should be used to model which outcome. botorch_model_class_per_outcome={"m": SingleTaskGP, "n": SingleTaskGP} ) ) ``` D23606005 then introduces default usage of this for cases where there are multiple X-s in Xs and they are not all the same (so we are not using batched multi-output). Reviewed By: Balandat Differential Revision: D23382535 fbshipit-source-id: ff524c658e09a8837f71646f309289cde4da8c33
sparks-baird · Sep 29, 2020 · fefc406 · fefc406
1 parent e3fd5a6
commit fefc406
Show file tree

Hide file tree

Showing 10 changed files with 519 additions and 94 deletions.
diff --git a/ax/models/tests/test_botorch_model.py b/ax/models/tests/test_botorch_model.py
@@ -5,16 +5,18 @@
 # LICENSE file in the root directory of this source tree.
 
 from itertools import chain
-from typing import Dict
 from unittest import mock
 
 import torch
 from ax.models.torch.botorch import BotorchModel, get_rounding_func
 from ax.models.torch.botorch_defaults import (
     get_and_fit_model,
+    get_chebyshev_scalarization,
     recommend_best_out_of_sample_point,
 )
+from ax.models.torch.utils import sample_simplex
 from ax.utils.common.testutils import TestCase
+from ax.utils.testing.torch_stubs import get_torch_test_data
 from botorch.acquisition.utils import get_infeasible_cost
 from botorch.models import FixedNoiseGP, ModelListGP
 from botorch.utils import get_objective_weights_transform
@@ -23,44 +25,24 @@
 from gpytorch.priors.lkj_prior import LKJCovariancePrior
 
 
-FIT_MODEL_MO_PATH = "ax.models.torch.botorch_defaults.fit_gpytorch_model"
-SAMPLE_SIMPLEX_UTIL_PATH = "ax.models.torch.utils.sample_simplex"
-SAMPLE_HYPERSPHERE_UTIL_PATH = "ax.models.torch.utils.sample_hypersphere"
+FIT_MODEL_MO_PATH = f"{get_and_fit_model.__module__}.fit_gpytorch_model"
+SAMPLE_SIMPLEX_UTIL_PATH = f"{sample_simplex.__module__}.sample_simplex"
+SAMPLE_HYPERSPHERE_UTIL_PATH = f"{sample_simplex.__module__}.sample_hypersphere"
 CHEBYSHEV_SCALARIZATION_PATH = (
-    "ax.models.torch.botorch_defaults.get_chebyshev_scalarization"
+    f"{get_chebyshev_scalarization.__module__}.get_chebyshev_scalarization"
 )
 
 
 def dummy_func(X: torch.Tensor) -> torch.Tensor:
     return X
 
 
-def _get_optimizer_kwargs() -> Dict[str, int]:
-    return {"num_restarts": 2, "raw_samples": 2, "maxiter": 2, "batch_limit": 1}
-
-
-def _get_torch_test_data(
-    dtype=torch.float, cuda=False, constant_noise=True, task_features=None
-):
-    device = torch.device("cuda") if cuda else torch.device("cpu")
-    Xs = [torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], dtype=dtype, device=device)]
-    Ys = [torch.tensor([[3.0], [4.0]], dtype=dtype, device=device)]
-    Yvars = [torch.tensor([[0.0], [2.0]], dtype=dtype, device=device)]
-    if constant_noise:
-        Yvars[0].fill_(1.0)
-    bounds = [(0.0, 1.0), (1.0, 4.0), (2.0, 5.0)]
-    feature_names = ["x1", "x2", "x3"]
-    task_features = [] if task_features is None else task_features
-    metric_names = ["y", "r"]
-    return Xs, Ys, Yvars, bounds, task_features, feature_names, metric_names
-
-
 class BotorchModelTest(TestCase):
     def test_fixed_rank_BotorchModel(self, dtype=torch.float, cuda=False):
-        Xs1, Ys1, Yvars1, bounds, _, fns, __package__ = _get_torch_test_data(
+        Xs1, Ys1, Yvars1, bounds, _, fns, __package__ = get_torch_test_data(
             dtype=dtype, cuda=cuda, constant_noise=True
         )
-        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+        Xs2, Ys2, Yvars2, _, _, _, _ = get_torch_test_data(
             dtype=dtype, cuda=cuda, constant_noise=True
         )
         model = BotorchModel(multitask_gp_ranks={"y": 2, "w": 1})
@@ -84,10 +66,10 @@ def test_fixed_rank_BotorchModel(self, dtype=torch.float, cuda=False):
         self.assertEqual(model_list[1]._rank, 1)
 
     def test_fixed_prior_BotorchModel(self, dtype=torch.float, cuda=False):
-        Xs1, Ys1, Yvars1, bounds, _, fns, __package__ = _get_torch_test_data(
+        Xs1, Ys1, Yvars1, bounds, _, fns, __package__ = get_torch_test_data(
             dtype=dtype, cuda=cuda, constant_noise=True
         )
-        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+        Xs2, Ys2, Yvars2, _, _, _, _ = get_torch_test_data(
             dtype=dtype, cuda=cuda, constant_noise=True
         )
         kwargs = {
@@ -131,10 +113,10 @@ def test_fixed_prior_BotorchModel(self, dtype=torch.float, cuda=False):
             )
 
     def test_BotorchModel(self, dtype=torch.float, cuda=False):
-        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
+        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = get_torch_test_data(
             dtype=dtype, cuda=cuda, constant_noise=True
         )
-        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+        Xs2, Ys2, Yvars2, _, _, _, _ = get_torch_test_data(
             dtype=dtype, cuda=cuda, constant_noise=True
         )
         model = BotorchModel()
@@ -457,7 +439,7 @@ def test_BotorchModel_double_cuda(self):
             self.test_BotorchModel(dtype=torch.double, cuda=True)
 
     def test_BotorchModelOneOutcome(self):
-        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
+        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = get_torch_test_data(
             dtype=torch.float, cuda=False, constant_noise=True
         )
         model = BotorchModel()
@@ -479,10 +461,10 @@ def test_BotorchModelOneOutcome(self):
         self.assertTrue(f_cov.shape == torch.Size([2, 1, 1]))
 
     def test_BotorchModelConstraints(self):
-        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
+        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = get_torch_test_data(
             dtype=torch.float, cuda=False, constant_noise=True
         )
-        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+        Xs2, Ys2, Yvars2, _, _, _, _ = get_torch_test_data(
             dtype=torch.float, cuda=False, constant_noise=True
         )
         # make infeasible

diff --git a/ax/models/torch/botorch_modular/acquisition.py b/ax/models/torch/botorch_modular/acquisition.py
@@ -6,9 +6,10 @@
 
 from __future__ import annotations
 
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 
 from ax.core.types import TConfig
+from ax.models.torch.botorch_modular.list_surrogate import ListSurrogate
 from ax.models.torch.botorch_modular.surrogate import Surrogate
 from ax.models.torch.utils import (
     _get_X_pending_and_observed,
@@ -18,9 +19,11 @@
 from ax.utils.common.constants import Keys
 from ax.utils.common.docutils import copy_doc
 from ax.utils.common.equality import Base
-from ax.utils.common.typeutils import not_none
+from ax.utils.common.typeutils import checked_cast, not_none
 from botorch.acquisition.acquisition import AcquisitionFunction
 from botorch.acquisition.analytic import AnalyticAcquisitionFunction
+from botorch.acquisition.objective import AcquisitionObjective
+from botorch.models.model import Model
 from botorch.optim.optimize import optimize_acqf
 from botorch.utils.containers import TrainingData
 from torch import Tensor
@@ -75,6 +78,9 @@ class Acquisition(Base):
     # class by default. `None` for the base `Acquisition` class, but can be
     # specified in subclasses.
     default_botorch_acqf_class: Optional[Type[AcquisitionFunction]] = None
+    # BoTorch `AcquisitionFunction` class associated with this `Acquisition`
+    # instance. Determined during `__init__`, do not set manually.
+    _botorch_acqf_class: Type[AcquisitionFunction]
 
     def __init__(
         self,
@@ -95,13 +101,15 @@ def __init__(
                 "BoTorch `AcquisitionFunction`, so `botorch_acqf_class` "
                 "argument must be specified."
             )
-        botorch_acqf_class = not_none(
+        self._botorch_acqf_class = not_none(
             botorch_acqf_class or self.default_botorch_acqf_class
         )
         self.surrogate = surrogate
         self.options = options or {}
+        trd = self._extract_training_data(surrogate=surrogate)
+        Xs = [trd.X] if isinstance(trd, TrainingData) else [i.X for i in trd.values()]
         X_pending, X_observed = _get_X_pending_and_observed(
-            Xs=[self.surrogate.training_data.X],
+            Xs=Xs,
             pending_observations=pending_observations,
             objective_weights=objective_weights,
             outcome_constraints=outcome_constraints,
@@ -120,17 +128,12 @@ def __init__(
         else:
             model = self.surrogate.model
 
-        objective = get_botorch_objective(
+        objective = self._get_botorch_objective(
             model=model,
             objective_weights=objective_weights,
             outcome_constraints=outcome_constraints,
             X_observed=X_observed,
-            use_scalarized_objective=issubclass(
-                botorch_acqf_class, AnalyticAcquisitionFunction
-            ),
         )
-        # NOTE: Computing model dependencies might be handled entirely on
-        # BoTorch side.
         model_deps = self.compute_model_dependencies(
             surrogate=surrogate,
             bounds=bounds,
@@ -142,19 +145,16 @@ def __init__(
             target_fidelities=target_fidelities,
             options=self.options,
         )
-        data_deps = self.compute_data_dependencies(
-            training_data=self.surrogate.training_data
-        )
         # pyre-ignore[28]: Some kwargs are not expected in base `Model`
         # but are expected in its subclasses.
-        self.acqf = botorch_acqf_class(
+        self.acqf = self._botorch_acqf_class(
             model=model,
             objective=objective,
             X_pending=X_pending,
             X_baseline=X_observed,
             **self.options,
             **model_deps,
-            **data_deps,
+            **self.compute_data_dependencies(training_data=trd),
         )
 
     def optimize(
@@ -171,7 +171,7 @@ def optimize(
         candidates and their associated acquisition function values.
         """
         optimizer_options = optimizer_options or {}
-        # TODO: make use of `optimizer_class` when its added to BoTorch.
+        # NOTE: Could make use of `optimizer_class` when it's added to BoTorch.
         return optimize_acqf(
             self.acqf,
             bounds=bounds,
@@ -236,17 +236,85 @@ def compute_model_dependencies(
         """Computes inputs to acquisition function class based on the given
         surrogate model.
 
-        NOTE: May not be needed if model dependencies are handled entirely on
-        the BoTorch side.
+        NOTE: When subclassing `Acquisition` from a superclass where this
+        method returns a non-empty dictionary of kwargs to `AcquisitionFunction`,
+        call `super().compute_model_dependencies` and then update that
+        dictionary of options with the options for the subclass you are creating
+        (unless the superclass' model dependencies should not be propagated to
+        the subclass). See `MultiFidelityKnowledgeGradient.compute_model_dependencies`
+        for an example.
+
+        Args:
+            surrogate: The surrogate object containing the BoTorch `Model`,
+                with which this `Acquisition` is to be used.
+            bounds: A list of (lower, upper) tuples for each column of X in
+                the training data of the surrogate model.
+            objective_weights: The objective is to maximize a weighted sum of
+                the columns of f(x). These are the weights.
+            pending_observations: A list of tensors, each of which contains
+                points whose evaluation is pending (i.e. that have been
+                submitted for evaluation) for a given outcome. A list
+                of m (k_i x d) feature tensors X for m outcomes and k_i,
+                pending observations for outcome i.
+            outcome_constraints: A tuple of (A, b). For k outcome constraints
+                and m outputs at f(x), A is (k x m) and b is (k x 1) such that
+                A f(x) <= b. (Not used by single task models)
+            linear_constraints: A tuple of (A, b). For k linear constraints on
+                d-dimensional x, A is (k x d) and b is (k x 1) such that
+                A x <= b. (Not used by single task models)
+            fixed_features: A map {feature_index: value} for features that
+                should be fixed to a particular value during generation.
+            target_fidelities: Optional mapping from parameter name to its
+                target fidelity, applicable to fidelity parameters only.
+            options: The `options` kwarg dict, passed on initialization of
+                the `Acquisition` object.
+
+        Returns: A dictionary of surrogate model-dependent options, to be passed
+            as kwargs to BoTorch`AcquisitionFunction` constructor.
         """
         return {}
 
     @classmethod
-    def compute_data_dependencies(cls, training_data: TrainingData) -> Dict[str, Any]:
+    def compute_data_dependencies(
+        cls, training_data: Union[TrainingData, Dict[str, TrainingData]]
+    ) -> Dict[str, Any]:
         """Computes inputs to acquisition function class based on the given
         data in model's training data.
 
         NOTE: May not be needed if model dependencies are handled entirely on
         the BoTorch side.
+
+        Args:
+            training_data: Either a `TrainingData` for 1 outcome, or a mapping of
+                outcome name to respective `TrainingData` (if `ListSurrogate` is used).
+
+        Returns: A dictionary of training data-dependent options, to be passed
+            as kwargs to BoTorch`AcquisitionFunction` constructor.
         """
         return {}
+
+    def _get_botorch_objective(
+        self,
+        model: Model,
+        objective_weights: Tensor,
+        outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
+        X_observed: Optional[Tensor] = None,
+    ) -> AcquisitionObjective:
+        return get_botorch_objective(
+            model=model,
+            objective_weights=objective_weights,
+            use_scalarized_objective=issubclass(
+                self._botorch_acqf_class, AnalyticAcquisitionFunction
+            ),
+            outcome_constraints=outcome_constraints,
+            X_observed=X_observed,
+        )
+
+    @classmethod
+    def _extract_training_data(
+        cls, surrogate: Surrogate
+    ) -> Union[TrainingData, Dict[str, TrainingData]]:
+        if isinstance(surrogate, ListSurrogate):
+            return checked_cast(dict, surrogate.training_data_per_outcome)
+        else:
+            return checked_cast(TrainingData, surrogate.training_data)