From a1a17d86a91921620d22796de3d029783d6b42b3 Mon Sep 17 00:00:00 2001 From: Rhys Goodall Date: Thu, 5 Dec 2024 17:23:08 -0500 Subject: [PATCH 1/5] wip: ax vizier transforms --- .../transforms/power_transform_y.py | 29 +- ax/modelbridge/transforms/sklearn_y.py | 723 ++++++++++++++++++ .../tests/test_power_y_transform.py | 20 +- .../tests/test_sklearn_y_transform.py | 219 ++++++ 4 files changed, 970 insertions(+), 21 deletions(-) create mode 100644 ax/modelbridge/transforms/sklearn_y.py create mode 100644 ax/modelbridge/transforms/tests/test_sklearn_y_transform.py diff --git a/ax/modelbridge/transforms/power_transform_y.py b/ax/modelbridge/transforms/power_transform_y.py index a8e79496b3f..e3425efc896 100644 --- a/ax/modelbridge/transforms/power_transform_y.py +++ b/ax/modelbridge/transforms/power_transform_y.py @@ -19,6 +19,7 @@ from ax.core.search_space import SearchSpace from ax.exceptions.core import DataRequiredError from ax.modelbridge.transforms.base import Transform +from ax.modelbridge.transforms.sklearn_y import _compute_sklearn_transforms from ax.modelbridge.transforms.utils import get_data, match_ci_width_truncated from ax.models.types import TConfig from ax.utils.common.logger import get_logger @@ -83,7 +84,11 @@ def __init__( Ys = get_data(observation_data=observation_data, metric_names=metric_names) self.metric_names: list[str] = list(Ys.keys()) # pyre-fixme[4]: Attribute must be annotated. - self.power_transforms = _compute_power_transforms(Ys=Ys) + self.power_transforms = _compute_sklearn_transforms( + Ys=Ys, + transformer=PowerTransformer, + transformer_kwargs={"method": "yeo-johnson"}, + ) # pyre-fixme[4]: Attribute must be annotated. self.inv_bounds = _compute_inverse_bounds(self.power_transforms, tol=1e-10) @@ -113,9 +118,11 @@ def _untransform_observation_data( for obsd in observation_data: for i, m in enumerate(obsd.metric_names): if m in self.metric_names: - l, u = self.inv_bounds[m] + lower_bound, upper_bound = self.inv_bounds[m] transform = self.power_transforms[m].inverse_transform - if not self.clip_mean and (obsd.means[i] < l or obsd.means[i] > u): + if not self.clip_mean and ( + obsd.means[i] < lower_bound or obsd.means[i] > upper_bound + ): raise ValueError( "Can't untransform mean outside the bounds without clipping" ) @@ -123,8 +130,8 @@ def _untransform_observation_data( mean=obsd.means[i], variance=obsd.covariance[i, i], transform=lambda y: transform(np.array(y, ndmin=2)), - lower_bound=l, - upper_bound=u, + lower_bound=lower_bound, + upper_bound=upper_bound, clip_mean=True, ) return observation_data @@ -172,18 +179,6 @@ def untransform_outcome_constraints( return outcome_constraints -def _compute_power_transforms( - Ys: dict[str, list[float]], -) -> dict[str, PowerTransformer]: - """Compute power transforms.""" - power_transforms = {} - for k, ys in Ys.items(): - y = np.array(ys)[:, None] # Need to unsqueeze the last dimension - pt = PowerTransformer(method="yeo-johnson").fit(y) - power_transforms[k] = pt - return power_transforms - - def _compute_inverse_bounds( power_transforms: dict[str, PowerTransformer], tol: float = 1e-10 ) -> dict[str, tuple[float, float]]: diff --git a/ax/modelbridge/transforms/sklearn_y.py b/ax/modelbridge/transforms/sklearn_y.py new file mode 100644 index 00000000000..520436f6db0 --- /dev/null +++ b/ax/modelbridge/transforms/sklearn_y.py @@ -0,0 +1,723 @@ +from __future__ import annotations + +from collections import defaultdict +from logging import Logger + +from typing import Any, Callable, TYPE_CHECKING + +import numpy as np + +from ax.core.observation import Observation, ObservationData, ObservationFeatures +from ax.core.optimization_config import OptimizationConfig +from ax.core.outcome_constraint import OutcomeConstraint, ScalarizedOutcomeConstraint +from ax.core.search_space import SearchSpace +from ax.exceptions.core import DataRequiredError +from ax.modelbridge.transforms.base import Transform +from ax.modelbridge.transforms.utils import get_data, match_ci_width_truncated +from ax.models.types import TConfig +from ax.utils.common.logger import get_logger +from ax.utils.common.typeutils import checked_cast_list +from pyre_extensions import assert_is_instance + +from scipy import stats + +from sklearn.base import ( + _fit_context, + BaseEstimator, + OneToOneFeatureMixin, + TransformerMixin, +) +from sklearn.preprocessing import PowerTransformer +from sklearn.utils.validation import check_is_fitted, FLOAT_DTYPES + + +if TYPE_CHECKING: + # import as module to make sphinx-autodoc-typehints happy + from ax import modelbridge as modelbridge_module # noqa F401 + + +logger: Logger = get_logger(__name__) + + +class LogWarpingTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator): + _parameter_constraints: dict = {"offset": [float], "copy": ["boolean"]} + + def __init__(self, *, offset=1.5, copy=True): + if offset <= 1: + raise ValueError("offset must be greater than 1") + self.offset = offset + self.copy = copy + + def _reset(self): + """Reset internal data-dependent state of the scaler, if necessary. + + __init__ parameters are not touched. + """ + # Checking one attribute is enough, because they are all set together + # in partial_fit + if hasattr(self, "labels_min_"): + del self.labels_min_ + del self.labels_max_ + + def fit(self, X, y=None, sample_weight=None): + # Reset internal state before fitting + self._reset() + return self.partial_fit(X, y, sample_weight) + + @_fit_context(prefer_skip_nested_validation=True) + def partial_fit(self, X, y=None, sample_weight=None): + X = self._check_input(X, in_fit=True, check_shape=False) + self.labels_min_ = np.nanmin(X, axis=0, keepdims=True) + self.labels_max_ = np.nanmax(X, axis=0, keepdims=True) + + return self + + def transform(self, X, copy=None): + check_is_fitted(self) + X = self._check_input(X, in_fit=False, check_shape=True) + + X[:, :] = (self.labels_max_ - X) / (self.labels_max_ - self.labels_min_) + X[:, :] = np.where( + np.isfinite(X), + 0.5 - (np.log1p(X * (self.offset - 1)) / np.log(self.offset)), + X, + ) + + return X + + def inverse_transform(self, X, copy=None): + check_is_fitted(self) + X = self._check_input(X, in_fit=False, check_shape=True) + + X[:, :] = self.labels_max_ - (np.exp(np.log(self.offset) * (0.5 - X)) - 1) * ( + self.labels_max_ - self.labels_min_ + ) / (self.offset - 1) + + return X + + def _check_input(self, X, in_fit, check_shape=False): + X = self._validate_data( + X, + ensure_2d=True, + dtype=FLOAT_DTYPES, + force_writeable=True, + copy=self.copy, + force_all_finite="allow-nan", + reset=in_fit, + ) + + if check_shape and not X.shape[1] == self.labels_min_.shape[1]: + n = self.labels_min_.shape[1] + m = X.shape[1] + raise ValueError( + "Input data has a different number of features " + f"than fitting data. Should have {n}, data has {m}" + ) + + return X + + def _more_tags(self): + return {"allow_nan": True} + + +class InfeasibleTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator): + _parameter_constraints: dict = {"offset": [float], "copy": ["boolean"]} + + def __init__(self, *, copy=True): + self.copy = copy + + def _reset(self): + """Reset internal data-dependent state of the scaler, if necessary. + + __init__ parameters are not touched. + """ + # Checking one attribute is enough, because they are all set together + # in partial_fit + if hasattr(self, "warped_bad_value_"): + del self.warped_bad_value_ + del self.shift_ + + def fit(self, X, y=None, sample_weight=None): + # Reset internal state before fitting + self._reset() + return self.partial_fit(X, y, sample_weight) + + @_fit_context(prefer_skip_nested_validation=True) + def partial_fit(self, X, y=None, sample_weight=None): + X = self._check_input(X, in_fit=True, check_shape=False) + + if np.isnan(X).all(axis=0).any(): + raise ValueError( + "Cannot fit InfeasibleTransformer on all-NaN feature columns." + ) + + labels_range = np.nanmax(X, axis=0, keepdims=True) - np.nanmin( + X, axis=0, keepdims=True + ) + warped_bad_value = np.nanmin(X, axis=0, keepdims=True) - ( + 0.5 * labels_range + 1 + ) + num_feasible = X.shape[0] - np.isnan(X).sum(axis=0) + + # Estimate the relative frequency of feasible points + p_feasible = (0.5 + num_feasible) / (1 + X.shape[0]) + + self.warped_bad_value_ = warped_bad_value + self.shift_ = -np.nanmean(X, axis=0) * p_feasible - warped_bad_value * ( + 1 - p_feasible + ) + + return self + + def transform(self, X, copy=None): + check_is_fitted(self) + X = self._check_input(X, in_fit=False, check_shape=True) + + X[:, :] = np.where( + np.isnan(X), + self.warped_bad_value_, + X + self.shift_, + ) + + return X + + def inverse_transform(self, X, copy=None): + check_is_fitted(self) + X = self._check_input(X, in_fit=False, check_shape=True) + + X[:, :] = X - self.shift_ + return X + + def _check_input(self, X, in_fit, check_shape=False): + X = self._validate_data( + X, + ensure_2d=True, + dtype=FLOAT_DTYPES, + force_writeable=True, + copy=self.copy, + force_all_finite="allow-nan", + reset=in_fit, + ) + + if check_shape and not X.shape[1] == self.warped_bad_value_.shape[1]: + n = self.warped_bad_value_.shape[1] + m = X.shape[1] + raise ValueError( + "Input data has a different number of features " + f"than fitting data. Should have {n}, data has {m}" + ) + + return X + + def _more_tags(self): + return {"allow_nan": True} + + +class HalfRankTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator): + _parameter_constraints: dict = {"copy": ["boolean"]} + + def __init__(self, *, copy=True): + self.copy = copy + + @_fit_context(prefer_skip_nested_validation=True) + def fit(self, X, y=None): + self._fit(X, y=y, force_transform=False) + return self + + @_fit_context(prefer_skip_nested_validation=True) + def fit_transform(self, X, y=None): + return self._fit(X, y, force_transform=True) + + def _get_std_above_median(self, unique_labels, median): + good_half = unique_labels[unique_labels >= median] + std = np.sqrt(((good_half - median) ** 2).mean()) + + if std == 0: + std = np.sqrt(((unique_labels - median) ** 2).mean()) + + if np.isnan(std): + std = np.abs(unique_labels - median).mean() + + return std + + def _fit(self, X, y=None, force_transform=False): + X = self._check_input(X, in_fit=True) + + if not self.copy and not force_transform: # if call from fit() + X = X.copy() # force copy so that fit does not change X inplace + + self.original_label_median_ = np.empty(X.shape[1]) + self.warped_labels_ = {} + self.unique_labels_ = {} + + for i, col in enumerate(X.T): + median = np.nanmedian(col) + + # Get finite values and their ranks for each batch + is_finite_mask = np.isfinite(col) + unique_labels, unique_indices = np.unique( + col[is_finite_mask], return_index=True + ) + + # Calculate rank quantiles + ranks = stats.rankdata(col, method="dense") + dedup_median_index = np.searchsorted(unique_labels, median) + denominator = 2 * dedup_median_index + ( + unique_labels[dedup_median_index] == median + ) + rank_quantile = (ranks - 0.5) / denominator + + above_median_std = self._get_std_above_median(unique_labels, median) + + # Apply transformation + rank_ppf = stats.norm.ppf(rank_quantile) * above_median_std * np.sqrt(2.0) + X[:, i] = np.where( + col < median, + rank_ppf + median, + col, + ) + + # save intermediate values for untransform + self.original_label_median_[i] = median + self.unique_labels_[i] = unique_labels + self.warped_labels_[i] = X[is_finite_mask, i][unique_indices] + + return X + + @staticmethod + def _extrapolate(col, unique_labels, warped_labels): + return warped_labels[0] - ( + (unique_labels[0] - col) / (unique_labels[-1] - unique_labels[0]) + ) * (warped_labels[-1] - warped_labels[0]) + + @staticmethod + def _extrapolate_inverse(col, unique_labels, warped_labels): + return unique_labels[0] + ( + (col - warped_labels[0]) / (warped_labels[-1] - warped_labels[0]) + ) * (unique_labels[-1] - unique_labels[0]) + + @staticmethod + def _expand_values_and_mask(below_median_indices, col, mask, values): + full_mask = np.full_like(col, False, dtype=bool) + extrapolate_indices = below_median_indices[mask] + full_mask[extrapolate_indices] = True + + full_values = np.zeros_like(col) + full_values[full_mask] = values + + return full_mask, full_values + + def transform(self, X): + check_is_fitted(self) + X = self._check_input(X, in_fit=False, check_shape=True) + + for i, col in enumerate(X.T): + median = self.original_label_median_[i] + warped_labels: np.ndarray = self.warped_labels_[i] + unique_labels: np.ndarray = self.unique_labels_[i] + + # Process values below median + below_median = col < median + if below_median.any(): + below_median_indices = np.where(below_median)[0] + + # 1) if the value is below the original minimum, we need to + # extrapolate outside the range + extrapolate_mask = col[below_median] < unique_labels.min() + extrapolated_values = self._extrapolate( + col[below_median][extrapolate_mask], unique_labels, warped_labels + ) + assert (extrapolated_values < warped_labels.min()).all() + + full_extrapolate_mask, full_extrapolate_values = ( + self._expand_values_and_mask( + below_median_indices, col, extrapolate_mask, extrapolated_values + ) + ) + + X[:, i] = np.where( + full_extrapolate_mask, + full_extrapolate_values, + col, + ) + + # 2) otherwise, find nearest original values and try to perform lookup + original_idx = np.searchsorted(unique_labels, col[below_median]) + + # Create indices for neighboring values + left_idx = np.clip(original_idx - 1, a_min=0, a_max=None) + right_idx = np.clip( + original_idx + 1, a_min=None, a_max=len(unique_labels) + ) + + # Gather neighboring values + candidates = np.stack( + [ + unique_labels[left_idx], + unique_labels[original_idx], + unique_labels[right_idx], + ], + axis=-1, + ) + + # Find nearest original values and perform lookup + best_idx = np.argmin( + np.abs(candidates - col[below_median][:, None]), axis=-1 + ) + lookup_mask = ( + np.isclose( + candidates[np.arange(len(best_idx)), best_idx], + col[below_median], + ) + & ~extrapolate_mask + ) + + full_lookup_mask, full_lookup_values = self._expand_values_and_mask( + below_median_indices, + col, + lookup_mask, + warped_labels[original_idx[lookup_mask]], + ) + + X[:, i] = np.where( + full_lookup_mask, + full_lookup_values, + col, + ) + + # 3) otherwise linearly interpolate between the nearest original values + interpolate_mask = ~(extrapolate_mask | lookup_mask) + interpolate_labels = col[below_median][interpolate_mask] + interpolate_idx = original_idx[interpolate_mask] + + lower_idx = interpolate_idx - 1 + upper_idx = interpolate_idx + + original_gap = unique_labels[upper_idx] - unique_labels[lower_idx] + warped_gap = warped_labels[upper_idx] - warped_labels[lower_idx] + + interpolated_values = np.where( + original_gap > 0, + warped_labels[lower_idx] + + (interpolate_labels - unique_labels[lower_idx]) + / original_gap + * warped_gap, + warped_labels[lower_idx], + ) + + full_interpolated_mask, full_interpolated_values = ( + self._expand_values_and_mask( + below_median_indices, col, interpolate_mask, interpolated_values + ) + ) + + X[:, i] = np.where( + full_interpolated_mask, + full_interpolated_values, + col, + ) + + return X + + def inverse_transform(self, X): + check_is_fitted(self) + X = self._check_input(X, in_fit=False, check_shape=True) + + for i, col in enumerate(X.T): + median = self.original_label_median_[i] + warped_labels: np.ndarray = self.warped_labels_[i] + unique_labels: np.ndarray = self.unique_labels_[i] + + # Process values below median + below_median = col < median + if below_median.any(): + below_median_indices = np.where(below_median)[0] + + # 1) if the value is below the original minimum, we need to + # extrapolate outside the range + extrapolate_mask = col[below_median] < warped_labels.min() + extrapolated_values = self._extrapolate_inverse( + col[below_median][extrapolate_mask], unique_labels, warped_labels + ) + assert (extrapolated_values < unique_labels.min()).all() + + full_extrapolate_mask, full_extrapolate_values = ( + self._expand_values_and_mask( + below_median_indices, col, extrapolate_mask, extrapolated_values + ) + ) + + X[:, i] = np.where(full_extrapolate_mask, full_extrapolate_values, col) + + # 2) otherwise, find nearest original values and try to perform lookup + warped_idx = np.searchsorted(warped_labels, col[below_median]) + + # Create indices for neighboring values + left_idx = np.clip(warped_idx - 1, a_min=0, a_max=None) + right_idx = np.clip( + warped_idx + 1, a_min=None, a_max=len(warped_labels) + ) + + # Gather neighboring values + candidates = np.stack( + [ + warped_labels[left_idx], + warped_labels[warped_idx], + warped_labels[right_idx], + ], + axis=-1, + ) + + # Find nearest original values and perform lookup + best_idx = np.argmin( + np.abs(candidates - col[below_median][:, None]), axis=-1 + ) + + lookup_mask = ( + np.isclose( + candidates[np.arange(len(best_idx)), best_idx], + col[below_median], + ) + & ~extrapolate_mask + ) + + full_lookup_mask, full_lookup_values = self._expand_values_and_mask( + below_median_indices, + col, + lookup_mask, + unique_labels[warped_idx[lookup_mask]], + ) + + X[:, i] = np.where( + full_lookup_mask, + full_lookup_values, + col, + ) + + # 3) otherwise linearly interpolate between the nearest original values + interpolate_mask = ~(extrapolate_mask | lookup_mask) + interpolate_labels = col[below_median][interpolate_mask] + interpolate_idx = warped_idx[interpolate_mask] + lower_idx = interpolate_idx - 1 + upper_idx = interpolate_idx + + original_gap = unique_labels[upper_idx] - unique_labels[lower_idx] + warped_gap = warped_labels[upper_idx] - warped_labels[lower_idx] + + interpolated_values = np.where( + warped_gap > 0, + unique_labels[lower_idx] + + (interpolate_labels - warped_labels[lower_idx]) + / warped_gap + * original_gap, + unique_labels[lower_idx], + ) + + full_interpolated_mask, full_interpolated_values = ( + self._expand_values_and_mask( + below_median_indices, col, interpolate_mask, interpolated_values + ) + ) + + X[:, i] = np.where( + full_interpolated_mask, + full_interpolated_values, + col, + ) + + return X + + def _check_input(self, X, in_fit, check_shape=False): + X = self._validate_data( + X, + ensure_2d=True, + dtype=FLOAT_DTYPES, + force_writeable=True, + copy=self.copy, + force_all_finite="allow-nan", + reset=in_fit, + ) + + if check_shape and not X.shape[1] == self.original_label_median_.shape[0]: + n = self.original_label_median_.shape[0] + m = X.shape[1] + raise ValueError( + f"Input data has a different number of features than fitting data. " + f"Should have {n}, data has {m}." + ) + + return X + + def _more_tags(self): + return {"allow_nan": True} + + +def _compute_sklearn_transforms( + Ys: dict[str, list[float]], + transformer: Callable[[dict[str, Any]], TransformerMixin], + transformer_kwargs: dict[str, Any], +) -> dict[str, TransformerMixin]: + """Compute power transforms.""" + transforms = {} + for k, ys in Ys.items(): + y = np.array(ys)[:, None] # Need to unsqueeze the last dimension + hrt = transformer(**transformer_kwargs).fit(y) + transforms[k] = hrt + return transforms + + +def _compute_sklearn_inverse_bounds( + transforms: dict[str, TransformerMixin], + tol: float, +) -> dict[str, tuple[float, float]]: + """Compute inverse bounds for sklearn transforms.""" + inv_bounds = defaultdict() + for k, transformer in transforms.items(): + bounds = [-np.inf, np.inf] + if isinstance(transformer, PowerTransformer): + mu, sigma = ( + transformer._scaler.mean_.item(), + transformer._scaler.scale_.item(), + ) # pyre-ignore + lambda_ = transformer.lambdas_.item() # pyre-ignore + if lambda_ < -1 * tol: + bounds[1] = (-1.0 / lambda_ - mu) / sigma + elif lambda_ > 2.0 + tol: + bounds[0] = (1.0 / (2.0 - lambda_) - mu) / sigma + inv_bounds[k] = tuple(checked_cast_list(float, bounds)) + else: + inv_bounds[k] = bounds + return inv_bounds + + +class SklearnTransform(Transform): + """A transform that wraps a sklearn transformer.""" + + def __init__( + self, + search_space: SearchSpace | None = None, + observations: list[Observation] | None = None, + modelbridge: modelbridge_module.base.ModelBridge | None = None, + config: TConfig | None = None, + ) -> None: + """Initialize the ``SklearnTransform`` transform. + + Args: + search_space: The search space of the experiment. Unused. + observations: A list of observations from the experiment. + modelbridge: The `ModelBridge` within which the transform is used. Unused. + config: A dictionary of options to control the behavior of the transform. + Can contain the following keys: + - "metrics": A list of metric names to apply the transform to. If + omitted, all metrics found in `observations` are transformed. + - "transformer": A callable for the sklearn transformer to use. + - "transformer_kwargs": A dictionary of keyword arguments to pass to + the sklearn transformer. + - "clip_mean": A boolean indicating whether to clip the mean of the + transformed data to the bounds. + """ + if observations is None or len(observations) == 0: + raise DataRequiredError("SklearnTransform requires observations.") + # pyre-fixme[9]: Can't annotate config["metrics"] properly. + metric_names: list[str] | None = config.get("metrics", None) if config else None + self.clip_mean: bool = ( + assert_is_instance(config.get("clip_mean", True), bool) if config else True + ) + observation_data = [obs.data for obs in observations] + Ys = get_data(observation_data=observation_data, metric_names=metric_names) + self.metric_names: list[str] = list(Ys.keys()) + + # pyre-fixme[4]: Attribute must be annotated. + self.transforms = _compute_sklearn_transforms( + Ys=Ys, + transformer=config["transformer"], + transformer_kwargs=config["transformer_kwargs"], + ) + # pyre-fixme[4]: Attribute must be annotated. + self.inv_bounds = _compute_sklearn_inverse_bounds(self.transforms, tol=1e-10) + + def _transform_observation_data( + self, + observation_data: list[ObservationData], + ) -> list[ObservationData]: + """Winsorize observation data in place.""" + for obsd in observation_data: + for i, m in enumerate(obsd.metric_names): + if m in self.metric_names: + transform = self.transforms[m].transform + obsd.means[i], obsd.covariance[i, i] = match_ci_width_truncated( + mean=obsd.means[i], + variance=obsd.covariance[i, i], + transform=lambda y: transform(np.array(y, ndmin=2)), + lower_bound=-np.inf, + upper_bound=np.inf, + ) + return observation_data + + def _untransform_observation_data( + self, + observation_data: list[ObservationData], + ) -> list[ObservationData]: + """Winsorize observation data in place.""" + for obsd in observation_data: + for i, m in enumerate(obsd.metric_names): + if m in self.metric_names: + lower_bound, upper_bound = self.inv_bounds[m] + transform = self.transforms[m].inverse_transform + if not self.clip_mean and ( + obsd.means[i] < lower_bound or obsd.means[i] > upper_bound + ): + raise ValueError( + "Can't untransform mean outside the bounds without clipping" + ) + obsd.means[i], obsd.covariance[i, i] = match_ci_width_truncated( + mean=obsd.means[i], + variance=obsd.covariance[i, i], + transform=lambda y: transform(np.array(y, ndmin=2)), + lower_bound=lower_bound, + upper_bound=upper_bound, + clip_mean=True, + ) + return observation_data + + def transform_optimization_config( + self, + optimization_config: OptimizationConfig, + modelbridge: modelbridge_module.base.ModelBridge | None = None, + fixed_features: ObservationFeatures | None = None, + ) -> OptimizationConfig: + for c in optimization_config.all_constraints: + if isinstance(c, ScalarizedOutcomeConstraint): + c_metric_names = [metric.name for metric in c.metrics] + intersection = set(c_metric_names) & set(self.metric_names) + if intersection: + raise NotImplementedError( + f"PowerTransformY cannot be used for metric(s) {intersection} " + "that are part of a ScalarizedOutcomeConstraint." + ) + elif c.metric.name in self.metric_names: + if c.relative: + raise ValueError( + f"PowerTransformY cannot be applied to metric {c.metric.name} " + "since it is subject to a relative constraint." + ) + else: + transform = self.transforms[c.metric.name].transform + c.bound = transform(np.array(c.bound, ndmin=2)).item() + return optimization_config + + def untransform_outcome_constraints( + self, + outcome_constraints: list[OutcomeConstraint], + fixed_features: ObservationFeatures | None = None, + ) -> list[OutcomeConstraint]: + for c in outcome_constraints: + if isinstance(c, ScalarizedOutcomeConstraint): + raise ValueError("ScalarizedOutcomeConstraint not supported here") + elif c.metric.name in self.metric_names: + if c.relative: + raise ValueError("Relative constraints not supported here.") + else: + transform = self.transforms[c.metric.name].inverse_transform + c.bound = transform(np.array(c.bound, ndmin=2)).item() + return outcome_constraints diff --git a/ax/modelbridge/transforms/tests/test_power_y_transform.py b/ax/modelbridge/transforms/tests/test_power_y_transform.py index 15eb4f7fa0b..1206f0b2f1e 100644 --- a/ax/modelbridge/transforms/tests/test_power_y_transform.py +++ b/ax/modelbridge/transforms/tests/test_power_y_transform.py @@ -20,9 +20,9 @@ from ax.core.types import ComparisonOp from ax.modelbridge.transforms.power_transform_y import ( _compute_inverse_bounds, - _compute_power_transforms, PowerTransformY, ) +from ax.modelbridge.transforms.sklearn_y import _compute_sklearn_transforms from ax.modelbridge.transforms.utils import get_data, match_ci_width_truncated from ax.utils.common.testutils import TestCase from ax.utils.testing.core_stubs import get_observations_with_invalid_value @@ -103,7 +103,11 @@ def test_GetData(self) -> None: def test_ComputePowerTransform(self) -> None: Ys = get_data([self.obsd1, self.obsd2, self.obsd3], ["m2"]) - pts = _compute_power_transforms(Ys) + pts = _compute_sklearn_transforms( + Ys, + transformer=PowerTransformer, + transformer_kwargs={"method": "yeo-johnson"}, + ) self.assertEqual(pts["m2"].method, "yeo-johnson") # pyre-fixme[16]: `PowerTransformer` has no attribute `lambdas_`. self.assertIsInstance(pts["m2"].lambdas_, np.ndarray) @@ -119,7 +123,11 @@ def test_ComputePowerTransform(self) -> None: def test_ComputeInverseBounds(self) -> None: Ys = get_data([self.obsd1, self.obsd2, self.obsd3], ["m2"]) - pt = _compute_power_transforms(Ys)["m2"] + pt = _compute_sklearn_transforms( + Ys, + transformer=PowerTransformer, + transformer_kwargs={"method": "yeo-johnson"}, + )["m2"] # lambda < 0: im(f) = (-inf, -1/lambda) without standardization # pyre-fixme[16]: `PowerTransformer` has no attribute `lambdas_`. pt.lambdas_.fill(-2.5) @@ -144,7 +152,11 @@ def test_ComputeInverseBounds(self) -> None: def test_MatchCIWidth(self) -> None: Ys = get_data([self.obsd1, self.obsd2, self.obsd3], ["m2"]) - pt = _compute_power_transforms(Ys) + pt = _compute_sklearn_transforms( + Ys, + transformer=PowerTransformer, + transformer_kwargs={"method": "yeo-johnson"}, + ) # pyre-fixme[16]: `PowerTransformer` has no attribute `lambdas_`. pt["m2"].lambdas_.fill(-3.0) bounds = _compute_inverse_bounds(pt)["m2"] diff --git a/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py b/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py new file mode 100644 index 00000000000..d000436de25 --- /dev/null +++ b/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py @@ -0,0 +1,219 @@ +import numpy as np +from ax.modelbridge.transforms.sklearn_y import ( + InfeasibleTransformer, + LogWarpingTransformer, +) +from ax.utils.common.testutils import TestCase + + +class LogWarpingTransformerTest(TestCase): + def test_init(self) -> None: + # Test valid initialization + transformer = LogWarpingTransformer(offset=1.5) + self.assertEqual(transformer.offset, 1.5) + self.assertTrue(transformer.copy) + + # Test invalid offset + with self.assertRaisesRegex(ValueError, "offset must be greater than 1"): + LogWarpingTransformer(offset=0.5) + + def test_transform_simple(self) -> None: + X = np.array([[1.0, 2.0], [3.0, 4.0]]) + transformer = LogWarpingTransformer() + transformer.fit(X) + X_transformed = transformer.transform(X) + + # Test shape preservation + self.assertEqual(X_transformed.shape, X.shape) + + # Test values are changed + self.assertFalse(np.allclose(X_transformed, X)) + + # Test inverse transform recovers original + X_recovered = transformer.inverse_transform(X_transformed) + self.assertTrue(np.allclose(X_recovered, X)) + + def test_nan_handling(self) -> None: + X = np.array([[1.0, np.nan], [3.0, 4.0], [np.nan, 2.0]]) + transformer = LogWarpingTransformer() + transformer.fit(X) + X_transformed = transformer.transform(X) + + # Test NaN values remain NaN + self.assertTrue(np.isnan(X_transformed[0, 1])) + self.assertTrue(np.isnan(X_transformed[2, 0])) + + # Test non-NaN values are transformed + self.assertFalse(np.isnan(X_transformed[0, 0])) + self.assertFalse(np.isnan(X_transformed[1, 0])) + self.assertFalse(np.isnan(X_transformed[1, 1])) + self.assertFalse(np.isnan(X_transformed[2, 1])) + + # Test inverse transform preserves NaN + X_recovered = transformer.inverse_transform(X_transformed) + self.assertTrue(np.isnan(X_recovered[0, 1])) + self.assertTrue(np.isnan(X_recovered[2, 0])) + + # Test non-NaN values are recovered correctly + X_no_nan = X[~np.isnan(X)] + X_recovered_no_nan = X_recovered[~np.isnan(X_recovered)] + self.assertTrue(np.allclose(X_no_nan, X_recovered_no_nan)) + + def test_transform_bounds(self) -> None: + # Test with values near bounds + X = np.array([[1.0, 10.0], [2.0, 20.0]]) + transformer = LogWarpingTransformer() + transformer.fit(X) + X_transformed = transformer.transform(X) + + # Test transformed values are bounded + self.assertTrue(np.all(X_transformed[np.isfinite(X_transformed)] <= 0.5)) + + # Test inverse transform recovers original values + X_recovered = transformer.inverse_transform(X_transformed) + self.assertTrue(np.allclose(X_recovered, X)) + + def test_input_validation(self) -> None: + transformer = LogWarpingTransformer() + + # Test 1D array raises error + with self.assertRaises(ValueError): + transformer.fit(np.array([1.0, 2.0])) + + # Test wrong shape in transform after fit + transformer.fit(np.array([[1.0, 2.0], [3.0, 4.0]])) + with self.assertRaises(ValueError): + transformer.transform(np.array([[1.0], [2.0]])) + + def test_copy_behavior(self) -> None: + X = np.array([[1.0, 2.0], [3.0, 4.0]]) + X_orig = X.copy() + + # Test with copy=True (default) + transformer = LogWarpingTransformer(copy=True) + transformer.fit(X) + self.assertTrue(np.array_equal(X, X_orig)) # Original should be unchanged + + # Test with copy=False + transformer = LogWarpingTransformer(copy=False) + X_transform = transformer.fit_transform(X) + self.assertFalse(np.array_equal(X, X_orig)) # Original should be modified + self.assertTrue(np.array_equal(X, X_transform)) # Should be the same object + + def test_partial_fit(self) -> None: + X1 = np.array([[1.0, 2.0], [3.0, 4.0]]) + X2 = np.array([[5.0, 6.0], [7.0, 8.0]]) + + transformer = LogWarpingTransformer() + transformer.partial_fit(X1) + + # Test that the transformer uses the full range of values + self.assertTrue(np.allclose(transformer.labels_min_, np.array([[1.0, 2.0]]))) + + transformer.partial_fit(X2) + self.assertTrue(np.allclose(transformer.labels_max_, np.array([[7.0, 8.0]]))) + + +class TestInfeasibleTransformer(TestCase): + def test_transform_basic(self) -> None: + """Test basic transformation with simple data.""" + X = np.array([[1.0, 2.0], [3.0, 4.0], [np.nan, 6.0]]) + transformer = InfeasibleTransformer() + transformer.fit(X) + + # Transform the data + X_transformed = transformer.transform(X) + + # Check that non-nan values are shifted + self.assertFalse(np.allclose(X_transformed[0:2], X[0:2])) + self.assertTrue(np.allclose(X_transformed[0:2] - transformer.shift_, X[0:2])) + + # Check that all values are finite + self.assertFalse(np.isnan(X_transformed).any()) + + # Check that previously nan values are replaced with warped_bad_value + self.assertEqual(X_transformed[2, 0], transformer.warped_bad_value_[0, 0]) + + # Check inverse transform + X_inverse = transformer.inverse_transform(X_transformed) + # Non-nan values should be recovered exactly + self.assertTrue(np.allclose(X_inverse[0:2], X[0:2])) + + def test_transform_all_nan_column(self) -> None: + """Test handling of columns that are all NaN.""" + X = np.array([[1.0, np.nan], [2.0, np.nan], [3.0, np.nan]]) + transformer = InfeasibleTransformer() + with self.assertRaisesRegex( + ValueError, "Cannot fit InfeasibleTransformer on all-NaN feature columns." + ): + transformer.fit(X) + + def test_transform_single_value(self) -> None: + """Test transformation of single non-nan value.""" + X = np.array([[1.0], [np.nan], [np.nan]]) + transformer = InfeasibleTransformer() + transformer.fit(X) + + X_transformed = transformer.transform(X) + + # Check that transformation preserves the relative ordering + self.assertGreater(X_transformed[0, 0], X_transformed[1, 0]) + self.assertEqual(X_transformed[1, 0], X_transformed[2, 0]) + + def test_shape_validation(self) -> None: + """Test that the transformer validates input shapes.""" + X = np.array([[1.0, 2.0], [3.0, 4.0]]) + transformer = InfeasibleTransformer() + transformer.fit(X) + + # Try to transform data with wrong number of features + X_wrong_shape = np.array([[1.0], [2.0]]) + with self.assertRaisesRegex( + ValueError, "features, but InfeasibleTransformer is expecting" + ): + transformer.transform(X_wrong_shape) + + def test_copy_behavior(self) -> None: + """Test that copy parameter works as expected.""" + X = np.array([[1.0, 2.0], [3.0, 4.0]]) + X_orig = X.copy() + + # Test with copy=True (default) + transformer = InfeasibleTransformer(copy=True) + transformer.fit(X) + self.assertTrue(np.array_equal(X, X_orig)) # Original should be unchanged + + # Test with copy=False + transformer = InfeasibleTransformer(copy=False) + X_transform = transformer.fit_transform(X) + self.assertFalse(np.array_equal(X, X_orig)) # Original should be modified + self.assertTrue(np.array_equal(X, X_transform)) # Should be the same object + + def test_p_feasible_calculation(self) -> None: + """Test that p_feasible is calculated correctly.""" + X = np.array([[1.0, 2.0], [np.nan, 4.0], [5.0, np.nan]]) + transformer = InfeasibleTransformer() + transformer.fit(X) + + # For first column: 2 feasible out of 3 total + expected_p_feasible_1 = (0.5 + 2) / (1 + 3) + # For second column: 2 feasible out of 3 total + expected_p_feasible_2 = (0.5 + 2) / (1 + 3) + + # Calculate actual p_feasible from the shift formula + # shift = -mean(X) * p_feasible - warped_bad_value * (1 - p_feasible) + p_feasible_1 = -( + transformer.shift_[0, 0] + transformer.warped_bad_value_[0, 0] + ) / (np.nanmean(X[:, 0]) - transformer.warped_bad_value_[0, 0]) + p_feasible_2 = -( + transformer.shift_[0, 1] + transformer.warped_bad_value_[0, 1] + ) / (np.nanmean(X[:, 1]) - transformer.warped_bad_value_[0, 1]) + + self.assertTrue(np.allclose(p_feasible_1, expected_p_feasible_1)) + self.assertTrue(np.allclose(p_feasible_2, expected_p_feasible_2)) + + +if __name__ == "__main__": + import unittest + + unittest.main() From a74e313129802163ab7cf6da7aa406aa513b0540 Mon Sep 17 00:00:00 2001 From: Rhys Goodall Date: Thu, 5 Dec 2024 20:59:57 -0500 Subject: [PATCH 2/5] fea: add tests for halfrank --- ax/modelbridge/transforms/sklearn_y.py | 2 +- .../tests/test_sklearn_y_transform.py | 94 +++++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) diff --git a/ax/modelbridge/transforms/sklearn_y.py b/ax/modelbridge/transforms/sklearn_y.py index 520436f6db0..2c6ed1e2784 100644 --- a/ax/modelbridge/transforms/sklearn_y.py +++ b/ax/modelbridge/transforms/sklearn_y.py @@ -260,7 +260,7 @@ def _fit(self, X, y=None, force_transform=False): ) # Calculate rank quantiles - ranks = stats.rankdata(col, method="dense") + ranks = stats.rankdata(col, method="dense", nan_policy="omit") dedup_median_index = np.searchsorted(unique_labels, median) denominator = 2 * dedup_median_index + ( unique_labels[dedup_median_index] == median diff --git a/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py b/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py index d000436de25..cc299a45a94 100644 --- a/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py +++ b/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py @@ -1,5 +1,6 @@ import numpy as np from ax.modelbridge.transforms.sklearn_y import ( + HalfRankTransformer, InfeasibleTransformer, LogWarpingTransformer, ) @@ -213,6 +214,99 @@ def test_p_feasible_calculation(self) -> None: self.assertTrue(np.allclose(p_feasible_2, expected_p_feasible_2)) +class TestHalfRankTransformer(TestCase): + def test_basic_transform(self) -> None: + """Test basic transformation with simple data.""" + X = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]) + transformer = HalfRankTransformer() + X_transformed = transformer.fit_transform(X) + + # Test shape preservation + self.assertEqual(X_transformed.shape, X.shape) + + # Test that values above median are unchanged + medians = np.median(X, axis=0) + for i in range(X.shape[1]): + above_median_mask = X[:, i] >= medians[i] + self.assertTrue( + np.allclose( + X_transformed[above_median_mask, i], X[above_median_mask, i] + ) + ) + + # Test inverse transform recovers original + X_recovered = transformer.inverse_transform(X_transformed) + self.assertTrue(np.allclose(X_recovered, X)) + + def test_nan_handling(self) -> None: + """Test handling of NaN values.""" + X = np.array([[1.0, np.nan], [3.0, 4.0], [np.nan, 2.0], [5.0, 6.0]]) + transformer = HalfRankTransformer() + X_transformed = transformer.fit_transform(X) + + # Test NaN values remain NaN + self.assertTrue(np.isnan(X_transformed[0, 1])) + self.assertTrue(np.isnan(X_transformed[2, 0])) + + # Test non-NaN values are transformed + self.assertFalse(np.isnan(X_transformed[0, 0])) + self.assertFalse(np.isnan(X_transformed[1, 0])) + self.assertFalse(np.isnan(X_transformed[1, 1])) + self.assertFalse(np.isnan(X_transformed[3, 1])) + + # Test inverse transform preserves NaN and recovers original values + X_recovered = transformer.inverse_transform(X_transformed) + self.assertTrue(np.allclose(X_recovered[~np.isnan(X)], X[~np.isnan(X)])) + self.assertTrue(np.isnan(X_recovered[0, 1])) + self.assertTrue(np.isnan(X_recovered[2, 0])) + + def test_extrapolation(self) -> None: + """Test extrapolation for values below minimum.""" + X = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) + transformer = HalfRankTransformer() + transformer.fit(X) + + # Test with values below minimum + X_test = np.array([[0.0, 1.0], [2.0, 3.0]]) + X_transformed = transformer.transform(X_test) + + # Values below minimum should be transformed + self.assertNotEqual(X_transformed[0, 0], X_test[0, 0]) + + # Test inverse transform recovers original values + X_recovered = transformer.inverse_transform(X_transformed) + self.assertTrue(np.allclose(X_recovered, X_test)) + + def test_copy_behavior(self) -> None: + """Test that copy parameter works as expected.""" + X = np.array([[1.0, 2.0], [3.0, 4.0]]) + X_orig = X.copy() + + # Test with copy=True (default) + transformer = HalfRankTransformer(copy=True) + transformer.fit(X) + self.assertTrue(np.array_equal(X, X_orig)) # Original should be unchanged + + # Test with copy=False + transformer = HalfRankTransformer(copy=False) + X_transform = transformer.fit_transform(X) + self.assertFalse(np.array_equal(X, X_orig)) # Original should be modified + self.assertTrue(np.array_equal(X, X_transform)) # Should be the same object + + def test_input_validation(self) -> None: + """Test input validation.""" + transformer = HalfRankTransformer() + + # Test 1D array raises error + with self.assertRaises(ValueError): + transformer.fit(np.array([1.0, 2.0])) + + # Test wrong shape in transform after fit + transformer.fit(np.array([[1.0, 2.0], [3.0, 4.0]])) + with self.assertRaises(ValueError): + transformer.transform(np.array([[1.0], [2.0]])) + + if __name__ == "__main__": import unittest From ecee7eac2c5a3658671671600606d33ac24a8d8b Mon Sep 17 00:00:00 2001 From: Rhys Goodall Date: Fri, 6 Dec 2024 11:12:34 -0500 Subject: [PATCH 3/5] refactor: clean-up the old PowerTransformY to use shared bones --- .../transforms/power_transform_y.py | 210 +--------------- ax/modelbridge/transforms/sklearn_y.py | 231 ++++++++++++++---- .../tests/test_power_y_transform.py | 13 +- .../tests/test_sklearn_y_transform.py | 6 - 4 files changed, 194 insertions(+), 266 deletions(-) diff --git a/ax/modelbridge/transforms/power_transform_y.py b/ax/modelbridge/transforms/power_transform_y.py index e3425efc896..a30971f58dc 100644 --- a/ax/modelbridge/transforms/power_transform_y.py +++ b/ax/modelbridge/transforms/power_transform_y.py @@ -6,210 +6,8 @@ # pyre-strict -from __future__ import annotations +"""This file is necessary for backwards compatibility with the old +PowerTransformer class. +""" -from collections import defaultdict -from logging import Logger -from typing import TYPE_CHECKING - -import numpy as np -from ax.core.observation import Observation, ObservationData, ObservationFeatures -from ax.core.optimization_config import OptimizationConfig -from ax.core.outcome_constraint import OutcomeConstraint, ScalarizedOutcomeConstraint -from ax.core.search_space import SearchSpace -from ax.exceptions.core import DataRequiredError -from ax.modelbridge.transforms.base import Transform -from ax.modelbridge.transforms.sklearn_y import _compute_sklearn_transforms -from ax.modelbridge.transforms.utils import get_data, match_ci_width_truncated -from ax.models.types import TConfig -from ax.utils.common.logger import get_logger -from ax.utils.common.typeutils import checked_cast_list -from pyre_extensions import assert_is_instance -from sklearn.preprocessing import PowerTransformer - -if TYPE_CHECKING: - # import as module to make sphinx-autodoc-typehints happy - from ax import modelbridge as modelbridge_module # noqa F401 - - -logger: Logger = get_logger(__name__) - - -class PowerTransformY(Transform): - """Transform the values to look as normally distributed as possible. - - This fits a power transform to the data with the goal of making the transformed - values look as normally distributed as possible. We use Yeo-Johnson - (https://www.stat.umn.edu/arc/yjpower.pdf), which can handle both positive and - negative values. - - While the transform seems to be quite robust, it probably makes sense to apply a - bit of winsorization and also standardize the inputs before applying the power - transform. The power transform will automatically standardize the data so the - data will remain standardized. - - The transform can't be inverted for all values, so we apply clipping to move - values to the image of the transform. This behavior can be controlled via the - `clip_mean` setting. - """ - - def __init__( - self, - search_space: SearchSpace | None = None, - observations: list[Observation] | None = None, - modelbridge: modelbridge_module.base.ModelBridge | None = None, - config: TConfig | None = None, - ) -> None: - """Initialize the ``PowerTransformY`` transform. - - Args: - search_space: The search space of the experiment. Unused. - observations: A list of observations from the experiment. - modelbridge: The `ModelBridge` within which the transform is used. Unused. - config: A dictionary of options to control the behavior of the transform. - Can contain the following keys: - - "metrics": A list of metric names to apply the transform to. If - omitted, all metrics found in `observations` are transformed. - - "clip_mean": Whether to clip the mean to the image of the transform. - Defaults to True. - """ - if observations is None or len(observations) == 0: - raise DataRequiredError("PowerTransformY requires observations.") - # pyre-fixme[9]: Can't annotate config["metrics"] properly. - metric_names: list[str] | None = config.get("metrics", None) if config else None - self.clip_mean: bool = ( - assert_is_instance(config.get("clip_mean", True), bool) if config else True - ) - observation_data = [obs.data for obs in observations] - Ys = get_data(observation_data=observation_data, metric_names=metric_names) - self.metric_names: list[str] = list(Ys.keys()) - # pyre-fixme[4]: Attribute must be annotated. - self.power_transforms = _compute_sklearn_transforms( - Ys=Ys, - transformer=PowerTransformer, - transformer_kwargs={"method": "yeo-johnson"}, - ) - # pyre-fixme[4]: Attribute must be annotated. - self.inv_bounds = _compute_inverse_bounds(self.power_transforms, tol=1e-10) - - def _transform_observation_data( - self, - observation_data: list[ObservationData], - ) -> list[ObservationData]: - """Winsorize observation data in place.""" - for obsd in observation_data: - for i, m in enumerate(obsd.metric_names): - if m in self.metric_names: - transform = self.power_transforms[m].transform - obsd.means[i], obsd.covariance[i, i] = match_ci_width_truncated( - mean=obsd.means[i], - variance=obsd.covariance[i, i], - transform=lambda y: transform(np.array(y, ndmin=2)), - lower_bound=-np.inf, - upper_bound=np.inf, - ) - return observation_data - - def _untransform_observation_data( - self, - observation_data: list[ObservationData], - ) -> list[ObservationData]: - """Winsorize observation data in place.""" - for obsd in observation_data: - for i, m in enumerate(obsd.metric_names): - if m in self.metric_names: - lower_bound, upper_bound = self.inv_bounds[m] - transform = self.power_transforms[m].inverse_transform - if not self.clip_mean and ( - obsd.means[i] < lower_bound or obsd.means[i] > upper_bound - ): - raise ValueError( - "Can't untransform mean outside the bounds without clipping" - ) - obsd.means[i], obsd.covariance[i, i] = match_ci_width_truncated( - mean=obsd.means[i], - variance=obsd.covariance[i, i], - transform=lambda y: transform(np.array(y, ndmin=2)), - lower_bound=lower_bound, - upper_bound=upper_bound, - clip_mean=True, - ) - return observation_data - - def transform_optimization_config( - self, - optimization_config: OptimizationConfig, - modelbridge: modelbridge_module.base.ModelBridge | None = None, - fixed_features: ObservationFeatures | None = None, - ) -> OptimizationConfig: - for c in optimization_config.all_constraints: - if isinstance(c, ScalarizedOutcomeConstraint): - c_metric_names = [metric.name for metric in c.metrics] - intersection = set(c_metric_names) & set(self.metric_names) - if intersection: - raise NotImplementedError( - f"PowerTransformY cannot be used for metric(s) {intersection} " - "that are part of a ScalarizedOutcomeConstraint." - ) - elif c.metric.name in self.metric_names: - if c.relative: - raise ValueError( - f"PowerTransformY cannot be applied to metric {c.metric.name} " - "since it is subject to a relative constraint." - ) - else: - transform = self.power_transforms[c.metric.name].transform - c.bound = transform(np.array(c.bound, ndmin=2)).item() - return optimization_config - - def untransform_outcome_constraints( - self, - outcome_constraints: list[OutcomeConstraint], - fixed_features: ObservationFeatures | None = None, - ) -> list[OutcomeConstraint]: - for c in outcome_constraints: - if isinstance(c, ScalarizedOutcomeConstraint): - raise ValueError("ScalarizedOutcomeConstraint not supported here") - elif c.metric.name in self.metric_names: - if c.relative: - raise ValueError("Relative constraints not supported here.") - else: - transform = self.power_transforms[c.metric.name].inverse_transform - c.bound = transform(np.array(c.bound, ndmin=2)).item() - return outcome_constraints - - -def _compute_inverse_bounds( - power_transforms: dict[str, PowerTransformer], tol: float = 1e-10 -) -> dict[str, tuple[float, float]]: - """Computes the image of the transform so we can clip when we untransform. - - The inverse of the Yeo-Johnson transform is given by: - if X >= 0 and lambda == 0: - X = exp(X_trans) - 1 - elif X >= 0 and lambda != 0: - X = (X_trans * lambda + 1) ** (1 / lambda) - 1 - elif X < 0 and lambda != 2: - X = 1 - (-(2 - lambda) * X_trans + 1) ** (1 / (2 - lambda)) - elif X < 0 and lambda == 2: - X = 1 - exp(-X_trans) - - We can break this down into three cases: - lambda < 0: X < -1 / lambda - 0 <= lambda <= 2: X is unbounded - lambda > 2: X > 1 / (2 - lambda) - - Sklearn standardizes the transformed values to have mean zero and standard - deviation 1, so we also need to account for this when we compute the bounds. - """ - inv_bounds = defaultdict() - for k, pt in power_transforms.items(): - bounds = [-np.inf, np.inf] - mu, sigma = pt._scaler.mean_.item(), pt._scaler.scale_.item() # pyre-ignore - lambda_ = pt.lambdas_.item() # pyre-ignore - if lambda_ < -1 * tol: - bounds[1] = (-1.0 / lambda_ - mu) / sigma - elif lambda_ > 2.0 + tol: - bounds[0] = (1.0 / (2.0 - lambda_) - mu) / sigma - inv_bounds[k] = tuple(checked_cast_list(float, bounds)) - return inv_bounds +from ax.modelbridge.transforms.sklearn_y import PowerTransformY # noqa: F401 diff --git a/ax/modelbridge/transforms/sklearn_y.py b/ax/modelbridge/transforms/sklearn_y.py index 2c6ed1e2784..9f296819e41 100644 --- a/ax/modelbridge/transforms/sklearn_y.py +++ b/ax/modelbridge/transforms/sklearn_y.py @@ -566,30 +566,6 @@ def _compute_sklearn_transforms( return transforms -def _compute_sklearn_inverse_bounds( - transforms: dict[str, TransformerMixin], - tol: float, -) -> dict[str, tuple[float, float]]: - """Compute inverse bounds for sklearn transforms.""" - inv_bounds = defaultdict() - for k, transformer in transforms.items(): - bounds = [-np.inf, np.inf] - if isinstance(transformer, PowerTransformer): - mu, sigma = ( - transformer._scaler.mean_.item(), - transformer._scaler.scale_.item(), - ) # pyre-ignore - lambda_ = transformer.lambdas_.item() # pyre-ignore - if lambda_ < -1 * tol: - bounds[1] = (-1.0 / lambda_ - mu) / sigma - elif lambda_ > 2.0 + tol: - bounds[0] = (1.0 / (2.0 - lambda_) - mu) / sigma - inv_bounds[k] = tuple(checked_cast_list(float, bounds)) - else: - inv_bounds[k] = bounds - return inv_bounds - - class SklearnTransform(Transform): """A transform that wraps a sklearn transformer.""" @@ -615,6 +591,9 @@ def __init__( the sklearn transformer. - "clip_mean": A boolean indicating whether to clip the mean of the transformed data to the bounds. + - "match_ci_width": A boolean indicating whether to match the width of + the confidence interval of the transformed data to the width of the + confidence interval of the original data. """ if observations is None or len(observations) == 0: raise DataRequiredError("SklearnTransform requires observations.") @@ -623,6 +602,11 @@ def __init__( self.clip_mean: bool = ( assert_is_instance(config.get("clip_mean", True), bool) if config else True ) + self.match_ci_width: bool = ( + assert_is_instance(config.get("match_ci_width", False), bool) + if config + else False + ) observation_data = [obs.data for obs in observations] Ys = get_data(observation_data=observation_data, metric_names=metric_names) self.metric_names: list[str] = list(Ys.keys()) @@ -634,7 +618,16 @@ def __init__( transformer_kwargs=config["transformer_kwargs"], ) # pyre-fixme[4]: Attribute must be annotated. - self.inv_bounds = _compute_sklearn_inverse_bounds(self.transforms, tol=1e-10) + self.inv_bounds = self._compute_inverse_bounds(self.transforms) + + @staticmethod + def _compute_inverse_bounds( + transforms: dict[str, TransformerMixin], **kwargs + ) -> dict[str, tuple[float, float]]: + inv_bounds = defaultdict() + for k, _t in transforms.items(): + inv_bounds[k] = tuple(checked_cast_list(float, [-np.inf, np.inf])) + return inv_bounds def _transform_observation_data( self, @@ -645,13 +638,18 @@ def _transform_observation_data( for i, m in enumerate(obsd.metric_names): if m in self.metric_names: transform = self.transforms[m].transform - obsd.means[i], obsd.covariance[i, i] = match_ci_width_truncated( - mean=obsd.means[i], - variance=obsd.covariance[i, i], - transform=lambda y: transform(np.array(y, ndmin=2)), - lower_bound=-np.inf, - upper_bound=np.inf, - ) + if self.match_ci_width: + lower_bound, upper_bound = self.inv_bounds[m] + obsd.means[i], obsd.covariance[i, i] = match_ci_width_truncated( + mean=obsd.means[i], + variance=obsd.covariance[i, i], + transform=lambda y: transform(np.array(y, ndmin=2)), + lower_bound=lower_bound, + upper_bound=upper_bound, + clip_mean=self.clip_mean, + ) + else: + obsd.means[i] = transform(np.array(obsd.means[i], ndmin=2)) return observation_data def _untransform_observation_data( @@ -662,22 +660,26 @@ def _untransform_observation_data( for obsd in observation_data: for i, m in enumerate(obsd.metric_names): if m in self.metric_names: - lower_bound, upper_bound = self.inv_bounds[m] transform = self.transforms[m].inverse_transform - if not self.clip_mean and ( - obsd.means[i] < lower_bound or obsd.means[i] > upper_bound - ): - raise ValueError( - "Can't untransform mean outside the bounds without clipping" + if self.match_ci_width: + lower_bound, upper_bound = self.inv_bounds[m] + if not self.clip_mean and ( + obsd.means[i] < lower_bound or obsd.means[i] > upper_bound + ): + raise ValueError( + "Can't untransform mean outside the bounds " + "without clipping" + ) + obsd.means[i], obsd.covariance[i, i] = match_ci_width_truncated( + mean=obsd.means[i], + variance=obsd.covariance[i, i], + transform=lambda y: transform(np.array(y, ndmin=2)), + lower_bound=lower_bound, + upper_bound=upper_bound, + clip_mean=True, ) - obsd.means[i], obsd.covariance[i, i] = match_ci_width_truncated( - mean=obsd.means[i], - variance=obsd.covariance[i, i], - transform=lambda y: transform(np.array(y, ndmin=2)), - lower_bound=lower_bound, - upper_bound=upper_bound, - clip_mean=True, - ) + else: + obsd.means[i] = transform(np.array(obsd.means[i], ndmin=2)) return observation_data def transform_optimization_config( @@ -721,3 +723,140 @@ def untransform_outcome_constraints( transform = self.transforms[c.metric.name].inverse_transform c.bound = transform(np.array(c.bound, ndmin=2)).item() return outcome_constraints + + +class PowerTransformY(SklearnTransform): + def __init__( + self, + search_space: SearchSpace | None = None, + observations: list[Observation] | None = None, + modelbridge: modelbridge_module.base.ModelBridge | None = None, + config: TConfig | None = None, + ) -> None: + config = config or {} + config["transformer"] = PowerTransformer + config["transformer_kwargs"] = {"method": "yeo-johnson"} + config["match_ci_width"] = True + config["clip_mean"] = True + super().__init__(search_space, observations, modelbridge, config) + + @property + def power_transforms(self) -> dict[str, TransformerMixin]: + """Getter for power_transforms that returns transforms.""" + return self.transforms + + @power_transforms.setter + def power_transforms(self, value: dict[str, TransformerMixin]) -> None: + """Setter for power_transforms that sets transforms.""" + self.transforms = value + + @staticmethod + def _compute_inverse_bounds( + transforms: dict[str, PowerTransformer], + tol=1e-10, + **kwargs, + ) -> dict[str, tuple[float, float]]: + """Computes the image of the transform so we can clip when we untransform. + + The inverse of the Yeo-Johnson transform is given by: + if X >= 0 and lambda == 0: + X = exp(X_trans) - 1 + elif X >= 0 and lambda != 0: + X = (X_trans * lambda + 1) ** (1 / lambda) - 1 + elif X < 0 and lambda != 2: + X = 1 - (-(2 - lambda) * X_trans + 1) ** (1 / (2 - lambda)) + elif X < 0 and lambda == 2: + X = 1 - exp(-X_trans) + + We can break this down into three cases: + lambda < 0: X < -1 / lambda + 0 <= lambda <= 2: X is unbounded + lambda > 2: X > 1 / (2 - lambda) + + Sklearn standardizes the transformed values to have mean zero and standard + deviation 1, so we also need to account for this when we compute the bounds. + """ + inv_bounds = defaultdict() + for k, pt in transforms.items(): + if not isinstance(pt, PowerTransformer): + raise ValueError(f"Unexpected transformer type: {type(pt)}") + bounds = [-np.inf, np.inf] + mu, sigma = pt._scaler.mean_.item(), pt._scaler.scale_.item() # pyre-ignore + lambda_ = pt.lambdas_.item() # pyre-ignore + if lambda_ < -1 * tol: + bounds[1] = (-1.0 / lambda_ - mu) / sigma + elif lambda_ > 2.0 + tol: + bounds[0] = (1.0 / (2.0 - lambda_) - mu) / sigma + inv_bounds[k] = tuple(checked_cast_list(float, bounds)) + return inv_bounds + + +class LogWarpingY(SklearnTransform): + def __init__( + self, + search_space: SearchSpace | None = None, + observations: list[Observation] | None = None, + modelbridge: modelbridge_module.base.ModelBridge | None = None, + config: TConfig | None = None, + ) -> None: + config = config or {} + config["transformer"] = LogWarpingTransformer() + config["match_ci_width"] = True + config["clip_mean"] = True + super().__init__(search_space, observations, modelbridge, config) + + @staticmethod + def _compute_inverse_bounds( + transforms: dict[str, LogWarpingTransformer], **kwargs + ) -> dict[str, tuple[float, float]]: + inv_bounds = defaultdict() + for k, lt in transforms.items(): + if not isinstance(lt, LogWarpingTransformer): + raise ValueError(f"Unexpected transformer type: {type(lt)}") + inv_bounds[k] = tuple(checked_cast_list(float, [-0.5, 0.5])) + return inv_bounds + + +class InfeasibleY(SklearnTransform): + def __init__( + self, + search_space: SearchSpace | None = None, + observations: list[Observation] | None = None, + modelbridge: modelbridge_module.base.ModelBridge | None = None, + config: TConfig | None = None, + ) -> None: + config = config or {} + config["transformer"] = InfeasibleTransformer() + config["match_ci_width"] = True + config["clip_mean"] = True + super().__init__(search_space, observations, modelbridge, config) + + @staticmethod + def _compute_inverse_bounds( + transforms: dict[str, InfeasibleTransformer], **kwargs + ) -> dict[str, tuple[float, float]]: + inv_bounds = defaultdict() + for k, it in transforms.items(): + if not isinstance(it, InfeasibleTransformer): + raise ValueError(f"Unexpected transformer type: {type(it)}") + # If we encounter a value that is lower than the warped bad value, we + # assign to infeasible values then it would potentially end up with + # exceptionally large values when we match the CI width. Clip to the + # warped bad value to avoid this. + inv_bounds[k] = tuple( + checked_cast_list(float, [it.warped_bad_value_[0, 0], np.inf]) + ) + return inv_bounds + + +class HalfRankY(SklearnTransform): + def __init__( + self, + search_space: SearchSpace | None = None, + observations: list[Observation] | None = None, + modelbridge: modelbridge_module.base.ModelBridge | None = None, + config: TConfig | None = None, + ) -> None: + config = config or {} + config["transformer"] = HalfRankTransformer() + super().__init__(search_space, observations, modelbridge, config) diff --git a/ax/modelbridge/transforms/tests/test_power_y_transform.py b/ax/modelbridge/transforms/tests/test_power_y_transform.py index 1206f0b2f1e..4db9d1f708e 100644 --- a/ax/modelbridge/transforms/tests/test_power_y_transform.py +++ b/ax/modelbridge/transforms/tests/test_power_y_transform.py @@ -18,10 +18,7 @@ from ax.core.optimization_config import OptimizationConfig from ax.core.outcome_constraint import OutcomeConstraint, ScalarizedOutcomeConstraint from ax.core.types import ComparisonOp -from ax.modelbridge.transforms.power_transform_y import ( - _compute_inverse_bounds, - PowerTransformY, -) +from ax.modelbridge.transforms.power_transform_y import PowerTransformY from ax.modelbridge.transforms.sklearn_y import _compute_sklearn_transforms from ax.modelbridge.transforms.utils import get_data, match_ci_width_truncated from ax.utils.common.testutils import TestCase @@ -131,7 +128,7 @@ def test_ComputeInverseBounds(self) -> None: # lambda < 0: im(f) = (-inf, -1/lambda) without standardization # pyre-fixme[16]: `PowerTransformer` has no attribute `lambdas_`. pt.lambdas_.fill(-2.5) - bounds = _compute_inverse_bounds({"m2": pt})["m2"] + bounds = PowerTransformY._compute_inverse_bounds({"m2": pt})["m2"] self.assertEqual(bounds[0], -np.inf) # Make sure we got the boundary right left = pt.inverse_transform(np.array(bounds[1] - 0.01, ndmin=2)) @@ -139,11 +136,11 @@ def test_ComputeInverseBounds(self) -> None: self.assertTrue(isnan(right) and not isnan(left)) # 0 <= lambda <= 2: im(f) = R pt.lambdas_.fill(1.0) - bounds = _compute_inverse_bounds({"m2": pt})["m2"] + bounds = PowerTransformY._compute_inverse_bounds({"m2": pt})["m2"] self.assertTrue(bounds == (-np.inf, np.inf)) # lambda > 2: im(f) = (1 / (2 - lambda), inf) without standardization pt.lambdas_.fill(3.5) - bounds = _compute_inverse_bounds({"m2": pt})["m2"] + bounds = PowerTransformY._compute_inverse_bounds({"m2": pt})["m2"] self.assertEqual(bounds[1], np.inf) # Make sure we got the boundary right left = pt.inverse_transform(np.array(bounds[0] - 0.01, ndmin=2)) @@ -159,7 +156,7 @@ def test_MatchCIWidth(self) -> None: ) # pyre-fixme[16]: `PowerTransformer` has no attribute `lambdas_`. pt["m2"].lambdas_.fill(-3.0) - bounds = _compute_inverse_bounds(pt)["m2"] + bounds = PowerTransformY._compute_inverse_bounds(pt)["m2"] # Both will be NaN since we are far outside the bounds new_mean_1, new_var_1 = match_ci_width_truncated( diff --git a/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py b/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py index cc299a45a94..62840eb1fce 100644 --- a/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py +++ b/ax/modelbridge/transforms/tests/test_sklearn_y_transform.py @@ -305,9 +305,3 @@ def test_input_validation(self) -> None: transformer.fit(np.array([[1.0, 2.0], [3.0, 4.0]])) with self.assertRaises(ValueError): transformer.transform(np.array([[1.0], [2.0]])) - - -if __name__ == "__main__": - import unittest - - unittest.main() From b465c35fba4e6d1fdedb3e05e1247f9026b29fcf Mon Sep 17 00:00:00 2001 From: Rhys Goodall Date: Mon, 9 Dec 2024 13:45:51 -0500 Subject: [PATCH 4/5] wip: test logwarpingy --- ax/modelbridge/transforms/sklearn_y.py | 63 +++-- .../transforms/tests/test_log_warping_y.py | 249 ++++++++++++++++++ .../tests/test_power_y_transform.py | 7 +- 3 files changed, 287 insertions(+), 32 deletions(-) create mode 100644 ax/modelbridge/transforms/tests/test_log_warping_y.py diff --git a/ax/modelbridge/transforms/sklearn_y.py b/ax/modelbridge/transforms/sklearn_y.py index 9f296819e41..eaee514f583 100644 --- a/ax/modelbridge/transforms/sklearn_y.py +++ b/ax/modelbridge/transforms/sklearn_y.py @@ -77,6 +77,9 @@ def transform(self, X, copy=None): X = self._check_input(X, in_fit=False, check_shape=True) X[:, :] = (self.labels_max_ - X) / (self.labels_max_ - self.labels_min_) + # TODO use the taylor series expansion for -ve values of X such that we + # can avoid the divergence of the log function if bounds are much larger + # than maximum value of X. X[:, :] = np.where( np.isfinite(X), 0.5 - (np.log1p(X * (self.offset - 1)) / np.log(self.offset)), @@ -552,20 +555,6 @@ def _more_tags(self): return {"allow_nan": True} -def _compute_sklearn_transforms( - Ys: dict[str, list[float]], - transformer: Callable[[dict[str, Any]], TransformerMixin], - transformer_kwargs: dict[str, Any], -) -> dict[str, TransformerMixin]: - """Compute power transforms.""" - transforms = {} - for k, ys in Ys.items(): - y = np.array(ys)[:, None] # Need to unsqueeze the last dimension - hrt = transformer(**transformer_kwargs).fit(y) - transforms[k] = hrt - return transforms - - class SklearnTransform(Transform): """A transform that wraps a sklearn transformer.""" @@ -612,10 +601,10 @@ def __init__( self.metric_names: list[str] = list(Ys.keys()) # pyre-fixme[4]: Attribute must be annotated. - self.transforms = _compute_sklearn_transforms( + self.transforms = self._compute_sklearn_transforms( Ys=Ys, transformer=config["transformer"], - transformer_kwargs=config["transformer_kwargs"], + transformer_kwargs=config.get("transformer_kwargs", {}), ) # pyre-fixme[4]: Attribute must be annotated. self.inv_bounds = self._compute_inverse_bounds(self.transforms) @@ -629,6 +618,20 @@ def _compute_inverse_bounds( inv_bounds[k] = tuple(checked_cast_list(float, [-np.inf, np.inf])) return inv_bounds + @staticmethod + def _compute_sklearn_transforms( + Ys: dict[str, list[float]], + transformer: Callable[[dict[str, Any]], TransformerMixin], + transformer_kwargs: dict[str, Any], + ) -> dict[str, TransformerMixin]: + """Compute sklearn transforms.""" + transforms = {} + for k, ys in Ys.items(): + y = np.array(ys)[:, None] # Need to unsqueeze the last dimension + t = transformer(**transformer_kwargs).fit(y) + transforms[k] = t + return transforms + def _transform_observation_data( self, observation_data: list[ObservationData], @@ -639,14 +642,12 @@ def _transform_observation_data( if m in self.metric_names: transform = self.transforms[m].transform if self.match_ci_width: - lower_bound, upper_bound = self.inv_bounds[m] obsd.means[i], obsd.covariance[i, i] = match_ci_width_truncated( mean=obsd.means[i], variance=obsd.covariance[i, i], transform=lambda y: transform(np.array(y, ndmin=2)), - lower_bound=lower_bound, - upper_bound=upper_bound, - clip_mean=self.clip_mean, + lower_bound=-np.inf, + upper_bound=np.inf, ) else: obsd.means[i] = transform(np.array(obsd.means[i], ndmin=2)) @@ -694,14 +695,15 @@ def transform_optimization_config( intersection = set(c_metric_names) & set(self.metric_names) if intersection: raise NotImplementedError( - f"PowerTransformY cannot be used for metric(s) {intersection} " - "that are part of a ScalarizedOutcomeConstraint." + f"{self.__class__.__name__} cannot be used for metric(s) " + f"{intersection} that are part of a " + "ScalarizedOutcomeConstraint." ) elif c.metric.name in self.metric_names: if c.relative: raise ValueError( - f"PowerTransformY cannot be applied to metric {c.metric.name} " - "since it is subject to a relative constraint." + f"{self.__class__.__name__} cannot be applied to metric " + f"{c.metric.name} since it is subject to a relative constraint." ) else: transform = self.transforms[c.metric.name].transform @@ -800,7 +802,7 @@ def __init__( config: TConfig | None = None, ) -> None: config = config or {} - config["transformer"] = LogWarpingTransformer() + config["transformer"] = LogWarpingTransformer config["match_ci_width"] = True config["clip_mean"] = True super().__init__(search_space, observations, modelbridge, config) @@ -813,7 +815,12 @@ def _compute_inverse_bounds( for k, lt in transforms.items(): if not isinstance(lt, LogWarpingTransformer): raise ValueError(f"Unexpected transformer type: {type(lt)}") - inv_bounds[k] = tuple(checked_cast_list(float, [-0.5, 0.5])) + # match_ci_width_truncated uses a margin of 0.001 to clip the mean, so + # we need to add a small margin to the bounds to avoid clipping. + margin = 1e-3 + inv_bounds[k] = tuple( + checked_cast_list(float, [-0.5 - margin, 0.5 + margin]) + ) return inv_bounds @@ -826,7 +833,7 @@ def __init__( config: TConfig | None = None, ) -> None: config = config or {} - config["transformer"] = InfeasibleTransformer() + config["transformer"] = InfeasibleTransformer config["match_ci_width"] = True config["clip_mean"] = True super().__init__(search_space, observations, modelbridge, config) @@ -858,5 +865,5 @@ def __init__( config: TConfig | None = None, ) -> None: config = config or {} - config["transformer"] = HalfRankTransformer() + config["transformer"] = HalfRankTransformer super().__init__(search_space, observations, modelbridge, config) diff --git a/ax/modelbridge/transforms/tests/test_log_warping_y.py b/ax/modelbridge/transforms/tests/test_log_warping_y.py new file mode 100644 index 00000000000..1a78afde2b8 --- /dev/null +++ b/ax/modelbridge/transforms/tests/test_log_warping_y.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +from __future__ import annotations + +from copy import deepcopy + +import numpy as np +from ax.core.metric import Metric +from ax.core.objective import Objective +from ax.core.observation import Observation, ObservationData, ObservationFeatures +from ax.core.optimization_config import OptimizationConfig +from ax.core.outcome_constraint import OutcomeConstraint, ScalarizedOutcomeConstraint +from ax.core.types import ComparisonOp +from ax.modelbridge.transforms.sklearn_y import LogWarpingTransformer, LogWarpingY +from ax.utils.common.testutils import TestCase +from ax.utils.testing.core_stubs import get_observations_with_invalid_value + + +def get_constraint( + metric: Metric, bound: float, relative: bool +) -> list[OutcomeConstraint]: + return [ + OutcomeConstraint( + metric=metric, op=ComparisonOp.GEQ, bound=bound, relative=relative + ) + ] + + +class LogWarpingYTest(TestCase): + def setUp(self) -> None: + super().setUp() + self.obsd1 = ObservationData( + metric_names=["m1", "m2"], + means=np.array([0.6, 0.9]), + covariance=np.array([[0.03, 0.0], [0.0, 0.001]]), + ) + self.obsd2 = ObservationData( + metric_names=["m1", "m2"], + means=np.array([0.1, 0.4]), + covariance=np.array([[0.005, 0.0], [0.0, 0.05]]), + ) + self.obsd3 = ObservationData( + metric_names=["m1", "m2"], + means=np.array([0.9, 0.8]), + covariance=np.array([[0.02, 0.0], [0.0, 0.01]]), + ) + self.obsd_nan = ObservationData( + metric_names=["m1", "m2"], + means=np.array([0.3, 0.2]), + covariance=np.array([[float("nan"), 0.0], [0.0, float("nan")]]), + ) + self.observations = [ + Observation(features=ObservationFeatures({}), data=obsd) + for obsd in [self.obsd1, self.obsd2, self.obsd3, self.obsd_nan] + ] + + def test_Init(self) -> None: + shared_init_args = { + "search_space": None, + "observations": self.observations[:2], + } + # Init without a config + t = LogWarpingY(**shared_init_args) + self.assertTrue(t.clip_mean) + self.assertEqual(t.metric_names, ["m1", "m2"]) + + # Test init with config + for m in ["m1", "m2"]: + tf = LogWarpingY(**shared_init_args, config={"metrics": [m]}) + # tf.transforms should only exist for m and be a LogWarpingTransformer + self.assertIsInstance(tf.transforms, dict) + self.assertEqual([*tf.transforms], [m]) # Check keys + self.assertIsInstance(tf.transforms[m], LogWarpingTransformer) + # tf.inv_bounds should only exist for m and be a tuple of length 2 + self.assertIsInstance(tf.inv_bounds, dict) + self.assertEqual([*tf.inv_bounds], [m]) # Check keys + self.assertIsInstance(tf.inv_bounds[m], tuple) + margin = 1e-3 # TODO clean this up + self.assertEqual(tf.inv_bounds[m], (-0.5 - margin, 0.5 + margin)) + + def test_TransformAndUntransformOneMetric(self) -> None: + t = LogWarpingY( + search_space=None, + observations=deepcopy(self.observations[:2]), + config={"metrics": ["m1"]}, + ) + + # Transform the data and make sure we don't touch m2 + observation_data_tf = t._transform_observation_data( + deepcopy([self.obsd1, self.obsd2]) + ) + for obsd, obsd_orig in zip(observation_data_tf, [self.obsd1, self.obsd2]): + self.assertNotAlmostEqual(obsd.means[0], obsd_orig.means[0]) + self.assertNotAlmostEqual(obsd.covariance[0][0], obsd_orig.covariance[0][0]) + self.assertAlmostEqual(obsd.means[1], obsd_orig.means[1]) + self.assertAlmostEqual(obsd.covariance[1][1], obsd_orig.covariance[1][1]) + + # Untransform the data and make sure the means are the same + observation_data_untf = t._untransform_observation_data(observation_data_tf) + for obsd, obsd_orig in zip(observation_data_untf, [self.obsd1, self.obsd2]): + self.assertAlmostEqual(obsd.means[0], obsd_orig.means[0], places=4) + self.assertAlmostEqual(obsd.means[1], obsd_orig.means[1], places=4) + + # NaN covar values remain as NaNs + transformed_obsd_nan = t._transform_observation_data([deepcopy(self.obsd_nan)])[ + 0 + ] + cov_results = np.array(transformed_obsd_nan.covariance) + self.assertTrue(np.all(np.isnan(np.diag(cov_results)))) + untransformed = t._untransform_observation_data([transformed_obsd_nan])[0] + self.assertTrue( + np.array_equal( + untransformed.covariance, self.obsd_nan.covariance, equal_nan=True + ) + ) + + def test_TransformAndUntransformAllMetrics(self) -> None: + t = LogWarpingY( + search_space=None, + observations=deepcopy(self.observations[:2]), + config={"metrics": ["m1", "m2"]}, + ) + + observation_data_tf = t._transform_observation_data( + deepcopy([self.obsd1, self.obsd2]) + ) + for obsd, obsd_orig in zip(observation_data_tf, [self.obsd1, self.obsd2]): + for i in range(2): # Both metrics should be transformed + self.assertNotAlmostEqual(obsd.means[i], obsd_orig.means[i]) + self.assertNotAlmostEqual( + obsd.covariance[i][i], obsd_orig.covariance[i][i] + ) + + # Untransform the data and make sure the means are the same + observation_data_untf = t._untransform_observation_data(observation_data_tf) + for obsd, obsd_orig in zip(observation_data_untf, [self.obsd1, self.obsd2]): + for i in range(2): # Both metrics should be transformed + self.assertAlmostEqual(obsd.means[i], obsd_orig.means[i], places=4) + + # NaN covar values remain as NaNs + transformed_obsd_nan = t._transform_observation_data([deepcopy(self.obsd_nan)])[ + 0 + ] + cov_results = np.array(transformed_obsd_nan.covariance) + self.assertTrue(np.all(np.isnan(np.diag(cov_results)))) + + def test_TransformOptimizationConfig(self) -> None: + m1 = Metric(name="m1") + objective_m1 = Objective(metric=m1, minimize=False) + m2 = Metric(name="m2") + objective_m2 = Objective(metric=m2, minimize=False) + + # No constraints + oc = OptimizationConfig(objective=objective_m1, outcome_constraints=[]) + tf = LogWarpingY( + search_space=None, + observations=self.observations[:2], + config={"metrics": ["m1"]}, + ) + oc_tf = tf.transform_optimization_config(deepcopy(oc), None, None) + self.assertEqual(oc_tf, oc) + + # Output constraint on a different metric should not transform the bound + for bound in [-1.234, 0, 2.345]: + oc = OptimizationConfig( + objective=objective_m1, + outcome_constraints=get_constraint( + metric=m2, bound=bound, relative=False + ), + ) + oc_tf = tf.transform_optimization_config(deepcopy(oc), None, None) + self.assertEqual(oc_tf, oc) + + # Output constraint on the same metric should transform the bound + for bound in [-1.234, 0, 2.345]: + oc = OptimizationConfig( + objective=objective_m2, + outcome_constraints=get_constraint( + metric=m1, bound=bound, relative=False + ), + ) + oc_tf = tf.transform_optimization_config(deepcopy(oc), None, None) + oc_true = deepcopy(oc) + tf_bound = tf.transforms["m1"].transform(np.array(bound, ndmin=2)).item() + oc_true.outcome_constraints[0].bound = tf_bound + self.assertEqual(oc_tf, oc_true) + + # Check untransform of outcome constraint + cons = tf.untransform_outcome_constraints( + outcome_constraints=oc_tf.outcome_constraints, fixed_features=None + ) + self.assertEqual(cons, oc.outcome_constraints) + + # Relative constraints aren't supported + oc = OptimizationConfig( + objective=objective_m2, + outcome_constraints=get_constraint(metric=m1, bound=2.345, relative=True), + ) + with self.assertRaisesRegex( + ValueError, + "LogWarpingY cannot be applied to metric m1 since it is " + "subject to a relative constraint.", + ): + tf.transform_optimization_config(oc, None, None) + + # Untransform doesn't work if relative + with self.assertRaises(ValueError): + tf.untransform_outcome_constraints( + outcome_constraints=oc.outcome_constraints, + fixed_features=None, + ) + + # Support for scalarized outcome constraints isn't implemented + m3 = Metric(name="m3") + oc = OptimizationConfig( + objective=objective_m2, + outcome_constraints=[ + ScalarizedOutcomeConstraint( + metrics=[m1, m3], op=ComparisonOp.GEQ, bound=2.345, relative=False + ) + ], + ) + with self.assertRaises(NotImplementedError) as cm: + tf.transform_optimization_config(oc, None, None) + self.assertEqual( + "LogWarpingY cannot be used for metric(s) {'m1'} " + "that are part of a ScalarizedOutcomeConstraint.", + str(cm.exception), + ) + + def test_non_finite_data_raises(self) -> None: + for invalid_value in [float("nan"), float("inf")]: + observations = get_observations_with_invalid_value(invalid_value) + with self.assertRaisesRegex( + ValueError, f"Non-finite data found for metric m1: {invalid_value}" + ): + LogWarpingY(observations=observations, config={"metrics": ["m1"]}) + + +if __name__ == "__main__": + import unittest + + unittest.main() diff --git a/ax/modelbridge/transforms/tests/test_power_y_transform.py b/ax/modelbridge/transforms/tests/test_power_y_transform.py index 4db9d1f708e..b80db7c0279 100644 --- a/ax/modelbridge/transforms/tests/test_power_y_transform.py +++ b/ax/modelbridge/transforms/tests/test_power_y_transform.py @@ -19,7 +19,6 @@ from ax.core.outcome_constraint import OutcomeConstraint, ScalarizedOutcomeConstraint from ax.core.types import ComparisonOp from ax.modelbridge.transforms.power_transform_y import PowerTransformY -from ax.modelbridge.transforms.sklearn_y import _compute_sklearn_transforms from ax.modelbridge.transforms.utils import get_data, match_ci_width_truncated from ax.utils.common.testutils import TestCase from ax.utils.testing.core_stubs import get_observations_with_invalid_value @@ -100,7 +99,7 @@ def test_GetData(self) -> None: def test_ComputePowerTransform(self) -> None: Ys = get_data([self.obsd1, self.obsd2, self.obsd3], ["m2"]) - pts = _compute_sklearn_transforms( + pts = PowerTransformY._compute_sklearn_transforms( Ys, transformer=PowerTransformer, transformer_kwargs={"method": "yeo-johnson"}, @@ -120,7 +119,7 @@ def test_ComputePowerTransform(self) -> None: def test_ComputeInverseBounds(self) -> None: Ys = get_data([self.obsd1, self.obsd2, self.obsd3], ["m2"]) - pt = _compute_sklearn_transforms( + pt = PowerTransformY._compute_sklearn_transforms( Ys, transformer=PowerTransformer, transformer_kwargs={"method": "yeo-johnson"}, @@ -149,7 +148,7 @@ def test_ComputeInverseBounds(self) -> None: def test_MatchCIWidth(self) -> None: Ys = get_data([self.obsd1, self.obsd2, self.obsd3], ["m2"]) - pt = _compute_sklearn_transforms( + pt = PowerTransformY._compute_sklearn_transforms( Ys, transformer=PowerTransformer, transformer_kwargs={"method": "yeo-johnson"}, From 319996486d7033363d7f01b18578eb7473e1a903 Mon Sep 17 00:00:00 2001 From: Rhys Goodall Date: Wed, 11 Dec 2024 14:13:48 -0500 Subject: [PATCH 5/5] fix: make the log-warping transform have unbounded image to avoid issues with transforming bounds. --- ax/modelbridge/transforms/sklearn_y.py | 67 ++++++++++--------- .../transforms/tests/test_log_warping_y.py | 3 +- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/ax/modelbridge/transforms/sklearn_y.py b/ax/modelbridge/transforms/sklearn_y.py index eaee514f583..eaddedd91d2 100644 --- a/ax/modelbridge/transforms/sklearn_y.py +++ b/ax/modelbridge/transforms/sklearn_y.py @@ -77,14 +77,21 @@ def transform(self, X, copy=None): X = self._check_input(X, in_fit=False, check_shape=True) X[:, :] = (self.labels_max_ - X) / (self.labels_max_ - self.labels_min_) - # TODO use the taylor series expansion for -ve values of X such that we - # can avoid the divergence of the log function if bounds are much larger - # than maximum value of X. + finite_mask = np.isfinite(X) + positive_mask = X >= 0 X[:, :] = np.where( - np.isfinite(X), + finite_mask & positive_mask, 0.5 - (np.log1p(X * (self.offset - 1)) / np.log(self.offset)), X, ) + # This isn't in the vizier implementation but allows us to handle the + # transformation of values of X that are larger than the maximum value + # of X seen in the fitting data. + X[:, :] = np.where( + finite_mask & ~positive_mask, + 0.5 - X * (self.offset - 1) / np.log(self.offset), + X, + ) return X @@ -92,10 +99,22 @@ def inverse_transform(self, X, copy=None): check_is_fitted(self) X = self._check_input(X, in_fit=False, check_shape=True) - X[:, :] = self.labels_max_ - (np.exp(np.log(self.offset) * (0.5 - X)) - 1) * ( - self.labels_max_ - self.labels_min_ - ) / (self.offset - 1) + above_max_mask = X > 0.5 + X[:, :] = np.where( + ~above_max_mask, + np.expm1(np.log(self.offset) * (0.5 - X)) / (self.offset - 1), + X, + ) + # This isn't in the vizier implementation but allows us to handle the + # inverse transformation of values of X that are larger than the maximum + # value of X seen in the fitting data. + X[:, :] = np.where( + above_max_mask, + (0.5 - X) * np.log(self.offset) / (self.offset - 1), + X, + ) + X[:, :] = self.labels_max_ - X * (self.labels_max_ - self.labels_min_) return X def _check_input(self, X, in_fit, check_shape=False): @@ -650,6 +669,9 @@ def _transform_observation_data( upper_bound=np.inf, ) else: + # TODO: for sklearn transformers that would have known + # variance/covariance transforms we should work out an + # interface to use them. obsd.means[i] = transform(np.array(obsd.means[i], ndmin=2)) return observation_data @@ -677,9 +699,12 @@ def _untransform_observation_data( transform=lambda y: transform(np.array(y, ndmin=2)), lower_bound=lower_bound, upper_bound=upper_bound, - clip_mean=True, + clip_mean=self.clip_mean, ) else: + # TODO: for sklearn transformers that would have known + # variance/covariance inverse transforms we should work + # out an interface to use them. obsd.means[i] = transform(np.array(obsd.means[i], ndmin=2)) return observation_data @@ -804,25 +829,8 @@ def __init__( config = config or {} config["transformer"] = LogWarpingTransformer config["match_ci_width"] = True - config["clip_mean"] = True super().__init__(search_space, observations, modelbridge, config) - @staticmethod - def _compute_inverse_bounds( - transforms: dict[str, LogWarpingTransformer], **kwargs - ) -> dict[str, tuple[float, float]]: - inv_bounds = defaultdict() - for k, lt in transforms.items(): - if not isinstance(lt, LogWarpingTransformer): - raise ValueError(f"Unexpected transformer type: {type(lt)}") - # match_ci_width_truncated uses a margin of 0.001 to clip the mean, so - # we need to add a small margin to the bounds to avoid clipping. - margin = 1e-3 - inv_bounds[k] = tuple( - checked_cast_list(float, [-0.5 - margin, 0.5 + margin]) - ) - return inv_bounds - class InfeasibleY(SklearnTransform): def __init__( @@ -835,7 +843,6 @@ def __init__( config = config or {} config["transformer"] = InfeasibleTransformer config["match_ci_width"] = True - config["clip_mean"] = True super().__init__(search_space, observations, modelbridge, config) @staticmethod @@ -846,10 +853,10 @@ def _compute_inverse_bounds( for k, it in transforms.items(): if not isinstance(it, InfeasibleTransformer): raise ValueError(f"Unexpected transformer type: {type(it)}") - # If we encounter a value that is lower than the warped bad value, we - # assign to infeasible values then it would potentially end up with - # exceptionally large values when we match the CI width. Clip to the - # warped bad value to avoid this. + # If we encounter a value that is lower than the warped bad value + # that we assign to infeasible values then it makes sense to clip + # to the warped bad value to avoid giving the idea that a value is + # worse than being infeasible. inv_bounds[k] = tuple( checked_cast_list(float, [it.warped_bad_value_[0, 0], np.inf]) ) diff --git a/ax/modelbridge/transforms/tests/test_log_warping_y.py b/ax/modelbridge/transforms/tests/test_log_warping_y.py index 1a78afde2b8..102e9e754b8 100644 --- a/ax/modelbridge/transforms/tests/test_log_warping_y.py +++ b/ax/modelbridge/transforms/tests/test_log_warping_y.py @@ -81,8 +81,7 @@ def test_Init(self) -> None: self.assertIsInstance(tf.inv_bounds, dict) self.assertEqual([*tf.inv_bounds], [m]) # Check keys self.assertIsInstance(tf.inv_bounds[m], tuple) - margin = 1e-3 # TODO clean this up - self.assertEqual(tf.inv_bounds[m], (-0.5 - margin, 0.5 + margin)) + self.assertEqual(tf.inv_bounds[m], (-np.inf, np.inf)) def test_TransformAndUntransformOneMetric(self) -> None: t = LogWarpingY(