Refactor Inducing Point Selection to Use Allocator Classes for Enhanc…

…ed Flexibility (#377) (#435) Summary: # Description This PR proposes a solution to issue #378 . Although the implementation is fully functional and tested, it is presented as an overall proposal, open for discussion and potential refinement. ### Key Changes - **Refactor of `select_inducing_points` Function**: - **Previous Implementation**: Accepted `method` as a string (`"pivoted_chol"`, `"kmeans++"`, `"auto"`, `"sobol"`) and used conditional logic to select inducing points. - **New Implementation**: Now accepts an `InducingPointAllocator` instance, with `AutoAllocator` as the default if `allocator` is `None`. This approach allows users to directly pass allocator instances, aligning with the issue’s goal to enable flexible use of custom allocators like `GreedyImprovementReduction`. - **New `inducing_point_allocators.py` File**: - Introduces classes `SobolAllocator`, `KMeansAllocator`, and `AutoAllocator`, all implementing the `InducingPointAllocator` interface from Botorch. This modularizes allocator logic, moving it out of `select_inducing_points` while following the established base class structure. - **Modifications to Models and Example Files**: - Updated `gp_classification.py`, `monotonic_projection_gp.py`, `monotonic_rejection_gp.py`, `semi_p.py`, and `example_problems.py` to handle allocator class instances rather than string-based methods, improving overall consistency. - Added imports for the new allocator classes in `__init__.py` for cross-codebase accessibility. - **Updated Tests**: - Adjusted tests in `test_semi_p.py`, `test_utils.py`, and `test_config.py` to work with allocator classes instead of the previous string-based structure. ### Additional Notes This PR preserves most of the existing logic in `select_inducing_points` to keep changes minimal. I know further work is needed to confirm compatibility with additional Botorch allocators and to support advanced configurations using `from_config` for custom allocator setups. I’d love to hear your feedback on the overall approach before moving forward with these additional refinements. Pull Request resolved: #435 Reviewed By: crasanders Differential Revision: D65451912 Pulled By: JasonKChow fbshipit-source-id: e0529e545e428ad94ef965cc9d642577cbeb2777
facebookresearch · Dec 5, 2024 · 32af5c5 · 32af5c5
1 parent d9ee675
commit 32af5c5
Show file tree

Hide file tree

Showing 13 changed files with 1,427 additions and 103 deletions.
diff --git a/aepsych/benchmark/example_problems.py b/aepsych/benchmark/example_problems.py
@@ -11,6 +11,7 @@
     novel_discrimination_testfun,
 )
 from aepsych.models import GPClassificationModel
+from aepsych.models.inducing_point_allocators import KMeansAllocator
 
 """The DiscrimLowDim, DiscrimHighDim, ContrastSensitivity6d, and Hartmann6Binary classes
 are copied from bernoulli_lse github repository (https://github.com/facebookresearch/bernoulli_lse)
@@ -109,7 +110,7 @@ def __init__(
             lb=self.bounds[0],
             ub=self.bounds[1],
             inducing_size=100,
-            inducing_point_method="kmeans++",
+            inducing_point_method=KMeansAllocator(),
         )
 
         self.m.fit(

diff --git a/aepsych/models/__init__.py b/aepsych/models/__init__.py
@@ -10,6 +10,14 @@
 from ..config import Config
 from .gp_classification import GPBetaRegressionModel, GPClassificationModel
 from .gp_regression import GPRegressionModel
+from .inducing_point_allocators import (
+    AutoAllocator,
+    DummyAllocator,
+    FixedAllocator,
+    GreedyVarianceReduction,
+    KMeansAllocator,
+    SobolAllocator,
+)
 from .monotonic_projection_gp import MonotonicProjectionGP
 from .monotonic_rejection_gp import MonotonicRejectionGP
 from .multitask_regression import IndependentMultitaskGPRModel, MultitaskGPRModel
@@ -34,6 +42,12 @@
     "semi_p_posterior_transform",
     "GPBetaRegressionModel",
     "PairwiseProbitModel",
+    "AutoAllocator",
+    "KMeansAllocator",
+    "SobolAllocator",
+    "DummyAllocator",
+    "FixedAllocator",
+    "GreedyVarianceReduction",
 ]
 
 Config.register_module(sys.modules[__name__])
diff --git a/aepsych/models/gp_classification.py b/aepsych/models/gp_classification.py
@@ -16,9 +16,15 @@
 from aepsych.config import Config
 from aepsych.factory.default import default_mean_covar_factory
 from aepsych.models.base import AEPsychModelDeviceMixin
+from aepsych.models.inducing_point_allocators import (
+    AutoAllocator,
+    DummyAllocator,
+    SobolAllocator,
+)
 from aepsych.models.utils import select_inducing_points
 from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d
 from aepsych.utils_logging import getLogger
+from botorch.models.utils.inducing_point_allocators import InducingPointAllocator
 from gpytorch.likelihoods import BernoulliLikelihood, BetaLikelihood, Likelihood
 from gpytorch.models import ApproximateGP
 from gpytorch.variational import CholeskyVariationalDistribution, VariationalStrategy
@@ -56,7 +62,7 @@ def __init__(
         likelihood: Optional[Likelihood] = None,
         inducing_size: Optional[int] = None,
         max_fit_time: Optional[float] = None,
-        inducing_point_method: str = "auto",
+        inducing_point_method: InducingPointAllocator = AutoAllocator(),
         optimizer_options: Optional[Dict[str, Any]] = None,
     ) -> None:
         """Initialize the GP Classification model
@@ -74,11 +80,8 @@ def __init__(
             inducing_size (int, optional): Number of inducing points. Defaults to 99.
             max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None,
                 there is no limit to the fitting time.
-            inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto".
-                If "sobol", a number of Sobol points equal to inducing_size will be selected.
-                If "pivoted_chol", selects points based on the pivoted Cholesky heuristic.
-                If "kmeans++", selects points by performing kmeans++ clustering on the training data.
-                If "auto", tries to determine the best method automatically.
+            inducing_point_method (InducingPointAllocator): The method to use for selecting inducing points.
+                Defaults to AutoAllocator().
             optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during
                 fitting. Assumes we are using L-BFGS-B.
         """
@@ -106,10 +109,10 @@ def __init__(
 
         # initialize to sobol before we have data
         inducing_points = select_inducing_points(
+            allocator=DummyAllocator(bounds=torch.stack((lb, ub))),
             inducing_size=self.inducing_size,
-            bounds=torch.stack((lb, ub)),
-            method="sobol",
         )
+        self.last_inducing_points_method = "DummyAllocator"
 
         variational_distribution = CholeskyVariationalDistribution(
             inducing_points.size(0), batch_shape=torch.Size([self._batch_size])
@@ -122,7 +125,6 @@ def __init__(
             learn_inducing_locations=False,
         )
         super().__init__(variational_strategy)
-
         if mean_module is None or covar_module is None:
             default_mean, default_covar = default_mean_covar_factory(
                 dim=self.dim, stimuli_per_trial=self.stimuli_per_trial
@@ -166,9 +168,14 @@ def from_config(cls, config: Config) -> GPClassificationModel:
         mean, covar = mean_covar_factory(config)
         max_fit_time = config.getfloat(classname, "max_fit_time", fallback=None)
 
-        inducing_point_method = config.get(
-            classname, "inducing_point_method", fallback="auto"
+        inducing_point_method_class = config.getobj(
+            classname, "inducing_point_method", fallback=AutoAllocator
         )
+        # Check if allocator class has a `from_config` method
+        if hasattr(inducing_point_method_class, "from_config"):
+            inducing_point_method = inducing_point_method_class.from_config(config)
+        else:
+            inducing_point_method = inducing_point_method_class()
 
         likelihood_cls = config.getobj(classname, "likelihood", fallback=None)
 
@@ -211,14 +218,16 @@ def _reset_variational_strategy(self) -> None:
         if self.train_inputs is not None:
             # remember original device
             device = self.device
-
             inducing_points = select_inducing_points(
+                allocator=self.inducing_point_method,
                 inducing_size=self.inducing_size,
                 covar_module=self.covar_module,
                 X=self.train_inputs[0],
                 bounds=self.bounds,
-                method=self.inducing_point_method,
             ).to(device)
+            self.last_inducing_points_method = (
+                self.inducing_point_method.__class__.__name__
+            )
 
             variational_distribution = CholeskyVariationalDistribution(
                 inducing_points.size(0), batch_shape=torch.Size([self._batch_size])
@@ -255,7 +264,10 @@ def fit(
         if not warmstart_hyperparams:
             self._reset_hyperparameters()
 
-        if not warmstart_induc:
+        if not warmstart_induc or (
+            self.last_inducing_points_method == "DummyAllocator"
+            and self.inducing_point_method.__class__.__name__ != "DummyAllocator"
+        ):
             self._reset_variational_strategy()
 
         n = train_y.shape[0]
@@ -360,7 +372,7 @@ def __init__(
         likelihood: Optional[Likelihood] = None,
         inducing_size: Optional[int] = None,
         max_fit_time: Optional[float] = None,
-        inducing_point_method: str = "auto",
+        inducing_point_method: InducingPointAllocator = AutoAllocator(),
         optimizer_options: Optional[Dict[str, Any]] = None,
     ) -> None:
         """Initialize the GP Beta Regression model
@@ -378,7 +390,7 @@ def __init__(
             inducing_size (int, optional): Number of inducing points. Defaults to 100.
             max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None,
                 there is no limit to the fitting time. Defaults to None.
-            inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto".
+            inducing_point_method (InducingPointAllocator): The method to use to select the inducing points. If None, defaults to AutoAllocator().
         """
         if likelihood is None:
             likelihood = BetaLikelihood()