From 2d63d2bf28a4e636747f192552fc299ccfd43771 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 13 Mar 2026 18:27:21 +0100 Subject: [PATCH 01/39] Fix is_numeric typo in _FixedNumericalContinuousParameter --- baybe/parameters/numerical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/parameters/numerical.py b/baybe/parameters/numerical.py index ba210de244..418d7b2598 100644 --- a/baybe/parameters/numerical.py +++ b/baybe/parameters/numerical.py @@ -155,7 +155,7 @@ def summary(self) -> dict: class _FixedNumericalContinuousParameter(ContinuousParameter): """Parameter class for fixed numerical parameters.""" - is_numeric: ClassVar[bool] = True + is_numerical: ClassVar[bool] = True # See base class. value: float = field(converter=float) From a84527ffa55089abbdb19b0aefad47996a9ffcbe Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 12 Mar 2026 22:29:45 +0100 Subject: [PATCH 02/39] Generalize subspace naming --- baybe/constraints/utils.py | 2 +- baybe/recommenders/pure/bayesian/botorch.py | 94 ++++++++++++------- baybe/searchspace/continuous.py | 49 +++++----- .../test_cardinality_constraint_continuous.py | 2 +- 4 files changed, 88 insertions(+), 59 deletions(-) diff --git a/baybe/constraints/utils.py b/baybe/constraints/utils.py index 22570f29b8..4556c39919 100644 --- a/baybe/constraints/utils.py +++ b/baybe/constraints/utils.py @@ -25,7 +25,7 @@ def is_cardinality_fulfilled( Returns: ``True`` if all cardinality constraints are fulfilled, ``False`` otherwise. """ - for c in subspace_continuous.constraints_cardinality: + for c in subspace_continuous.constraints_subspace_generating: # Get the activity thresholds for all parameters cols = df[c.parameters] thresholds = { diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 0f89b1f80f..2be9ce2a84 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -16,7 +16,6 @@ from typing_extensions import override from baybe.acquisition.acqfs import qThompsonSampling -from baybe.constraints import ContinuousCardinalityConstraint from baybe.constraints.utils import is_cardinality_fulfilled from baybe.exceptions import ( IncompatibilityError, @@ -91,11 +90,10 @@ class BotorchRecommender(BayesianRecommender): """ max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) - """Threshold defining the maximum number of subspaces to consider for exhaustive - search in the presence of cardinality constraints. If the combinatorial number of - groupings into active and inactive parameters dictated by the constraints is greater - than this number, that many randomly selected combinations are selected for - optimization.""" + """Maximum number of subspaces to evaluate when subspace-generating constraints are + present (e.g., continuous cardinality constraints). If the total number of subspaces + exceeds this limit, a random subset of that size is sampled for optimization instead + of performing an exhaustive search.""" @sampling_percentage.validator def _validate_percentage( # noqa: DOC101, DOC103 @@ -227,35 +225,34 @@ def _recommend_continuous_torch( self, subspace_continuous: SubspaceContinuous, batch_size: int ) -> tuple[Tensor, Tensor]: """Dispatcher selecting the continuous optimization routine.""" - if subspace_continuous.constraints_cardinality: - return self._recommend_continuous_with_cardinality_constraints( + if subspace_continuous.constraints_subspace_generating: + return self._recommend_continuous_with_subspaces( subspace_continuous, batch_size ) else: - return self._recommend_continuous_without_cardinality_constraints( + return self._recommend_continuous_without_subspaces( subspace_continuous, batch_size ) - def _recommend_continuous_with_cardinality_constraints( + def _recommend_continuous_with_subspaces( self, subspace_continuous: SubspaceContinuous, batch_size: int, ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space with cardinality constraints. + """Recommend from a continuous space with subspace-generating constraints. - This is achieved by considering the individual restricted subspaces that can be - obtained by splitting the parameters into sets of active and inactive - parameters, according to what is allowed by the cardinality constraints. + Optimizes the acquisition function across subspaces defined by constraints + (currently only cardinality constraints) and returns the best result. The specific collection of subspaces considered by the recommender is obtained as either the full combinatorial set of possible parameter splits or a random selection thereof, depending on the upper bound specified by the corresponding recommender attribute. - In each of these spaces, the (in)activity assignment is fixed, so that the - cardinality constraints can be removed and a regular optimization can be - performed. The recommendation is then constructed from the combined optimization - results of the unconstrained spaces. + In each subspace, the constraint-imposed configuration is fixed, so that the + constraints can be removed and a regular optimization can be performed. The + recommendation is then constructed from the combined optimization results of the + unconstrained spaces. Args: subspace_continuous: The continuous subspace from which to generate @@ -266,27 +263,24 @@ def _recommend_continuous_with_cardinality_constraints( The recommendations and corresponding acquisition values. Raises: - ValueError: If the continuous search space has no cardinality constraints. + ValueError: If the continuous search space has no subspace-generating + constraints. """ - if not subspace_continuous.constraints_cardinality: + if not subspace_continuous.constraints_subspace_generating: raise ValueError( - f"'{self._recommend_continuous_with_cardinality_constraints.__name__}' " - f"expects a subspace with constraints of type " - f"'{ContinuousCardinalityConstraint.__name__}'. " + f"'{self._recommend_continuous_with_subspaces.__name__}' " + f"expects a subspace with subspace-generating constraints." ) # Determine search scope based on number of inactive parameter combinations - exhaustive_search = ( - subspace_continuous.n_inactive_parameter_combinations - <= self.max_n_subspaces - ) + exhaustive_search = subspace_continuous.n_subspaces <= self.max_n_subspaces iterator: Iterable[Collection[str]] if exhaustive_search: # If manageable, evaluate all combinations of inactive parameters - iterator = subspace_continuous.inactive_parameter_combinations() + iterator = subspace_continuous.subspace_configurations() else: # Otherwise, draw a random subset of inactive parameter combinations - iterator = subspace_continuous._sample_inactive_parameters( + iterator = subspace_continuous._sample_subspace_configurations( self.max_n_subspaces ) @@ -315,12 +309,12 @@ def _recommend_continuous_with_cardinality_constraints( return points, acqf_value - def _recommend_continuous_without_cardinality_constraints( + def _recommend_continuous_without_subspaces( self, subspace_continuous: SubspaceContinuous, batch_size: int, ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space without cardinality constraints. + """Recommend from a continuous search space without subspace decomposition. Args: subspace_continuous: The continuous subspace from which to generate @@ -331,16 +325,16 @@ def _recommend_continuous_without_cardinality_constraints( The recommendations and corresponding acquisition values. Raises: - ValueError: If the continuous search space has cardinality constraints. + ValueError: If the continuous search space has subspace-generating + constraints. """ import torch from botorch.optim import optimize_acqf - if subspace_continuous.constraints_cardinality: + if subspace_continuous.constraints_subspace_generating: raise ValueError( - f"'{self._recommend_continuous_without_cardinality_constraints.__name__}' " # noqa: E501 - f"expects a subspace without constraints of type " - f"'{ContinuousCardinalityConstraint.__name__}'. " + f"'{self._recommend_continuous_without_subspaces.__name__}' " + f"expects a subspace without subspace-generating constraints." ) fixed_parameters = { @@ -399,6 +393,34 @@ def _recommend_hybrid( searchspace: SearchSpace, candidates_exp: pd.DataFrame, batch_size: int, + ) -> pd.DataFrame: + """Generate recommendations from a hybrid search space. + + Dispatches to the appropriate optimization routine depending on whether + the continuous part contains subspace-generating constraints. + + Args: + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + if searchspace.continuous.constraints_subspace_generating: + return self._recommend_hybrid_with_subspaces( + searchspace, candidates_exp, batch_size + ) + return self._recommend_hybrid_without_subspaces( + searchspace, candidates_exp, batch_size + ) + + def _recommend_hybrid_without_subspaces( + self, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, ) -> pd.DataFrame: """Recommend points using the ``optimize_acqf_mixed`` function of BoTorch. diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 66ad32856f..d7d3fc7ba7 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -109,8 +109,10 @@ def __str__(self) -> str: return to_string(self.__class__.__name__, *fields) @property - def constraints_cardinality(self) -> tuple[ContinuousCardinalityConstraint, ...]: - """Cardinality constraints.""" + def constraints_subspace_generating( + self, + ) -> tuple[ContinuousCardinalityConstraint, ...]: + """Constraints generating subspaces for separate optimization.""" return tuple( c for c in self.constraints_nonlin @@ -144,18 +146,19 @@ def _validate_constraints_lin_ineq( ) @property - def n_inactive_parameter_combinations(self) -> int: - """The number of possible inactive parameter combinations.""" + def n_subspaces(self) -> int: + """The number of possible subspace configurations.""" return math.prod( - c.n_inactive_parameter_combinations for c in self.constraints_cardinality + c.n_inactive_parameter_combinations + for c in self.constraints_subspace_generating ) - def inactive_parameter_combinations(self) -> Iterator[frozenset[str]]: - """Get an iterator over all possible combinations of inactive parameters.""" + def subspace_configurations(self) -> Iterator[frozenset[str]]: + """Get an iterator over all possible subspace configurations.""" for combination in product( *[ con.inactive_parameter_combinations() - for con in self.constraints_cardinality + for con in self.constraints_subspace_generating ] ): yield frozenset(chain(*combination)) @@ -165,10 +168,10 @@ def _validate_constraints_nonlin(self, _, __) -> None: """Validate nonlinear constraints.""" # Note: The passed constraints are accessed indirectly through the property validate_cardinality_constraints_are_nonoverlapping( - self.constraints_cardinality + self.constraints_subspace_generating ) - for con in self.constraints_cardinality: + for con in self.constraints_subspace_generating: validate_cardinality_constraint_parameter_bounds(con, self.parameters) def to_searchspace(self) -> SearchSpace: @@ -311,9 +314,11 @@ def comp_rep_columns(self) -> tuple[str, ...]: return tuple(chain.from_iterable(p.comp_rep_columns for p in self.parameters)) @property - def parameter_names_in_cardinality_constraints(self) -> frozenset[str]: - """The names of all parameters affected by cardinality constraints.""" - names_per_constraint = (c.parameters for c in self.constraints_cardinality) + def parameter_names_in_subspace_constraints(self) -> frozenset[str]: + """The names of all parameters affected by subspace-generating constraints.""" + names_per_constraint = ( + c.parameters for c in self.constraints_subspace_generating + ) return frozenset(chain(*names_per_constraint)) @property @@ -391,7 +396,7 @@ def _enforce_cardinality_constraints( """ # Extract active parameters involved in cardinality constraints active_parameter_names = ( - self.parameter_names_in_cardinality_constraints.difference( + self.parameter_names_in_subspace_constraints.difference( inactive_parameter_names ) ) @@ -405,7 +410,9 @@ def _enforce_cardinality_constraints( elif p.name in active_parameter_names: constraints = [ - c for c in self.constraints_cardinality if p.name in c.parameters + c + for c in self.constraints_subspace_generating + if p.name in c.parameters ] # Constraint validation should have ensured that each parameter can @@ -481,7 +488,7 @@ def sample_uniform(self, batch_size: int = 1) -> pd.DataFrame: if not self.is_constrained: return self._sample_from_bounds(batch_size, self.comp_rep_bounds.values) - if len(self.constraints_cardinality) == 0: + if len(self.constraints_subspace_generating) == 0: return self._sample_from_polytope(batch_size, self.comp_rep_bounds.values) return self._sample_from_polytope_with_cardinality_constraints(batch_size) @@ -567,7 +574,7 @@ def _sample_from_polytope_with_cardinality_constraints( self, batch_size: int ) -> pd.DataFrame: """Draw random samples from a polytope with cardinality constraints.""" - if not self.constraints_cardinality: + if not self.constraints_subspace_generating: raise RuntimeError( f"This method should not be called without any constraints of type " f"'{ContinuousCardinalityConstraint.__name__}' in place. " @@ -584,7 +591,7 @@ def _sample_from_polytope_with_cardinality_constraints( while len(samples) < batch_size: # Randomly set some parameters inactive - inactive_params_sample = self._sample_inactive_parameters(1)[0] + inactive_params_sample = self._sample_subspace_configurations(1)[0] # Remove the inactive parameters from the search space. In the first # step, the active parameters get activated and inactive parameters are @@ -622,11 +629,11 @@ def _sample_from_polytope_with_cardinality_constraints( .fillna(0.0) ) - def _sample_inactive_parameters(self, batch_size: int = 1) -> list[set[str]]: - """Sample inactive parameters according to the given cardinality constraints.""" + def _sample_subspace_configurations(self, batch_size: int = 1) -> list[set[str]]: + """Sample subspace configurations according to the given constraints.""" inactives_per_constraint = [ con.sample_inactive_parameters(batch_size) - for con in self.constraints_cardinality + for con in self.constraints_subspace_generating ] return [set(chain(*x)) for x in zip(*inactives_per_constraint)] diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index 04e9e10cf9..0e54c850c8 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -66,7 +66,7 @@ def _validate_cardinality_constrained_batch( # turns out the check fails because we observe degenerate batches as actual # recommendations, we need to invent something smarter. max_cardinalities = [ - c.max_cardinality for c in subspace_continuous.constraints_cardinality + c.max_cardinality for c in subspace_continuous.constraints_subspace_generating ] if len(unique_row := batch.drop_duplicates()) == 1: assert (unique_row.iloc[0] == 0.0).all() and all( From 0d2a8132f007bc52f7a9a40519dd28400dc4d64b Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 12 Mar 2026 22:32:35 +0100 Subject: [PATCH 03/39] Extract _optimize_over_subspaces and add dispatch --- baybe/recommenders/pure/bayesian/botorch.py | 201 +++++++++++++++----- baybe/searchspace/continuous.py | 6 +- 2 files changed, 155 insertions(+), 52 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 2be9ce2a84..79b1a11410 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -5,7 +5,7 @@ import gc import math import warnings -from collections.abc import Collection, Iterable +from collections.abc import Callable, Collection, Iterable from typing import TYPE_CHECKING, Any, ClassVar import numpy as np @@ -136,6 +136,34 @@ def _recommend_discrete( ) -> pd.Index: """Generate recommendations from a discrete search space. + Dispatches to the appropriate optimization routine depending on whether + subspace-generating constraints are present. Currently, no discrete + constraints generate subspaces, so this always routes to + ``_recommend_discrete_without_subspaces``. + + Args: + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of all discrete candidate + points to be considered. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points in the provided + experimental representation. + """ + return self._recommend_discrete_without_subspaces( + subspace_discrete, candidates_exp, batch_size + ) + + def _recommend_discrete_without_subspaces( + self, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.Index: + """Generate recommendations from a discrete search space. + Args: subspace_discrete: The discrete subspace from which to generate recommendations. @@ -272,25 +300,38 @@ def _recommend_continuous_with_subspaces( f"expects a subspace with subspace-generating constraints." ) - # Determine search scope based on number of inactive parameter combinations - exhaustive_search = subspace_continuous.n_subspaces <= self.max_n_subspaces - iterator: Iterable[Collection[str]] - if exhaustive_search: - # If manageable, evaluate all combinations of inactive parameters - iterator = subspace_continuous.subspace_configurations() + # Determine search scope based on number of subspace configurations + configs: Iterable[frozenset[str]] + if subspace_continuous.n_subspaces <= self.max_n_subspaces: + configs = subspace_continuous.subspace_configurations() else: - # Otherwise, draw a random subset of inactive parameter combinations - iterator = subspace_continuous._sample_subspace_configurations( + configs = subspace_continuous._sample_subspace_configurations( self.max_n_subspaces ) - # Create iterable of subspaces to be optimized - subspaces = ( - (subspace_continuous._enforce_cardinality_constraints(inactive_parameters)) - for inactive_parameters in iterator - ) + # Create closures for each subspace configuration + def make_callable( + inactive_params: Collection[str], + ) -> Callable[[], tuple[Tensor, Tensor]]: + def optimize() -> tuple[Tensor, Tensor]: + import torch + + sub = subspace_continuous._enforce_cardinality_constraints( + inactive_params + ) + # Note: We explicitly evaluate the acqf function for the batch + # because the object returned by the optimization routine may + # contain joint or individual acquisition values, depending on + # whether sequential or joint optimization is applied + p, _ = self._recommend_continuous_torch(sub, batch_size) + with torch.no_grad(): + acqf_value = self._botorch_acqf(p) + return p, acqf_value + + return optimize - points, acqf_value = self._optimize_continuous_subspaces(subspaces, batch_size) + callables = (make_callable(ip) for ip in configs) + points, acqf_value = self._optimize_over_subspaces(callables) # Check if any minimum cardinality constraints are violated if not is_cardinality_fulfilled( @@ -544,65 +585,125 @@ def _recommend_hybrid_without_subspaces( return rec_exp - def _optimize_continuous_subspaces( - self, subspaces: Iterable[SubspaceContinuous], batch_size: int - ) -> tuple[Tensor, Tensor]: - """Find the optimum candidates from multiple continuous subspaces. + def _recommend_hybrid_with_subspaces( + self, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.DataFrame: + """Recommend from a hybrid space with subspace-generating constraints. - Important: - Subspaces without feasible solutions will be silently ignored. If none of - the subspaces has a feasible solution, an exception will be raised. + Creates subspaces by enumerating/sampling inactive parameter configurations + for the continuous part, then runs hybrid optimization per subspace via + ``_recommend_hybrid_without_subspaces``. Args: - subspaces: The subspaces to consider for the optimization. - batch_size: The number of points to be recommended. + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + from attrs import evolve + + subspace_c = searchspace.continuous + + # Determine exhaustive vs. sampling + configs: Iterable[frozenset[str]] + if subspace_c.n_subspaces <= self.max_n_subspaces: + configs = subspace_c.subspace_configurations() + else: + configs = subspace_c._sample_subspace_configurations(self.max_n_subspaces) + + def make_callable( + inactive_params: Collection[str], + ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: + def optimize() -> tuple[pd.DataFrame, Tensor]: + import torch + + modified_cont = subspace_c._enforce_cardinality_constraints( + inactive_params + ) + modified_searchspace = evolve(searchspace, continuous=modified_cont) + rec = self._recommend_hybrid_without_subspaces( + modified_searchspace, candidates_exp, batch_size + ) + # Evaluate joint acquisition value on the recommended points + comp = modified_searchspace.transform(rec) + with torch.no_grad(): + acqf_value = self._botorch_acqf(to_tensor(comp.values).unsqueeze(0)) + return rec, acqf_value + + return optimize + + callables = (make_callable(ip) for ip in configs) + best_rec, _ = self._optimize_over_subspaces(callables) + + # Post-check minimum cardinality on continuous columns + if not is_cardinality_fulfilled( + best_rec[list(subspace_c.parameter_names)], + subspace_c, + check_maximum=False, + ): + warnings.warn( + "At least one minimum cardinality constraint has been violated. " + "This may occur when parameter ranges extend beyond zero in both " + "directions, making the feasible region non-convex. For such " + "parameters, minimum cardinality constraints are currently not " + "enforced due to the complexity of the resulting optimization problem.", + MinimumCardinalityViolatedWarning, + ) + + return best_rec + + def _optimize_over_subspaces( + self, + subspace_callables: Iterable[Callable[[], tuple[Any, Tensor]]], + ) -> tuple[Any, Tensor]: + """Optimize across subspaces and return the result with the best acqf value. + + Each callable performs optimization for one subspace configuration and returns + a ``(result, acquisition_value)`` tuple. Subspaces that raise + ``InfeasibilityError`` are silently skipped. + + Args: + subspace_callables: An iterable of zero-argument callables. Each callable + runs the optimization for one subspace and returns + ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the + subspace is infeasible. Raises: InfeasibilityError: If none of the subspaces has a feasible solution. Returns: - The batch of candidates and the corresponding acquisition value. + The result and acquisition value of the best subspace. """ - import torch from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError + results_all: list = [] acqf_values_all: list[Tensor] = [] - points_all: list[Tensor] = [] - for subspace in subspaces: + for optimize_fn in subspace_callables: try: - # Optimize the acquisition function - # Note: We explicitly evaluate the acqf function for the batch because - # the object returned by the optimization routine may contain joint or - # individual acquisition values, depending on the whether sequential - # or joint optimization is applied - p, _ = self._recommend_continuous_torch(subspace, batch_size) - with torch.no_grad(): - acqf = self._botorch_acqf(p) - - # Append optimization results - points_all.append(p) - acqf_values_all.append(acqf) - - # The optimization problem may be infeasible in certain subspaces - except BoInfeasibilityError: + result, acqf_value = optimize_fn() + results_all.append(result) + acqf_values_all.append(acqf_value) + except (BoInfeasibilityError, InfeasibilityError): pass - if not points_all: + if not results_all: raise InfeasibilityError( "No feasible solution could be found. Potentially the specified " "constraints are too restrictive, i.e. there may be too many " "constraints or thresholds may have been set too tightly. " - "Considered relaxing the constraints to improve the chances " + "Consider relaxing the constraints to improve the chances " "of finding a feasible solution." ) - # Find the best option f best_idx = np.argmax(acqf_values_all) - points = points_all[best_idx] - acqf_value = acqf_values_all[best_idx] - - return points, acqf_value + return results_all[best_idx], acqf_values_all[best_idx] # Collect leftover original slotted classes processed by `attrs.define` diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index d7d3fc7ba7..ba53c5b7d0 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -629,13 +629,15 @@ def _sample_from_polytope_with_cardinality_constraints( .fillna(0.0) ) - def _sample_subspace_configurations(self, batch_size: int = 1) -> list[set[str]]: + def _sample_subspace_configurations( + self, batch_size: int = 1 + ) -> list[frozenset[str]]: """Sample subspace configurations according to the given constraints.""" inactives_per_constraint = [ con.sample_inactive_parameters(batch_size) for con in self.constraints_subspace_generating ] - return [set(chain(*x)) for x in zip(*inactives_per_constraint)] + return [frozenset(chain(*x)) for x in zip(*inactives_per_constraint)] def sample_from_full_factorial(self, batch_size: int = 1) -> pd.DataFrame: """Draw parameter configurations from the full factorial of the space. From dc8929eb71614dfeafa9f0019f2ccdd06562ae55 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 13 Mar 2026 18:28:01 +0100 Subject: [PATCH 04/39] Add hybrid constraint tests --- .../test_cardinality_constraint_hybrid.py | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tests/constraints/test_cardinality_constraint_hybrid.py diff --git a/tests/constraints/test_cardinality_constraint_hybrid.py b/tests/constraints/test_cardinality_constraint_hybrid.py new file mode 100644 index 0000000000..bcbe115e58 --- /dev/null +++ b/tests/constraints/test_cardinality_constraint_hybrid.py @@ -0,0 +1,88 @@ +"""Tests for cardinality constraints in hybrid search spaces.""" + +import pytest + +from baybe.constraints.continuous import ContinuousCardinalityConstraint +from baybe.constraints.discrete import DiscreteCardinalityConstraint +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.parameters.numerical import ( + NumericalContinuousParameter, + NumericalDiscreteParameter, +) +from baybe.recommenders import BotorchRecommender +from baybe.searchspace import SearchSpace +from baybe.targets import NumericalTarget +from baybe.utils.dataframe import create_fake_input + +BATCH_SIZE = 2 +MAX_CARDINALITY = 1 + +_discrete_params = [ + NumericalDiscreteParameter(f"d{i}", values=(0.0, 0.5, 1.0)) for i in range(2) +] +_continuous_params = [ + NumericalContinuousParameter(f"c{i}", bounds=(0, 1)) for i in range(2) +] + + +@pytest.mark.parametrize( + ("disc_params", "conti_params", "constraints"), + [ + pytest.param( + [NumericalDiscreteParameter("d", values=(0.0, 1.0))], + _continuous_params, + [ + ContinuousCardinalityConstraint( + parameters=[p.name for p in _continuous_params], + max_cardinality=MAX_CARDINALITY, + ) + ], + id="conti", + ), + pytest.param( + _discrete_params, + [NumericalContinuousParameter("c", bounds=(0, 1))], + [ + DiscreteCardinalityConstraint( + parameters=[p.name for p in _discrete_params], + max_cardinality=MAX_CARDINALITY, + ) + ], + id="disc", + ), + pytest.param( + _discrete_params, + _continuous_params, + [ + DiscreteCardinalityConstraint( + parameters=[p.name for p in _discrete_params], + max_cardinality=MAX_CARDINALITY, + ), + ContinuousCardinalityConstraint( + parameters=[p.name for p in _continuous_params], + max_cardinality=MAX_CARDINALITY, + ), + ], + id="hybrid", + ), + ], +) +def test_cardinality_constraint_hybrid(disc_params, conti_params, constraints): + """Cardinality constraints are respected in hybrid search spaces.""" + parameters = [*disc_params, *conti_params] + searchspace = SearchSpace.from_product(parameters, constraints) + target = NumericalTarget("t") + measurements = create_fake_input(parameters, [target]) + + rec = BotorchRecommender().recommend( + BATCH_SIZE, searchspace, target.to_objective(), measurements + ) + + for c in constraints: + if isinstance(c, ContinuousCardinalityConstraint): + assert is_cardinality_fulfilled( + rec, searchspace.continuous, check_minimum=False + ) + elif isinstance(c, DiscreteCardinalityConstraint): + n_nonzero = (rec[list(c.parameters)] != 0.0).sum(axis=1) + assert (n_nonzero <= c.max_cardinality).all() From 1c3aaf73ebed95958bee0807c1d0adb638118e2c Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 13 Mar 2026 19:16:14 +0100 Subject: [PATCH 05/39] Filter by constraint type in cardinality utilities --- baybe/constraints/utils.py | 8 +++++++- .../constraints/test_cardinality_constraint_continuous.py | 7 +++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/baybe/constraints/utils.py b/baybe/constraints/utils.py index 4556c39919..6d5e1e7378 100644 --- a/baybe/constraints/utils.py +++ b/baybe/constraints/utils.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd +from baybe.constraints.continuous import ContinuousCardinalityConstraint from baybe.parameters.utils import is_inactive from baybe.searchspace import SubspaceContinuous @@ -25,7 +26,12 @@ def is_cardinality_fulfilled( Returns: ``True`` if all cardinality constraints are fulfilled, ``False`` otherwise. """ - for c in subspace_continuous.constraints_subspace_generating: + cardinality_constraints = [ + c + for c in subspace_continuous.constraints_subspace_generating + if isinstance(c, ContinuousCardinalityConstraint) + ] + for c in cardinality_constraints: # Get the activity thresholds for all parameters cols = df[c.parameters] thresholds = { diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index 0e54c850c8..770e2ceab5 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -65,9 +65,12 @@ def _validate_cardinality_constrained_batch( # We thus include this check as a safety net for catching regressions. If it # turns out the check fails because we observe degenerate batches as actual # recommendations, we need to invent something smarter. - max_cardinalities = [ - c.max_cardinality for c in subspace_continuous.constraints_subspace_generating + cardinality_constraints = [ + c + for c in subspace_continuous.constraints_subspace_generating + if isinstance(c, ContinuousCardinalityConstraint) ] + max_cardinalities = [c.max_cardinality for c in cardinality_constraints] if len(unique_row := batch.drop_duplicates()) == 1: assert (unique_row.iloc[0] == 0.0).all() and all( max_cardinality == 0 for max_cardinality in max_cardinalities From c8928b8cc0aa2421ad519fc0a8c5503ffcbf4b39 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:57:44 +0100 Subject: [PATCH 06/39] Add DiscreteBatchConstraint class and validation --- baybe/constraints/__init__.py | 2 + baybe/constraints/discrete.py | 74 +++++++++++++++++++++++++++++++++ baybe/constraints/validation.py | 12 ++++++ 3 files changed, 88 insertions(+) diff --git a/baybe/constraints/__init__.py b/baybe/constraints/__init__.py index 8b92ecd6fe..40f4b33b0d 100644 --- a/baybe/constraints/__init__.py +++ b/baybe/constraints/__init__.py @@ -11,6 +11,7 @@ ) from baybe.constraints.discrete import ( DISCRETE_CONSTRAINTS_FILTERING_ORDER, + DiscreteBatchConstraint, DiscreteCardinalityConstraint, DiscreteCustomConstraint, DiscreteDependenciesConstraint, @@ -33,6 +34,7 @@ "ContinuousLinearEqualityConstraint", "ContinuousLinearInequalityConstraint", # --- Discrete constraints ---# + "DiscreteBatchConstraint", "DiscreteCardinalityConstraint", "DiscreteCustomConstraint", "DiscreteDependenciesConstraint", diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 497104049e..0307f898b4 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -7,6 +7,8 @@ from functools import reduce from typing import TYPE_CHECKING, Any, ClassVar, cast +import numpy as np +import numpy.typing as npt import pandas as pd from attrs import define, field from attrs.validators import in_, min_len @@ -424,6 +426,78 @@ def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index: return df.index[mask_bad] +@define +class DiscreteBatchConstraint(DiscreteConstraint): + """Constraint ensuring all batch recommendations share the same parameter value. + + When this constraint is active, the recommender internally partitions the + candidate set into subspaces — one for each unique value of the constrained + parameter — obtains a full batch recommendation from each subspace, and + returns the batch with the highest joint acquisition value. + + This constraint is only effective with Bayesian recommenders that have access + to an acquisition function for comparing batches. It is not applied during + search space creation (all parameter values remain in the search space). + + Example: + If parameter ``Temperature`` has values ``[50, 100, 150]`` and a batch of + 10 is requested, the recommender will generate three candidate batches + (one all-50, one all-100, one all-150) and return the best one. + """ + + # Class variables + eval_during_creation: ClassVar[bool] = False + eval_during_modeling: ClassVar[bool] = True + + numerical_only: ClassVar[bool] = False + # See base class. + + def __attrs_post_init__(self): + """Validate that exactly one parameter is specified.""" + if len(self.parameters) != 1: + raise ValueError( + f"'{self.__class__.__name__}' requires exactly one parameter, " + f"but {len(self.parameters)} were provided: {self.parameters}." + ) + + @override + def get_invalid(self, data: pd.DataFrame) -> pd.Index: + """Get the indices of invalid rows. + + Always returns an empty index because this constraint operates at the + batch level, not the row level. Individual rows are never invalid; the + constraint is enforced at recommendation time by partitioning candidates + into subspaces. + + Args: + data: A dataframe where each row represents a parameter configuration. + + Returns: + An empty index. + """ + return pd.Index([]) + + def subspace_masks( + self, candidates_exp: pd.DataFrame + ) -> list[npt.NDArray[np.bool_]]: + """Return boolean masks defining the subspaces for this constraint. + + Each mask selects the rows in ``candidates_exp`` that belong to one + subspace, i.e. share the same value for the constrained parameter. + + Args: + candidates_exp: The experimental representation of candidate points. + + Returns: + A list of boolean masks, one per unique value of the constrained + parameter. + """ + param = self.parameters[0] + return [ + (candidates_exp[param] == v).values for v in candidates_exp[param].unique() + ] + + @define class DiscreteCardinalityConstraint(CardinalityConstraint, DiscreteConstraint): """Class for discrete cardinality constraints.""" diff --git a/baybe/constraints/validation.py b/baybe/constraints/validation.py index bd2bc7a89f..c0afa3b6fb 100644 --- a/baybe/constraints/validation.py +++ b/baybe/constraints/validation.py @@ -6,6 +6,7 @@ from baybe.constraints.base import Constraint from baybe.constraints.continuous import ContinuousCardinalityConstraint from baybe.constraints.discrete import ( + DiscreteBatchConstraint, DiscreteDependenciesConstraint, ) from baybe.parameters import NumericalContinuousParameter @@ -27,6 +28,7 @@ def validate_constraints( # noqa: DOC101, DOC103 :class:`baybe.constraints.discrete.DiscreteDependenciesConstraint` declared. ValueError: If any two continuous cardinality constraints have an overlapping parameter set. + ValueError: If multiple batch constraints reference the same parameter. ValueError: If any constraint contains an invalid parameter name. ValueError: If any continuous constraint includes a discrete parameter. ValueError: If any discrete constraint includes a continuous parameter. @@ -45,6 +47,16 @@ def validate_constraints( # noqa: DOC101, DOC103 [con for con in constraints if isinstance(con, ContinuousCardinalityConstraint)] ) + batch_param_names = [ + c.parameters[0] for c in constraints if isinstance(c, DiscreteBatchConstraint) + ] + if duplicates := {n for n in batch_param_names if batch_param_names.count(n) > 1}: + raise ValueError( + f"Multiple '{DiscreteBatchConstraint.__name__}' instances reference " + f"the same parameter(s): {duplicates}. Each parameter can have at " + f"most one batch constraint." + ) + param_names_all = [p.name for p in parameters] param_names_discrete = [p.name for p in parameters if p.is_discrete] param_names_continuous = [p.name for p in parameters if p.is_continuous] From 812896888eb035e8e51b9a6211c0af0c964f6eeb Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:58:05 +0100 Subject: [PATCH 07/39] Add partition machinery to SubspaceDiscrete --- baybe/searchspace/discrete.py | 125 +++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index 1dc094edb3..4a8336b4e0 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -3,12 +3,15 @@ from __future__ import annotations import gc +import random import warnings -from collections.abc import Collection, Sequence +from collections.abc import Collection, Iterator, Sequence +from itertools import islice from math import prod from typing import TYPE_CHECKING, Any import numpy as np +import numpy.typing as npt import pandas as pd from attrs import define, field from cattrs import IterableValidationError @@ -16,6 +19,7 @@ from baybe.constraints import DISCRETE_CONSTRAINTS_FILTERING_ORDER, validate_constraints from baybe.constraints.base import DiscreteConstraint +from baybe.constraints.discrete import DiscreteBatchConstraint from baybe.exceptions import DeprecationError from baybe.parameters import ( CategoricalEncoding, @@ -573,6 +577,125 @@ def estimate_product_space_size( comp_rep_shape=(n_rows, n_cols_comp), ) + @property + def constraints_subspace_generating( + self, + ) -> tuple[DiscreteBatchConstraint, ...]: + """Constraints generating subspaces for separate optimization.""" + return tuple( + c for c in self.constraints if isinstance(c, DiscreteBatchConstraint) + ) + + @property + def n_theoretical_subspaces(self) -> int: + """The theoretical number of possible subspace configurations. + + Returns 0 if no subspace-generating constraints exist, indicating that + no decomposition is needed. + """ + if not self.constraints_subspace_generating: + return 0 + return prod( + len(self.get_parameters_by_name([c.parameters[0]])[0].active_values) + for c in self.constraints_subspace_generating + ) + + def subspace_masks( # noqa: DOC404 + self, + candidates_exp: pd.DataFrame, + min_candidates: int | None = None, + *, + shuffle: bool = False, + replace: bool = False, + ) -> Iterator[npt.NDArray[np.bool_]]: + r"""Get an iterator over all possible subspace masks. + + Collects masks from each subspace-generating constraint, iterates the + Cartesian product, AND-reduces each combination, and yields feasible + combined masks. + + Args: + candidates_exp: The experimental representation of candidate points. + min_candidates: If provided, combined masks selecting fewer rows + are silently skipped. + shuffle: If ``True``, iterate in uniformly shuffled order. + Has no effect when ``replace=True``. + replace: If ``True``, sample with replacement, producing an + infinite iterator where each draw is independent. Infeasible + indices are permanently excluded from the sampling pool. + + Yields: + A boolean mask selecting the subspace's rows. + """ + constraints = self.constraints_subspace_generating + if not constraints: + per_constraint: list[list[npt.NDArray[np.bool_]]] = [ + [np.ones(len(candidates_exp), dtype=bool)] + ] + else: + per_constraint = [c.subspace_masks(candidates_exp) for c in constraints] + + total = prod(len(masks) for masks in per_constraint) + + def _resolve_flat_idx(flat_idx: int) -> npt.NDArray[np.bool_]: + # Decompose flat index into per-constraint indices. + # Example with 3 constraints of subspace lengths [3, 2, 4]: + # flat_idx=11 -> divmod(11,3)=(3,2) -> A[2] + # divmod(3,2)=(1,1) -> B[1] + # divmod(1,4)=(0,1) -> C[1] + # Result: masks A[2] AND B[1] AND C[1] + masks = [] + remaining = flat_idx + for constraint_masks in per_constraint: + remaining, idx = divmod(remaining, len(constraint_masks)) + masks.append(constraint_masks[idx]) + return np.logical_and.reduce(masks) + + if replace: + candidates = list(range(total)) + while candidates: + idx_pos = random.randint(0, len(candidates) - 1) + flat_idx = candidates[idx_pos] + combined = _resolve_flat_idx(flat_idx) + if min_candidates is not None and combined.sum() < min_candidates: + candidates[idx_pos] = candidates[-1] + candidates.pop() + continue + yield combined + else: + order = list(range(total)) + if shuffle: + random.shuffle(order) + for flat_idx in order: + combined = _resolve_flat_idx(flat_idx) + if min_candidates is not None and combined.sum() < min_candidates: + continue + yield combined + + def sample_subspace_masks( + self, + candidates_exp: pd.DataFrame, + n: int, + min_candidates: int | None = None, + ) -> list[npt.NDArray[np.bool_]]: + """Sample subspace masks. + + Args: + candidates_exp: The experimental representation of candidate points. + n: Number of masks to sample. + min_candidates: If provided, subspaces with fewer matching + candidates are skipped. + + Returns: + A list of boolean masks. + """ + return list( + islice( + self.subspace_masks(candidates_exp, min_candidates, shuffle=True), + n, + ) + ) + def get_candidates(self) -> tuple[pd.DataFrame, pd.DataFrame]: """Return the set of candidate parameter settings that can be tested. From 5bb43cdea0ba2f1e8a55580fe72e28ab30af8fcc Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:59:18 +0100 Subject: [PATCH 08/39] Add shuffle/replace to SubspaceContinuous --- baybe/recommenders/pure/bayesian/botorch.py | 4 +- baybe/searchspace/continuous.py | 65 +++++++++++++++++---- 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 79b1a11410..c449b9e662 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -302,7 +302,7 @@ def _recommend_continuous_with_subspaces( # Determine search scope based on number of subspace configurations configs: Iterable[frozenset[str]] - if subspace_continuous.n_subspaces <= self.max_n_subspaces: + if subspace_continuous.n_theoretical_subspaces <= self.max_n_subspaces: configs = subspace_continuous.subspace_configurations() else: configs = subspace_continuous._sample_subspace_configurations( @@ -612,7 +612,7 @@ def _recommend_hybrid_with_subspaces( # Determine exhaustive vs. sampling configs: Iterable[frozenset[str]] - if subspace_c.n_subspaces <= self.max_n_subspaces: + if subspace_c.n_theoretical_subspaces <= self.max_n_subspaces: configs = subspace_c.subspace_configurations() else: configs = subspace_c._sample_subspace_configurations(self.max_n_subspaces) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index ba53c5b7d0..dfa48ca1e1 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -4,8 +4,9 @@ import gc import math +import random from collections.abc import Collection, Iterator, Sequence -from itertools import chain, product +from itertools import chain from typing import TYPE_CHECKING, Any, cast import numpy as np @@ -146,22 +147,62 @@ def _validate_constraints_lin_ineq( ) @property - def n_subspaces(self) -> int: - """The number of possible subspace configurations.""" + def n_theoretical_subspaces(self) -> int: + """The theoretical number of possible subspace configurations. + + Returns 0 if no subspace-generating constraints exist, indicating that + no decomposition is needed. + """ + if not self.constraints_subspace_generating: + return 0 return math.prod( c.n_inactive_parameter_combinations for c in self.constraints_subspace_generating ) - def subspace_configurations(self) -> Iterator[frozenset[str]]: - """Get an iterator over all possible subspace configurations.""" - for combination in product( - *[ - con.inactive_parameter_combinations() - for con in self.constraints_subspace_generating - ] - ): - yield frozenset(chain(*combination)) + def subspace_configurations( # noqa: DOC404 + self, + *, + shuffle: bool = False, + replace: bool = False, + ) -> Iterator[frozenset[str]]: + """Get an iterator over all possible subspace configurations. + + Args: + shuffle: If ``True``, iterate in uniformly shuffled order. + Has no effect when ``replace=True``. + replace: If ``True``, sample with replacement, producing an + infinite iterator where each draw is independent. + + Yields: + A frozenset of inactive parameter names for the subspace. + """ + per_constraint = [ + list(con.inactive_parameter_combinations()) + for con in self.constraints_subspace_generating + ] + + total = math.prod(len(v) for v in per_constraint) + + def _resolve_flat_idx(flat_idx: int) -> frozenset[str]: + combo = [] + remaining = flat_idx + for values in per_constraint: + remaining, idx = divmod(remaining, len(values)) + combo.append(values[idx]) + return frozenset(chain(*combo)) + + if replace: + candidates = list(range(total)) + while candidates: + idx_pos = random.randint(0, len(candidates) - 1) + yield _resolve_flat_idx(candidates[idx_pos]) + else: + order = list(range(total)) + if shuffle: + random.shuffle(order) + for flat_idx in order: + yield _resolve_flat_idx(flat_idx) @constraints_nonlin.validator def _validate_constraints_nonlin(self, _, __) -> None: From f78dab382246b726909cb532e0dc834951feafbc Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:59:31 +0100 Subject: [PATCH 09/39] Add partition aggregation to SearchSpace --- baybe/searchspace/core.py | 102 +++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 8b0da30c92..a576991c6e 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -3,16 +3,21 @@ from __future__ import annotations import gc -from collections.abc import Iterable, Sequence +from collections import Counter +from collections.abc import Iterable, Iterator, Sequence from enum import Enum +from itertools import product from typing import cast +import numpy as np +import numpy.typing as npt import pandas as pd from attrs import define, field from typing_extensions import override from baybe.constraints import validate_constraints from baybe.constraints.base import Constraint +from baybe.exceptions import InfeasibilityError from baybe.parameters import TaskParameter from baybe.parameters.base import Parameter from baybe.searchspace.continuous import SubspaceContinuous @@ -284,6 +289,101 @@ def n_tasks(self) -> int: except StopIteration: return 1 + @property + def n_theoretical_subspaces(self) -> int: + """Total theoretical number of subspace configurations. + + Returns 0 if no subspace-generating constraints exist on either side. + When only one side has constraints, the other does not contribute to + the count. + """ + d = self.discrete.n_theoretical_subspaces + c = self.continuous.n_theoretical_subspaces + if d == 0 == c: + return 0 + return max(d, 1) * max(c, 1) + + def subspace_masks( # noqa: DOC404 + self, + candidates_exp: pd.DataFrame, + min_discrete_candidates: int | None = None, + ) -> Iterator[tuple[npt.NDArray[np.bool_], frozenset[str]]]: + r"""Get an iterator over all combined subspace configurations. + + Yields the Cartesian product of discrete masks and continuous + configurations. + + Args: + candidates_exp: The experimental representation of discrete candidates. + min_discrete_candidates: If provided, discrete subspaces with fewer + matching candidates are skipped. + + Yields: + A discrete mask and continuous inactive parameters pair. + """ + yield from product( + self.discrete.subspace_masks( + candidates_exp, min_candidates=min_discrete_candidates + ), + self.continuous.subspace_configurations(), + ) + + def sample_subspace_masks( + self, + candidates_exp: pd.DataFrame, + n: int, + min_discrete_candidates: int | None = None, + *, + max_rejections: int = 10, + ) -> list[tuple[npt.NDArray[np.bool_], frozenset[str]]]: + """Sample unique combined subspace configurations. + + Zips two independent with-replacement iterators from the discrete and + continuous sides, producing random pairs from the Cartesian product. + Duplicate pairs are skipped. + + Args: + candidates_exp: The experimental representation of discrete candidates. + n: Number of unique configurations to sample. + min_discrete_candidates: If provided, discrete subspaces with fewer + matching candidates are excluded. + max_rejections: Maximum number of times a duplicate combination can + be drawn before raising ``InfeasibilityError``. + + Raises: + InfeasibilityError: If not enough unique subspace configurations + are available. + + Returns: + A list of ``(discrete_mask, continuous_inactive_params)`` tuples. + """ + d_iter = self.discrete.subspace_masks( + candidates_exp, + min_candidates=min_discrete_candidates, + shuffle=True, + replace=True, + ) + c_iter = self.continuous.subspace_configurations(shuffle=True, replace=True) + + counts: Counter[int] = Counter() + results: list[tuple[npt.NDArray[np.bool_], frozenset[str]]] = [] + + for d_mask, c_config in zip(d_iter, c_iter): + key = hash((tuple(d_mask), c_config)) + counts[key] += 1 + if counts[key] > max_rejections + 1: + raise InfeasibilityError( + f"Not enough unique subspace configurations available. " + f"Requested {n} but only {len(results)} could be found." + ) + if counts[key] > 1: + continue + results.append((d_mask, c_config)) + if len(results) >= n: + break + + return results + def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]: """Find a parameter's column indices in the computational representation. From 48efdfe568647330a32760f1a659868b7cc5698d Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:59:45 +0100 Subject: [PATCH 10/39] Wire recommenders for DiscreteBatchConstraint --- baybe/recommenders/pure/base.py | 25 +++- baybe/recommenders/pure/bayesian/botorch.py | 125 ++++++++++++++---- .../pure/nonpredictive/sampling.py | 45 +++++-- 3 files changed, 157 insertions(+), 38 deletions(-) diff --git a/baybe/recommenders/pure/base.py b/baybe/recommenders/pure/base.py index 16eefe1016..499a93468b 100644 --- a/baybe/recommenders/pure/base.py +++ b/baybe/recommenders/pure/base.py @@ -11,7 +11,11 @@ from cattrs.gen import make_dict_unstructure_fn from typing_extensions import override -from baybe.exceptions import DeprecationError, NotEnoughPointsLeftError +from baybe.exceptions import ( + DeprecationError, + IncompatibilityError, + NotEnoughPointsLeftError, +) from baybe.objectives.base import Objective from baybe.recommenders.base import RecommenderProtocol from baybe.searchspace import SearchSpace @@ -38,6 +42,10 @@ class PureRecommender(ABC, RecommenderProtocol): compatibility: ClassVar[SearchSpaceType] """Class variable reflecting the search space compatibility.""" + supports_discrete_subspace_constraints: ClassVar[bool] = False + """Class variable indicating whether the recommender supports discrete + subspace-generating constraints.""" + _deprecated_allow_repeated_recommendations: bool = field( alias="allow_repeated_recommendations", default=None, @@ -259,6 +267,21 @@ def _recommend_with_discrete_parts( """ is_hybrid_space = searchspace.type is SearchSpaceType.HYBRID + # Check subspace-generating constraint support + if ( + searchspace.discrete.constraints_subspace_generating + and not self.supports_discrete_subspace_constraints + ): + constraint_types = { + type(c).__name__ + for c in searchspace.discrete.constraints_subspace_generating + } + raise IncompatibilityError( + f"'{self.__class__.__name__}' does not support discrete " + f"subspace-generating constraints. The search space contains: " + f"{constraint_types}." + ) + # Get discrete candidates candidates_exp, _ = searchspace.discrete.get_candidates() diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index c449b9e662..860ef3e73d 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -62,6 +62,9 @@ class BotorchRecommender(BayesianRecommender): compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID # See base class. + supports_discrete_subspace_constraints: ClassVar[bool] = True + # See base class. + # Object variables sequential_continuous: bool = field(default=True) """Flag defining whether to apply sequential greedy or batch optimization in @@ -137,9 +140,7 @@ def _recommend_discrete( """Generate recommendations from a discrete search space. Dispatches to the appropriate optimization routine depending on whether - subspace-generating constraints are present. Currently, no discrete - constraints generate subspaces, so this always routes to - ``_recommend_discrete_without_subspaces``. + subspace-generating constraints are present. Args: subspace_discrete: The discrete subspace from which to generate @@ -152,10 +153,69 @@ def _recommend_discrete( The dataframe indices of the recommended points in the provided experimental representation. """ + if subspace_discrete.constraints_subspace_generating: + return self._recommend_discrete_with_subspaces( + subspace_discrete, candidates_exp, batch_size + ) return self._recommend_discrete_without_subspaces( subspace_discrete, candidates_exp, batch_size ) + def _recommend_discrete_with_subspaces( + self, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.Index: + """Recommend from a discrete space with subspace-generating constraints. + + Partitions the candidate set according to subspace-generating constraints, + runs optimization on each feasible partition, and returns the batch with + the highest joint acquisition value. Subspaces with fewer candidates + than ``batch_size`` are skipped with a warning. + + Args: + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of candidates. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points. + """ + import torch + + masks: Iterable[np.ndarray] + if subspace_discrete.n_theoretical_subspaces <= self.max_n_subspaces: + masks = subspace_discrete.subspace_masks( + candidates_exp, min_candidates=batch_size + ) + else: + masks = subspace_discrete.sample_subspace_masks( + candidates_exp, self.max_n_subspaces, min_candidates=batch_size + ) + + def make_callable( + mask: np.ndarray, + ) -> Callable[[], tuple[pd.Index, Tensor]]: + def optimize() -> tuple[pd.Index, Tensor]: + subset = candidates_exp.loc[mask] + + idxs = self._recommend_discrete_without_subspaces( + subspace_discrete, subset, batch_size + ) + + comp = subspace_discrete.transform(candidates_exp.loc[idxs]) + with torch.no_grad(): + acqf_value = self._botorch_acqf(to_tensor(comp).unsqueeze(0)) + return idxs, acqf_value + + return optimize + + callables = (make_callable(m) for m in masks) + best_idxs, _ = self._optimize_over_subspaces(callables) + return best_idxs + def _recommend_discrete_without_subspaces( self, subspace_discrete: SubspaceDiscrete, @@ -438,7 +498,7 @@ def _recommend_hybrid( """Generate recommendations from a hybrid search space. Dispatches to the appropriate optimization routine depending on whether - the continuous part contains subspace-generating constraints. + subspace-generating constraints are present. Args: searchspace: The search space in which the recommendations should be made. @@ -449,7 +509,10 @@ def _recommend_hybrid( Returns: The recommended points. """ - if searchspace.continuous.constraints_subspace_generating: + if ( + searchspace.discrete.constraints_subspace_generating + or searchspace.continuous.constraints_subspace_generating + ): return self._recommend_hybrid_with_subspaces( searchspace, candidates_exp, batch_size ) @@ -593,9 +656,10 @@ def _recommend_hybrid_with_subspaces( ) -> pd.DataFrame: """Recommend from a hybrid space with subspace-generating constraints. - Creates subspaces by enumerating/sampling inactive parameter configurations - for the continuous part, then runs hybrid optimization per subspace via - ``_recommend_hybrid_without_subspaces``. + Uses ``SearchSpace.subspace_configurations()`` to enumerate the Cartesian + product of discrete and continuous subspace configurations, capped at + ``max_n_subspaces`` total. Discrete subspaces with fewer candidates than + ``batch_size`` are pre-filtered. Args: searchspace: The search space in which the recommendations should be made. @@ -610,39 +674,51 @@ def _recommend_hybrid_with_subspaces( subspace_c = searchspace.continuous - # Determine exhaustive vs. sampling - configs: Iterable[frozenset[str]] - if subspace_c.n_theoretical_subspaces <= self.max_n_subspaces: - configs = subspace_c.subspace_configurations() + # Get combined configurations, capped at max_n_subspaces + # NOTE: No min_discrete_candidates filtering in hybrid spaces because + # optimize_acqf_mixed can produce multiple recommendations from a single + # discrete candidate by varying continuous parameters. + combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] + if searchspace.n_theoretical_subspaces <= self.max_n_subspaces: + combined_masks = searchspace.subspace_masks(candidates_exp) else: - configs = subspace_c._sample_subspace_configurations(self.max_n_subspaces) + combined_masks = searchspace.sample_subspace_masks( + candidates_exp, self.max_n_subspaces + ) def make_callable( - inactive_params: Collection[str], + d_mask: np.ndarray, + c_inactive_params: frozenset[str], ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: def optimize() -> tuple[pd.DataFrame, Tensor]: import torch - modified_cont = subspace_c._enforce_cardinality_constraints( - inactive_params - ) - modified_searchspace = evolve(searchspace, continuous=modified_cont) + subset = candidates_exp.loc[d_mask] + + if c_inactive_params: + mod_cont = subspace_c._enforce_cardinality_constraints( + c_inactive_params + ) + else: + mod_cont = subspace_c + mod_searchspace = evolve(searchspace, continuous=mod_cont) + rec = self._recommend_hybrid_without_subspaces( - modified_searchspace, candidates_exp, batch_size + mod_searchspace, subset, batch_size ) - # Evaluate joint acquisition value on the recommended points - comp = modified_searchspace.transform(rec) + + comp = mod_searchspace.transform(rec) with torch.no_grad(): acqf_value = self._botorch_acqf(to_tensor(comp.values).unsqueeze(0)) return rec, acqf_value return optimize - callables = (make_callable(ip) for ip in configs) + callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) best_rec, _ = self._optimize_over_subspaces(callables) # Post-check minimum cardinality on continuous columns - if not is_cardinality_fulfilled( + if subspace_c.constraints_subspace_generating and not is_cardinality_fulfilled( best_rec[list(subspace_c.parameter_names)], subspace_c, check_maximum=False, @@ -652,7 +728,8 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: "This may occur when parameter ranges extend beyond zero in both " "directions, making the feasible region non-convex. For such " "parameters, minimum cardinality constraints are currently not " - "enforced due to the complexity of the resulting optimization problem.", + "enforced due to the complexity of the resulting optimization " + "problem.", MinimumCardinalityViolatedWarning, ) diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index acb5af55c3..b03485c454 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -9,6 +9,7 @@ from attrs.validators import instance_of from typing_extensions import override +from baybe.exceptions import InfeasibilityError from baybe.recommenders.pure.nonpredictive.base import NonPredictiveRecommender from baybe.searchspace import SearchSpace, SearchSpaceType, SubspaceDiscrete from baybe.settings import Settings, active_settings @@ -23,6 +24,9 @@ class RandomRecommender(NonPredictiveRecommender): compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID # See base class. + supports_discrete_subspace_constraints: ClassVar[bool] = True + # See base class. + @override def _recommend_hybrid( self, @@ -30,22 +34,37 @@ def _recommend_hybrid( candidates_exp: pd.DataFrame, batch_size: int, ) -> pd.DataFrame: - if searchspace.type == SearchSpaceType.DISCRETE: - return candidates_exp.sample(batch_size) - - cont_random = searchspace.continuous.sample_uniform(batch_size=batch_size) - if searchspace.type == SearchSpaceType.CONTINUOUS: - return cont_random - - disc_candidates, _ = searchspace.discrete.get_candidates() - - # TODO decide mechanism if number of possible discrete candidates is smaller - # than batch size - disc_random = disc_candidates.sample( + is_hybrid = searchspace.type is SearchSpaceType.HYBRID + + # Sample continuous part if applicable + if is_hybrid or searchspace.type is SearchSpaceType.CONTINUOUS: + cont_random = searchspace.continuous.sample_uniform(batch_size=batch_size) + if searchspace.type is SearchSpaceType.CONTINUOUS: + return cont_random + + # Restrict to a random subspace if subspace-generating constraints are present + if searchspace.discrete.constraints_subspace_generating: + masks = searchspace.discrete.sample_subspace_masks( + candidates_exp, + n=1, + min_candidates=None if is_hybrid else batch_size, + ) + if not masks: + raise InfeasibilityError( + "No feasible subspace found for the given " + "subspace-generating constraints. All subspaces have fewer " + f"candidates than the requested {batch_size=}." + ) + candidates_exp = candidates_exp.loc[masks[0]] + + disc_random = candidates_exp.sample( n=batch_size, - replace=len(disc_candidates) < batch_size, + replace=is_hybrid or len(candidates_exp) < batch_size, ) + if not is_hybrid: + return disc_random + cont_random.index = disc_random.index return pd.concat([disc_random, cont_random], axis=1) From 6039bafa8081987691750b9c314c79a1ae0d4989 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:59:57 +0100 Subject: [PATCH 11/39] Add tests for DiscreteBatchConstraint --- tests/constraints/test_batch_constraint.py | 150 ++++++++++++++++++ .../test_cardinality_constraint_hybrid.py | 88 ---------- .../test_subspace_constraints_hybrid.py | 111 +++++++++++++ 3 files changed, 261 insertions(+), 88 deletions(-) create mode 100644 tests/constraints/test_batch_constraint.py delete mode 100644 tests/constraints/test_cardinality_constraint_hybrid.py create mode 100644 tests/constraints/test_subspace_constraints_hybrid.py diff --git a/tests/constraints/test_batch_constraint.py b/tests/constraints/test_batch_constraint.py new file mode 100644 index 0000000000..e2a552c70d --- /dev/null +++ b/tests/constraints/test_batch_constraint.py @@ -0,0 +1,150 @@ +"""Tests for the discrete batch constraint.""" + +import pytest +from pytest import param + +from baybe.constraints.discrete import DiscreteBatchConstraint +from baybe.exceptions import IncompatibilityError, InfeasibilityError +from baybe.parameters.numerical import NumericalDiscreteParameter +from baybe.recommenders import BotorchRecommender +from baybe.recommenders.pure.nonpredictive.sampling import ( + FPSRecommender, + RandomRecommender, +) +from baybe.searchspace import SearchSpace +from baybe.targets import NumericalTarget +from baybe.utils.dataframe import create_fake_input + +BATCH_SIZE = 3 +TARGET = NumericalTarget("y") + +_params = [ + NumericalDiscreteParameter("d0", values=(0.0, 0.5, 1.0)), + NumericalDiscreteParameter("d1", values=(0.0, 0.5, 1.0)), +] + + +@pytest.mark.parametrize( + ("constraints", "constrained_params", "batch_size"), + [ + param( + [DiscreteBatchConstraint(parameters=["d0"])], + ["d0"], + BATCH_SIZE, + id="single", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + ["d0", "d1"], + 1, + id="multiple", + ), + ], +) +def test_batch_constraint_bayesian(constraints, constrained_params, batch_size): + """BotorchRecommender respects batch constraints.""" + searchspace = SearchSpace.from_product(_params, constraints) + measurements = create_fake_input(_params, [TARGET], n_rows=3) + + rec = BotorchRecommender().recommend( + batch_size, searchspace, TARGET.to_objective(), measurements + ) + assert rec.shape[0] == batch_size + for p in constrained_params: + assert rec[p].nunique() == 1 + + +def test_batch_constraint_random_recommender(): + """RandomRecommender respects the batch constraint.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + rec = RandomRecommender().recommend(BATCH_SIZE, searchspace) + assert rec["d0"].nunique() == 1 + assert rec.shape[0] == BATCH_SIZE + + +def test_batch_constraint_unsupported_recommender(): + """Unsupported recommenders raise IncompatibilityError.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + with pytest.raises(IncompatibilityError, match="does not support"): + FPSRecommender().recommend(BATCH_SIZE, searchspace) + + +def test_batch_constraint_validation_multi_param(): + """DiscreteBatchConstraint requires exactly one parameter.""" + with pytest.raises(ValueError, match="exactly one parameter"): + DiscreteBatchConstraint(parameters=["A", "B"]) + + +def test_batch_constraint_validation_duplicate(): + """Two batch constraints on the same parameter are rejected.""" + constraints = [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d0"]), + ] + with pytest.raises(ValueError, match="same parameter"): + SearchSpace.from_product(_params, constraints) + + +@pytest.mark.parametrize( + ("constraints", "expected"), + [ + param([], 0, id="none"), + param([DiscreteBatchConstraint(parameters=["d0"])], 3, id="single"), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + 9, + id="two", + ), + ], +) +def test_batch_constraint_n_theoretical_subspaces(constraints, expected): + """The n_theoretical_subspaces property returns the correct count.""" + assert ( + SearchSpace.from_product(_params, constraints).discrete.n_theoretical_subspaces + == expected + ) + + +def test_batch_constraint_all_subspaces_too_small(): + """All subspaces infeasible raises InfeasibilityError.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + measurements = create_fake_input(_params, [TARGET], n_rows=2) + + # Each d0 subspace has 3 candidates, batch_size=4 exceeds all + with pytest.raises(InfeasibilityError): + BotorchRecommender().recommend( + 4, searchspace, TARGET.to_objective(), measurements + ) + + +@pytest.mark.parametrize( + ("min_candidates", "expected_count"), + [ + param(None, 3, id="no_filter"), + param(4, 0, id="all_skipped"), + param(3, 3, id="all_retained"), + ], +) +def test_subspace_masks_min_candidates(min_candidates, expected_count): + """Subspace mask filtering by min_candidates.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + masks = list( + searchspace.discrete.subspace_masks( + searchspace.discrete.exp_rep, min_candidates=min_candidates + ) + ) + assert len(masks) == expected_count diff --git a/tests/constraints/test_cardinality_constraint_hybrid.py b/tests/constraints/test_cardinality_constraint_hybrid.py deleted file mode 100644 index bcbe115e58..0000000000 --- a/tests/constraints/test_cardinality_constraint_hybrid.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Tests for cardinality constraints in hybrid search spaces.""" - -import pytest - -from baybe.constraints.continuous import ContinuousCardinalityConstraint -from baybe.constraints.discrete import DiscreteCardinalityConstraint -from baybe.constraints.utils import is_cardinality_fulfilled -from baybe.parameters.numerical import ( - NumericalContinuousParameter, - NumericalDiscreteParameter, -) -from baybe.recommenders import BotorchRecommender -from baybe.searchspace import SearchSpace -from baybe.targets import NumericalTarget -from baybe.utils.dataframe import create_fake_input - -BATCH_SIZE = 2 -MAX_CARDINALITY = 1 - -_discrete_params = [ - NumericalDiscreteParameter(f"d{i}", values=(0.0, 0.5, 1.0)) for i in range(2) -] -_continuous_params = [ - NumericalContinuousParameter(f"c{i}", bounds=(0, 1)) for i in range(2) -] - - -@pytest.mark.parametrize( - ("disc_params", "conti_params", "constraints"), - [ - pytest.param( - [NumericalDiscreteParameter("d", values=(0.0, 1.0))], - _continuous_params, - [ - ContinuousCardinalityConstraint( - parameters=[p.name for p in _continuous_params], - max_cardinality=MAX_CARDINALITY, - ) - ], - id="conti", - ), - pytest.param( - _discrete_params, - [NumericalContinuousParameter("c", bounds=(0, 1))], - [ - DiscreteCardinalityConstraint( - parameters=[p.name for p in _discrete_params], - max_cardinality=MAX_CARDINALITY, - ) - ], - id="disc", - ), - pytest.param( - _discrete_params, - _continuous_params, - [ - DiscreteCardinalityConstraint( - parameters=[p.name for p in _discrete_params], - max_cardinality=MAX_CARDINALITY, - ), - ContinuousCardinalityConstraint( - parameters=[p.name for p in _continuous_params], - max_cardinality=MAX_CARDINALITY, - ), - ], - id="hybrid", - ), - ], -) -def test_cardinality_constraint_hybrid(disc_params, conti_params, constraints): - """Cardinality constraints are respected in hybrid search spaces.""" - parameters = [*disc_params, *conti_params] - searchspace = SearchSpace.from_product(parameters, constraints) - target = NumericalTarget("t") - measurements = create_fake_input(parameters, [target]) - - rec = BotorchRecommender().recommend( - BATCH_SIZE, searchspace, target.to_objective(), measurements - ) - - for c in constraints: - if isinstance(c, ContinuousCardinalityConstraint): - assert is_cardinality_fulfilled( - rec, searchspace.continuous, check_minimum=False - ) - elif isinstance(c, DiscreteCardinalityConstraint): - n_nonzero = (rec[list(c.parameters)] != 0.0).sum(axis=1) - assert (n_nonzero <= c.max_cardinality).all() diff --git a/tests/constraints/test_subspace_constraints_hybrid.py b/tests/constraints/test_subspace_constraints_hybrid.py new file mode 100644 index 0000000000..420111950a --- /dev/null +++ b/tests/constraints/test_subspace_constraints_hybrid.py @@ -0,0 +1,111 @@ +"""Tests for subspace-generating constraints in hybrid search spaces.""" + +import pytest +from pytest import param + +from baybe.constraints.continuous import ContinuousCardinalityConstraint +from baybe.constraints.discrete import ( + DiscreteBatchConstraint, + DiscreteCardinalityConstraint, +) +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.parameters.numerical import ( + NumericalContinuousParameter, + NumericalDiscreteParameter, +) +from baybe.recommenders import BotorchRecommender +from baybe.searchspace import SearchSpace +from baybe.targets import NumericalTarget +from baybe.utils.dataframe import create_fake_input + +BATCH_SIZE = 2 +MAX_CARDINALITY = 1 +TARGET = NumericalTarget("t") + +_discrete_params = [ + NumericalDiscreteParameter("d0", values=(0.0, 0.5, 1.0)), + NumericalDiscreteParameter("d1", values=(0.0, 0.5, 1.0)), +] +_continuous_params = [ + NumericalContinuousParameter("c0", bounds=(0, 1)), + NumericalContinuousParameter("c1", bounds=(0, 1)), +] +_all_params = [*_discrete_params, *_continuous_params] + + +@pytest.mark.parametrize( + "constraints", + [ + param( + [ + ContinuousCardinalityConstraint( + parameters=["c0", "c1"], max_cardinality=MAX_CARDINALITY + ) + ], + id="continuous_cardinality", + ), + param( + [ + DiscreteCardinalityConstraint( + parameters=["d0", "d1"], max_cardinality=MAX_CARDINALITY + ) + ], + id="discrete_cardinality", + ), + param( + [ + DiscreteCardinalityConstraint( + parameters=["d0", "d1"], max_cardinality=MAX_CARDINALITY + ), + ContinuousCardinalityConstraint( + parameters=["c0", "c1"], max_cardinality=MAX_CARDINALITY + ), + ], + id="both_cardinality", + ), + param( + [DiscreteBatchConstraint(parameters=["d0"])], + id="batch_single", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + id="batch_multiple", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + ContinuousCardinalityConstraint( + parameters=["c0", "c1"], max_cardinality=MAX_CARDINALITY + ), + ], + id="batch_and_cardinality", + ), + ], +) +def test_subspace_constraints_hybrid(constraints): + """Subspace-generating constraints are respected in hybrid search spaces.""" + searchspace = SearchSpace.from_product(_all_params, constraints) + measurements = create_fake_input(_all_params, [TARGET], n_rows=3) + + rec = BotorchRecommender().recommend( + BATCH_SIZE, searchspace, TARGET.to_objective(), measurements + ) + + for c in constraints: + if isinstance(c, ContinuousCardinalityConstraint): + assert is_cardinality_fulfilled( + rec, searchspace.continuous, check_minimum=False + ) + elif isinstance(c, DiscreteCardinalityConstraint): + n_nonzero = (rec[list(c.parameters)] != 0.0).sum(axis=1) + assert (n_nonzero <= c.max_cardinality).all(), ( + f"Discrete cardinality constraint violated: {n_nonzero.tolist()}" + ) + elif isinstance(c, DiscreteBatchConstraint): + assert rec[c.parameters[0]].nunique() == 1, ( + f"Batch constraint violated for '{c.parameters[0]}': " + f"found {rec[c.parameters[0]].nunique()} unique values" + ) From b79643f464345127542ca29854bf1e9150149c5f Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 17:00:08 +0100 Subject: [PATCH 12/39] Add DiscreteBatchConstraint to constraints userguide --- docs/userguide/constraints.md | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/docs/userguide/constraints.md b/docs/userguide/constraints.md index e66be3051c..dcc5597cf0 100644 --- a/docs/userguide/constraints.md +++ b/docs/userguide/constraints.md @@ -533,3 +533,50 @@ Due to the arbitrary nature of code and dependencies that can be used in the using a `DiscreteCustomConstraint` results in an error if you attempt to serialize the corresponding object or higher-level objects containing it. ``` + +### DiscreteBatchConstraint +Unlike the other discrete constraints described above, the +{class}`~baybe.constraints.discrete.DiscreteBatchConstraint` does not filter candidates +from the search space. Instead, it controls how recommendations are generated at +batch level: it ensures that **all experiments in a recommended batch share the same +value** for the constrained parameter. + +This is useful, for example, when experiments in a batch must be run under shared +conditions. Consider a well plate experiment where each plate holds multiple samples +but only one temperature can be set per plate. If the optimizer recommends a batch of +experiments to fill one plate, all of them must use the same temperature. The +`DiscreteBatchConstraint` enforces this by internally partitioning the candidate space +into subspaces (one per temperature value), optimizing each subspace independently, and +selecting the batch with the highest expected utility. + +```python +from baybe.constraints import DiscreteBatchConstraint + +DiscreteBatchConstraint( + parameters=["Temperature"], # all batch entries will share the same temperature +) +``` + +Multiple batch constraints on different parameters can be combined. For instance, if +both the temperature and the solvent must be fixed across the plate, two constraints +can be specified: + +```python +DiscreteBatchConstraint(parameters=["Temperature"]) +DiscreteBatchConstraint(parameters=["Solvent"]) +``` + +In this case, each recommended batch will share both the same temperature and the same +solvent. The optimizer evaluates the Cartesian product of possible value combinations +and selects the best one. + +```{admonition} Recommender Compatibility +:class: warning +The `DiscreteBatchConstraint` is only effective with recommenders that can compare +batch-level outcomes, such as +{class}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender` and +{class}`~baybe.recommenders.pure.nonpredictive.sampling.RandomRecommender`. +Other recommenders will raise an +{class}`~baybe.exceptions.IncompatibilityError` if a search space with batch +constraints is used. +``` From 3417f9143347409b75c4046ac4f7da947ac5202c Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 21:30:58 +0200 Subject: [PATCH 13/39] Adjust constraint property names --- baybe/constraints/utils.py | 8 +-- baybe/recommenders/pure/base.py | 15 +++--- baybe/recommenders/pure/bayesian/botorch.py | 54 +++++++++---------- .../pure/nonpredictive/sampling.py | 8 +-- baybe/searchspace/continuous.py | 49 ++++++++--------- baybe/searchspace/core.py | 10 ++-- baybe/searchspace/discrete.py | 14 ++--- .../test_cardinality_constraint_continuous.py | 7 +-- 8 files changed, 75 insertions(+), 90 deletions(-) diff --git a/baybe/constraints/utils.py b/baybe/constraints/utils.py index 6d5e1e7378..22570f29b8 100644 --- a/baybe/constraints/utils.py +++ b/baybe/constraints/utils.py @@ -3,7 +3,6 @@ import numpy as np import pandas as pd -from baybe.constraints.continuous import ContinuousCardinalityConstraint from baybe.parameters.utils import is_inactive from baybe.searchspace import SubspaceContinuous @@ -26,12 +25,7 @@ def is_cardinality_fulfilled( Returns: ``True`` if all cardinality constraints are fulfilled, ``False`` otherwise. """ - cardinality_constraints = [ - c - for c in subspace_continuous.constraints_subspace_generating - if isinstance(c, ContinuousCardinalityConstraint) - ] - for c in cardinality_constraints: + for c in subspace_continuous.constraints_cardinality: # Get the activity thresholds for all parameters cols = df[c.parameters] thresholds = { diff --git a/baybe/recommenders/pure/base.py b/baybe/recommenders/pure/base.py index 499a93468b..813fc35d37 100644 --- a/baybe/recommenders/pure/base.py +++ b/baybe/recommenders/pure/base.py @@ -42,9 +42,9 @@ class PureRecommender(ABC, RecommenderProtocol): compatibility: ClassVar[SearchSpaceType] """Class variable reflecting the search space compatibility.""" - supports_discrete_subspace_constraints: ClassVar[bool] = False + supports_discrete_batch_constraints: ClassVar[bool] = False """Class variable indicating whether the recommender supports discrete - subspace-generating constraints.""" + batch constraints.""" _deprecated_allow_repeated_recommendations: bool = field( alias="allow_repeated_recommendations", @@ -267,18 +267,17 @@ def _recommend_with_discrete_parts( """ is_hybrid_space = searchspace.type is SearchSpaceType.HYBRID - # Check subspace-generating constraint support + # Check batch constraint support if ( - searchspace.discrete.constraints_subspace_generating - and not self.supports_discrete_subspace_constraints + searchspace.discrete.constraints_batch + and not self.supports_discrete_batch_constraints ): constraint_types = { - type(c).__name__ - for c in searchspace.discrete.constraints_subspace_generating + type(c).__name__ for c in searchspace.discrete.constraints_batch } raise IncompatibilityError( f"'{self.__class__.__name__}' does not support discrete " - f"subspace-generating constraints. The search space contains: " + f"batch constraints. The search space contains: " f"{constraint_types}." ) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 860ef3e73d..5ebd4ba687 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -62,7 +62,7 @@ class BotorchRecommender(BayesianRecommender): compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID # See base class. - supports_discrete_subspace_constraints: ClassVar[bool] = True + supports_discrete_batch_constraints: ClassVar[bool] = True # See base class. # Object variables @@ -153,7 +153,7 @@ def _recommend_discrete( The dataframe indices of the recommended points in the provided experimental representation. """ - if subspace_discrete.constraints_subspace_generating: + if subspace_discrete.constraints_batch: return self._recommend_discrete_with_subspaces( subspace_discrete, candidates_exp, batch_size ) @@ -313,24 +313,24 @@ def _recommend_continuous_torch( self, subspace_continuous: SubspaceContinuous, batch_size: int ) -> tuple[Tensor, Tensor]: """Dispatcher selecting the continuous optimization routine.""" - if subspace_continuous.constraints_subspace_generating: - return self._recommend_continuous_with_subspaces( + if subspace_continuous.constraints_cardinality: + return self._recommend_continuous_with_cardinality_constraints( subspace_continuous, batch_size ) else: - return self._recommend_continuous_without_subspaces( + return self._recommend_continuous_without_cardinality_constraints( subspace_continuous, batch_size ) - def _recommend_continuous_with_subspaces( + def _recommend_continuous_with_cardinality_constraints( self, subspace_continuous: SubspaceContinuous, batch_size: int, ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous space with subspace-generating constraints. + """Recommend from a continuous space with cardinality constraints. - Optimizes the acquisition function across subspaces defined by constraints - (currently only cardinality constraints) and returns the best result. + Optimizes the acquisition function across subspaces defined by cardinality + constraints and returns the best result. The specific collection of subspaces considered by the recommender is obtained as either the full combinatorial set of possible parameter splits or a random @@ -351,21 +351,21 @@ def _recommend_continuous_with_subspaces( The recommendations and corresponding acquisition values. Raises: - ValueError: If the continuous search space has no subspace-generating + ValueError: If the continuous search space has no cardinality constraints. """ - if not subspace_continuous.constraints_subspace_generating: + if not subspace_continuous.constraints_cardinality: raise ValueError( - f"'{self._recommend_continuous_with_subspaces.__name__}' " - f"expects a subspace with subspace-generating constraints." + f"'{self._recommend_continuous_with_cardinality_constraints.__name__}' " + f"expects a subspace with cardinality constraints." ) # Determine search scope based on number of subspace configurations configs: Iterable[frozenset[str]] if subspace_continuous.n_theoretical_subspaces <= self.max_n_subspaces: - configs = subspace_continuous.subspace_configurations() + configs = subspace_continuous.inactive_parameter_combinations() else: - configs = subspace_continuous._sample_subspace_configurations( + configs = subspace_continuous._sample_inactive_parameters( self.max_n_subspaces ) @@ -410,12 +410,12 @@ def optimize() -> tuple[Tensor, Tensor]: return points, acqf_value - def _recommend_continuous_without_subspaces( + def _recommend_continuous_without_cardinality_constraints( self, subspace_continuous: SubspaceContinuous, batch_size: int, ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space without subspace decomposition. + """Recommend from a continuous search space without cardinality constraints. Args: subspace_continuous: The continuous subspace from which to generate @@ -426,16 +426,16 @@ def _recommend_continuous_without_subspaces( The recommendations and corresponding acquisition values. Raises: - ValueError: If the continuous search space has subspace-generating - constraints. + ValueError: If the continuous search space has cardinality constraints. """ import torch from botorch.optim import optimize_acqf - if subspace_continuous.constraints_subspace_generating: + if subspace_continuous.constraints_cardinality: + method = self._recommend_continuous_without_cardinality_constraints raise ValueError( - f"'{self._recommend_continuous_without_subspaces.__name__}' " - f"expects a subspace without subspace-generating constraints." + f"'{method.__name__}' expects a subspace " + f"without cardinality constraints." ) fixed_parameters = { @@ -510,8 +510,8 @@ def _recommend_hybrid( The recommended points. """ if ( - searchspace.discrete.constraints_subspace_generating - or searchspace.continuous.constraints_subspace_generating + searchspace.discrete.constraints_batch + or searchspace.continuous.constraints_cardinality ): return self._recommend_hybrid_with_subspaces( searchspace, candidates_exp, batch_size @@ -680,9 +680,9 @@ def _recommend_hybrid_with_subspaces( # discrete candidate by varying continuous parameters. combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] if searchspace.n_theoretical_subspaces <= self.max_n_subspaces: - combined_masks = searchspace.subspace_masks(candidates_exp) + combined_masks = searchspace.subspaces(candidates_exp) else: - combined_masks = searchspace.sample_subspace_masks( + combined_masks = searchspace.sample_subspaces( candidates_exp, self.max_n_subspaces ) @@ -718,7 +718,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: best_rec, _ = self._optimize_over_subspaces(callables) # Post-check minimum cardinality on continuous columns - if subspace_c.constraints_subspace_generating and not is_cardinality_fulfilled( + if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( best_rec[list(subspace_c.parameter_names)], subspace_c, check_maximum=False, diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index b03485c454..72f06b2b41 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -24,7 +24,7 @@ class RandomRecommender(NonPredictiveRecommender): compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID # See base class. - supports_discrete_subspace_constraints: ClassVar[bool] = True + supports_discrete_batch_constraints: ClassVar[bool] = True # See base class. @override @@ -42,8 +42,8 @@ def _recommend_hybrid( if searchspace.type is SearchSpaceType.CONTINUOUS: return cont_random - # Restrict to a random subspace if subspace-generating constraints are present - if searchspace.discrete.constraints_subspace_generating: + # Restrict to a random subspace if batch constraints are present + if searchspace.discrete.constraints_batch: masks = searchspace.discrete.sample_subspace_masks( candidates_exp, n=1, @@ -52,7 +52,7 @@ def _recommend_hybrid( if not masks: raise InfeasibilityError( "No feasible subspace found for the given " - "subspace-generating constraints. All subspaces have fewer " + "batch constraints. All subspaces have fewer " f"candidates than the requested {batch_size=}." ) candidates_exp = candidates_exp.loc[masks[0]] diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index dfa48ca1e1..d47ea53132 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -110,10 +110,10 @@ def __str__(self) -> str: return to_string(self.__class__.__name__, *fields) @property - def constraints_subspace_generating( + def constraints_cardinality( self, ) -> tuple[ContinuousCardinalityConstraint, ...]: - """Constraints generating subspaces for separate optimization.""" + """The cardinality constraints of the subspace.""" return tuple( c for c in self.constraints_nonlin @@ -150,23 +150,22 @@ def _validate_constraints_lin_ineq( def n_theoretical_subspaces(self) -> int: """The theoretical number of possible subspace configurations. - Returns 0 if no subspace-generating constraints exist, indicating that + Returns 0 if no cardinality constraints exist, indicating that no decomposition is needed. """ - if not self.constraints_subspace_generating: + if not self.constraints_cardinality: return 0 return math.prod( - c.n_inactive_parameter_combinations - for c in self.constraints_subspace_generating + c.n_inactive_parameter_combinations for c in self.constraints_cardinality ) - def subspace_configurations( # noqa: DOC404 + def inactive_parameter_combinations( # noqa: DOC404 self, *, shuffle: bool = False, replace: bool = False, ) -> Iterator[frozenset[str]]: - """Get an iterator over all possible subspace configurations. + """Get an iterator over all possible inactive parameter combinations. Args: shuffle: If ``True``, iterate in uniformly shuffled order. @@ -179,7 +178,7 @@ def subspace_configurations( # noqa: DOC404 """ per_constraint = [ list(con.inactive_parameter_combinations()) - for con in self.constraints_subspace_generating + for con in self.constraints_cardinality ] total = math.prod(len(v) for v in per_constraint) @@ -209,10 +208,10 @@ def _validate_constraints_nonlin(self, _, __) -> None: """Validate nonlinear constraints.""" # Note: The passed constraints are accessed indirectly through the property validate_cardinality_constraints_are_nonoverlapping( - self.constraints_subspace_generating + self.constraints_cardinality ) - for con in self.constraints_subspace_generating: + for con in self.constraints_cardinality: validate_cardinality_constraint_parameter_bounds(con, self.parameters) def to_searchspace(self) -> SearchSpace: @@ -355,11 +354,9 @@ def comp_rep_columns(self) -> tuple[str, ...]: return tuple(chain.from_iterable(p.comp_rep_columns for p in self.parameters)) @property - def parameter_names_in_subspace_constraints(self) -> frozenset[str]: - """The names of all parameters affected by subspace-generating constraints.""" - names_per_constraint = ( - c.parameters for c in self.constraints_subspace_generating - ) + def parameter_names_in_cardinality_constraints(self) -> frozenset[str]: + """The names of all parameters affected by cardinality constraints.""" + names_per_constraint = (c.parameters for c in self.constraints_cardinality) return frozenset(chain(*names_per_constraint)) @property @@ -437,7 +434,7 @@ def _enforce_cardinality_constraints( """ # Extract active parameters involved in cardinality constraints active_parameter_names = ( - self.parameter_names_in_subspace_constraints.difference( + self.parameter_names_in_cardinality_constraints.difference( inactive_parameter_names ) ) @@ -451,9 +448,7 @@ def _enforce_cardinality_constraints( elif p.name in active_parameter_names: constraints = [ - c - for c in self.constraints_subspace_generating - if p.name in c.parameters + c for c in self.constraints_cardinality if p.name in c.parameters ] # Constraint validation should have ensured that each parameter can @@ -529,7 +524,7 @@ def sample_uniform(self, batch_size: int = 1) -> pd.DataFrame: if not self.is_constrained: return self._sample_from_bounds(batch_size, self.comp_rep_bounds.values) - if len(self.constraints_subspace_generating) == 0: + if len(self.constraints_cardinality) == 0: return self._sample_from_polytope(batch_size, self.comp_rep_bounds.values) return self._sample_from_polytope_with_cardinality_constraints(batch_size) @@ -615,7 +610,7 @@ def _sample_from_polytope_with_cardinality_constraints( self, batch_size: int ) -> pd.DataFrame: """Draw random samples from a polytope with cardinality constraints.""" - if not self.constraints_subspace_generating: + if not self.constraints_cardinality: raise RuntimeError( f"This method should not be called without any constraints of type " f"'{ContinuousCardinalityConstraint.__name__}' in place. " @@ -632,7 +627,7 @@ def _sample_from_polytope_with_cardinality_constraints( while len(samples) < batch_size: # Randomly set some parameters inactive - inactive_params_sample = self._sample_subspace_configurations(1)[0] + inactive_params_sample = self._sample_inactive_parameters(1)[0] # Remove the inactive parameters from the search space. In the first # step, the active parameters get activated and inactive parameters are @@ -670,13 +665,11 @@ def _sample_from_polytope_with_cardinality_constraints( .fillna(0.0) ) - def _sample_subspace_configurations( - self, batch_size: int = 1 - ) -> list[frozenset[str]]: - """Sample subspace configurations according to the given constraints.""" + def _sample_inactive_parameters(self, batch_size: int = 1) -> list[frozenset[str]]: + """Sample inactive parameter configurations from the cardinality constraints.""" inactives_per_constraint = [ con.sample_inactive_parameters(batch_size) - for con in self.constraints_subspace_generating + for con in self.constraints_cardinality ] return [frozenset(chain(*x)) for x in zip(*inactives_per_constraint)] diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index a576991c6e..80aca6e102 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -303,7 +303,7 @@ def n_theoretical_subspaces(self) -> int: return 0 return max(d, 1) * max(c, 1) - def subspace_masks( # noqa: DOC404 + def subspaces( # noqa: DOC404 self, candidates_exp: pd.DataFrame, min_discrete_candidates: int | None = None, @@ -325,10 +325,10 @@ def subspace_masks( # noqa: DOC404 self.discrete.subspace_masks( candidates_exp, min_candidates=min_discrete_candidates ), - self.continuous.subspace_configurations(), + self.continuous.inactive_parameter_combinations(), ) - def sample_subspace_masks( + def sample_subspaces( self, candidates_exp: pd.DataFrame, n: int, @@ -363,7 +363,9 @@ def sample_subspace_masks( shuffle=True, replace=True, ) - c_iter = self.continuous.subspace_configurations(shuffle=True, replace=True) + c_iter = self.continuous.inactive_parameter_combinations( + shuffle=True, replace=True + ) counts: Counter[int] = Counter() results: list[tuple[npt.NDArray[np.bool_], frozenset[str]]] = [] diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index 4a8336b4e0..a41bed31ed 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -578,10 +578,10 @@ def estimate_product_space_size( ) @property - def constraints_subspace_generating( + def constraints_batch( self, ) -> tuple[DiscreteBatchConstraint, ...]: - """Constraints generating subspaces for separate optimization.""" + """The batch constraints of the subspace.""" return tuple( c for c in self.constraints if isinstance(c, DiscreteBatchConstraint) ) @@ -590,14 +590,14 @@ def constraints_subspace_generating( def n_theoretical_subspaces(self) -> int: """The theoretical number of possible subspace configurations. - Returns 0 if no subspace-generating constraints exist, indicating that + Returns 0 if no batch constraints exist, indicating that no decomposition is needed. """ - if not self.constraints_subspace_generating: + if not self.constraints_batch: return 0 return prod( len(self.get_parameters_by_name([c.parameters[0]])[0].active_values) - for c in self.constraints_subspace_generating + for c in self.constraints_batch ) def subspace_masks( # noqa: DOC404 @@ -610,7 +610,7 @@ def subspace_masks( # noqa: DOC404 ) -> Iterator[npt.NDArray[np.bool_]]: r"""Get an iterator over all possible subspace masks. - Collects masks from each subspace-generating constraint, iterates the + Collects masks from each batch constraint, iterates the Cartesian product, AND-reduces each combination, and yields feasible combined masks. @@ -627,7 +627,7 @@ def subspace_masks( # noqa: DOC404 Yields: A boolean mask selecting the subspace's rows. """ - constraints = self.constraints_subspace_generating + constraints = self.constraints_batch if not constraints: per_constraint: list[list[npt.NDArray[np.bool_]]] = [ [np.ones(len(candidates_exp), dtype=bool)] diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index 770e2ceab5..04e9e10cf9 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -65,12 +65,9 @@ def _validate_cardinality_constrained_batch( # We thus include this check as a safety net for catching regressions. If it # turns out the check fails because we observe degenerate batches as actual # recommendations, we need to invent something smarter. - cardinality_constraints = [ - c - for c in subspace_continuous.constraints_subspace_generating - if isinstance(c, ContinuousCardinalityConstraint) + max_cardinalities = [ + c.max_cardinality for c in subspace_continuous.constraints_cardinality ] - max_cardinalities = [c.max_cardinality for c in cardinality_constraints] if len(unique_row := batch.drop_duplicates()) == 1: assert (unique_row.iloc[0] == 0.0).all() and all( max_cardinality == 0 for max_cardinality in max_cardinalities From 77818e935cc75176ff96e4dd54dd7619d6e666d3 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 21:31:31 +0200 Subject: [PATCH 14/39] Improve docstring language --- baybe/constraints/discrete.py | 3 +-- baybe/recommenders/pure/bayesian/botorch.py | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 0307f898b4..1f35dab3b2 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -435,8 +435,7 @@ class DiscreteBatchConstraint(DiscreteConstraint): parameter — obtains a full batch recommendation from each subspace, and returns the batch with the highest joint acquisition value. - This constraint is only effective with Bayesian recommenders that have access - to an acquisition function for comparing batches. It is not applied during + This constraint is not supported by all recommenders. It is not applied during search space creation (all parameter values remain in the search space). Example: diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 5ebd4ba687..2d12da1262 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -172,7 +172,7 @@ def _recommend_discrete_with_subspaces( Partitions the candidate set according to subspace-generating constraints, runs optimization on each feasible partition, and returns the batch with the highest joint acquisition value. Subspaces with fewer candidates - than ``batch_size`` are skipped with a warning. + than ``batch_size`` are skipped. Args: subspace_discrete: The discrete subspace from which to generate @@ -656,10 +656,10 @@ def _recommend_hybrid_with_subspaces( ) -> pd.DataFrame: """Recommend from a hybrid space with subspace-generating constraints. - Uses ``SearchSpace.subspace_configurations()`` to enumerate the Cartesian + Uses ``SearchSpace.subspaces()`` to enumerate the Cartesian product of discrete and continuous subspace configurations, capped at - ``max_n_subspaces`` total. Discrete subspaces with fewer candidates than - ``batch_size`` are pre-filtered. + ``max_n_subspaces`` total. In purely discrete search spaces, subspaces + with fewer candidates than ``batch_size`` are pre-filtered. Args: searchspace: The search space in which the recommendations should be made. From 4e76ccfc31ffff2221b2ba0ff3265db2d3e86a2e Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 21:31:56 +0200 Subject: [PATCH 15/39] Improve partition sampling --- baybe/searchspace/core.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 80aca6e102..1c35e4f61f 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -3,7 +3,6 @@ from __future__ import annotations import gc -from collections import Counter from collections.abc import Iterable, Iterator, Sequence from enum import Enum from itertools import product @@ -367,23 +366,32 @@ def sample_subspaces( shuffle=True, replace=True ) - counts: Counter[int] = Counter() + seen: set[tuple[bytes, frozenset[str]]] = set() results: list[tuple[npt.NDArray[np.bool_], frozenset[str]]] = [] + rejections = 0 for d_mask, c_config in zip(d_iter, c_iter): - key = hash((tuple(d_mask), c_config)) - counts[key] += 1 - if counts[key] > max_rejections + 1: - raise InfeasibilityError( - f"Not enough unique subspace configurations available. " - f"Requested {n} but only {len(results)} could be found." - ) - if counts[key] > 1: + key = (d_mask.tobytes(), c_config) + if key in seen: + rejections += 1 + if rejections > max_rejections: + raise InfeasibilityError( + f"Not enough unique subspace configurations available. " + f"Requested {n} but only {len(results)} could be found." + ) continue + seen.add(key) + rejections = 0 results.append((d_mask, c_config)) if len(results) >= n: break + if len(results) < n: + raise InfeasibilityError( + f"Not enough unique subspace configurations available. " + f"Requested {n} but only {len(results)} could be found." + ) + return results def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]: From c7597e8e78e364aa047ea38d8b7313450377e8e1 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 21:35:19 +0200 Subject: [PATCH 16/39] Split BotorchRecommender into submodules --- baybe/recommenders/naive.py | 4 +- baybe/recommenders/pure/bayesian/botorch.py | 787 ------------------ .../pure/bayesian/botorch/__init__.py | 7 + .../pure/bayesian/botorch/continuous.py | 209 +++++ .../pure/bayesian/botorch/core.py | 278 +++++++ .../pure/bayesian/botorch/discrete.py | 141 ++++ .../pure/bayesian/botorch/hybrid.py | 252 ++++++ docs/userguide/async.md | 2 +- docs/userguide/campaigns.md | 2 +- docs/userguide/constraints.md | 8 +- docs/userguide/getting_recommendations.md | 2 +- docs/userguide/recommenders.md | 12 +- .../probability_of_improvement.py | 4 +- 13 files changed, 904 insertions(+), 804 deletions(-) delete mode 100644 baybe/recommenders/pure/bayesian/botorch.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/__init__.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/continuous.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/core.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/discrete.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/hybrid.py diff --git a/baybe/recommenders/naive.py b/baybe/recommenders/naive.py index 8039755443..5b602d881b 100644 --- a/baybe/recommenders/naive.py +++ b/baybe/recommenders/naive.py @@ -41,11 +41,11 @@ class NaiveHybridSpaceRecommender(PureRecommender): # problem that might come up when implementing new subclasses of PureRecommender disc_recommender: PureRecommender = field(factory=BotorchRecommender) """The recommender used for the discrete subspace. Default: - :class:`baybe.recommenders.pure.bayesian.botorch.BotorchRecommender`""" + :class:`baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender`""" cont_recommender: BayesianRecommender = field(factory=BotorchRecommender) """The recommender used for the continuous subspace. Default: - :class:`baybe.recommenders.pure.bayesian.botorch.BotorchRecommender`""" + :class:`baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender`""" @override def recommend( diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py deleted file mode 100644 index 2d12da1262..0000000000 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ /dev/null @@ -1,787 +0,0 @@ -"""Botorch recommender.""" - -from __future__ import annotations - -import gc -import math -import warnings -from collections.abc import Callable, Collection, Iterable -from typing import TYPE_CHECKING, Any, ClassVar - -import numpy as np -import pandas as pd -from attrs import define, field, fields -from attrs.converters import optional as optional_c -from attrs.validators import ge, gt, instance_of -from typing_extensions import override - -from baybe.acquisition.acqfs import qThompsonSampling -from baybe.constraints.utils import is_cardinality_fulfilled -from baybe.exceptions import ( - IncompatibilityError, - IncompatibleAcquisitionFunctionError, - InfeasibilityError, - MinimumCardinalityViolatedWarning, -) -from baybe.parameters.numerical import _FixedNumericalContinuousParameter -from baybe.recommenders.pure.bayesian.base import BayesianRecommender -from baybe.searchspace import ( - SearchSpace, - SearchSpaceType, - SubspaceContinuous, - SubspaceDiscrete, -) -from baybe.utils.basic import flatten -from baybe.utils.conversion import to_string -from baybe.utils.dataframe import to_tensor -from baybe.utils.sampling_algorithms import ( - DiscreteSamplingMethod, - sample_numerical_df, -) - -if TYPE_CHECKING: - from torch import Tensor - - -@define(kw_only=True) -class BotorchRecommender(BayesianRecommender): - """A pure recommender utilizing Botorch's optimization machinery. - - This recommender makes use of Botorch's ``optimize_acqf_discrete``, - ``optimize_acqf`` and ``optimize_acqf_mixed`` functions to optimize discrete, - continuous and hybrid search spaces, respectively. Accordingly, it can be applied to - all kinds of search spaces. - - Note: - In hybrid search spaces, the used algorithm performs a brute-force optimization - that can be computationally expensive. Thus, the behavior of the algorithm in - hybrid search spaces can be controlled via two additional parameters. - """ - - # Class variables - compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID - # See base class. - - supports_discrete_batch_constraints: ClassVar[bool] = True - # See base class. - - # Object variables - sequential_continuous: bool = field(default=True) - """Flag defining whether to apply sequential greedy or batch optimization in - **continuous** search spaces. In discrete/hybrid spaces, sequential greedy - optimization is applied automatically. - """ - - hybrid_sampler: DiscreteSamplingMethod | None = field( - converter=optional_c(DiscreteSamplingMethod), default=None - ) - """Strategy used for sampling the discrete subspace when performing hybrid search - space optimization.""" - - sampling_percentage: float = field(default=1.0) - """Percentage of discrete search space that is sampled when performing hybrid search - space optimization. Ignored when ``hybrid_sampler="None"``.""" - - n_restarts: int = field(validator=[instance_of(int), gt(0)], default=10) - """Number of times gradient-based optimization is restarted from different initial - points. **Does not affect purely discrete optimization**. - """ - - n_raw_samples: int = field(validator=[instance_of(int), gt(0)], default=64) - """Number of raw samples drawn for the initialization heuristic in gradient-based - optimization. **Does not affect purely discrete optimization**. - """ - - max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) - """Maximum number of subspaces to evaluate when subspace-generating constraints are - present (e.g., continuous cardinality constraints). If the total number of subspaces - exceeds this limit, a random subset of that size is sampled for optimization instead - of performing an exhaustive search.""" - - @sampling_percentage.validator - def _validate_percentage( # noqa: DOC101, DOC103 - self, _: Any, value: float - ) -> None: - """Validate that the given value is in fact a percentage. - - Raises: - ValueError: If ``value`` is not between 0 and 1. - """ - if not 0 <= value <= 1: - raise ValueError( - f"Hybrid sampling percentage needs to be between 0 and 1 but is {value}" - ) - - @override - def __str__(self) -> str: - fields = [ - to_string("Surrogate", self._surrogate_model), - to_string( - "Acquisition function", self.acquisition_function, single_line=True - ), - to_string("Compatibility", self.compatibility, single_line=True), - to_string( - "Sequential continuous", self.sequential_continuous, single_line=True - ), - to_string("Hybrid sampler", self.hybrid_sampler, single_line=True), - to_string( - "Sampling percentage", self.sampling_percentage, single_line=True - ), - ] - return to_string(self.__class__.__name__, *fields) - - @override - def _recommend_discrete( - self, - subspace_discrete: SubspaceDiscrete, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.Index: - """Generate recommendations from a discrete search space. - - Dispatches to the appropriate optimization routine depending on whether - subspace-generating constraints are present. - - Args: - subspace_discrete: The discrete subspace from which to generate - recommendations. - candidates_exp: The experimental representation of all discrete candidate - points to be considered. - batch_size: The size of the recommendation batch. - - Returns: - The dataframe indices of the recommended points in the provided - experimental representation. - """ - if subspace_discrete.constraints_batch: - return self._recommend_discrete_with_subspaces( - subspace_discrete, candidates_exp, batch_size - ) - return self._recommend_discrete_without_subspaces( - subspace_discrete, candidates_exp, batch_size - ) - - def _recommend_discrete_with_subspaces( - self, - subspace_discrete: SubspaceDiscrete, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.Index: - """Recommend from a discrete space with subspace-generating constraints. - - Partitions the candidate set according to subspace-generating constraints, - runs optimization on each feasible partition, and returns the batch with - the highest joint acquisition value. Subspaces with fewer candidates - than ``batch_size`` are skipped. - - Args: - subspace_discrete: The discrete subspace from which to generate - recommendations. - candidates_exp: The experimental representation of candidates. - batch_size: The size of the recommendation batch. - - Returns: - The dataframe indices of the recommended points. - """ - import torch - - masks: Iterable[np.ndarray] - if subspace_discrete.n_theoretical_subspaces <= self.max_n_subspaces: - masks = subspace_discrete.subspace_masks( - candidates_exp, min_candidates=batch_size - ) - else: - masks = subspace_discrete.sample_subspace_masks( - candidates_exp, self.max_n_subspaces, min_candidates=batch_size - ) - - def make_callable( - mask: np.ndarray, - ) -> Callable[[], tuple[pd.Index, Tensor]]: - def optimize() -> tuple[pd.Index, Tensor]: - subset = candidates_exp.loc[mask] - - idxs = self._recommend_discrete_without_subspaces( - subspace_discrete, subset, batch_size - ) - - comp = subspace_discrete.transform(candidates_exp.loc[idxs]) - with torch.no_grad(): - acqf_value = self._botorch_acqf(to_tensor(comp).unsqueeze(0)) - return idxs, acqf_value - - return optimize - - callables = (make_callable(m) for m in masks) - best_idxs, _ = self._optimize_over_subspaces(callables) - return best_idxs - - def _recommend_discrete_without_subspaces( - self, - subspace_discrete: SubspaceDiscrete, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.Index: - """Generate recommendations from a discrete search space. - - Args: - subspace_discrete: The discrete subspace from which to generate - recommendations. - candidates_exp: The experimental representation of all discrete candidate - points to be considered. - batch_size: The size of the recommendation batch. - - Raises: - IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition - function is used with a batch size > 1. - - Returns: - The dataframe indices of the recommended points in the provided - experimental representation. - """ - assert self._objective is not None - acqf = self._get_acquisition_function(self._objective) - if batch_size > 1 and not acqf.supports_batching: - raise IncompatibleAcquisitionFunctionError( - f"The '{self.__class__.__name__}' only works with Monte Carlo " - f"acquisition functions for batch sizes > 1." - ) - if batch_size > 1 and isinstance(acqf, qThompsonSampling): - raise IncompatibilityError( - "Thompson sampling currently only supports a batch size of 1." - ) - - from botorch.optim import optimize_acqf_discrete - - # determine the next set of points to be tested - candidates_comp = subspace_discrete.transform(candidates_exp) - points, _ = optimize_acqf_discrete( - self._botorch_acqf, batch_size, to_tensor(candidates_comp) - ) - - # retrieve the index of the points from the input dataframe - # IMPROVE: The merging procedure is conceptually similar to what - # `SearchSpace._match_measurement_with_searchspace_indices` does, though using - # a simpler matching logic. When refactoring the SearchSpace class to - # handle continuous parameters, a corresponding utility could be extracted. - idxs = pd.Index( - pd.merge( - pd.DataFrame(points, columns=candidates_comp.columns), - candidates_comp.reset_index(), - on=list(candidates_comp), - how="left", - )["index"] - ) - - return idxs - - @override - def _recommend_continuous( - self, - subspace_continuous: SubspaceContinuous, - batch_size: int, - ) -> pd.DataFrame: - """Generate recommendations from a continuous search space. - - Args: - subspace_continuous: The continuous subspace from which to generate - recommendations. - batch_size: The size of the recommendation batch. - - Raises: - IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition - function is used with a batch size > 1. - - Returns: - A dataframe containing the recommendations as individual rows. - """ - assert self._objective is not None - if ( - batch_size > 1 - and not self._get_acquisition_function(self._objective).supports_batching - ): - raise IncompatibleAcquisitionFunctionError( - f"The '{self.__class__.__name__}' only works with Monte Carlo " - f"acquisition functions for batch sizes > 1." - ) - - points, _ = self._recommend_continuous_torch(subspace_continuous, batch_size) - - return pd.DataFrame(points, columns=subspace_continuous.parameter_names) - - def _recommend_continuous_torch( - self, subspace_continuous: SubspaceContinuous, batch_size: int - ) -> tuple[Tensor, Tensor]: - """Dispatcher selecting the continuous optimization routine.""" - if subspace_continuous.constraints_cardinality: - return self._recommend_continuous_with_cardinality_constraints( - subspace_continuous, batch_size - ) - else: - return self._recommend_continuous_without_cardinality_constraints( - subspace_continuous, batch_size - ) - - def _recommend_continuous_with_cardinality_constraints( - self, - subspace_continuous: SubspaceContinuous, - batch_size: int, - ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous space with cardinality constraints. - - Optimizes the acquisition function across subspaces defined by cardinality - constraints and returns the best result. - - The specific collection of subspaces considered by the recommender is obtained - as either the full combinatorial set of possible parameter splits or a random - selection thereof, depending on the upper bound specified by the corresponding - recommender attribute. - - In each subspace, the constraint-imposed configuration is fixed, so that the - constraints can be removed and a regular optimization can be performed. The - recommendation is then constructed from the combined optimization results of the - unconstrained spaces. - - Args: - subspace_continuous: The continuous subspace from which to generate - recommendations. - batch_size: The size of the recommendation batch. - - Returns: - The recommendations and corresponding acquisition values. - - Raises: - ValueError: If the continuous search space has no cardinality - constraints. - """ - if not subspace_continuous.constraints_cardinality: - raise ValueError( - f"'{self._recommend_continuous_with_cardinality_constraints.__name__}' " - f"expects a subspace with cardinality constraints." - ) - - # Determine search scope based on number of subspace configurations - configs: Iterable[frozenset[str]] - if subspace_continuous.n_theoretical_subspaces <= self.max_n_subspaces: - configs = subspace_continuous.inactive_parameter_combinations() - else: - configs = subspace_continuous._sample_inactive_parameters( - self.max_n_subspaces - ) - - # Create closures for each subspace configuration - def make_callable( - inactive_params: Collection[str], - ) -> Callable[[], tuple[Tensor, Tensor]]: - def optimize() -> tuple[Tensor, Tensor]: - import torch - - sub = subspace_continuous._enforce_cardinality_constraints( - inactive_params - ) - # Note: We explicitly evaluate the acqf function for the batch - # because the object returned by the optimization routine may - # contain joint or individual acquisition values, depending on - # whether sequential or joint optimization is applied - p, _ = self._recommend_continuous_torch(sub, batch_size) - with torch.no_grad(): - acqf_value = self._botorch_acqf(p) - return p, acqf_value - - return optimize - - callables = (make_callable(ip) for ip in configs) - points, acqf_value = self._optimize_over_subspaces(callables) - - # Check if any minimum cardinality constraints are violated - if not is_cardinality_fulfilled( - pd.DataFrame(points, columns=subspace_continuous.parameter_names), - subspace_continuous, - check_maximum=False, - ): - warnings.warn( - "At least one minimum cardinality constraint has been violated. " - "This may occur when parameter ranges extend beyond zero in both " - "directions, making the feasible region non-convex. For such " - "parameters, minimum cardinality constraints are currently not " - "enforced due to the complexity of the resulting optimization problem.", - MinimumCardinalityViolatedWarning, - ) - - return points, acqf_value - - def _recommend_continuous_without_cardinality_constraints( - self, - subspace_continuous: SubspaceContinuous, - batch_size: int, - ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space without cardinality constraints. - - Args: - subspace_continuous: The continuous subspace from which to generate - recommendations. - batch_size: The size of the recommendation batch. - - Returns: - The recommendations and corresponding acquisition values. - - Raises: - ValueError: If the continuous search space has cardinality constraints. - """ - import torch - from botorch.optim import optimize_acqf - - if subspace_continuous.constraints_cardinality: - method = self._recommend_continuous_without_cardinality_constraints - raise ValueError( - f"'{method.__name__}' expects a subspace " - f"without cardinality constraints." - ) - - fixed_parameters = { - idx: p.value - for (idx, p) in enumerate(subspace_continuous.parameters) - if isinstance(p, _FixedNumericalContinuousParameter) - } - - # TODO: Add option for automatic choice once the "settings" PR is merged, - # which ships the necessary machinery - if ( - self.sequential_continuous - and subspace_continuous.has_interpoint_constraints - ): - raise IncompatibilityError( - f"Setting the " - f"'{fields(BotorchRecommender).sequential_continuous.name}' " - f"flag to ``True`` while interpoint constraints are present in the " - f"continuous subspace is not supported. " - ) - - # NOTE: The explicit `or None` conversion is added as an additional safety net - # because it is unclear if the corresponding presence checks for these - # arguments is correctly implemented in all invoked BoTorch subroutines. - # For details: https://github.com/pytorch/botorch/issues/2042 - points, acqf_values = optimize_acqf( - acq_function=self._botorch_acqf, - bounds=torch.from_numpy(subspace_continuous.comp_rep_bounds.values), - q=batch_size, - num_restarts=self.n_restarts, - raw_samples=self.n_raw_samples, - fixed_features=fixed_parameters or None, - equality_constraints=flatten( - c.to_botorch( - subspace_continuous.parameters, - batch_size=batch_size if c.is_interpoint else None, - ) - for c in subspace_continuous.constraints_lin_eq - ) - or None, - inequality_constraints=flatten( - c.to_botorch( - subspace_continuous.parameters, - batch_size=batch_size if c.is_interpoint else None, - ) - for c in subspace_continuous.constraints_lin_ineq - ) - or None, - sequential=self.sequential_continuous, - ) - return points, acqf_values - - @override - def _recommend_hybrid( - self, - searchspace: SearchSpace, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.DataFrame: - """Generate recommendations from a hybrid search space. - - Dispatches to the appropriate optimization routine depending on whether - subspace-generating constraints are present. - - Args: - searchspace: The search space in which the recommendations should be made. - candidates_exp: The experimental representation of the candidates - of the discrete subspace. - batch_size: The size of the calculated batch. - - Returns: - The recommended points. - """ - if ( - searchspace.discrete.constraints_batch - or searchspace.continuous.constraints_cardinality - ): - return self._recommend_hybrid_with_subspaces( - searchspace, candidates_exp, batch_size - ) - return self._recommend_hybrid_without_subspaces( - searchspace, candidates_exp, batch_size - ) - - def _recommend_hybrid_without_subspaces( - self, - searchspace: SearchSpace, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.DataFrame: - """Recommend points using the ``optimize_acqf_mixed`` function of BoTorch. - - This functions samples points from the discrete subspace, performs optimization - in the continuous subspace with these points being fixed and returns the best - found solution. - - **Important**: This performs a brute-force calculation by fixing every possible - assignment of discrete variables and optimizing the continuous subspace for - each of them. It is thus computationally expensive. - - **Note**: This function implicitly assumes that discrete search space parts in - the respective data frame come first and continuous parts come second. - - Args: - searchspace: The search space in which the recommendations should be made. - candidates_exp: The experimental representation of the candidates - of the discrete subspace. - batch_size: The size of the calculated batch. - - Raises: - IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition - function is used with a batch size > 1. - - Returns: - The recommended points. - """ - assert self._objective is not None - - # Interpoint constraints cannot be used with optimize_acqf_mixed, see - # https://github.com/meta-pytorch/botorch/issues/2996 - if searchspace.continuous.has_interpoint_constraints: - raise IncompatibilityError( - "Interpoint constraints are not available in hybrid spaces." - ) - if ( - batch_size > 1 - and not self._get_acquisition_function(self._objective).supports_batching - ): - raise IncompatibleAcquisitionFunctionError( - f"The '{self.__class__.__name__}' only works with Monte Carlo " - f"acquisition functions for batch sizes > 1." - ) - - import torch - from botorch.optim import optimize_acqf_mixed - - # Transform discrete candidates - candidates_comp = searchspace.discrete.transform(candidates_exp) - - # Calculate the number of samples from the given percentage - n_candidates = math.ceil(self.sampling_percentage * len(candidates_comp.index)) - - # Potential sampling of discrete candidates - if self.hybrid_sampler is not None: - candidates_comp = sample_numerical_df( - candidates_comp, n_candidates, method=self.hybrid_sampler - ) - - # Prepare all considered discrete configurations in the - # List[Dict[int, float]] format expected by BoTorch. - num_comp_columns = len(candidates_comp.columns) - candidates_comp.columns = list(range(num_comp_columns)) - fixed_features_list = candidates_comp.to_dict("records") - - # Actual call of the BoTorch optimization routine - # NOTE: The explicit `or None` conversion is added as an additional safety net - # because it is unclear if the corresponding presence checks for these - # arguments is correctly implemented in all invoked BoTorch subroutines. - # For details: https://github.com/pytorch/botorch/issues/2042 - points, _ = optimize_acqf_mixed( - acq_function=self._botorch_acqf, - bounds=torch.from_numpy(searchspace.comp_rep_bounds.values), - q=batch_size, - num_restarts=self.n_restarts, - raw_samples=self.n_raw_samples, - fixed_features_list=fixed_features_list, # type: ignore[arg-type] - equality_constraints=flatten( - c.to_botorch( - searchspace.continuous.parameters, - idx_offset=len(candidates_comp.columns), - batch_size=batch_size if c.is_interpoint else None, - ) - for c in searchspace.continuous.constraints_lin_eq - ) - or None, - inequality_constraints=flatten( - c.to_botorch( - searchspace.continuous.parameters, - idx_offset=num_comp_columns, - batch_size=batch_size if c.is_interpoint else None, - ) - for c in searchspace.continuous.constraints_lin_ineq - ) - or None, - ) - - # Align candidates with search space index. Done via including the search space - # index during the merge, which is used later for back-translation into the - # experimental representation - merged = pd.merge( - pd.DataFrame(points), - candidates_comp.reset_index(), - on=list(candidates_comp.columns), - how="left", - ).set_index("index") - - # Get experimental representation of discrete part - rec_disc_exp = searchspace.discrete.exp_rep.loc[merged.index] - - # Combine discrete and continuous parts - rec_exp = pd.concat( - [ - rec_disc_exp, - merged.iloc[:, num_comp_columns:].set_axis( - searchspace.continuous.parameter_names, axis=1 - ), - ], - axis=1, - ) - - return rec_exp - - def _recommend_hybrid_with_subspaces( - self, - searchspace: SearchSpace, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.DataFrame: - """Recommend from a hybrid space with subspace-generating constraints. - - Uses ``SearchSpace.subspaces()`` to enumerate the Cartesian - product of discrete and continuous subspace configurations, capped at - ``max_n_subspaces`` total. In purely discrete search spaces, subspaces - with fewer candidates than ``batch_size`` are pre-filtered. - - Args: - searchspace: The search space in which the recommendations should be made. - candidates_exp: The experimental representation of the candidates - of the discrete subspace. - batch_size: The size of the calculated batch. - - Returns: - The recommended points. - """ - from attrs import evolve - - subspace_c = searchspace.continuous - - # Get combined configurations, capped at max_n_subspaces - # NOTE: No min_discrete_candidates filtering in hybrid spaces because - # optimize_acqf_mixed can produce multiple recommendations from a single - # discrete candidate by varying continuous parameters. - combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] - if searchspace.n_theoretical_subspaces <= self.max_n_subspaces: - combined_masks = searchspace.subspaces(candidates_exp) - else: - combined_masks = searchspace.sample_subspaces( - candidates_exp, self.max_n_subspaces - ) - - def make_callable( - d_mask: np.ndarray, - c_inactive_params: frozenset[str], - ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: - def optimize() -> tuple[pd.DataFrame, Tensor]: - import torch - - subset = candidates_exp.loc[d_mask] - - if c_inactive_params: - mod_cont = subspace_c._enforce_cardinality_constraints( - c_inactive_params - ) - else: - mod_cont = subspace_c - mod_searchspace = evolve(searchspace, continuous=mod_cont) - - rec = self._recommend_hybrid_without_subspaces( - mod_searchspace, subset, batch_size - ) - - comp = mod_searchspace.transform(rec) - with torch.no_grad(): - acqf_value = self._botorch_acqf(to_tensor(comp.values).unsqueeze(0)) - return rec, acqf_value - - return optimize - - callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) - best_rec, _ = self._optimize_over_subspaces(callables) - - # Post-check minimum cardinality on continuous columns - if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( - best_rec[list(subspace_c.parameter_names)], - subspace_c, - check_maximum=False, - ): - warnings.warn( - "At least one minimum cardinality constraint has been violated. " - "This may occur when parameter ranges extend beyond zero in both " - "directions, making the feasible region non-convex. For such " - "parameters, minimum cardinality constraints are currently not " - "enforced due to the complexity of the resulting optimization " - "problem.", - MinimumCardinalityViolatedWarning, - ) - - return best_rec - - def _optimize_over_subspaces( - self, - subspace_callables: Iterable[Callable[[], tuple[Any, Tensor]]], - ) -> tuple[Any, Tensor]: - """Optimize across subspaces and return the result with the best acqf value. - - Each callable performs optimization for one subspace configuration and returns - a ``(result, acquisition_value)`` tuple. Subspaces that raise - ``InfeasibilityError`` are silently skipped. - - Args: - subspace_callables: An iterable of zero-argument callables. Each callable - runs the optimization for one subspace and returns - ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the - subspace is infeasible. - - Raises: - InfeasibilityError: If none of the subspaces has a feasible solution. - - Returns: - The result and acquisition value of the best subspace. - """ - from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError - - results_all: list = [] - acqf_values_all: list[Tensor] = [] - - for optimize_fn in subspace_callables: - try: - result, acqf_value = optimize_fn() - results_all.append(result) - acqf_values_all.append(acqf_value) - except (BoInfeasibilityError, InfeasibilityError): - pass - - if not results_all: - raise InfeasibilityError( - "No feasible solution could be found. Potentially the specified " - "constraints are too restrictive, i.e. there may be too many " - "constraints or thresholds may have been set too tightly. " - "Consider relaxing the constraints to improve the chances " - "of finding a feasible solution." - ) - - best_idx = np.argmax(acqf_values_all) - return results_all[best_idx], acqf_values_all[best_idx] - - -# Collect leftover original slotted classes processed by `attrs.define` -gc.collect() diff --git a/baybe/recommenders/pure/bayesian/botorch/__init__.py b/baybe/recommenders/pure/bayesian/botorch/__init__.py new file mode 100644 index 0000000000..899b2c9a70 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/__init__.py @@ -0,0 +1,7 @@ +"""Botorch recommender.""" + +from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + +__all__ = [ + "BotorchRecommender", +] diff --git a/baybe/recommenders/pure/bayesian/botorch/continuous.py b/baybe/recommenders/pure/bayesian/botorch/continuous.py new file mode 100644 index 0000000000..380eb89f40 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/continuous.py @@ -0,0 +1,209 @@ +"""Continuous recommendation routines for BotorchRecommender.""" + +from __future__ import annotations + +import warnings +from collections.abc import Callable, Collection, Iterable +from typing import TYPE_CHECKING + +import pandas as pd +from attrs import fields + +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.exceptions import ( + IncompatibilityError, + MinimumCardinalityViolatedWarning, +) +from baybe.parameters.numerical import _FixedNumericalContinuousParameter +from baybe.searchspace import SubspaceContinuous +from baybe.utils.basic import flatten + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + +def recommend_continuous_torch( + recommender: BotorchRecommender, + subspace_continuous: SubspaceContinuous, + batch_size: int, +) -> tuple[Tensor, Tensor]: + """Dispatcher selecting the continuous optimization routine.""" + if subspace_continuous.constraints_cardinality: + return recommend_continuous_with_cardinality_constraints( + recommender, subspace_continuous, batch_size + ) + else: + return recommend_continuous_without_cardinality_constraints( + recommender, subspace_continuous, batch_size + ) + + +def recommend_continuous_with_cardinality_constraints( + recommender: BotorchRecommender, + subspace_continuous: SubspaceContinuous, + batch_size: int, +) -> tuple[Tensor, Tensor]: + """Recommend from a continuous space with cardinality constraints. + + Optimizes the acquisition function across subspaces defined by cardinality + constraints and returns the best result. + + The specific collection of subspaces considered by the recommender is obtained + as either the full combinatorial set of possible parameter splits or a random + selection thereof, depending on the upper bound specified by the corresponding + recommender attribute. + + In each subspace, the constraint-imposed configuration is fixed, so that the + constraints can be removed and a regular optimization can be performed. The + recommendation is then constructed from the combined optimization results of the + unconstrained spaces. + + Args: + recommender: The recommender instance. + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Returns: + The recommendations and corresponding acquisition values. + + Raises: + ValueError: If the continuous search space has no cardinality + constraints. + """ + if not subspace_continuous.constraints_cardinality: + raise ValueError( + f"'{recommend_continuous_with_cardinality_constraints.__name__}' " + f"expects a subspace with cardinality constraints." + ) + + # Determine search scope based on number of subspace configurations + configs: Iterable[frozenset[str]] + if subspace_continuous.n_theoretical_subspaces <= recommender.max_n_subspaces: + configs = subspace_continuous.inactive_parameter_combinations() + else: + configs = subspace_continuous._sample_inactive_parameters( + recommender.max_n_subspaces + ) + + # Create closures for each subspace configuration + def make_callable( + inactive_params: Collection[str], + ) -> Callable[[], tuple[Tensor, Tensor]]: + def optimize() -> tuple[Tensor, Tensor]: + import torch + + sub = subspace_continuous._enforce_cardinality_constraints(inactive_params) + # Note: We explicitly evaluate the acqf function for the batch + # because the object returned by the optimization routine may + # contain joint or individual acquisition values, depending on + # whether sequential or joint optimization is applied + p, _ = recommend_continuous_torch(recommender, sub, batch_size) + with torch.no_grad(): + acqf_value = recommender._botorch_acqf(p) + return p, acqf_value + + return optimize + + callables = (make_callable(ip) for ip in configs) + points, acqf_value = recommender._optimize_over_subspaces(callables) + + # Check if any minimum cardinality constraints are violated + if not is_cardinality_fulfilled( + pd.DataFrame(points, columns=subspace_continuous.parameter_names), + subspace_continuous, + check_maximum=False, + ): + warnings.warn( + "At least one minimum cardinality constraint has been violated. " + "This may occur when parameter ranges extend beyond zero in both " + "directions, making the feasible region non-convex. For such " + "parameters, minimum cardinality constraints are currently not " + "enforced due to the complexity of the resulting optimization problem.", + MinimumCardinalityViolatedWarning, + ) + + return points, acqf_value + + +def recommend_continuous_without_cardinality_constraints( + recommender: BotorchRecommender, + subspace_continuous: SubspaceContinuous, + batch_size: int, +) -> tuple[Tensor, Tensor]: + """Recommend from a continuous search space without cardinality constraints. + + Args: + recommender: The recommender instance. + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Returns: + The recommendations and corresponding acquisition values. + + Raises: + ValueError: If the continuous search space has cardinality constraints. + """ + import torch + from botorch.optim import optimize_acqf + + if subspace_continuous.constraints_cardinality: + raise ValueError( + f"'{recommend_continuous_without_cardinality_constraints.__name__}' " + f"expects a subspace without cardinality constraints." + ) + + fixed_parameters = { + idx: p.value + for (idx, p) in enumerate(subspace_continuous.parameters) + if isinstance(p, _FixedNumericalContinuousParameter) + } + + # TODO: Add option for automatic choice once the "settings" PR is merged, + # which ships the necessary machinery + if ( + recommender.sequential_continuous + and subspace_continuous.has_interpoint_constraints + ): + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + raise IncompatibilityError( + f"Setting the " + f"'{fields(BotorchRecommender).sequential_continuous.name}' " + f"flag to ``True`` while interpoint constraints are present in the " + f"continuous subspace is not supported. " + ) + + # NOTE: The explicit `or None` conversion is added as an additional safety net + # because it is unclear if the corresponding presence checks for these + # arguments is correctly implemented in all invoked BoTorch subroutines. + # For details: https://github.com/pytorch/botorch/issues/2042 + points, acqf_values = optimize_acqf( + acq_function=recommender._botorch_acqf, + bounds=torch.from_numpy(subspace_continuous.comp_rep_bounds.values), + q=batch_size, + num_restarts=recommender.n_restarts, + raw_samples=recommender.n_raw_samples, + fixed_features=fixed_parameters or None, + equality_constraints=flatten( + c.to_botorch( + subspace_continuous.parameters, + batch_size=batch_size if c.is_interpoint else None, + ) + for c in subspace_continuous.constraints_lin_eq + ) + or None, + inequality_constraints=flatten( + c.to_botorch( + subspace_continuous.parameters, + batch_size=batch_size if c.is_interpoint else None, + ) + for c in subspace_continuous.constraints_lin_ineq + ) + or None, + sequential=recommender.sequential_continuous, + ) + return points, acqf_values diff --git a/baybe/recommenders/pure/bayesian/botorch/core.py b/baybe/recommenders/pure/bayesian/botorch/core.py new file mode 100644 index 0000000000..bea0bc8fea --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/core.py @@ -0,0 +1,278 @@ +"""Botorch recommender core.""" + +from __future__ import annotations + +import gc +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING, Any, ClassVar + +import numpy as np +import pandas as pd +from attrs import define, field +from attrs.converters import optional as optional_c +from attrs.validators import ge, gt, instance_of +from typing_extensions import override + +from baybe.exceptions import ( + IncompatibleAcquisitionFunctionError, + InfeasibilityError, +) +from baybe.recommenders.pure.bayesian.base import BayesianRecommender +from baybe.recommenders.pure.bayesian.botorch.continuous import ( + recommend_continuous_torch, +) +from baybe.recommenders.pure.bayesian.botorch.discrete import ( + recommend_discrete_with_subspaces, + recommend_discrete_without_subspaces, +) +from baybe.recommenders.pure.bayesian.botorch.hybrid import ( + recommend_hybrid_with_subspaces, + recommend_hybrid_without_subspaces, +) +from baybe.searchspace import ( + SearchSpace, + SearchSpaceType, + SubspaceContinuous, + SubspaceDiscrete, +) +from baybe.utils.conversion import to_string +from baybe.utils.sampling_algorithms import DiscreteSamplingMethod + +if TYPE_CHECKING: + from torch import Tensor + + +@define(kw_only=True) +class BotorchRecommender(BayesianRecommender): + """A pure recommender utilizing Botorch's optimization machinery. + + This recommender makes use of Botorch's ``optimize_acqf_discrete``, + ``optimize_acqf`` and ``optimize_acqf_mixed`` functions to optimize discrete, + continuous and hybrid search spaces, respectively. Accordingly, it can be applied to + all kinds of search spaces. + + Note: + In hybrid search spaces, the used algorithm performs a brute-force optimization + that can be computationally expensive. Thus, the behavior of the algorithm in + hybrid search spaces can be controlled via two additional parameters. + """ + + # Class variables + compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID + # See base class. + + supports_discrete_batch_constraints: ClassVar[bool] = True + # See base class. + + # Object variables + sequential_continuous: bool = field(default=True) + """Flag defining whether to apply sequential greedy or batch optimization in + **continuous** search spaces. In discrete/hybrid spaces, sequential greedy + optimization is applied automatically. + """ + + hybrid_sampler: DiscreteSamplingMethod | None = field( + converter=optional_c(DiscreteSamplingMethod), default=None + ) + """Strategy used for sampling the discrete subspace when performing hybrid search + space optimization.""" + + sampling_percentage: float = field(default=1.0) + """Percentage of discrete search space that is sampled when performing hybrid search + space optimization. Ignored when ``hybrid_sampler="None"``.""" + + n_restarts: int = field(validator=[instance_of(int), gt(0)], default=10) + """Number of times gradient-based optimization is restarted from different initial + points. **Does not affect purely discrete optimization**. + """ + + n_raw_samples: int = field(validator=[instance_of(int), gt(0)], default=64) + """Number of raw samples drawn for the initialization heuristic in gradient-based + optimization. **Does not affect purely discrete optimization**. + """ + + max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) + """Maximum number of subspaces to evaluate when subspace-generating constraints are + present (e.g., continuous cardinality constraints). If the total number of subspaces + exceeds this limit, a random subset of that size is sampled for optimization instead + of performing an exhaustive search.""" + + @sampling_percentage.validator + def _validate_percentage( # noqa: DOC101, DOC103 + self, _: Any, value: float + ) -> None: + """Validate that the given value is in fact a percentage. + + Raises: + ValueError: If ``value`` is not between 0 and 1. + """ + if not 0 <= value <= 1: + raise ValueError( + f"Hybrid sampling percentage needs to be between 0 and 1 but is {value}" + ) + + @override + def __str__(self) -> str: + fields = [ + to_string("Surrogate", self._surrogate_model), + to_string( + "Acquisition function", self.acquisition_function, single_line=True + ), + to_string("Compatibility", self.compatibility, single_line=True), + to_string( + "Sequential continuous", self.sequential_continuous, single_line=True + ), + to_string("Hybrid sampler", self.hybrid_sampler, single_line=True), + to_string( + "Sampling percentage", self.sampling_percentage, single_line=True + ), + ] + return to_string(self.__class__.__name__, *fields) + + @override + def _recommend_discrete( + self, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.Index: + """Generate recommendations from a discrete search space. + + Dispatches to the appropriate optimization routine depending on whether + batch constraints are present. + + Args: + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of all discrete candidate + points to be considered. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points in the provided + experimental representation. + """ + if subspace_discrete.constraints_batch: + return recommend_discrete_with_subspaces( + self, subspace_discrete, candidates_exp, batch_size + ) + return recommend_discrete_without_subspaces( + self, subspace_discrete, candidates_exp, batch_size + ) + + @override + def _recommend_continuous( + self, + subspace_continuous: SubspaceContinuous, + batch_size: int, + ) -> pd.DataFrame: + """Generate recommendations from a continuous search space. + + Args: + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Raises: + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. + + Returns: + A dataframe containing the recommendations as individual rows. + """ + assert self._objective is not None + if ( + batch_size > 1 + and not self._get_acquisition_function(self._objective).supports_batching + ): + raise IncompatibleAcquisitionFunctionError( + f"The '{self.__class__.__name__}' only works with Monte Carlo " + f"acquisition functions for batch sizes > 1." + ) + + points, _ = recommend_continuous_torch(self, subspace_continuous, batch_size) + + return pd.DataFrame(points, columns=subspace_continuous.parameter_names) + + @override + def _recommend_hybrid( + self, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.DataFrame: + """Generate recommendations from a hybrid search space. + + Dispatches to the appropriate optimization routine depending on whether + subspace-generating constraints are present. + + Args: + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + if ( + searchspace.discrete.constraints_batch + or searchspace.continuous.constraints_cardinality + ): + return recommend_hybrid_with_subspaces( + self, searchspace, candidates_exp, batch_size + ) + return recommend_hybrid_without_subspaces( + self, searchspace, candidates_exp, batch_size + ) + + def _optimize_over_subspaces( + self, + subspace_callables: Iterable[Callable[[], tuple[Any, Tensor]]], + ) -> tuple[Any, Tensor]: + """Optimize across subspaces and return the result with the best acqf value. + + Each callable performs optimization for one subspace configuration and returns + a ``(result, acquisition_value)`` tuple. Subspaces that raise + ``InfeasibilityError`` are silently skipped. + + Args: + subspace_callables: An iterable of zero-argument callables. Each callable + runs the optimization for one subspace and returns + ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the + subspace is infeasible. + + Raises: + InfeasibilityError: If none of the subspaces has a feasible solution. + + Returns: + The result and acquisition value of the best subspace. + """ + from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError + + results_all: list = [] + acqf_values_all: list[Tensor] = [] + + for optimize_fn in subspace_callables: + try: + result, acqf_value = optimize_fn() + results_all.append(result) + acqf_values_all.append(acqf_value) + except (BoInfeasibilityError, InfeasibilityError): + pass + + if not results_all: + raise InfeasibilityError( + "No feasible solution could be found. Potentially the specified " + "constraints are too restrictive, i.e. there may be too many " + "constraints or thresholds may have been set too tightly. " + "Consider relaxing the constraints to improve the chances " + "of finding a feasible solution." + ) + + best_idx = np.argmax(acqf_values_all) + return results_all[best_idx], acqf_values_all[best_idx] + + +# Collect leftover original slotted classes processed by `attrs.define` +gc.collect() diff --git a/baybe/recommenders/pure/bayesian/botorch/discrete.py b/baybe/recommenders/pure/bayesian/botorch/discrete.py new file mode 100644 index 0000000000..40cfa61bd3 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/discrete.py @@ -0,0 +1,141 @@ +"""Discrete recommendation routines for BotorchRecommender.""" + +from __future__ import annotations + +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd + +from baybe.searchspace import SubspaceDiscrete +from baybe.utils.dataframe import to_tensor + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + +def recommend_discrete_with_subspaces( + recommender: BotorchRecommender, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.Index: + """Recommend from a discrete space with batch constraints. + + Partitions the candidate set according to batch constraints, + runs optimization on each feasible partition, and returns the batch with + the highest joint acquisition value. Subspaces with fewer candidates + than ``batch_size`` are skipped. + + Args: + recommender: The recommender instance. + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of candidates. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points. + """ + import torch + + masks: Iterable[np.ndarray] + if subspace_discrete.n_theoretical_subspaces <= recommender.max_n_subspaces: + masks = subspace_discrete.subspace_masks( + candidates_exp, min_candidates=batch_size + ) + else: + masks = subspace_discrete.sample_subspace_masks( + candidates_exp, recommender.max_n_subspaces, min_candidates=batch_size + ) + + def make_callable( + mask: np.ndarray, + ) -> Callable[[], tuple[pd.Index, Tensor]]: + def optimize() -> tuple[pd.Index, Tensor]: + subset = candidates_exp.loc[mask] + + idxs = recommend_discrete_without_subspaces( + recommender, subspace_discrete, subset, batch_size + ) + + comp = subspace_discrete.transform(candidates_exp.loc[idxs]) + with torch.no_grad(): + acqf_value = recommender._botorch_acqf(to_tensor(comp).unsqueeze(0)) + return idxs, acqf_value + + return optimize + + callables = (make_callable(m) for m in masks) + best_idxs, _ = recommender._optimize_over_subspaces(callables) + return best_idxs + + +def recommend_discrete_without_subspaces( + recommender: BotorchRecommender, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.Index: + """Generate recommendations from a discrete search space. + + Args: + recommender: The recommender instance. + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of all discrete candidate + points to be considered. + batch_size: The size of the recommendation batch. + + Raises: + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. + + Returns: + The dataframe indices of the recommended points in the provided + experimental representation. + """ + from baybe.acquisition.acqfs import qThompsonSampling + from baybe.exceptions import ( + IncompatibilityError, + IncompatibleAcquisitionFunctionError, + ) + + assert recommender._objective is not None + acqf = recommender._get_acquisition_function(recommender._objective) + if batch_size > 1 and not acqf.supports_batching: + raise IncompatibleAcquisitionFunctionError( + f"The '{recommender.__class__.__name__}' only works with Monte Carlo " + f"acquisition functions for batch sizes > 1." + ) + if batch_size > 1 and isinstance(acqf, qThompsonSampling): + raise IncompatibilityError( + "Thompson sampling currently only supports a batch size of 1." + ) + + from botorch.optim import optimize_acqf_discrete + + # determine the next set of points to be tested + candidates_comp = subspace_discrete.transform(candidates_exp) + points, _ = optimize_acqf_discrete( + recommender._botorch_acqf, batch_size, to_tensor(candidates_comp) + ) + + # retrieve the index of the points from the input dataframe + # IMPROVE: The merging procedure is conceptually similar to what + # `SearchSpace._match_measurement_with_searchspace_indices` does, though using + # a simpler matching logic. When refactoring the SearchSpace class to + # handle continuous parameters, a corresponding utility could be extracted. + idxs = pd.Index( + pd.merge( + pd.DataFrame(points, columns=candidates_comp.columns), + candidates_comp.reset_index(), + on=list(candidates_comp), + how="left", + )["index"] + ) + + return idxs diff --git a/baybe/recommenders/pure/bayesian/botorch/hybrid.py b/baybe/recommenders/pure/bayesian/botorch/hybrid.py new file mode 100644 index 0000000000..631d4f24d8 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/hybrid.py @@ -0,0 +1,252 @@ +"""Hybrid recommendation routines for BotorchRecommender.""" + +from __future__ import annotations + +import math +import warnings +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd + +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.exceptions import ( + IncompatibilityError, + IncompatibleAcquisitionFunctionError, + MinimumCardinalityViolatedWarning, +) +from baybe.searchspace import SearchSpace +from baybe.utils.basic import flatten +from baybe.utils.dataframe import to_tensor +from baybe.utils.sampling_algorithms import sample_numerical_df + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + +def recommend_hybrid_without_subspaces( + recommender: BotorchRecommender, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.DataFrame: + """Recommend points using the ``optimize_acqf_mixed`` function of BoTorch. + + This functions samples points from the discrete subspace, performs optimization + in the continuous subspace with these points being fixed and returns the best + found solution. + + **Important**: This performs a brute-force calculation by fixing every possible + assignment of discrete variables and optimizing the continuous subspace for + each of them. It is thus computationally expensive. + + **Note**: This function implicitly assumes that discrete search space parts in + the respective data frame come first and continuous parts come second. + + Args: + recommender: The recommender instance. + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Raises: + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. + + Returns: + The recommended points. + """ + assert recommender._objective is not None + + # Interpoint constraints cannot be used with optimize_acqf_mixed, see + # https://github.com/meta-pytorch/botorch/issues/2996 + if searchspace.continuous.has_interpoint_constraints: + raise IncompatibilityError( + "Interpoint constraints are not available in hybrid spaces." + ) + if ( + batch_size > 1 + and not recommender._get_acquisition_function( + recommender._objective + ).supports_batching + ): + raise IncompatibleAcquisitionFunctionError( + f"The '{recommender.__class__.__name__}' only works with Monte Carlo " + f"acquisition functions for batch sizes > 1." + ) + + import torch + from botorch.optim import optimize_acqf_mixed + + # Transform discrete candidates + candidates_comp = searchspace.discrete.transform(candidates_exp) + + # Calculate the number of samples from the given percentage + n_candidates = math.ceil( + recommender.sampling_percentage * len(candidates_comp.index) + ) + + # Potential sampling of discrete candidates + if recommender.hybrid_sampler is not None: + candidates_comp = sample_numerical_df( + candidates_comp, n_candidates, method=recommender.hybrid_sampler + ) + + # Prepare all considered discrete configurations in the + # List[Dict[int, float]] format expected by BoTorch. + num_comp_columns = len(candidates_comp.columns) + candidates_comp.columns = list(range(num_comp_columns)) + fixed_features_list = candidates_comp.to_dict("records") + + # Actual call of the BoTorch optimization routine + # NOTE: The explicit `or None` conversion is added as an additional safety net + # because it is unclear if the corresponding presence checks for these + # arguments is correctly implemented in all invoked BoTorch subroutines. + # For details: https://github.com/pytorch/botorch/issues/2042 + points, _ = optimize_acqf_mixed( + acq_function=recommender._botorch_acqf, + bounds=torch.from_numpy(searchspace.comp_rep_bounds.values), + q=batch_size, + num_restarts=recommender.n_restarts, + raw_samples=recommender.n_raw_samples, + fixed_features_list=fixed_features_list, # type: ignore[arg-type] + equality_constraints=flatten( + c.to_botorch( + searchspace.continuous.parameters, + idx_offset=len(candidates_comp.columns), + batch_size=batch_size if c.is_interpoint else None, + ) + for c in searchspace.continuous.constraints_lin_eq + ) + or None, + inequality_constraints=flatten( + c.to_botorch( + searchspace.continuous.parameters, + idx_offset=num_comp_columns, + batch_size=batch_size if c.is_interpoint else None, + ) + for c in searchspace.continuous.constraints_lin_ineq + ) + or None, + ) + + # Align candidates with search space index. Done via including the search space + # index during the merge, which is used later for back-translation into the + # experimental representation + merged = pd.merge( + pd.DataFrame(points), + candidates_comp.reset_index(), + on=list(candidates_comp.columns), + how="left", + ).set_index("index") + + # Get experimental representation of discrete part + rec_disc_exp = searchspace.discrete.exp_rep.loc[merged.index] + + # Combine discrete and continuous parts + rec_exp = pd.concat( + [ + rec_disc_exp, + merged.iloc[:, num_comp_columns:].set_axis( + searchspace.continuous.parameter_names, axis=1 + ), + ], + axis=1, + ) + + return rec_exp + + +def recommend_hybrid_with_subspaces( + recommender: BotorchRecommender, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.DataFrame: + """Recommend from a hybrid space with subspace-generating constraints. + + Uses ``SearchSpace.subspaces()`` to enumerate the Cartesian + product of discrete and continuous subspace configurations, capped at + ``max_n_subspaces`` total. In purely discrete search spaces, subspaces + with fewer candidates than ``batch_size`` are pre-filtered. + + Args: + recommender: The recommender instance. + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + from attrs import evolve + + subspace_c = searchspace.continuous + + # Get combined configurations, capped at max_n_subspaces + # NOTE: No min_discrete_candidates filtering in hybrid spaces because + # optimize_acqf_mixed can produce multiple recommendations from a single + # discrete candidate by varying continuous parameters. + combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] + if searchspace.n_theoretical_subspaces <= recommender.max_n_subspaces: + combined_masks = searchspace.subspaces(candidates_exp) + else: + combined_masks = searchspace.sample_subspaces( + candidates_exp, recommender.max_n_subspaces + ) + + def make_callable( + d_mask: np.ndarray, + c_inactive_params: frozenset[str], + ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: + def optimize() -> tuple[pd.DataFrame, Tensor]: + import torch + + subset = candidates_exp.loc[d_mask] + + if c_inactive_params: + mod_cont = subspace_c._enforce_cardinality_constraints( + c_inactive_params + ) + else: + mod_cont = subspace_c + mod_searchspace = evolve(searchspace, continuous=mod_cont) + + rec = recommend_hybrid_without_subspaces( + recommender, mod_searchspace, subset, batch_size + ) + + comp = mod_searchspace.transform(rec) + with torch.no_grad(): + acqf_value = recommender._botorch_acqf( + to_tensor(comp.values).unsqueeze(0) + ) + return rec, acqf_value + + return optimize + + callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) + best_rec, _ = recommender._optimize_over_subspaces(callables) + + # Post-check minimum cardinality on continuous columns + if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( + best_rec[list(subspace_c.parameter_names)], + subspace_c, + check_maximum=False, + ): + warnings.warn( + "At least one minimum cardinality constraint has been violated. " + "This may occur when parameter ranges extend beyond zero in both " + "directions, making the feasible region non-convex. For such " + "parameters, minimum cardinality constraints are currently not " + "enforced due to the complexity of the resulting optimization " + "problem.", + MinimumCardinalityViolatedWarning, + ) + + return best_rec diff --git a/docs/userguide/async.md b/docs/userguide/async.md index c590b0048a..a376fcf2c6 100644 --- a/docs/userguide/async.md +++ b/docs/userguide/async.md @@ -48,7 +48,7 @@ function with `pending_experiments` will result in an For technical reasons, not every recommender is able to make use of `pending_experiments`. For instance, -[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender) +[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender) takes all pending experiments into account, even if they do not match exactly with points in the search space. By contrast, diff --git a/docs/userguide/campaigns.md b/docs/userguide/campaigns.md index 1c602409e4..26a242afba 100644 --- a/docs/userguide/campaigns.md +++ b/docs/userguide/campaigns.md @@ -96,7 +96,7 @@ used is strongly discouraged. **Note:** While the above distinction is true in the general case, it may not be relevant for all configured settings, for instance, when the used recommender is not capable of joint optimization. Currently, the -[BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender) +[BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender) is the only recommender available that performs joint optimization. ``` diff --git a/docs/userguide/constraints.md b/docs/userguide/constraints.md index dcc5597cf0..d14e3dd831 100644 --- a/docs/userguide/constraints.md +++ b/docs/userguide/constraints.md @@ -130,7 +130,7 @@ to be aware of: - BayBE does not support to use both interpoint and cardinality constraints within the same search space. - When using interpoint constraints, candidate generation cannot be done -{attr}`sequentially `, +{attr}`sequentially `, and an error is raised when attempted. - Interpoint constraints are only supported in purely continuous spaces and are not available in hybrid spaces. @@ -169,8 +169,8 @@ settings, searching an optimal parameter configuration can quickly become infeas creating the need for approximation schemes: * The - {paramref}`BotorchRecommender.max_n_subspaces ` - attribute can be used to limit the number of subspaces considered during optimization. + {paramref}`BotorchRecommender.max_n_partitions ` + attribute can be used to limit the number of partitions considered during optimization. * When the ranges of cardinality-constrained parameters cover both positive and negative values, minimal cardinality requirements cannot always be guaranteed, potentially resulting in a {class}`~baybe.exceptions.MinimumCardinalityViolatedWarning`. @@ -574,7 +574,7 @@ and selects the best one. :class: warning The `DiscreteBatchConstraint` is only effective with recommenders that can compare batch-level outcomes, such as -{class}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender` and +{class}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender` and {class}`~baybe.recommenders.pure.nonpredictive.sampling.RandomRecommender`. Other recommenders will raise an {class}`~baybe.exceptions.IncompatibilityError` if a search space with batch diff --git a/docs/userguide/getting_recommendations.md b/docs/userguide/getting_recommendations.md index c836882cc2..7d82a15a36 100644 --- a/docs/userguide/getting_recommendations.md +++ b/docs/userguide/getting_recommendations.md @@ -31,7 +31,7 @@ BayBE offers two entry points for requesting recommendations: {attr}`~baybe.recommenders.meta.base.MetaRecommender.is_stateful` property. ``` - For example, using the {class}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender`: + For example, using the {class}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender`: ~~~python recommender = BotorchRecommender() recommendation = recommender.recommend(batch_size, searchspace, objective, measurements) diff --git a/docs/userguide/recommenders.md b/docs/userguide/recommenders.md index 488ea4c297..fc4dadd7c1 100644 --- a/docs/userguide/recommenders.md +++ b/docs/userguide/recommenders.md @@ -21,7 +21,7 @@ The Bayesian recommenders in BayBE are built on the foundation of the class, offering an array of possibilities with internal surrogate models and support for various acquisition functions. -* The **[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender)** +* The **[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender)** is a powerful recommender based on BoTorch's optimization engine that can be applied to all kinds of search spaces. In continuous spaces, its `sequential_continuous` flag allows to choose between greedy sequential optimization and batch optimization as the @@ -32,16 +32,16 @@ for various acquisition functions. spaces, as it does gradient-based optimization in the continuous part of the space while exhaustively evaluating configurations of the discrete subspace. You can customize this behavior to only sample a certain percentage of the discrete subspace via the - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.sampling_percentage` + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.sampling_percentage` argument and to choose different sampling algorithms via the - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.hybrid_sampler` + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.hybrid_sampler` argument. The gradient-based optimization part can also further be controlled by the - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.n_restarts` and - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.n_raw_samples` + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.n_restarts` and + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.n_raw_samples` arguments. For details, please refer - to [BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender). + to [BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender). * The **[`NaiveHybridSpaceRecommender`](baybe.recommenders.naive.NaiveHybridSpaceRecommender)** can be applied to all search spaces, but is intended to be used in hybrid spaces. diff --git a/examples/Custom_Hooks/probability_of_improvement.py b/examples/Custom_Hooks/probability_of_improvement.py index 3ae7dd66ae..7834592d1d 100644 --- a/examples/Custom_Hooks/probability_of_improvement.py +++ b/examples/Custom_Hooks/probability_of_improvement.py @@ -4,7 +4,7 @@ # {func}`register_hooks ` utility can be used to # extract the *Probability of Improvement (PI)* from a running campaign: # * We define a hook that is compatible with the -# {meth}`BotorchRecommender.recommend ` +# {meth}`BotorchRecommender.recommend ` # interface and lets us extract the PI achieved after each experimental iteration, # * attach the hook to the recommender driving our campaign, # * and plot the evolving PI values after campaign completion. @@ -107,7 +107,7 @@ def extract_pi( ) # In this example, we use `MethodType` to bind the -# {meth}`BotorchRecommender.recommend ` +# {meth}`BotorchRecommender.recommend ` # **function** with our hook. # For more information, we refer to the [`basic example`](./basics.md) explaining the # hook mechanics. From 273567c57a45e67f2287a32101d7368a027bc120 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 22:02:11 +0200 Subject: [PATCH 17/39] Rename subspace to partition --- baybe/constraints/discrete.py | 12 ++-- .../pure/bayesian/botorch/continuous.py | 16 ++--- .../pure/bayesian/botorch/core.py | 70 +++++++++++++------ .../pure/bayesian/botorch/discrete.py | 18 ++--- .../pure/bayesian/botorch/hybrid.py | 26 +++---- .../pure/nonpredictive/sampling.py | 8 +-- baybe/searchspace/continuous.py | 4 +- baybe/searchspace/core.py | 30 ++++---- baybe/searchspace/discrete.py | 22 +++--- tests/constraints/test_batch_constraint.py | 18 ++--- ...y => test_partition_constraints_hybrid.py} | 6 +- 11 files changed, 127 insertions(+), 103 deletions(-) rename tests/constraints/{test_subspace_constraints_hybrid.py => test_partition_constraints_hybrid.py} (94%) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 1f35dab3b2..590ae8c0ab 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -431,8 +431,8 @@ class DiscreteBatchConstraint(DiscreteConstraint): """Constraint ensuring all batch recommendations share the same parameter value. When this constraint is active, the recommender internally partitions the - candidate set into subspaces — one for each unique value of the constrained - parameter — obtains a full batch recommendation from each subspace, and + candidate set into partitions — one for each unique value of the constrained + parameter — obtains a full batch recommendation from each partition, and returns the batch with the highest joint acquisition value. This constraint is not supported by all recommenders. It is not applied during @@ -466,7 +466,7 @@ def get_invalid(self, data: pd.DataFrame) -> pd.Index: Always returns an empty index because this constraint operates at the batch level, not the row level. Individual rows are never invalid; the constraint is enforced at recommendation time by partitioning candidates - into subspaces. + into partitions. Args: data: A dataframe where each row represents a parameter configuration. @@ -476,13 +476,13 @@ def get_invalid(self, data: pd.DataFrame) -> pd.Index: """ return pd.Index([]) - def subspace_masks( + def partition_masks( self, candidates_exp: pd.DataFrame ) -> list[npt.NDArray[np.bool_]]: - """Return boolean masks defining the subspaces for this constraint. + """Return boolean masks defining the partitions for this constraint. Each mask selects the rows in ``candidates_exp`` that belong to one - subspace, i.e. share the same value for the constrained parameter. + partition, i.e. share the same value for the constrained parameter. Args: candidates_exp: The experimental representation of candidate points. diff --git a/baybe/recommenders/pure/bayesian/botorch/continuous.py b/baybe/recommenders/pure/bayesian/botorch/continuous.py index 380eb89f40..252bdfde6f 100644 --- a/baybe/recommenders/pure/bayesian/botorch/continuous.py +++ b/baybe/recommenders/pure/bayesian/botorch/continuous.py @@ -47,15 +47,15 @@ def recommend_continuous_with_cardinality_constraints( ) -> tuple[Tensor, Tensor]: """Recommend from a continuous space with cardinality constraints. - Optimizes the acquisition function across subspaces defined by cardinality + Optimizes the acquisition function across partitions defined by cardinality constraints and returns the best result. - The specific collection of subspaces considered by the recommender is obtained + The specific collection of partitions considered by the recommender is obtained as either the full combinatorial set of possible parameter splits or a random selection thereof, depending on the upper bound specified by the corresponding recommender attribute. - In each subspace, the constraint-imposed configuration is fixed, so that the + In each partition, the constraint-imposed configuration is fixed, so that the constraints can be removed and a regular optimization can be performed. The recommendation is then constructed from the combined optimization results of the unconstrained spaces. @@ -79,16 +79,16 @@ def recommend_continuous_with_cardinality_constraints( f"expects a subspace with cardinality constraints." ) - # Determine search scope based on number of subspace configurations + # Determine search scope based on number of partition configurations configs: Iterable[frozenset[str]] - if subspace_continuous.n_theoretical_subspaces <= recommender.max_n_subspaces: + if subspace_continuous.n_theoretical_partitions <= recommender.max_n_partitions: configs = subspace_continuous.inactive_parameter_combinations() else: configs = subspace_continuous._sample_inactive_parameters( - recommender.max_n_subspaces + recommender.max_n_partitions ) - # Create closures for each subspace configuration + # Create closures for each partition configuration def make_callable( inactive_params: Collection[str], ) -> Callable[[], tuple[Tensor, Tensor]]: @@ -108,7 +108,7 @@ def optimize() -> tuple[Tensor, Tensor]: return optimize callables = (make_callable(ip) for ip in configs) - points, acqf_value = recommender._optimize_over_subspaces(callables) + points, acqf_value = recommender._optimize_over_partitions(callables) # Check if any minimum cardinality constraints are violated if not is_cardinality_fulfilled( diff --git a/baybe/recommenders/pure/bayesian/botorch/core.py b/baybe/recommenders/pure/bayesian/botorch/core.py index bea0bc8fea..2fcc5e759d 100644 --- a/baybe/recommenders/pure/bayesian/botorch/core.py +++ b/baybe/recommenders/pure/bayesian/botorch/core.py @@ -3,6 +3,7 @@ from __future__ import annotations import gc +import warnings from collections.abc import Callable, Iterable from typing import TYPE_CHECKING, Any, ClassVar @@ -22,12 +23,12 @@ recommend_continuous_torch, ) from baybe.recommenders.pure.bayesian.botorch.discrete import ( - recommend_discrete_with_subspaces, - recommend_discrete_without_subspaces, + recommend_discrete_with_partitions, + recommend_discrete_without_partitions, ) from baybe.recommenders.pure.bayesian.botorch.hybrid import ( - recommend_hybrid_with_subspaces, - recommend_hybrid_without_subspaces, + recommend_hybrid_with_partitions, + recommend_hybrid_without_partitions, ) from baybe.searchspace import ( SearchSpace, @@ -91,12 +92,35 @@ class BotorchRecommender(BayesianRecommender): optimization. **Does not affect purely discrete optimization**. """ - max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) - """Maximum number of subspaces to evaluate when subspace-generating constraints are - present (e.g., continuous cardinality constraints). If the total number of subspaces + max_n_partitions: int = field(default=10, validator=[instance_of(int), ge(1)]) + """Maximum number of partitions to evaluate when partitioning constraints are + present (e.g., continuous cardinality constraints). If the total number of + partitions exceeds this limit, a random subset of that size is sampled for optimization instead of performing an exhaustive search.""" + @property + def max_n_subspaces(self) -> int: + """Deprecated! Use ``max_n_partitions`` instead.""" + warnings.warn( + "'max_n_subspaces' has been renamed to 'max_n_partitions' and will " + "be removed in a future version.", + DeprecationWarning, + stacklevel=2, + ) + return self.max_n_partitions + + @max_n_subspaces.setter + def max_n_subspaces(self, value: int) -> None: + """Deprecated! Use ``max_n_partitions`` instead.""" # noqa: D401 + warnings.warn( + "'max_n_subspaces' has been renamed to 'max_n_partitions' and will " + "be removed in a future version.", + DeprecationWarning, + stacklevel=2, + ) + self.max_n_partitions = value + @sampling_percentage.validator def _validate_percentage( # noqa: DOC101, DOC103 self, _: Any, value: float @@ -153,10 +177,10 @@ def _recommend_discrete( experimental representation. """ if subspace_discrete.constraints_batch: - return recommend_discrete_with_subspaces( + return recommend_discrete_with_partitions( self, subspace_discrete, candidates_exp, batch_size ) - return recommend_discrete_without_subspaces( + return recommend_discrete_without_partitions( self, subspace_discrete, candidates_exp, batch_size ) @@ -204,7 +228,7 @@ def _recommend_hybrid( """Generate recommendations from a hybrid search space. Dispatches to the appropriate optimization routine depending on whether - subspace-generating constraints are present. + partitioning constraints are present. Args: searchspace: The search space in which the recommendations should be made. @@ -219,41 +243,41 @@ def _recommend_hybrid( searchspace.discrete.constraints_batch or searchspace.continuous.constraints_cardinality ): - return recommend_hybrid_with_subspaces( + return recommend_hybrid_with_partitions( self, searchspace, candidates_exp, batch_size ) - return recommend_hybrid_without_subspaces( + return recommend_hybrid_without_partitions( self, searchspace, candidates_exp, batch_size ) - def _optimize_over_subspaces( + def _optimize_over_partitions( self, - subspace_callables: Iterable[Callable[[], tuple[Any, Tensor]]], + partition_callables: Iterable[Callable[[], tuple[Any, Tensor]]], ) -> tuple[Any, Tensor]: - """Optimize across subspaces and return the result with the best acqf value. + """Optimize across partitions and return the result with the best acqf value. - Each callable performs optimization for one subspace configuration and returns - a ``(result, acquisition_value)`` tuple. Subspaces that raise + Each callable performs optimization for one partition configuration and returns + a ``(result, acquisition_value)`` tuple. Partitions that raise ``InfeasibilityError`` are silently skipped. Args: - subspace_callables: An iterable of zero-argument callables. Each callable - runs the optimization for one subspace and returns + partition_callables: An iterable of zero-argument callables. Each callable + runs the optimization for one partition and returns ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the - subspace is infeasible. + partition is infeasible. Raises: - InfeasibilityError: If none of the subspaces has a feasible solution. + InfeasibilityError: If none of the partitions has a feasible solution. Returns: - The result and acquisition value of the best subspace. + The result and acquisition value of the best partition. """ from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError results_all: list = [] acqf_values_all: list[Tensor] = [] - for optimize_fn in subspace_callables: + for optimize_fn in partition_callables: try: result, acqf_value = optimize_fn() results_all.append(result) diff --git a/baybe/recommenders/pure/bayesian/botorch/discrete.py b/baybe/recommenders/pure/bayesian/botorch/discrete.py index 40cfa61bd3..21085195b0 100644 --- a/baybe/recommenders/pure/bayesian/botorch/discrete.py +++ b/baybe/recommenders/pure/bayesian/botorch/discrete.py @@ -17,7 +17,7 @@ from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender -def recommend_discrete_with_subspaces( +def recommend_discrete_with_partitions( recommender: BotorchRecommender, subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, @@ -27,7 +27,7 @@ def recommend_discrete_with_subspaces( Partitions the candidate set according to batch constraints, runs optimization on each feasible partition, and returns the batch with - the highest joint acquisition value. Subspaces with fewer candidates + the highest joint acquisition value. Partitions with fewer candidates than ``batch_size`` are skipped. Args: @@ -43,13 +43,13 @@ def recommend_discrete_with_subspaces( import torch masks: Iterable[np.ndarray] - if subspace_discrete.n_theoretical_subspaces <= recommender.max_n_subspaces: - masks = subspace_discrete.subspace_masks( + if subspace_discrete.n_theoretical_partitions <= recommender.max_n_partitions: + masks = subspace_discrete.partition_masks( candidates_exp, min_candidates=batch_size ) else: - masks = subspace_discrete.sample_subspace_masks( - candidates_exp, recommender.max_n_subspaces, min_candidates=batch_size + masks = subspace_discrete.sample_partition_masks( + candidates_exp, recommender.max_n_partitions, min_candidates=batch_size ) def make_callable( @@ -58,7 +58,7 @@ def make_callable( def optimize() -> tuple[pd.Index, Tensor]: subset = candidates_exp.loc[mask] - idxs = recommend_discrete_without_subspaces( + idxs = recommend_discrete_without_partitions( recommender, subspace_discrete, subset, batch_size ) @@ -70,11 +70,11 @@ def optimize() -> tuple[pd.Index, Tensor]: return optimize callables = (make_callable(m) for m in masks) - best_idxs, _ = recommender._optimize_over_subspaces(callables) + best_idxs, _ = recommender._optimize_over_partitions(callables) return best_idxs -def recommend_discrete_without_subspaces( +def recommend_discrete_without_partitions( recommender: BotorchRecommender, subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, diff --git a/baybe/recommenders/pure/bayesian/botorch/hybrid.py b/baybe/recommenders/pure/bayesian/botorch/hybrid.py index 631d4f24d8..d3eddd31f0 100644 --- a/baybe/recommenders/pure/bayesian/botorch/hybrid.py +++ b/baybe/recommenders/pure/bayesian/botorch/hybrid.py @@ -27,7 +27,7 @@ from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender -def recommend_hybrid_without_subspaces( +def recommend_hybrid_without_partitions( recommender: BotorchRecommender, searchspace: SearchSpace, candidates_exp: pd.DataFrame, @@ -161,17 +161,17 @@ def recommend_hybrid_without_subspaces( return rec_exp -def recommend_hybrid_with_subspaces( +def recommend_hybrid_with_partitions( recommender: BotorchRecommender, searchspace: SearchSpace, candidates_exp: pd.DataFrame, batch_size: int, ) -> pd.DataFrame: - """Recommend from a hybrid space with subspace-generating constraints. + """Recommend from a hybrid space with partitioning constraints. - Uses ``SearchSpace.subspaces()`` to enumerate the Cartesian - product of discrete and continuous subspace configurations, capped at - ``max_n_subspaces`` total. In purely discrete search spaces, subspaces + Uses ``SearchSpace.partitions()`` to enumerate the Cartesian + product of discrete and continuous partition configurations, capped at + ``max_n_partitions`` total. In purely discrete search spaces, partitions with fewer candidates than ``batch_size`` are pre-filtered. Args: @@ -188,16 +188,16 @@ def recommend_hybrid_with_subspaces( subspace_c = searchspace.continuous - # Get combined configurations, capped at max_n_subspaces + # Get combined configurations, capped at max_n_partitions # NOTE: No min_discrete_candidates filtering in hybrid spaces because # optimize_acqf_mixed can produce multiple recommendations from a single # discrete candidate by varying continuous parameters. combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] - if searchspace.n_theoretical_subspaces <= recommender.max_n_subspaces: - combined_masks = searchspace.subspaces(candidates_exp) + if searchspace.n_theoretical_partitions <= recommender.max_n_partitions: + combined_masks = searchspace.partitions(candidates_exp) else: - combined_masks = searchspace.sample_subspaces( - candidates_exp, recommender.max_n_subspaces + combined_masks = searchspace.sample_partitions( + candidates_exp, recommender.max_n_partitions ) def make_callable( @@ -217,7 +217,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: mod_cont = subspace_c mod_searchspace = evolve(searchspace, continuous=mod_cont) - rec = recommend_hybrid_without_subspaces( + rec = recommend_hybrid_without_partitions( recommender, mod_searchspace, subset, batch_size ) @@ -231,7 +231,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: return optimize callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) - best_rec, _ = recommender._optimize_over_subspaces(callables) + best_rec, _ = recommender._optimize_over_partitions(callables) # Post-check minimum cardinality on continuous columns if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index 72f06b2b41..d8e5156c72 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -42,17 +42,17 @@ def _recommend_hybrid( if searchspace.type is SearchSpaceType.CONTINUOUS: return cont_random - # Restrict to a random subspace if batch constraints are present + # Restrict to a random partition if batch constraints are present if searchspace.discrete.constraints_batch: - masks = searchspace.discrete.sample_subspace_masks( + masks = searchspace.discrete.sample_partition_masks( candidates_exp, n=1, min_candidates=None if is_hybrid else batch_size, ) if not masks: raise InfeasibilityError( - "No feasible subspace found for the given " - "batch constraints. All subspaces have fewer " + "No feasible partition found for the given " + "batch constraints. All partitions have fewer " f"candidates than the requested {batch_size=}." ) candidates_exp = candidates_exp.loc[masks[0]] diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index d47ea53132..cafbaad909 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -147,8 +147,8 @@ def _validate_constraints_lin_ineq( ) @property - def n_theoretical_subspaces(self) -> int: - """The theoretical number of possible subspace configurations. + def n_theoretical_partitions(self) -> int: + """The theoretical number of possible partition configurations. Returns 0 if no cardinality constraints exist, indicating that no decomposition is needed. diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 1c35e4f61f..319f6dc712 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -289,45 +289,45 @@ def n_tasks(self) -> int: return 1 @property - def n_theoretical_subspaces(self) -> int: - """Total theoretical number of subspace configurations. + def n_theoretical_partitions(self) -> int: + """Total theoretical number of partition configurations. - Returns 0 if no subspace-generating constraints exist on either side. + Returns 0 if no partitioning constraints exist on either side. When only one side has constraints, the other does not contribute to the count. """ - d = self.discrete.n_theoretical_subspaces - c = self.continuous.n_theoretical_subspaces + d = self.discrete.n_theoretical_partitions + c = self.continuous.n_theoretical_partitions if d == 0 == c: return 0 return max(d, 1) * max(c, 1) - def subspaces( # noqa: DOC404 + def partitions( # noqa: DOC404 self, candidates_exp: pd.DataFrame, min_discrete_candidates: int | None = None, ) -> Iterator[tuple[npt.NDArray[np.bool_], frozenset[str]]]: - r"""Get an iterator over all combined subspace configurations. + r"""Get an iterator over all combined partition configurations. Yields the Cartesian product of discrete masks and continuous configurations. Args: candidates_exp: The experimental representation of discrete candidates. - min_discrete_candidates: If provided, discrete subspaces with fewer + min_discrete_candidates: If provided, discrete partitions with fewer matching candidates are skipped. Yields: A discrete mask and continuous inactive parameters pair. """ yield from product( - self.discrete.subspace_masks( + self.discrete.partition_masks( candidates_exp, min_candidates=min_discrete_candidates ), self.continuous.inactive_parameter_combinations(), ) - def sample_subspaces( + def sample_partitions( self, candidates_exp: pd.DataFrame, n: int, @@ -335,7 +335,7 @@ def sample_subspaces( *, max_rejections: int = 10, ) -> list[tuple[npt.NDArray[np.bool_], frozenset[str]]]: - """Sample unique combined subspace configurations. + """Sample unique combined partition configurations. Zips two independent with-replacement iterators from the discrete and continuous sides, producing random pairs from the Cartesian product. @@ -344,19 +344,19 @@ def sample_subspaces( Args: candidates_exp: The experimental representation of discrete candidates. n: Number of unique configurations to sample. - min_discrete_candidates: If provided, discrete subspaces with fewer + min_discrete_candidates: If provided, discrete partitions with fewer matching candidates are excluded. max_rejections: Maximum number of times a duplicate combination can be drawn before raising ``InfeasibilityError``. Raises: - InfeasibilityError: If not enough unique subspace configurations + InfeasibilityError: If not enough unique partition configurations are available. Returns: A list of ``(discrete_mask, continuous_inactive_params)`` tuples. """ - d_iter = self.discrete.subspace_masks( + d_iter = self.discrete.partition_masks( candidates_exp, min_candidates=min_discrete_candidates, shuffle=True, @@ -376,7 +376,7 @@ def sample_subspaces( rejections += 1 if rejections > max_rejections: raise InfeasibilityError( - f"Not enough unique subspace configurations available. " + f"Not enough unique partition configurations available. " f"Requested {n} but only {len(results)} could be found." ) continue diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index a41bed31ed..2f30dad10b 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -587,8 +587,8 @@ def constraints_batch( ) @property - def n_theoretical_subspaces(self) -> int: - """The theoretical number of possible subspace configurations. + def n_theoretical_partitions(self) -> int: + """The theoretical number of possible partition configurations. Returns 0 if no batch constraints exist, indicating that no decomposition is needed. @@ -600,7 +600,7 @@ def n_theoretical_subspaces(self) -> int: for c in self.constraints_batch ) - def subspace_masks( # noqa: DOC404 + def partition_masks( # noqa: DOC404 self, candidates_exp: pd.DataFrame, min_candidates: int | None = None, @@ -608,7 +608,7 @@ def subspace_masks( # noqa: DOC404 shuffle: bool = False, replace: bool = False, ) -> Iterator[npt.NDArray[np.bool_]]: - r"""Get an iterator over all possible subspace masks. + r"""Get an iterator over all possible partition masks. Collects masks from each batch constraint, iterates the Cartesian product, AND-reduces each combination, and yields feasible @@ -625,7 +625,7 @@ def subspace_masks( # noqa: DOC404 indices are permanently excluded from the sampling pool. Yields: - A boolean mask selecting the subspace's rows. + A boolean mask selecting the partition's rows. """ constraints = self.constraints_batch if not constraints: @@ -633,13 +633,13 @@ def subspace_masks( # noqa: DOC404 [np.ones(len(candidates_exp), dtype=bool)] ] else: - per_constraint = [c.subspace_masks(candidates_exp) for c in constraints] + per_constraint = [c.partition_masks(candidates_exp) for c in constraints] total = prod(len(masks) for masks in per_constraint) def _resolve_flat_idx(flat_idx: int) -> npt.NDArray[np.bool_]: # Decompose flat index into per-constraint indices. - # Example with 3 constraints of subspace lengths [3, 2, 4]: + # Example with 3 constraints of partition lengths [3, 2, 4]: # flat_idx=11 -> divmod(11,3)=(3,2) -> A[2] # divmod(3,2)=(1,1) -> B[1] # divmod(1,4)=(0,1) -> C[1] @@ -672,18 +672,18 @@ def _resolve_flat_idx(flat_idx: int) -> npt.NDArray[np.bool_]: continue yield combined - def sample_subspace_masks( + def sample_partition_masks( self, candidates_exp: pd.DataFrame, n: int, min_candidates: int | None = None, ) -> list[npt.NDArray[np.bool_]]: - """Sample subspace masks. + """Sample partition masks. Args: candidates_exp: The experimental representation of candidate points. n: Number of masks to sample. - min_candidates: If provided, subspaces with fewer matching + min_candidates: If provided, partitions with fewer matching candidates are skipped. Returns: @@ -691,7 +691,7 @@ def sample_subspace_masks( """ return list( islice( - self.subspace_masks(candidates_exp, min_candidates, shuffle=True), + self.partition_masks(candidates_exp, min_candidates, shuffle=True), n, ) ) diff --git a/tests/constraints/test_batch_constraint.py b/tests/constraints/test_batch_constraint.py index e2a552c70d..7cc14f59ec 100644 --- a/tests/constraints/test_batch_constraint.py +++ b/tests/constraints/test_batch_constraint.py @@ -107,22 +107,22 @@ def test_batch_constraint_validation_duplicate(): ), ], ) -def test_batch_constraint_n_theoretical_subspaces(constraints, expected): - """The n_theoretical_subspaces property returns the correct count.""" +def test_batch_constraint_n_theoretical_partitions(constraints, expected): + """The n_theoretical_partitions property returns the correct count.""" assert ( - SearchSpace.from_product(_params, constraints).discrete.n_theoretical_subspaces + SearchSpace.from_product(_params, constraints).discrete.n_theoretical_partitions == expected ) -def test_batch_constraint_all_subspaces_too_small(): - """All subspaces infeasible raises InfeasibilityError.""" +def test_batch_constraint_all_partitions_too_small(): + """All partitions infeasible raises InfeasibilityError.""" searchspace = SearchSpace.from_product( _params, [DiscreteBatchConstraint(parameters=["d0"])] ) measurements = create_fake_input(_params, [TARGET], n_rows=2) - # Each d0 subspace has 3 candidates, batch_size=4 exceeds all + # Each d0 partition has 3 candidates, batch_size=4 exceeds all with pytest.raises(InfeasibilityError): BotorchRecommender().recommend( 4, searchspace, TARGET.to_objective(), measurements @@ -137,13 +137,13 @@ def test_batch_constraint_all_subspaces_too_small(): param(3, 3, id="all_retained"), ], ) -def test_subspace_masks_min_candidates(min_candidates, expected_count): - """Subspace mask filtering by min_candidates.""" +def test_partition_masks_min_candidates(min_candidates, expected_count): + """Partition mask filtering by min_candidates.""" searchspace = SearchSpace.from_product( _params, [DiscreteBatchConstraint(parameters=["d0"])] ) masks = list( - searchspace.discrete.subspace_masks( + searchspace.discrete.partition_masks( searchspace.discrete.exp_rep, min_candidates=min_candidates ) ) diff --git a/tests/constraints/test_subspace_constraints_hybrid.py b/tests/constraints/test_partition_constraints_hybrid.py similarity index 94% rename from tests/constraints/test_subspace_constraints_hybrid.py rename to tests/constraints/test_partition_constraints_hybrid.py index 420111950a..1bb1d3d57a 100644 --- a/tests/constraints/test_subspace_constraints_hybrid.py +++ b/tests/constraints/test_partition_constraints_hybrid.py @@ -1,4 +1,4 @@ -"""Tests for subspace-generating constraints in hybrid search spaces.""" +"""Tests for partitioning constraints in hybrid search spaces.""" import pytest from pytest import param @@ -85,8 +85,8 @@ ), ], ) -def test_subspace_constraints_hybrid(constraints): - """Subspace-generating constraints are respected in hybrid search spaces.""" +def test_partition_constraints_hybrid(constraints): + """Partitioning constraints are respected in hybrid search spaces.""" searchspace = SearchSpace.from_product(_all_params, constraints) measurements = create_fake_input(_all_params, [TARGET], n_rows=3) From 7fdfb5c9979d6adf1815294e0f9af43dd531f347 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 13 Mar 2026 19:28:37 +0100 Subject: [PATCH 18/39] Update CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e669f82e0..9b3be7e82c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Coding convention instructions for agentic developers (`AGENTS.md`, `CLAUDE.md`) - `has_polars_implementation` property on `DiscreteConstraint` - `allow_missing` flag on `DiscreteConstraint.get_invalid` and `get_valid` +- `DiscreteBatchConstraint` for ensuring all recommendations in a batch share + the same value for a specified discrete parameter ### Breaking Changes - `parameter_cartesian_prod_pandas` and `parameter_cartesian_prod_polars` moved @@ -26,6 +28,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Broken cache validation for certain `Campaign.recommend` cases +- `ContinuousCardinalityConstraint` now works in hybrid search spaces +- Typo in `_FixedNumericalContinuousParameter` where `is_numeric` was used + instead of `is_numerical` - `SHAPInsight` breaking with `numpy>=2.4` due to no longer accepted implicit array to scalar conversion - Using `np.isclose` for assessing equality of `Interval` bounds instead of hard @@ -46,6 +51,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 can now be conveniently controlled via the new `Settings` mechanism ### Deprecations +- `BotorchRecommender.max_n_subspaces` has been renamed to `max_n_partitions` - `set_random_seed` and `temporary_seed` utility functions - The environment variables `BAYBE_NUMPY_USE_SINGLE_PRECISION`/`BAYBE_TORCH_USE_SINGLE_PRECISION` have been From 5ab2a6d03911dab72a914b5d020dd4a4b9d065f6 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 9 Apr 2026 17:51:06 +0200 Subject: [PATCH 19/39] Improve docstring Co-authored-by: Alexander V. Hopp --- baybe/constraints/discrete.py | 4 ++-- baybe/searchspace/discrete.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 590ae8c0ab..e9a141fd18 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -431,8 +431,8 @@ class DiscreteBatchConstraint(DiscreteConstraint): """Constraint ensuring all batch recommendations share the same parameter value. When this constraint is active, the recommender internally partitions the - candidate set into partitions — one for each unique value of the constrained - parameter — obtains a full batch recommendation from each partition, and + candidate set into partitions (one for each unique value of the constrained + parameter), obtains a full batch recommendation from each partition, and returns the batch with the highest joint acquisition value. This constraint is not supported by all recommenders. It is not applied during diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index 2f30dad10b..4611f7fdd0 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -608,7 +608,7 @@ def partition_masks( # noqa: DOC404 shuffle: bool = False, replace: bool = False, ) -> Iterator[npt.NDArray[np.bool_]]: - r"""Get an iterator over all possible partition masks. + """Get an iterator over all possible partition masks. Collects masks from each batch constraint, iterates the Cartesian product, AND-reduces each combination, and yields feasible From 231f82ac122459d0157fcda0c4b300605d671121 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 9 Apr 2026 18:18:23 +0200 Subject: [PATCH 20/39] Use consistent mask type hint in discrete recommender --- baybe/recommenders/pure/bayesian/botorch/discrete.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/baybe/recommenders/pure/bayesian/botorch/discrete.py b/baybe/recommenders/pure/bayesian/botorch/discrete.py index 21085195b0..61280092f1 100644 --- a/baybe/recommenders/pure/bayesian/botorch/discrete.py +++ b/baybe/recommenders/pure/bayesian/botorch/discrete.py @@ -6,6 +6,7 @@ from typing import TYPE_CHECKING import numpy as np +import numpy.typing as npt import pandas as pd from baybe.searchspace import SubspaceDiscrete @@ -42,7 +43,7 @@ def recommend_discrete_with_partitions( """ import torch - masks: Iterable[np.ndarray] + masks: Iterable[npt.NDArray[np.bool_]] if subspace_discrete.n_theoretical_partitions <= recommender.max_n_partitions: masks = subspace_discrete.partition_masks( candidates_exp, min_candidates=batch_size From f27cbca92cf631df2885e61f7e6661325df24369 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 10 Apr 2026 20:27:41 +0200 Subject: [PATCH 21/39] Mention replacement in docstring --- baybe/searchspace/discrete.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index 4611f7fdd0..b597f6a273 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -610,7 +610,7 @@ def partition_masks( # noqa: DOC404 ) -> Iterator[npt.NDArray[np.bool_]]: """Get an iterator over all possible partition masks. - Collects masks from each batch constraint, iterates the + Collect masks from each batch constraint, iterates the Cartesian product, AND-reduces each combination, and yields feasible combined masks. @@ -678,7 +678,7 @@ def sample_partition_masks( n: int, min_candidates: int | None = None, ) -> list[npt.NDArray[np.bool_]]: - """Sample partition masks. + """Sample partition masks (without replacement). Args: candidates_exp: The experimental representation of candidate points. From ce9c5e3d217f340537757bbe4a6120e27685c3ec Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 10 Apr 2026 20:39:37 +0200 Subject: [PATCH 22/39] Mention computational expense --- baybe/constraints/continuous.py | 9 ++++++++- baybe/constraints/discrete.py | 6 ++++++ docs/userguide/constraints.md | 7 +++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index 69be29f16a..c41498b182 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -203,7 +203,14 @@ def to_botorch( class ContinuousCardinalityConstraint( CardinalityConstraint, ContinuousNonlinearConstraint ): - """Class for continuous cardinality constraints.""" + """Class for continuous cardinality constraints. + + Notes: + This constraint can lead to overhead in the computation since optimization + results in individual optimizations over several partitions. If there are + multiple partition-generating constraints active, this can drastically increase + the computational cost due to the combinatorial explosion. + """ relative_threshold: float = field( default=1e-3, converter=float, validator=[gt(0.0), lt(1.0)] diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index e9a141fd18..0e923a9534 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -442,6 +442,12 @@ class DiscreteBatchConstraint(DiscreteConstraint): If parameter ``Temperature`` has values ``[50, 100, 150]`` and a batch of 10 is requested, the recommender will generate three candidate batches (one all-50, one all-100, one all-150) and return the best one. + + Notes: + This constraint can lead to overhead in the computation since optimization + results in individual optimizations over several partitions. If there are + multiple partition-generating constraints active, this can drastically increase + the computational cost due to the combinatorial explosion. """ # Class variables diff --git a/docs/userguide/constraints.md b/docs/userguide/constraints.md index d14e3dd831..7a8195aaaa 100644 --- a/docs/userguide/constraints.md +++ b/docs/userguide/constraints.md @@ -570,6 +570,13 @@ In this case, each recommended batch will share both the same temperature and th solvent. The optimizer evaluates the Cartesian product of possible value combinations and selects the best one. +```{admonition} Computational Expense +:class: warning +This constraint can lead to overhead in the computation. If there are multiple +partition-generating constraints active, this can drastically increase the +computational cost due to the combinatorial explosion. +``` + ```{admonition} Recommender Compatibility :class: warning The `DiscreteBatchConstraint` is only effective with recommenders that can compare From f8c1905f90106513e9e7c3789ac41c00cdcfc598 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 5 May 2026 11:08:47 +0200 Subject: [PATCH 23/39] Simplify infinite iterator in inactive_parameter_combinations --- baybe/searchspace/continuous.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index cafbaad909..a055befbe6 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -192,10 +192,8 @@ def _resolve_flat_idx(flat_idx: int) -> frozenset[str]: return frozenset(chain(*combo)) if replace: - candidates = list(range(total)) - while candidates: - idx_pos = random.randint(0, len(candidates) - 1) - yield _resolve_flat_idx(candidates[idx_pos]) + while True: + yield _resolve_flat_idx(random.randint(0, total - 1)) else: order = list(range(total)) if shuffle: From 32a6edf06e22076690bbe69ed95abf7a78d269af Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 5 May 2026 11:33:31 +0200 Subject: [PATCH 24/39] Disable pydoclint yield type checking (DOC404) globally Consistent with the existing check-return-types=False setting. --- baybe/_optional/info.py | 2 +- baybe/searchspace/continuous.py | 2 +- baybe/searchspace/core.py | 2 +- baybe/searchspace/discrete.py | 2 +- baybe/utils/random.py | 2 +- pydoclint.toml | 3 ++- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/baybe/_optional/info.py b/baybe/_optional/info.py index 40add2f32d..7608db4ff4 100644 --- a/baybe/_optional/info.py +++ b/baybe/_optional/info.py @@ -7,7 +7,7 @@ @contextmanager -def exclude_sys_path(path: str, /): # noqa: DOC402, DOC404 +def exclude_sys_path(path: str, /): # noqa: DOC402 """Temporarily remove a specified path from `sys.path`. Args: diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index a055befbe6..e169184353 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -159,7 +159,7 @@ def n_theoretical_partitions(self) -> int: c.n_inactive_parameter_combinations for c in self.constraints_cardinality ) - def inactive_parameter_combinations( # noqa: DOC404 + def inactive_parameter_combinations( self, *, shuffle: bool = False, diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 319f6dc712..9936726af2 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -302,7 +302,7 @@ def n_theoretical_partitions(self) -> int: return 0 return max(d, 1) * max(c, 1) - def partitions( # noqa: DOC404 + def partitions( self, candidates_exp: pd.DataFrame, min_discrete_candidates: int | None = None, diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index b597f6a273..14b60d6e04 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -600,7 +600,7 @@ def n_theoretical_partitions(self) -> int: for c in self.constraints_batch ) - def partition_masks( # noqa: DOC404 + def partition_masks( self, candidates_exp: pd.DataFrame, min_candidates: int | None = None, diff --git a/baybe/utils/random.py b/baybe/utils/random.py index b912b148b3..349aae4893 100644 --- a/baybe/utils/random.py +++ b/baybe/utils/random.py @@ -42,7 +42,7 @@ def set_random_seed(seed: int): "https://emdgroup.github.io/baybe/stable/userguide/settings.html", ) @contextlib.contextmanager -def temporary_seed(seed: int): # noqa: DOC402, DOC404 +def temporary_seed(seed: int): # noqa: DOC402 """Context manager for setting a temporary random seed. Args: diff --git a/pydoclint.toml b/pydoclint.toml index a9447a670a..196d599be9 100644 --- a/pydoclint.toml +++ b/pydoclint.toml @@ -9,8 +9,9 @@ style=google # We do not repeat parameter type hints in docstrings arg-type-hints-in-docstring=False -# We do not repeat return types in the docstrings +# We do not repeat return/yield types in the docstrings check-return-types=False +check-yield-types=False # As of 0.5.2, pydoclint breaks with attrs classes # https://github.com/jsh9/pydoclint/issues/140 From 86c87b27b90075e58edc6680bf584c38a17dfcb2 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 17:34:53 +0200 Subject: [PATCH 25/39] Update docstring Co-authored-by: AdrianSosic --- baybe/constraints/discrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 0e923a9534..ec41c2507a 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -428,7 +428,7 @@ def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index: @define class DiscreteBatchConstraint(DiscreteConstraint): - """Constraint ensuring all batch recommendations share the same parameter value. + """Constraint ensuring recommendations in a batch share certain parameter values. When this constraint is active, the recommender internally partitions the candidate set into partitions (one for each unique value of the constrained From d2120607558f5a158a42f3543d5fcdc459b6928f Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 17:35:47 +0200 Subject: [PATCH 26/39] Update signature Co-authored-by: AdrianSosic --- baybe/constraints/discrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index ec41c2507a..aaecde8f6f 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -483,7 +483,7 @@ def get_invalid(self, data: pd.DataFrame) -> pd.Index: return pd.Index([]) def partition_masks( - self, candidates_exp: pd.DataFrame + self, candidates_exp: pd.DataFrame, / ) -> list[npt.NDArray[np.bool_]]: """Return boolean masks defining the partitions for this constraint. From a835cd4d69633958235b5be09b78905d81d0d858 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 17:49:59 +0200 Subject: [PATCH 27/39] Update docstring Co-authored-by: AdrianSosic --- baybe/constraints/discrete.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index aaecde8f6f..19bde55ea4 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -485,7 +485,7 @@ def get_invalid(self, data: pd.DataFrame) -> pd.Index: def partition_masks( self, candidates_exp: pd.DataFrame, / ) -> list[npt.NDArray[np.bool_]]: - """Return boolean masks defining the partitions for this constraint. + """Return Boolean masks defining the partitions for this constraint. Each mask selects the rows in ``candidates_exp`` that belong to one partition, i.e. share the same value for the constrained parameter. @@ -494,7 +494,7 @@ def partition_masks( candidates_exp: The experimental representation of candidate points. Returns: - A list of boolean masks, one per unique value of the constrained + A list of Boolean masks, one per unique value of the constrained parameter. """ param = self.parameters[0] From 41902c3a3f61e82d39ea2c019b134bb1a2548343 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 17:51:06 +0200 Subject: [PATCH 28/39] Fix formatting Co-authored-by: AdrianSosic --- baybe/searchspace/continuous.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index e169184353..c25f1a52db 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -110,9 +110,7 @@ def __str__(self) -> str: return to_string(self.__class__.__name__, *fields) @property - def constraints_cardinality( - self, - ) -> tuple[ContinuousCardinalityConstraint, ...]: + def constraints_cardinality(self) -> tuple[ContinuousCardinalityConstraint, ...]: """The cardinality constraints of the subspace.""" return tuple( c From d76a6a3a4a62404af472932810c26b329672ce89 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 17:59:57 +0200 Subject: [PATCH 29/39] Improve language Co-authored-by: AdrianSosic --- baybe/constraints/discrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 19bde55ea4..af974f5d92 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -431,7 +431,7 @@ class DiscreteBatchConstraint(DiscreteConstraint): """Constraint ensuring recommendations in a batch share certain parameter values. When this constraint is active, the recommender internally partitions the - candidate set into partitions (one for each unique value of the constrained + candidate set (one partition for each unique value of the constrained parameter), obtains a full batch recommendation from each partition, and returns the batch with the highest joint acquisition value. From 2f14ea668e20acffcb202da200059d5b28a247c7 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 18:03:12 +0200 Subject: [PATCH 30/39] Improve language Co-authored-by: AdrianSosic --- docs/userguide/constraints.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/userguide/constraints.md b/docs/userguide/constraints.md index 7a8195aaaa..7fee4d5996 100644 --- a/docs/userguide/constraints.md +++ b/docs/userguide/constraints.md @@ -579,7 +579,7 @@ computational cost due to the combinatorial explosion. ```{admonition} Recommender Compatibility :class: warning -The `DiscreteBatchConstraint` is only effective with recommenders that can compare +The `DiscreteBatchConstraint` is only compatible with recommenders that can compare batch-level outcomes, such as {class}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender` and {class}`~baybe.recommenders.pure.nonpredictive.sampling.RandomRecommender`. From 5ebccade09778573dc49638314dc2e3ba4e1fc70 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 18:19:45 +0200 Subject: [PATCH 31/39] Make formatting consistent --- baybe/constraints/discrete.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index af974f5d92..1aef6c611d 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -453,9 +453,7 @@ class DiscreteBatchConstraint(DiscreteConstraint): # Class variables eval_during_creation: ClassVar[bool] = False eval_during_modeling: ClassVar[bool] = True - numerical_only: ClassVar[bool] = False - # See base class. def __attrs_post_init__(self): """Validate that exactly one parameter is specified.""" From bc19835dedc1404010368b45683bfc70f4527d69 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 19:23:18 +0200 Subject: [PATCH 32/39] Turn docstring into comment --- baybe/constraints/discrete.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 1aef6c611d..f300e0a9de 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -465,19 +465,10 @@ def __attrs_post_init__(self): @override def get_invalid(self, data: pd.DataFrame) -> pd.Index: - """Get the indices of invalid rows. - - Always returns an empty index because this constraint operates at the - batch level, not the row level. Individual rows are never invalid; the - constraint is enforced at recommendation time by partitioning candidates - into partitions. - - Args: - data: A dataframe where each row represents a parameter configuration. - - Returns: - An empty index. - """ + # Always returns an empty index because this constraint operates at the + # batch level, not the row level. Individual rows are never invalid; the + # constraint is enforced at recommendation time by partitioning candidates + # into partitions. return pd.Index([]) def partition_masks( From a980be90a52acb6db61fbd5623710d696094fcfb Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 19:23:23 +0200 Subject: [PATCH 33/39] Improve tests --- tests/constraints/test_batch_constraint.py | 89 +++++++++++++++------- 1 file changed, 62 insertions(+), 27 deletions(-) diff --git a/tests/constraints/test_batch_constraint.py b/tests/constraints/test_batch_constraint.py index 7cc14f59ec..f2389195bc 100644 --- a/tests/constraints/test_batch_constraint.py +++ b/tests/constraints/test_batch_constraint.py @@ -1,12 +1,20 @@ """Tests for the discrete batch constraint.""" +from contextlib import nullcontext + import pytest from pytest import param +from baybe.constraints import DiscreteExcludeConstraint, SubSelectionCondition from baybe.constraints.discrete import DiscreteBatchConstraint -from baybe.exceptions import IncompatibilityError, InfeasibilityError +from baybe.exceptions import ( + IncompatibilityError, + InfeasibilityError, + UnusedObjectWarning, +) from baybe.parameters.numerical import NumericalDiscreteParameter from baybe.recommenders import BotorchRecommender +from baybe.recommenders.pure.nonpredictive.base import NonPredictiveRecommender from baybe.recommenders.pure.nonpredictive.sampling import ( FPSRecommender, RandomRecommender, @@ -25,13 +33,31 @@ @pytest.mark.parametrize( - ("constraints", "constrained_params", "batch_size"), + ("constraints", "constrained_params", "batch_size", "recommender"), [ param( [DiscreteBatchConstraint(parameters=["d0"])], ["d0"], BATCH_SIZE, - id="single", + BotorchRecommender(), + id="botorch-single", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + ["d0", "d1"], + 1, + BotorchRecommender(), + id="botorch-multiple", + ), + param( + [DiscreteBatchConstraint(parameters=["d0"])], + ["d0"], + BATCH_SIZE, + RandomRecommender(), + id="random-single", ), param( [ @@ -40,33 +66,29 @@ ], ["d0", "d1"], 1, - id="multiple", + RandomRecommender(), + id="random-multiple", ), ], ) -def test_batch_constraint_bayesian(constraints, constrained_params, batch_size): - """BotorchRecommender respects batch constraints.""" +def test_batch_constraint(constraints, constrained_params, batch_size, recommender): + """Recommenders respecting batch constraints return uniform batches.""" searchspace = SearchSpace.from_product(_params, constraints) measurements = create_fake_input(_params, [TARGET], n_rows=3) - - rec = BotorchRecommender().recommend( - batch_size, searchspace, TARGET.to_objective(), measurements + ctx = ( + pytest.warns(UnusedObjectWarning) + if isinstance(recommender, NonPredictiveRecommender) + else nullcontext() ) + with ctx: + rec = recommender.recommend( + batch_size, searchspace, TARGET.to_objective(), measurements + ) assert rec.shape[0] == batch_size for p in constrained_params: assert rec[p].nunique() == 1 -def test_batch_constraint_random_recommender(): - """RandomRecommender respects the batch constraint.""" - searchspace = SearchSpace.from_product( - _params, [DiscreteBatchConstraint(parameters=["d0"])] - ) - rec = RandomRecommender().recommend(BATCH_SIZE, searchspace) - assert rec["d0"].nunique() == 1 - assert rec.shape[0] == BATCH_SIZE - - def test_batch_constraint_unsupported_recommender(): """Unsupported recommenders raise IncompatibilityError.""" searchspace = SearchSpace.from_product( @@ -130,18 +152,31 @@ def test_batch_constraint_all_partitions_too_small(): @pytest.mark.parametrize( - ("min_candidates", "expected_count"), + ("min_candidates", "expected_count", "constraint"), [ - param(None, 3, id="no_filter"), - param(4, 0, id="all_skipped"), - param(3, 3, id="all_retained"), + param(None, 3, None, id="no_filter"), + param(4, 0, None, id="all_skipped"), + param(3, 3, None, id="all_retained"), + param( + 2, + 2, + DiscreteExcludeConstraint( + parameters=["d0", "d1"], + conditions=[ + SubSelectionCondition(selection=[0.0]), + SubSelectionCondition(selection=[0.0, 0.5]), + ], + ), + id="partial", + ), ], ) -def test_partition_masks_min_candidates(min_candidates, expected_count): +def test_partition_masks_min_candidates(min_candidates, expected_count, constraint): """Partition mask filtering by min_candidates.""" - searchspace = SearchSpace.from_product( - _params, [DiscreteBatchConstraint(parameters=["d0"])] - ) + constraints = [DiscreteBatchConstraint(parameters=["d0"])] + if constraint is not None: + constraints.append(constraint) + searchspace = SearchSpace.from_product(_params, constraints) masks = list( searchspace.discrete.partition_masks( searchspace.discrete.exp_rep, min_candidates=min_candidates From 615b58108ba2b5dd03e169acb6f723e08a478bfb Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 7 May 2026 19:23:30 +0200 Subject: [PATCH 34/39] Fix post-rebase issues - Rename DiscreteBatchConstraint.get_invalid to _get_invalid to align with the abstract method rename introduced on main - Add DiscreteBatchConstraint to DISCRETE_CONSTRAINTS_FILTERING_ORDER so build_constrained_product (introduced on main) can sort it - Ignore BadInitialCandidatesWarning in pytest.ini; the warning fires non-deterministically in heavily constrained spaces regardless of data volume --- baybe/constraints/discrete.py | 3 ++- pytest.ini | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index f300e0a9de..48a166946c 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -464,7 +464,7 @@ def __attrs_post_init__(self): ) @override - def get_invalid(self, data: pd.DataFrame) -> pd.Index: + def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index: # Always returns an empty index because this constraint operates at the # batch level, not the row level. Individual rows are never invalid; the # constraint is enforced at recommendation time by partitioning candidates @@ -534,6 +534,7 @@ def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index: DiscreteCustomConstraint, DiscretePermutationInvarianceConstraint, DiscreteDependenciesConstraint, + DiscreteBatchConstraint, ) # Prevent (de-)serialization of custom constraints diff --git a/pytest.ini b/pytest.ini index 350de2c1d5..a544c244d7 100644 --- a/pytest.ini +++ b/pytest.ini @@ -43,4 +43,7 @@ filterwarnings = ignore:`scipy_minimize` terminated with status .*:botorch.exceptions.warnings.OptimizationWarning:.*botorch/fit ; BoTorch warning recommending to switch to log-versions of acquisition functions ignore:.*has known numerical issues that lead to suboptimal optimization performance.*:botorch.exceptions.warnings.NumericsWarning:botorch.acquisition + ; BoTorch warning when all raw sample acquisition values are identical; fires + ; non-deterministically in heavily constrained spaces regardless of data volume + ignore:All acquisition values for raw samples points are the same.*:botorch.exceptions.warnings.BadInitialCandidatesWarning From e6b60fc5787f03e4bd0f42922119c48fb63d867e Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 8 May 2026 07:33:53 +0200 Subject: [PATCH 35/39] Refactor constraint parametrization --- tests/constraints/test_batch_constraint.py | 48 ++++++---------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/tests/constraints/test_batch_constraint.py b/tests/constraints/test_batch_constraint.py index f2389195bc..cdaa89adfc 100644 --- a/tests/constraints/test_batch_constraint.py +++ b/tests/constraints/test_batch_constraint.py @@ -33,46 +33,22 @@ @pytest.mark.parametrize( - ("constraints", "constrained_params", "batch_size", "recommender"), + "recommender", [ - param( - [DiscreteBatchConstraint(parameters=["d0"])], - ["d0"], - BATCH_SIZE, - BotorchRecommender(), - id="botorch-single", - ), - param( - [ - DiscreteBatchConstraint(parameters=["d0"]), - DiscreteBatchConstraint(parameters=["d1"]), - ], - ["d0", "d1"], - 1, - BotorchRecommender(), - id="botorch-multiple", - ), - param( - [DiscreteBatchConstraint(parameters=["d0"])], - ["d0"], - BATCH_SIZE, - RandomRecommender(), - id="random-single", - ), - param( - [ - DiscreteBatchConstraint(parameters=["d0"]), - DiscreteBatchConstraint(parameters=["d1"]), - ], - ["d0", "d1"], - 1, - RandomRecommender(), - id="random-multiple", - ), + param(BotorchRecommender(), id="botorch"), + param(RandomRecommender(), id="random"), + ], +) +@pytest.mark.parametrize( + ("constrained_params", "batch_size"), + [ + param(["d0"], BATCH_SIZE, id="single"), + param(["d0", "d1"], 1, id="multiple"), ], ) -def test_batch_constraint(constraints, constrained_params, batch_size, recommender): +def test_batch_constraint(constrained_params, batch_size, recommender): """Recommenders respecting batch constraints return uniform batches.""" + constraints = [DiscreteBatchConstraint(parameters=[p]) for p in constrained_params] searchspace = SearchSpace.from_product(_params, constraints) measurements = create_fake_input(_params, [TARGET], n_rows=3) ctx = ( From 7a5d7252763cbcf396c88268702c959e64420fde Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 8 May 2026 17:10:15 +0200 Subject: [PATCH 36/39] Extract select_via_flat_index as shared utility --- baybe/searchspace/continuous.py | 19 +++++++++---------- baybe/searchspace/discrete.py | 24 +++++++----------------- baybe/searchspace/utils.py | 32 +++++++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index c25f1a52db..5aa604fb54 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -32,6 +32,7 @@ get_parameters_from_dataframe, sort_parameters, ) +from baybe.searchspace.utils import select_via_flat_index from baybe.searchspace.validation import ( validate_parameter_names, ) @@ -181,23 +182,21 @@ def inactive_parameter_combinations( total = math.prod(len(v) for v in per_constraint) - def _resolve_flat_idx(flat_idx: int) -> frozenset[str]: - combo = [] - remaining = flat_idx - for values in per_constraint: - remaining, idx = divmod(remaining, len(values)) - combo.append(values[idx]) - return frozenset(chain(*combo)) - if replace: while True: - yield _resolve_flat_idx(random.randint(0, total - 1)) + yield frozenset( + chain( + *select_via_flat_index( + random.randint(0, total - 1), per_constraint + ) + ) + ) else: order = list(range(total)) if shuffle: random.shuffle(order) for flat_idx in order: - yield _resolve_flat_idx(flat_idx) + yield frozenset(chain(*select_via_flat_index(flat_idx, per_constraint))) @constraints_nonlin.validator def _validate_constraints_nonlin(self, _, __) -> None: diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index 14b60d6e04..745017eafa 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -28,7 +28,7 @@ ) from baybe.parameters.base import DiscreteParameter from baybe.parameters.utils import get_parameters_from_dataframe, sort_parameters -from baybe.searchspace.utils import build_constrained_product +from baybe.searchspace.utils import build_constrained_product, select_via_flat_index from baybe.searchspace.validation import validate_parameter_names, validate_parameters from baybe.serialization import SerialMixin, converter, select_constructor_hook from baybe.settings import active_settings @@ -637,26 +637,14 @@ def partition_masks( total = prod(len(masks) for masks in per_constraint) - def _resolve_flat_idx(flat_idx: int) -> npt.NDArray[np.bool_]: - # Decompose flat index into per-constraint indices. - # Example with 3 constraints of partition lengths [3, 2, 4]: - # flat_idx=11 -> divmod(11,3)=(3,2) -> A[2] - # divmod(3,2)=(1,1) -> B[1] - # divmod(1,4)=(0,1) -> C[1] - # Result: masks A[2] AND B[1] AND C[1] - masks = [] - remaining = flat_idx - for constraint_masks in per_constraint: - remaining, idx = divmod(remaining, len(constraint_masks)) - masks.append(constraint_masks[idx]) - return np.logical_and.reduce(masks) - if replace: candidates = list(range(total)) while candidates: idx_pos = random.randint(0, len(candidates) - 1) flat_idx = candidates[idx_pos] - combined = _resolve_flat_idx(flat_idx) + combined = np.logical_and.reduce( + select_via_flat_index(flat_idx, per_constraint) + ) if min_candidates is not None and combined.sum() < min_candidates: candidates[idx_pos] = candidates[-1] candidates.pop() @@ -667,7 +655,9 @@ def _resolve_flat_idx(flat_idx: int) -> npt.NDArray[np.bool_]: if shuffle: random.shuffle(order) for flat_idx in order: - combined = _resolve_flat_idx(flat_idx) + combined = np.logical_and.reduce( + select_via_flat_index(flat_idx, per_constraint) + ) if min_candidates is not None and combined.sum() < min_candidates: continue yield combined diff --git a/baybe/searchspace/utils.py b/baybe/searchspace/utils.py index 48cbf45524..2c4e23ce17 100644 --- a/baybe/searchspace/utils.py +++ b/baybe/searchspace/utils.py @@ -3,7 +3,7 @@ from __future__ import annotations from collections.abc import Collection, Sequence -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypeVar import pandas as pd @@ -14,6 +14,36 @@ if TYPE_CHECKING: import polars as pl +_T = TypeVar("_T") + + +def select_via_flat_index(flat_idx: int, groups: Sequence[Sequence[_T]]) -> list[_T]: + """Select one element per group using a flat Cartesian-product index. + + Maps a single integer index over the Cartesian product of ``groups`` to the + corresponding element from each group, using repeated ``divmod`` to unpack + the mixed-radix representation. + + Note: + Given groups of sizes ``[3, 2, 4]`` and ``flat_idx=11``, + ``divmod(11, 3)`` yields index ``2`` from group 0, + ``divmod(3, 2)`` yields index ``1`` from group 1, and + ``divmod(1, 4)`` yields index ``1`` from group 2. + + Args: + flat_idx: The flat index into the Cartesian product of all groups. + groups: The groups to select from, one element selected per group. + + Returns: + A list of selected elements, one per group. + """ + selected = [] + remaining = flat_idx + for group in groups: + remaining, idx = divmod(remaining, len(group)) + selected.append(group[idx]) + return selected + def optimize_parameter_order( parameters: Sequence[DiscreteParameter], From 1ee36c2c996e6c9f72cbf10a229891930b9d2973 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 8 May 2026 17:37:48 +0200 Subject: [PATCH 37/39] Rename partition to subset --- baybe/constraints/continuous.py | 4 +- baybe/constraints/discrete.py | 20 +++---- .../pure/bayesian/botorch/continuous.py | 16 ++--- .../pure/bayesian/botorch/core.py | 59 +++++++++---------- .../pure/bayesian/botorch/discrete.py | 22 +++---- .../pure/bayesian/botorch/hybrid.py | 26 ++++---- .../pure/nonpredictive/sampling.py | 8 +-- baybe/searchspace/continuous.py | 4 +- baybe/searchspace/core.py | 30 +++++----- baybe/searchspace/discrete.py | 20 +++---- tests/constraints/test_batch_constraint.py | 18 +++--- ...d.py => test_subset_constraints_hybrid.py} | 6 +- 12 files changed, 116 insertions(+), 117 deletions(-) rename tests/constraints/{test_partition_constraints_hybrid.py => test_subset_constraints_hybrid.py} (94%) diff --git a/baybe/constraints/continuous.py b/baybe/constraints/continuous.py index c41498b182..a16210cca1 100644 --- a/baybe/constraints/continuous.py +++ b/baybe/constraints/continuous.py @@ -207,8 +207,8 @@ class ContinuousCardinalityConstraint( Notes: This constraint can lead to overhead in the computation since optimization - results in individual optimizations over several partitions. If there are - multiple partition-generating constraints active, this can drastically increase + results in individual optimizations over several subsets. If there are + multiple subset-generating constraints active, this can drastically increase the computational cost due to the combinatorial explosion. """ diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 48a166946c..84f364e52f 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -430,9 +430,9 @@ def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index: class DiscreteBatchConstraint(DiscreteConstraint): """Constraint ensuring recommendations in a batch share certain parameter values. - When this constraint is active, the recommender internally partitions the - candidate set (one partition for each unique value of the constrained - parameter), obtains a full batch recommendation from each partition, and + When this constraint is active, the recommender internally subsets the + candidate set (one subset for each unique value of the constrained + parameter), obtains a full batch recommendation from each subset, and returns the batch with the highest joint acquisition value. This constraint is not supported by all recommenders. It is not applied during @@ -445,8 +445,8 @@ class DiscreteBatchConstraint(DiscreteConstraint): Notes: This constraint can lead to overhead in the computation since optimization - results in individual optimizations over several partitions. If there are - multiple partition-generating constraints active, this can drastically increase + results in individual optimizations over several subsets. If there are + multiple subset-generating constraints active, this can drastically increase the computational cost due to the combinatorial explosion. """ @@ -467,17 +467,17 @@ def __attrs_post_init__(self): def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index: # Always returns an empty index because this constraint operates at the # batch level, not the row level. Individual rows are never invalid; the - # constraint is enforced at recommendation time by partitioning candidates - # into partitions. + # constraint is enforced at recommendation time by subsetting candidates + # into subsets. return pd.Index([]) - def partition_masks( + def subset_masks( self, candidates_exp: pd.DataFrame, / ) -> list[npt.NDArray[np.bool_]]: - """Return Boolean masks defining the partitions for this constraint. + """Return Boolean masks defining the subsets for this constraint. Each mask selects the rows in ``candidates_exp`` that belong to one - partition, i.e. share the same value for the constrained parameter. + subset, i.e. share the same value for the constrained parameter. Args: candidates_exp: The experimental representation of candidate points. diff --git a/baybe/recommenders/pure/bayesian/botorch/continuous.py b/baybe/recommenders/pure/bayesian/botorch/continuous.py index 252bdfde6f..e91243528f 100644 --- a/baybe/recommenders/pure/bayesian/botorch/continuous.py +++ b/baybe/recommenders/pure/bayesian/botorch/continuous.py @@ -47,15 +47,15 @@ def recommend_continuous_with_cardinality_constraints( ) -> tuple[Tensor, Tensor]: """Recommend from a continuous space with cardinality constraints. - Optimizes the acquisition function across partitions defined by cardinality + Optimizes the acquisition function across subsets defined by cardinality constraints and returns the best result. - The specific collection of partitions considered by the recommender is obtained + The specific collection of subsets considered by the recommender is obtained as either the full combinatorial set of possible parameter splits or a random selection thereof, depending on the upper bound specified by the corresponding recommender attribute. - In each partition, the constraint-imposed configuration is fixed, so that the + In each subset, the constraint-imposed configuration is fixed, so that the constraints can be removed and a regular optimization can be performed. The recommendation is then constructed from the combined optimization results of the unconstrained spaces. @@ -79,16 +79,16 @@ def recommend_continuous_with_cardinality_constraints( f"expects a subspace with cardinality constraints." ) - # Determine search scope based on number of partition configurations + # Determine search scope based on number of subset configurations configs: Iterable[frozenset[str]] - if subspace_continuous.n_theoretical_partitions <= recommender.max_n_partitions: + if subspace_continuous.n_theoretical_subsets <= recommender.max_n_subsets: configs = subspace_continuous.inactive_parameter_combinations() else: configs = subspace_continuous._sample_inactive_parameters( - recommender.max_n_partitions + recommender.max_n_subsets ) - # Create closures for each partition configuration + # Create closures for each subset configuration def make_callable( inactive_params: Collection[str], ) -> Callable[[], tuple[Tensor, Tensor]]: @@ -108,7 +108,7 @@ def optimize() -> tuple[Tensor, Tensor]: return optimize callables = (make_callable(ip) for ip in configs) - points, acqf_value = recommender._optimize_over_partitions(callables) + points, acqf_value = recommender._optimize_over_subsets(callables) # Check if any minimum cardinality constraints are violated if not is_cardinality_fulfilled( diff --git a/baybe/recommenders/pure/bayesian/botorch/core.py b/baybe/recommenders/pure/bayesian/botorch/core.py index 2fcc5e759d..4548435a63 100644 --- a/baybe/recommenders/pure/bayesian/botorch/core.py +++ b/baybe/recommenders/pure/bayesian/botorch/core.py @@ -23,12 +23,12 @@ recommend_continuous_torch, ) from baybe.recommenders.pure.bayesian.botorch.discrete import ( - recommend_discrete_with_partitions, - recommend_discrete_without_partitions, + recommend_discrete_with_subsets, + recommend_discrete_without_subsets, ) from baybe.recommenders.pure.bayesian.botorch.hybrid import ( - recommend_hybrid_with_partitions, - recommend_hybrid_without_partitions, + recommend_hybrid_with_subsets, + recommend_hybrid_without_subsets, ) from baybe.searchspace import ( SearchSpace, @@ -92,34 +92,33 @@ class BotorchRecommender(BayesianRecommender): optimization. **Does not affect purely discrete optimization**. """ - max_n_partitions: int = field(default=10, validator=[instance_of(int), ge(1)]) - """Maximum number of partitions to evaluate when partitioning constraints are + max_n_subsets: int = field(default=10, validator=[instance_of(int), ge(1)]) + """Maximum number of subsets to evaluate when subset constraints are present (e.g., continuous cardinality constraints). If the total number of - partitions - exceeds this limit, a random subset of that size is sampled for optimization instead - of performing an exhaustive search.""" + subsets exceeds this limit, a random subset of that size is sampled for + optimization instead of performing an exhaustive search.""" @property def max_n_subspaces(self) -> int: - """Deprecated! Use ``max_n_partitions`` instead.""" + """Deprecated! Use ``max_n_subsets`` instead.""" warnings.warn( - "'max_n_subspaces' has been renamed to 'max_n_partitions' and will " + "'max_n_subspaces' has been renamed to 'max_n_subsets' and will " "be removed in a future version.", DeprecationWarning, stacklevel=2, ) - return self.max_n_partitions + return self.max_n_subsets @max_n_subspaces.setter def max_n_subspaces(self, value: int) -> None: - """Deprecated! Use ``max_n_partitions`` instead.""" # noqa: D401 + """Deprecated! Use ``max_n_subsets`` instead.""" # noqa: D401 warnings.warn( - "'max_n_subspaces' has been renamed to 'max_n_partitions' and will " + "'max_n_subspaces' has been renamed to 'max_n_subsets' and will " "be removed in a future version.", DeprecationWarning, stacklevel=2, ) - self.max_n_partitions = value + self.max_n_subsets = value @sampling_percentage.validator def _validate_percentage( # noqa: DOC101, DOC103 @@ -177,10 +176,10 @@ def _recommend_discrete( experimental representation. """ if subspace_discrete.constraints_batch: - return recommend_discrete_with_partitions( + return recommend_discrete_with_subsets( self, subspace_discrete, candidates_exp, batch_size ) - return recommend_discrete_without_partitions( + return recommend_discrete_without_subsets( self, subspace_discrete, candidates_exp, batch_size ) @@ -228,7 +227,7 @@ def _recommend_hybrid( """Generate recommendations from a hybrid search space. Dispatches to the appropriate optimization routine depending on whether - partitioning constraints are present. + subset constraints are present. Args: searchspace: The search space in which the recommendations should be made. @@ -243,41 +242,41 @@ def _recommend_hybrid( searchspace.discrete.constraints_batch or searchspace.continuous.constraints_cardinality ): - return recommend_hybrid_with_partitions( + return recommend_hybrid_with_subsets( self, searchspace, candidates_exp, batch_size ) - return recommend_hybrid_without_partitions( + return recommend_hybrid_without_subsets( self, searchspace, candidates_exp, batch_size ) - def _optimize_over_partitions( + def _optimize_over_subsets( self, - partition_callables: Iterable[Callable[[], tuple[Any, Tensor]]], + subset_callables: Iterable[Callable[[], tuple[Any, Tensor]]], ) -> tuple[Any, Tensor]: - """Optimize across partitions and return the result with the best acqf value. + """Optimize across subsets and return the result with the best acqf value. - Each callable performs optimization for one partition configuration and returns + Each callable performs optimization for one subset configuration and returns a ``(result, acquisition_value)`` tuple. Partitions that raise ``InfeasibilityError`` are silently skipped. Args: - partition_callables: An iterable of zero-argument callables. Each callable - runs the optimization for one partition and returns + subset_callables: An iterable of zero-argument callables. Each callable + runs the optimization for one subset and returns ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the - partition is infeasible. + subset is infeasible. Raises: - InfeasibilityError: If none of the partitions has a feasible solution. + InfeasibilityError: If none of the subsets has a feasible solution. Returns: - The result and acquisition value of the best partition. + The result and acquisition value of the best subset. """ from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError results_all: list = [] acqf_values_all: list[Tensor] = [] - for optimize_fn in partition_callables: + for optimize_fn in subset_callables: try: result, acqf_value = optimize_fn() results_all.append(result) diff --git a/baybe/recommenders/pure/bayesian/botorch/discrete.py b/baybe/recommenders/pure/bayesian/botorch/discrete.py index 61280092f1..f5f89abb0b 100644 --- a/baybe/recommenders/pure/bayesian/botorch/discrete.py +++ b/baybe/recommenders/pure/bayesian/botorch/discrete.py @@ -18,7 +18,7 @@ from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender -def recommend_discrete_with_partitions( +def recommend_discrete_with_subsets( recommender: BotorchRecommender, subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, @@ -26,9 +26,9 @@ def recommend_discrete_with_partitions( ) -> pd.Index: """Recommend from a discrete space with batch constraints. - Partitions the candidate set according to batch constraints, - runs optimization on each feasible partition, and returns the batch with - the highest joint acquisition value. Partitions with fewer candidates + Splits the candidate set into subsets according to batch constraints, + runs optimization on each feasible subset, and returns the batch with + the highest joint acquisition value. Subsets with fewer candidates than ``batch_size`` are skipped. Args: @@ -44,13 +44,13 @@ def recommend_discrete_with_partitions( import torch masks: Iterable[npt.NDArray[np.bool_]] - if subspace_discrete.n_theoretical_partitions <= recommender.max_n_partitions: - masks = subspace_discrete.partition_masks( + if subspace_discrete.n_theoretical_subsets <= recommender.max_n_subsets: + masks = subspace_discrete.subset_masks( candidates_exp, min_candidates=batch_size ) else: - masks = subspace_discrete.sample_partition_masks( - candidates_exp, recommender.max_n_partitions, min_candidates=batch_size + masks = subspace_discrete.sample_subset_masks( + candidates_exp, recommender.max_n_subsets, min_candidates=batch_size ) def make_callable( @@ -59,7 +59,7 @@ def make_callable( def optimize() -> tuple[pd.Index, Tensor]: subset = candidates_exp.loc[mask] - idxs = recommend_discrete_without_partitions( + idxs = recommend_discrete_without_subsets( recommender, subspace_discrete, subset, batch_size ) @@ -71,11 +71,11 @@ def optimize() -> tuple[pd.Index, Tensor]: return optimize callables = (make_callable(m) for m in masks) - best_idxs, _ = recommender._optimize_over_partitions(callables) + best_idxs, _ = recommender._optimize_over_subsets(callables) return best_idxs -def recommend_discrete_without_partitions( +def recommend_discrete_without_subsets( recommender: BotorchRecommender, subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, diff --git a/baybe/recommenders/pure/bayesian/botorch/hybrid.py b/baybe/recommenders/pure/bayesian/botorch/hybrid.py index d3eddd31f0..9b504fa149 100644 --- a/baybe/recommenders/pure/bayesian/botorch/hybrid.py +++ b/baybe/recommenders/pure/bayesian/botorch/hybrid.py @@ -27,7 +27,7 @@ from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender -def recommend_hybrid_without_partitions( +def recommend_hybrid_without_subsets( recommender: BotorchRecommender, searchspace: SearchSpace, candidates_exp: pd.DataFrame, @@ -161,17 +161,17 @@ def recommend_hybrid_without_partitions( return rec_exp -def recommend_hybrid_with_partitions( +def recommend_hybrid_with_subsets( recommender: BotorchRecommender, searchspace: SearchSpace, candidates_exp: pd.DataFrame, batch_size: int, ) -> pd.DataFrame: - """Recommend from a hybrid space with partitioning constraints. + """Recommend from a hybrid space with subset constraints. - Uses ``SearchSpace.partitions()`` to enumerate the Cartesian - product of discrete and continuous partition configurations, capped at - ``max_n_partitions`` total. In purely discrete search spaces, partitions + Uses ``SearchSpace.subsets()`` to enumerate the Cartesian + product of discrete and continuous subset configurations, capped at + ``max_n_subsets`` total. In purely discrete search spaces, subsets with fewer candidates than ``batch_size`` are pre-filtered. Args: @@ -188,16 +188,16 @@ def recommend_hybrid_with_partitions( subspace_c = searchspace.continuous - # Get combined configurations, capped at max_n_partitions + # Get combined configurations, capped at max_n_subsets # NOTE: No min_discrete_candidates filtering in hybrid spaces because # optimize_acqf_mixed can produce multiple recommendations from a single # discrete candidate by varying continuous parameters. combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] - if searchspace.n_theoretical_partitions <= recommender.max_n_partitions: - combined_masks = searchspace.partitions(candidates_exp) + if searchspace.n_theoretical_subsets <= recommender.max_n_subsets: + combined_masks = searchspace.subsets(candidates_exp) else: - combined_masks = searchspace.sample_partitions( - candidates_exp, recommender.max_n_partitions + combined_masks = searchspace.sample_subsets( + candidates_exp, recommender.max_n_subsets ) def make_callable( @@ -217,7 +217,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: mod_cont = subspace_c mod_searchspace = evolve(searchspace, continuous=mod_cont) - rec = recommend_hybrid_without_partitions( + rec = recommend_hybrid_without_subsets( recommender, mod_searchspace, subset, batch_size ) @@ -231,7 +231,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: return optimize callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) - best_rec, _ = recommender._optimize_over_partitions(callables) + best_rec, _ = recommender._optimize_over_subsets(callables) # Post-check minimum cardinality on continuous columns if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index d8e5156c72..3fa933c79f 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -42,17 +42,17 @@ def _recommend_hybrid( if searchspace.type is SearchSpaceType.CONTINUOUS: return cont_random - # Restrict to a random partition if batch constraints are present + # Restrict to a random subset if batch constraints are present if searchspace.discrete.constraints_batch: - masks = searchspace.discrete.sample_partition_masks( + masks = searchspace.discrete.sample_subset_masks( candidates_exp, n=1, min_candidates=None if is_hybrid else batch_size, ) if not masks: raise InfeasibilityError( - "No feasible partition found for the given " - "batch constraints. All partitions have fewer " + "No feasible subset found for the given " + "batch constraints. All subsets have fewer " f"candidates than the requested {batch_size=}." ) candidates_exp = candidates_exp.loc[masks[0]] diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 5aa604fb54..49b491cbeb 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -146,8 +146,8 @@ def _validate_constraints_lin_ineq( ) @property - def n_theoretical_partitions(self) -> int: - """The theoretical number of possible partition configurations. + def n_theoretical_subsets(self) -> int: + """The theoretical number of possible subset configurations. Returns 0 if no cardinality constraints exist, indicating that no decomposition is needed. diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 9936726af2..3b3ca52da5 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -289,45 +289,45 @@ def n_tasks(self) -> int: return 1 @property - def n_theoretical_partitions(self) -> int: - """Total theoretical number of partition configurations. + def n_theoretical_subsets(self) -> int: + """Total theoretical number of subset configurations. - Returns 0 if no partitioning constraints exist on either side. + Returns 0 if no subset constraints exist on either side. When only one side has constraints, the other does not contribute to the count. """ - d = self.discrete.n_theoretical_partitions - c = self.continuous.n_theoretical_partitions + d = self.discrete.n_theoretical_subsets + c = self.continuous.n_theoretical_subsets if d == 0 == c: return 0 return max(d, 1) * max(c, 1) - def partitions( + def subsets( self, candidates_exp: pd.DataFrame, min_discrete_candidates: int | None = None, ) -> Iterator[tuple[npt.NDArray[np.bool_], frozenset[str]]]: - r"""Get an iterator over all combined partition configurations. + r"""Get an iterator over all combined subset configurations. Yields the Cartesian product of discrete masks and continuous configurations. Args: candidates_exp: The experimental representation of discrete candidates. - min_discrete_candidates: If provided, discrete partitions with fewer + min_discrete_candidates: If provided, discrete Subsets with fewer matching candidates are skipped. Yields: A discrete mask and continuous inactive parameters pair. """ yield from product( - self.discrete.partition_masks( + self.discrete.subset_masks( candidates_exp, min_candidates=min_discrete_candidates ), self.continuous.inactive_parameter_combinations(), ) - def sample_partitions( + def sample_subsets( self, candidates_exp: pd.DataFrame, n: int, @@ -335,7 +335,7 @@ def sample_partitions( *, max_rejections: int = 10, ) -> list[tuple[npt.NDArray[np.bool_], frozenset[str]]]: - """Sample unique combined partition configurations. + """Sample unique combined subset configurations. Zips two independent with-replacement iterators from the discrete and continuous sides, producing random pairs from the Cartesian product. @@ -344,19 +344,19 @@ def sample_partitions( Args: candidates_exp: The experimental representation of discrete candidates. n: Number of unique configurations to sample. - min_discrete_candidates: If provided, discrete partitions with fewer + min_discrete_candidates: If provided, discrete Subsets with fewer matching candidates are excluded. max_rejections: Maximum number of times a duplicate combination can be drawn before raising ``InfeasibilityError``. Raises: - InfeasibilityError: If not enough unique partition configurations + InfeasibilityError: If not enough unique subset configurations are available. Returns: A list of ``(discrete_mask, continuous_inactive_params)`` tuples. """ - d_iter = self.discrete.partition_masks( + d_iter = self.discrete.subset_masks( candidates_exp, min_candidates=min_discrete_candidates, shuffle=True, @@ -376,7 +376,7 @@ def sample_partitions( rejections += 1 if rejections > max_rejections: raise InfeasibilityError( - f"Not enough unique partition configurations available. " + f"Not enough unique subset configurations available. " f"Requested {n} but only {len(results)} could be found." ) continue diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index 745017eafa..a95e21c266 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -587,8 +587,8 @@ def constraints_batch( ) @property - def n_theoretical_partitions(self) -> int: - """The theoretical number of possible partition configurations. + def n_theoretical_subsets(self) -> int: + """The theoretical number of possible subset configurations. Returns 0 if no batch constraints exist, indicating that no decomposition is needed. @@ -600,7 +600,7 @@ def n_theoretical_partitions(self) -> int: for c in self.constraints_batch ) - def partition_masks( + def subset_masks( self, candidates_exp: pd.DataFrame, min_candidates: int | None = None, @@ -608,7 +608,7 @@ def partition_masks( shuffle: bool = False, replace: bool = False, ) -> Iterator[npt.NDArray[np.bool_]]: - """Get an iterator over all possible partition masks. + """Get an iterator over all possible subset masks. Collect masks from each batch constraint, iterates the Cartesian product, AND-reduces each combination, and yields feasible @@ -625,7 +625,7 @@ def partition_masks( indices are permanently excluded from the sampling pool. Yields: - A boolean mask selecting the partition's rows. + A boolean mask selecting the subset's rows. """ constraints = self.constraints_batch if not constraints: @@ -633,7 +633,7 @@ def partition_masks( [np.ones(len(candidates_exp), dtype=bool)] ] else: - per_constraint = [c.partition_masks(candidates_exp) for c in constraints] + per_constraint = [c.subset_masks(candidates_exp) for c in constraints] total = prod(len(masks) for masks in per_constraint) @@ -662,18 +662,18 @@ def partition_masks( continue yield combined - def sample_partition_masks( + def sample_subset_masks( self, candidates_exp: pd.DataFrame, n: int, min_candidates: int | None = None, ) -> list[npt.NDArray[np.bool_]]: - """Sample partition masks (without replacement). + """Sample subset masks (without replacement). Args: candidates_exp: The experimental representation of candidate points. n: Number of masks to sample. - min_candidates: If provided, partitions with fewer matching + min_candidates: If provided, Subsets with fewer matching candidates are skipped. Returns: @@ -681,7 +681,7 @@ def sample_partition_masks( """ return list( islice( - self.partition_masks(candidates_exp, min_candidates, shuffle=True), + self.subset_masks(candidates_exp, min_candidates, shuffle=True), n, ) ) diff --git a/tests/constraints/test_batch_constraint.py b/tests/constraints/test_batch_constraint.py index cdaa89adfc..6324cf10a0 100644 --- a/tests/constraints/test_batch_constraint.py +++ b/tests/constraints/test_batch_constraint.py @@ -105,22 +105,22 @@ def test_batch_constraint_validation_duplicate(): ), ], ) -def test_batch_constraint_n_theoretical_partitions(constraints, expected): - """The n_theoretical_partitions property returns the correct count.""" +def test_batch_constraint_n_theoretical_subsets(constraints, expected): + """The n_theoretical_subsets property returns the correct count.""" assert ( - SearchSpace.from_product(_params, constraints).discrete.n_theoretical_partitions + SearchSpace.from_product(_params, constraints).discrete.n_theoretical_subsets == expected ) -def test_batch_constraint_all_partitions_too_small(): - """All partitions infeasible raises InfeasibilityError.""" +def test_batch_constraint_all_subsets_too_small(): + """All subsets infeasible raises InfeasibilityError.""" searchspace = SearchSpace.from_product( _params, [DiscreteBatchConstraint(parameters=["d0"])] ) measurements = create_fake_input(_params, [TARGET], n_rows=2) - # Each d0 partition has 3 candidates, batch_size=4 exceeds all + # Each d0 subset has 3 candidates, batch_size=4 exceeds all with pytest.raises(InfeasibilityError): BotorchRecommender().recommend( 4, searchspace, TARGET.to_objective(), measurements @@ -147,14 +147,14 @@ def test_batch_constraint_all_partitions_too_small(): ), ], ) -def test_partition_masks_min_candidates(min_candidates, expected_count, constraint): - """Partition mask filtering by min_candidates.""" +def test_subset_masks_min_candidates(min_candidates, expected_count, constraint): + """Subset mask filtering by min_candidates.""" constraints = [DiscreteBatchConstraint(parameters=["d0"])] if constraint is not None: constraints.append(constraint) searchspace = SearchSpace.from_product(_params, constraints) masks = list( - searchspace.discrete.partition_masks( + searchspace.discrete.subset_masks( searchspace.discrete.exp_rep, min_candidates=min_candidates ) ) diff --git a/tests/constraints/test_partition_constraints_hybrid.py b/tests/constraints/test_subset_constraints_hybrid.py similarity index 94% rename from tests/constraints/test_partition_constraints_hybrid.py rename to tests/constraints/test_subset_constraints_hybrid.py index 1bb1d3d57a..c96cd18cc5 100644 --- a/tests/constraints/test_partition_constraints_hybrid.py +++ b/tests/constraints/test_subset_constraints_hybrid.py @@ -1,4 +1,4 @@ -"""Tests for partitioning constraints in hybrid search spaces.""" +"""Tests for subset constraints in hybrid search spaces.""" import pytest from pytest import param @@ -85,8 +85,8 @@ ), ], ) -def test_partition_constraints_hybrid(constraints): - """Partitioning constraints are respected in hybrid search spaces.""" +def test_subset_constraints_hybrid(constraints): + """Subset constraints are respected in hybrid search spaces.""" searchspace = SearchSpace.from_product(_all_params, constraints) measurements = create_fake_input(_all_params, [TARGET], n_rows=3) From 86aaaf93229d1dc9d8fc721101f995af61e2dbc2 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 8 May 2026 18:22:15 +0200 Subject: [PATCH 38/39] Fix docstring --- baybe/surrogates/gaussian_process/presets/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py index 8b06b313d7..ea4aba81cd 100644 --- a/baybe/surrogates/gaussian_process/presets/core.py +++ b/baybe/surrogates/gaussian_process/presets/core.py @@ -17,7 +17,7 @@ class GaussianProcessPreset(Enum): def make_gp_from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: - """Create a :class:`GaussianProcessSurrogate` from a :class:`GaussianProcessPreset.""" # noqa: E501 + """Create a :class:`GaussianProcessSurrogate` from a :class:`GaussianProcessPreset`.""" # noqa: E501 from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate if preset is GaussianProcessPreset.BAYBE: From c1b33ac51091dc3e70acf9778a17388655bb8cf6 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 8 May 2026 21:11:43 +0200 Subject: [PATCH 39/39] Add formatting rule to AGENTS.md --- AGENTS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index d44c1cc9ad..edf183f45e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -131,6 +131,8 @@ Custom `@classproperty` from `baybe.utils.basic` for class-level computed proper - No private field names (`_attr`) in user-facing messages — use `fields(type(self)).attr.alias`. - Method names start with verbs. Comments capitalize first word. +- Always capitalize words that correspond to names of inventors, e.g. `Bayesian`, + `Boolean` or `Gaussian` ## 5. Type Annotations - **Full coverage**: All signatures including returns. Every field annotated. @@ -236,7 +238,7 @@ Three tiers: - Cache invalidation: `on_setattr` hooks on mutable fields. ## 12. Optional Dependencies -1. Detection (`baybe/_optional/info.py`): `importlib.util.find_spec()` sets boolean +1. Detection (`baybe/_optional/info.py`): `importlib.util.find_spec()` sets Boolean flags (`CHEM_INSTALLED`, `ONNX_INSTALLED`, etc.) without importing. 2. Guarded imports (`baybe/_optional/.py`): Import or raise `OptionalImportError` with pip install instructions.