diff --git a/bofire/benchmarks/hyperopt.py b/bofire/benchmarks/hyperopt.py index c49f2d7b9..61e1f8531 100644 --- a/bofire/benchmarks/hyperopt.py +++ b/bofire/benchmarks/hyperopt.py @@ -19,7 +19,7 @@ def __init__( show_progress_bar: bool = False, ) -> None: super().__init__() - if surrogate_data.hyperconfig is None: + if surrogate_data.hyperconfig_access is None: raise ValueError("No hyperoptimization configuration found.") self.surrogate_data = surrogate_data self.training_data = training_data @@ -30,11 +30,11 @@ def __init__( @property def domain(self) -> Domain: - return self.surrogate_data.hyperconfig.domain # type: ignore + return self.surrogate_data.hyperconfig_access.domain # type: ignore @property def target_metric(self): - return self.surrogate_data.hyperconfig.target_metric # type: ignore + return self.surrogate_data.hyperconfig_access.target_metric # type: ignore def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: for i, candidate in tqdm( diff --git a/bofire/data_models/surrogates/bnn.py b/bofire/data_models/surrogates/bnn.py index 3b73259ba..c540999d5 100644 --- a/bofire/data_models/surrogates/bnn.py +++ b/bofire/data_models/surrogates/bnn.py @@ -1,11 +1,31 @@ -from typing import Literal, Optional +from typing import Literal, Optional, Type +from pydantic import Field + +from bofire.data_models.features.api import AnyOutput +from bofire.data_models.features.continuous import ContinuousOutput from bofire.data_models.kernels.api import InfiniteWidthBNNKernel -from bofire.data_models.surrogates.single_task_gp import SingleTaskGPSurrogate +from bofire.data_models.priors.api import HVARFNER_NOISE_PRIOR, AnyPrior +from bofire.data_models.surrogates.single_task_gp import TrainableBotorchSurrogate from bofire.data_models.surrogates.trainable import Hyperconfig -class SingleTaskIBNNSurrogate(SingleTaskGPSurrogate): +class SingleTaskIBNNSurrogate(TrainableBotorchSurrogate): type: Literal["SingleTaskIBNNSurrogate"] = "SingleTaskIBNNSurrogate" kernel: InfiniteWidthBNNKernel = InfiniteWidthBNNKernel() hyperconfig: Optional[Hyperconfig] = None + noise_prior: AnyPrior = Field(default_factory=lambda: HVARFNER_NOISE_PRIOR()) + + @classmethod + def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: + """Abstract method to check output type for surrogate models + Args: + my_type: continuous or categorical output + Returns: + bool: True if the output type is valid for the surrogate chosen, False otherwise + """ + return isinstance(my_type, type(ContinuousOutput)) + + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig diff --git a/bofire/data_models/surrogates/fully_bayesian.py b/bofire/data_models/surrogates/fully_bayesian.py index 6096cef2c..789d60b03 100644 --- a/bofire/data_models/surrogates/fully_bayesian.py +++ b/bofire/data_models/surrogates/fully_bayesian.py @@ -1,8 +1,9 @@ -from typing import Annotated, List, Literal, Type +from typing import Annotated, List, Literal, Optional, Type from pydantic import AfterValidator, Field, field_validator, model_validator from bofire.data_models.features.api import AnyOutput, ContinuousOutput +from bofire.data_models.surrogates.trainable import Hyperconfig from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate from bofire.data_models.types import make_unique_validator @@ -19,6 +20,12 @@ class FullyBayesianSingleTaskGPSurrogate(TrainableBotorchSurrogate): List[str], AfterValidator(make_unique_validator("Features")) ] = [] + hyperconfig: Optional[Hyperconfig] = None + + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + @model_validator(mode="after") def validate_features_to_warp(self): input_keys = self.inputs.get_keys() diff --git a/bofire/data_models/surrogates/linear.py b/bofire/data_models/surrogates/linear.py index c20863ba4..baa05526d 100644 --- a/bofire/data_models/surrogates/linear.py +++ b/bofire/data_models/surrogates/linear.py @@ -1,4 +1,4 @@ -from typing import Literal, Type +from typing import Literal, Optional, Type from pydantic import Field @@ -7,12 +7,19 @@ from bofire.data_models.kernels.api import LinearKernel from bofire.data_models.priors.api import THREESIX_NOISE_PRIOR, AnyPrior from bofire.data_models.surrogates.scaler import ScalerEnum +from bofire.data_models.surrogates.trainable import Hyperconfig from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate class LinearSurrogate(TrainableBotorchSurrogate): type: Literal["LinearSurrogate"] = "LinearSurrogate" + hyperconfig: Optional[Hyperconfig] = None + + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + kernel: LinearKernel = Field(default_factory=lambda: LinearKernel()) noise_prior: AnyPrior = Field(default_factory=lambda: THREESIX_NOISE_PRIOR()) scaler: ScalerEnum = ScalerEnum.NORMALIZE diff --git a/bofire/data_models/surrogates/map_saas.py b/bofire/data_models/surrogates/map_saas.py index 56ffae5a8..55a1f10f7 100644 --- a/bofire/data_models/surrogates/map_saas.py +++ b/bofire/data_models/surrogates/map_saas.py @@ -1,8 +1,9 @@ -from typing import Literal, Type +from typing import Literal, Optional, Type -from pydantic import PositiveInt +from pydantic import Field, PositiveInt from bofire.data_models.features.api import AnyOutput, ContinuousOutput +from bofire.data_models.surrogates.trainable import Hyperconfig from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate @@ -25,6 +26,12 @@ class AdditiveMapSaasSingleTaskGPSurrogate(TrainableBotorchSurrogate): ) n_taus: PositiveInt = 4 + hyperconfig: Optional[Hyperconfig] = Field(default=None) + + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + @classmethod def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: """Abstract method to check output type for surrogate models diff --git a/bofire/data_models/surrogates/mixed_single_task_gp.py b/bofire/data_models/surrogates/mixed_single_task_gp.py index c19030e40..df87b9390 100644 --- a/bofire/data_models/surrogates/mixed_single_task_gp.py +++ b/bofire/data_models/surrogates/mixed_single_task_gp.py @@ -54,53 +54,6 @@ class MixedSingleTaskGPHyperconfig(Hyperconfig): "FractionalFactorialStrategy", "SoboStrategy", "RandomStrategy" ] = "FractionalFactorialStrategy" - @staticmethod - def _update_hyperparameters( - surrogate_data: "MixedSingleTaskGPSurrogate", - hyperparameters: pd.Series, - ): - if hyperparameters.prior == "mbo": - noise_prior, lengthscale_prior, _ = ( - MBO_NOISE_PRIOR(), - MBO_LENGTHSCALE_PRIOR(), - MBO_OUTPUTSCALE_PRIOR(), - ) - elif hyperparameters.prior == "threesix": - noise_prior, lengthscale_prior, _ = ( - THREESIX_NOISE_PRIOR(), - THREESIX_LENGTHSCALE_PRIOR(), - THREESIX_SCALE_PRIOR(), - ) - else: - noise_prior, lengthscale_prior = ( - HVARFNER_NOISE_PRIOR(), - HVARFNER_LENGTHSCALE_PRIOR(), - ) - - surrogate_data.noise_prior = noise_prior - if hyperparameters.continuous_kernel == "rbf": - surrogate_data.continuous_kernel = RBFKernel( - ard=hyperparameters.ard, - lengthscale_prior=lengthscale_prior, - ) - - elif hyperparameters.continuous_kernel == "matern_2.5": - surrogate_data.continuous_kernel = MaternKernel( - ard=hyperparameters.ard, - lengthscale_prior=lengthscale_prior, - nu=2.5, - ) - - elif hyperparameters.continuous_kernel == "matern_1.5": - surrogate_data.continuous_kernel = MaternKernel( - ard=hyperparameters.ard, - lengthscale_prior=lengthscale_prior, - nu=1.5, - ) - - else: - raise ValueError(f"Kernel {hyperparameters.kernel} not known.") - class MixedSingleTaskGPSurrogate(TrainableBotorchSurrogate): type: Literal["MixedSingleTaskGPSurrogate"] = "MixedSingleTaskGPSurrogate" @@ -121,6 +74,10 @@ class MixedSingleTaskGPSurrogate(TrainableBotorchSurrogate): default_factory=lambda: MixedSingleTaskGPHyperconfig(), ) + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + @classmethod def _default_categorical_encodings( cls, @@ -190,3 +147,51 @@ def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: bool: True if the output type is valid for the surrogate chosen, False otherwise """ return isinstance(my_type, type(ContinuousOutput)) + + def update_hyperparameters( + self, + hyperparameters: pd.Series, + ): + super().update_hyperparameters(hyperparameters) + + if hyperparameters.prior == "mbo": + noise_prior, lengthscale_prior, _ = ( + MBO_NOISE_PRIOR(), + MBO_LENGTHSCALE_PRIOR(), + MBO_OUTPUTSCALE_PRIOR(), + ) + elif hyperparameters.prior == "threesix": + noise_prior, lengthscale_prior, _ = ( + THREESIX_NOISE_PRIOR(), + THREESIX_LENGTHSCALE_PRIOR(), + THREESIX_SCALE_PRIOR(), + ) + else: + noise_prior, lengthscale_prior = ( + HVARFNER_NOISE_PRIOR(), + HVARFNER_LENGTHSCALE_PRIOR(), + ) + + self.noise_prior = noise_prior + if hyperparameters.continuous_kernel == "rbf": + self.continuous_kernel = RBFKernel( + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + ) + + elif hyperparameters.continuous_kernel == "matern_2.5": + self.continuous_kernel = MaternKernel( + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + nu=2.5, + ) + + elif hyperparameters.continuous_kernel == "matern_1.5": + self.continuous_kernel = MaternKernel( + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + nu=1.5, + ) + + else: + raise ValueError(f"Kernel {hyperparameters.kernel} not known.") diff --git a/bofire/data_models/surrogates/mlp.py b/bofire/data_models/surrogates/mlp.py index a6c4d0f2d..80d7a1379 100644 --- a/bofire/data_models/surrogates/mlp.py +++ b/bofire/data_models/surrogates/mlp.py @@ -1,5 +1,5 @@ from collections.abc import Sequence -from typing import Annotated, Literal, Type +from typing import Annotated, Literal, Optional, Type from pydantic import Field @@ -9,11 +9,18 @@ ContinuousOutput, ) from bofire.data_models.surrogates.scaler import ScalerEnum +from bofire.data_models.surrogates.trainable import Hyperconfig from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate class MLPEnsemble(TrainableBotorchSurrogate): type: Literal["MLPEnsemble"] = "MLPEnsemble" + hyperconfig: Optional[Hyperconfig] = None + + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + n_estimators: Annotated[int, Field(ge=1)] = 5 hidden_layer_sizes: Sequence = (100,) activation: Literal["relu", "logistic", "tanh"] = "relu" diff --git a/bofire/data_models/surrogates/multi_task_gp.py b/bofire/data_models/surrogates/multi_task_gp.py index 9544bd96e..649f23886 100644 --- a/bofire/data_models/surrogates/multi_task_gp.py +++ b/bofire/data_models/surrogates/multi_task_gp.py @@ -44,47 +44,6 @@ class MultiTaskGPHyperconfig(Hyperconfig): "FractionalFactorialStrategy", "SoboStrategy", "RandomStrategy" ] = "FractionalFactorialStrategy" - @staticmethod - def _update_hyperparameters( - surrogate_data: "MultiTaskGPSurrogate", - hyperparameters: pd.Series, - ): - def matern_25(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel: - return MaternKernel(nu=2.5, lengthscale_prior=lengthscale_prior, ard=ard) - - def matern_15(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel: - return MaternKernel(nu=1.5, lengthscale_prior=lengthscale_prior, ard=ard) - - if hyperparameters.prior == "mbo": - noise_prior, lengthscale_prior = ( - MBO_NOISE_PRIOR(), - MBO_LENGTHSCALE_PRIOR(), - ) - else: - noise_prior, lengthscale_prior = ( - THREESIX_NOISE_PRIOR(), - THREESIX_LENGTHSCALE_PRIOR(), - ) - - surrogate_data.noise_prior = noise_prior - if hyperparameters.kernel == "rbf": - surrogate_data.kernel = RBFKernel( - ard=hyperparameters.ard, - lengthscale_prior=lengthscale_prior, - ) - elif hyperparameters.kernel == "matern_2.5": - surrogate_data.kernel = matern_25( - ard=hyperparameters.ard, - lengthscale_prior=lengthscale_prior, - ) - elif hyperparameters.kernel == "matern_1.5": - surrogate_data.kernel = matern_15( - ard=hyperparameters.ard, - lengthscale_prior=lengthscale_prior, - ) - else: - raise ValueError(f"Kernel {hyperparameters.kernel} not known.") - class MultiTaskGPSurrogate(TrainableBotorchSurrogate): type: Literal["MultiTaskGPSurrogate"] = "MultiTaskGPSurrogate" @@ -101,6 +60,10 @@ class MultiTaskGPSurrogate(TrainableBotorchSurrogate): default_factory=lambda: MultiTaskGPHyperconfig(), ) + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + @classmethod def _default_categorical_encodings( cls, @@ -155,3 +118,45 @@ def validate_encoding(cls, v, info): ) return v + + def update_hyperparameters( + self, + hyperparameters: pd.Series, + ): + super().update_hyperparameters(hyperparameters) + + def matern_25(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel: + return MaternKernel(nu=2.5, lengthscale_prior=lengthscale_prior, ard=ard) + + def matern_15(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel: + return MaternKernel(nu=1.5, lengthscale_prior=lengthscale_prior, ard=ard) + + if hyperparameters.prior == "mbo": + noise_prior, lengthscale_prior = ( + MBO_NOISE_PRIOR(), + MBO_LENGTHSCALE_PRIOR(), + ) + else: + noise_prior, lengthscale_prior = ( + THREESIX_NOISE_PRIOR(), + THREESIX_LENGTHSCALE_PRIOR(), + ) + + self.noise_prior = noise_prior + if hyperparameters.kernel == "rbf": + self.kernel = RBFKernel( + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + ) + elif hyperparameters.kernel == "matern_2.5": + self.kernel = matern_25( + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + ) + elif hyperparameters.kernel == "matern_1.5": + self.kernel = matern_15( + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + ) + else: + raise ValueError(f"Kernel {hyperparameters.kernel} not known.") diff --git a/bofire/data_models/surrogates/polynomial.py b/bofire/data_models/surrogates/polynomial.py index 7bebd963c..068087e28 100644 --- a/bofire/data_models/surrogates/polynomial.py +++ b/bofire/data_models/surrogates/polynomial.py @@ -1,4 +1,4 @@ -from typing import Literal, Type +from typing import Literal, Optional, Type from pydantic import Field @@ -6,11 +6,17 @@ from bofire.data_models.features.api import AnyOutput, ContinuousOutput from bofire.data_models.kernels.api import PolynomialKernel from bofire.data_models.priors.api import THREESIX_NOISE_PRIOR, AnyPrior +from bofire.data_models.surrogates.trainable import Hyperconfig from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate class PolynomialSurrogate(TrainableBotorchSurrogate): type: Literal["PolynomialSurrogate"] = "PolynomialSurrogate" + hyperconfig: Optional[Hyperconfig] = None + + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig kernel: PolynomialKernel = Field(default_factory=lambda: PolynomialKernel(power=2)) noise_prior: AnyPrior = Field(default_factory=lambda: THREESIX_NOISE_PRIOR()) diff --git a/bofire/data_models/surrogates/random_forest.py b/bofire/data_models/surrogates/random_forest.py index a92f5f043..758201b00 100644 --- a/bofire/data_models/surrogates/random_forest.py +++ b/bofire/data_models/surrogates/random_forest.py @@ -3,12 +3,19 @@ from pydantic import Field from bofire.data_models.features.api import AnyOutput, ContinuousOutput +from bofire.data_models.surrogates.trainable import Hyperconfig from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate class RandomForestSurrogate(TrainableBotorchSurrogate): type: Literal["RandomForestSurrogate"] = "RandomForestSurrogate" + hyperconfig: Optional[Hyperconfig] = None + + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + # hyperparams passed down to `RandomForestRegressor` n_estimators: int = 100 criterion: Literal[ diff --git a/bofire/data_models/surrogates/robust_single_task_gp.py b/bofire/data_models/surrogates/robust_single_task_gp.py index 249ebf30b..ceec6a946 100644 --- a/bofire/data_models/surrogates/robust_single_task_gp.py +++ b/bofire/data_models/surrogates/robust_single_task_gp.py @@ -12,6 +12,7 @@ AnyPrior, ) from bofire.data_models.surrogates.single_task_gp import SingleTaskGPHyperconfig +from bofire.data_models.surrogates.trainable import Hyperconfig from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate @@ -34,7 +35,7 @@ class RobustSingleTaskGPSurrogate(TrainableBotorchSurrogate): For this reason, it is necessary to bound the lengthscale of the GP kernel from below. """ - type: Literal["RobustSingleTaskGPSurrogate"] = "RobustSingleTaskGPSurrogate" + type: Literal["RobustSingleTaskGPSurrogate"] = "RobustSingleTaskGPSurrogate" # type: ignore kernel: Union[ScaleKernel, RBFKernel, MaternKernel] = Field( default_factory=lambda: RBFKernel( @@ -51,6 +52,10 @@ class RobustSingleTaskGPSurrogate(TrainableBotorchSurrogate): ), ) + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + prior_mean_of_support: Optional[int] = Field(default=None) convex_parametrization: bool = Field(default=True) cache_model_trace: bool = Field(default=False) diff --git a/bofire/data_models/surrogates/shape.py b/bofire/data_models/surrogates/shape.py index 36fc807d9..75e2e760a 100644 --- a/bofire/data_models/surrogates/shape.py +++ b/bofire/data_models/surrogates/shape.py @@ -45,49 +45,6 @@ class PiecewiseLinearGPSurrogateHyperconfig(Hyperconfig): "FractionalFactorialStrategy", "SoboStrategy", "RandomStrategy" ] = "FractionalFactorialStrategy" - @staticmethod - def _update_hyperparameters( - surrogate_data: "PiecewiseLinearGPSurrogate", - hyperparameters: pd.Series, - ): - if hyperparameters.prior == "mbo": - noise_prior, lengthscale_prior, outputscale_prior = ( - MBO_NOISE_PRIOR(), - MBO_LENGTHSCALE_PRIOR(), - MBO_OUTPUTSCALE_PRIOR(), - ) - else: - noise_prior, lengthscale_prior, outputscale_prior = ( - THREESIX_NOISE_PRIOR(), - THREESIX_LENGTHSCALE_PRIOR(), - THREESIX_SCALE_PRIOR(), - ) - surrogate_data.noise_prior = noise_prior - surrogate_data.outputscale_prior = outputscale_prior - - if hyperparameters.continuous_kernel == "rbf": - surrogate_data.continuous_kernel = RBFKernel( - ard=hyperparameters.ard, - lengthscale_prior=lengthscale_prior, - ) - - elif hyperparameters.continuous_kernel == "matern_2.5": - surrogate_data.continuous_kernel = MaternKernel( - ard=hyperparameters.ard, - lengthscale_prior=lengthscale_prior, - nu=2.5, - ) - - elif hyperparameters.continuous_kernel == "matern_1.5": - surrogate_data.continuous_kernel = MaternKernel( - ard=hyperparameters.ard, - lengthscale_prior=lengthscale_prior, - nu=1.5, - ) - - else: - raise ValueError(f"Kernel {hyperparameters.kernel} not known.") - class PiecewiseLinearGPSurrogate(TrainableBotorchSurrogate): """GP surrogate that is based on a `WassersteinKernel` for modeling functions @@ -131,6 +88,10 @@ class PiecewiseLinearGPSurrogate(TrainableBotorchSurrogate): default_factory=lambda: PiecewiseLinearGPSurrogateHyperconfig(), ) + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + shape_kernel: WassersteinKernel = Field( default_factory=lambda: WassersteinKernel( squared=False, @@ -181,3 +142,44 @@ def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: bool: True if the output type is valid for the surrogate chosen, False otherwise """ return isinstance(my_type, type(ContinuousOutput)) + + def update_hyperparameters(self, hyperparameters: pd.Series): + super().update_hyperparameters(hyperparameters) + + if hyperparameters.prior == "mbo": + noise_prior, lengthscale_prior, outputscale_prior = ( + MBO_NOISE_PRIOR(), + MBO_LENGTHSCALE_PRIOR(), + MBO_OUTPUTSCALE_PRIOR(), + ) + else: + noise_prior, lengthscale_prior, outputscale_prior = ( + THREESIX_NOISE_PRIOR(), + THREESIX_LENGTHSCALE_PRIOR(), + THREESIX_SCALE_PRIOR(), + ) + self.noise_prior = noise_prior + self.outputscale_prior = outputscale_prior + + if hyperparameters.continuous_kernel == "rbf": + self.continuous_kernel = RBFKernel( + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + ) + + elif hyperparameters.continuous_kernel == "matern_2.5": + self.continuous_kernel = MaternKernel( + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + nu=2.5, + ) + + elif hyperparameters.continuous_kernel == "matern_1.5": + self.continuous_kernel = MaternKernel( + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + nu=1.5, + ) + + else: + raise ValueError(f"Kernel {hyperparameters.kernel} not known.") diff --git a/bofire/data_models/surrogates/single_task_gp.py b/bofire/data_models/surrogates/single_task_gp.py index ed0077b33..7e4c2c5c0 100644 --- a/bofire/data_models/surrogates/single_task_gp.py +++ b/bofire/data_models/surrogates/single_task_gp.py @@ -52,13 +52,42 @@ class SingleTaskGPHyperconfig(Hyperconfig): "FractionalFactorialStrategy", "SoboStrategy", "RandomStrategy" ] = "FractionalFactorialStrategy" - @staticmethod - def _update_hyperparameters( - surrogate_data: "SingleTaskGPSurrogate", - hyperparameters: pd.Series, - outputscale_constraint: Optional[AnyPriorConstraint] = None, - lengthscale_constraint: Optional[AnyPriorConstraint] = None, - ): + +class BaseSingleTaskGPSurrogate(TrainableBotorchSurrogate): + noise_prior: AnyPrior = Field(default_factory=lambda: HVARFNER_NOISE_PRIOR()) + + @classmethod + def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: + """Abstract method to check output type for surrogate models + Args: + my_type: continuous or categorical output + Returns: + bool: True if the output type is valid for the surrogate chosen, False otherwise + """ + return isinstance(my_type, type(ContinuousOutput)) + + +class SingleTaskGPSurrogate(BaseSingleTaskGPSurrogate): + type: Literal["SingleTaskGPSurrogate"] = "SingleTaskGPSurrogate" + kernel: AnyKernel = Field( + default_factory=lambda: RBFKernel( + ard=True, + lengthscale_prior=HVARFNER_LENGTHSCALE_PRIOR(), + ) + ) + hyperconfig: Optional[SingleTaskGPHyperconfig] = Field( + default_factory=lambda: SingleTaskGPHyperconfig() + ) + + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + + def update_hyperparameters(self, hyperparameters: pd.Series): + super().update_hyperparameters(hyperparameters) + # update_hyperparameters throws when hyperconfig is None + assert self.hyperconfig is not None + def matern_25( ard: bool, lengthscale_prior: AnyPrior, @@ -101,59 +130,34 @@ def matern_15( HVARFNER_LENGTHSCALE_PRIOR(), THREESIX_SCALE_PRIOR(), ) - surrogate_data.noise_prior = noise_prior + self.noise_prior = noise_prior if hyperparameters.kernel == "rbf": base_kernel = RBFKernel( ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior, - lengthscale_constraint=lengthscale_constraint, + lengthscale_constraint=self.hyperconfig.lengthscale_constraint, ) elif hyperparameters.kernel == "matern_2.5": base_kernel = matern_25( ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior, - lengthscale_constraint=lengthscale_constraint, + lengthscale_constraint=self.hyperconfig.lengthscale_constraint, ) elif hyperparameters.kernel == "matern_1.5": base_kernel = matern_15( ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior, - lengthscale_constraint=lengthscale_constraint, + lengthscale_constraint=self.hyperconfig.lengthscale_constraint, ) else: raise ValueError(f"Kernel {hyperparameters.kernel} not known.") if hyperparameters.scalekernel == "True": - surrogate_data.kernel = ScaleKernel( + self.kernel = ScaleKernel( base_kernel=base_kernel, outputscale_prior=outputscale_prior, - outputscale_constraint=outputscale_constraint, + outputscale_constraint=self.hyperconfig.outputscale_constraint, ) else: - surrogate_data.kernel = base_kernel - - -class SingleTaskGPSurrogate(TrainableBotorchSurrogate): - type: Literal["SingleTaskGPSurrogate"] = "SingleTaskGPSurrogate" - - kernel: AnyKernel = Field( - default_factory=lambda: RBFKernel( - ard=True, - lengthscale_prior=HVARFNER_LENGTHSCALE_PRIOR(), - ) - ) - noise_prior: AnyPrior = Field(default_factory=lambda: HVARFNER_NOISE_PRIOR()) - hyperconfig: Optional[SingleTaskGPHyperconfig] = Field( - default_factory=lambda: SingleTaskGPHyperconfig(), - ) - - @classmethod - def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: - """Abstract method to check output type for surrogate models - Args: - my_type: continuous or categorical output - Returns: - bool: True if the output type is valid for the surrogate chosen, False otherwise - """ - return isinstance(my_type, type(ContinuousOutput)) + self.kernel = base_kernel diff --git a/bofire/data_models/surrogates/tanimoto_gp.py b/bofire/data_models/surrogates/tanimoto_gp.py index f5962eb01..ccce3670b 100644 --- a/bofire/data_models/surrogates/tanimoto_gp.py +++ b/bofire/data_models/surrogates/tanimoto_gp.py @@ -1,4 +1,4 @@ -from typing import Literal, Type +from typing import Literal, Optional, Type from pydantic import Field, model_validator @@ -16,12 +16,19 @@ AnyPrior, ) from bofire.data_models.surrogates.scaler import ScalerEnum +from bofire.data_models.surrogates.single_task_gp import Hyperconfig from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate class TanimotoGPSurrogate(TrainableBotorchSurrogate): type: Literal["TanimotoGPSurrogate"] = "TanimotoGPSurrogate" + hyperconfig: Optional[Hyperconfig] = None + + @property + def hyperconfig_access(self) -> Optional[Hyperconfig]: + return self.hyperconfig + kernel: AnyKernel = Field( default_factory=lambda: ScaleKernel( base_kernel=TanimotoKernel( diff --git a/bofire/data_models/surrogates/trainable.py b/bofire/data_models/surrogates/trainable.py index 4ffc5c4d2..1e7393aed 100644 --- a/bofire/data_models/surrogates/trainable.py +++ b/bofire/data_models/surrogates/trainable.py @@ -1,4 +1,5 @@ import warnings +from abc import ABC, abstractmethod from typing import Annotated, List, Literal, Optional, Union import pandas as pd @@ -85,15 +86,20 @@ def domain(self) -> Domain: ), ) - @staticmethod - def _update_hyperparameters(surrogate_data, hyperparameters: pd.Series): - raise NotImplementedError( - "Ideally this would be an abstract method, but this causes problems in pydantic.", + def update_hyperparameters(self, hyperparameters: pd.Series): + self.domain.validate_candidates( + pd.DataFrame(hyperparameters).T, + only_inputs=True, + raise_validation_error=True, ) -class TrainableSurrogate(BaseModel): - hyperconfig: Optional[Hyperconfig] = None +class TrainableSurrogate(BaseModel, ABC): + @property + @abstractmethod + def hyperconfig_access(self) -> Optional[Hyperconfig]: + pass + aggregations: Optional[Annotated[List[AnyAggregation], Field(min_length=1)]] = None @model_validator(mode="after") @@ -116,15 +122,7 @@ def validate_aggregations(self): return self def update_hyperparameters(self, hyperparameters: pd.Series): - if self.hyperconfig is not None: - self.hyperconfig.domain.validate_candidates( - pd.DataFrame(hyperparameters).T, - only_inputs=True, - raise_validation_error=True, - ) - self.hyperconfig._update_hyperparameters( - self, - hyperparameters=hyperparameters, - ) + if self.hyperconfig_access is not None: + self.hyperconfig_access.update_hyperparameters(hyperparameters) else: raise ValueError("No hyperconfig available.") diff --git a/bofire/runners/hyperoptimize.py b/bofire/runners/hyperoptimize.py index 241a7b242..592d0a2d3 100644 --- a/bofire/runners/hyperoptimize.py +++ b/bofire/runners/hyperoptimize.py @@ -29,7 +29,7 @@ def hyperoptimize( folds: int, random_state: Optional[int] = None, ) -> Tuple[AnyTrainableSurrogate, pd.DataFrame]: - if surrogate_data.hyperconfig is None: + if surrogate_data.hyperconfig_access is None: warnings.warn( "No hyperopt is possible as no hyperopt config is available. Returning initial config.", ) @@ -54,11 +54,12 @@ def sample(domain): folds=folds, random_state=random_state, show_progress_bar=True - if surrogate_data.hyperconfig.hyperstrategy == "FractionalFactorialStrategy" + if surrogate_data.hyperconfig_access.hyperstrategy + == "FractionalFactorialStrategy" else False, ) - if surrogate_data.hyperconfig.hyperstrategy == "FractionalFactorialStrategy": + if surrogate_data.hyperconfig_access.hyperstrategy == "FractionalFactorialStrategy": strategy = strategies.map(FractionalFactorialStrategy(domain=benchmark.domain)) experiments = benchmark.f( strategy.ask(candidate_count=None), @@ -67,7 +68,7 @@ def sample(domain): else: strategy_data = ( RandomStrategy - if surrogate_data.hyperconfig.hyperstrategy == "RandomStrategy" + if surrogate_data.hyperconfig_access.hyperstrategy == "RandomStrategy" else SoboStrategy ) experiments = run( @@ -77,7 +78,7 @@ def sample(domain): ), metric=best, n_runs=1, - n_iterations=surrogate_data.hyperconfig.n_iterations # type: ignore + n_iterations=surrogate_data.hyperconfig_access.n_iterations # type: ignore - len(benchmark.domain.inputs) - 1, initial_sampler=sample, @@ -99,7 +100,7 @@ def sample(domain): return ( surrogate_data, experiments[ - surrogate_data.hyperconfig.domain.inputs.get_keys() + surrogate_data.hyperconfig_access.domain.inputs.get_keys() + [e.name for e in RegressionMetricsEnum] ], ) diff --git a/bofire/surrogates/deterministic.py b/bofire/surrogates/deterministic.py index 1d99f22a4..2c02bf870 100644 --- a/bofire/surrogates/deterministic.py +++ b/bofire/surrogates/deterministic.py @@ -1,6 +1,10 @@ +from typing import Dict, Optional, cast + import torch from botorch.models.deterministic import AffineDeterministicModel +from typing_extensions import Self +from bofire.data_models.domain.features import Inputs, Outputs from bofire.data_models.enum import CategoricalEncodingEnum from bofire.data_models.surrogates.api import ( CategoricalDeterministicSurrogate as CategoricalDeterministicSurrogateDataModel, @@ -8,7 +12,9 @@ from bofire.data_models.surrogates.api import ( LinearDeterministicSurrogate as LinearDeterministicSurrogateDataModel, ) +from bofire.data_models.types import InputTransformSpecs from bofire.surrogates.botorch import BotorchSurrogate +from bofire.surrogates.model_utils import make_surrogate from bofire.utils.torch_tools import get_NumericToCategorical_input_transform, tkwargs @@ -30,6 +36,28 @@ def __init__( .unsqueeze(-1), ) + @classmethod + def make( + cls, + inputs: Inputs, + outputs: Outputs, + coefficients: Dict[str, float], + intercept: float, + input_preprocessing_specs: Optional[InputTransformSpecs] = None, + dump: str | None = None, + categorical_encodings: Optional[InputTransformSpecs] = None, + ) -> Self: + """ + Factory method to create an EmpiricalSurrogate from a data model. + Args: + # document parameters + Returns: + LinearDeterministicSurrogate: A new instance. + """ + return cast( + Self, make_surrogate(cls, LinearDeterministicSurrogateDataModel, locals()) + ) + class CategoricalDeterministicSurrogate(BotorchSurrogate): def __init__( @@ -53,3 +81,25 @@ def __init__( inputs=self.inputs, transform_specs={self.inputs[0].key: CategoricalEncodingEnum.ONE_HOT}, ) + + @classmethod + def make( + cls, + inputs: Inputs, + outputs: Outputs, + mapping: Dict[str, float], + input_preprocessing_specs: Optional[InputTransformSpecs] = None, + dump: str | None = None, + categorical_encodings: Optional[InputTransformSpecs] = None, + ) -> Self: + """ + Factory method to create an EmpiricalSurrogate from a data model. + Args: + # document parameters + Returns: + CategoricalDeterministicSurrogate: A new instance. + """ + return cast( + Self, + make_surrogate(cls, CategoricalDeterministicSurrogateDataModel, locals()), + ) diff --git a/bofire/surrogates/empirical.py b/bofire/surrogates/empirical.py index 861ba3a9e..6f4def8a0 100644 --- a/bofire/surrogates/empirical.py +++ b/bofire/surrogates/empirical.py @@ -1,13 +1,17 @@ import base64 import io import warnings -from typing import Optional +from typing import Optional, cast import torch from botorch.models.deterministic import DeterministicModel +from typing_extensions import Self +from bofire.data_models.domain.features import Inputs, Outputs from bofire.data_models.surrogates.api import EmpiricalSurrogate as DataModel +from bofire.data_models.types import InputTransformSpecs from bofire.surrogates.botorch import BotorchSurrogate +from bofire.surrogates.model_utils import make_surrogate class EmpiricalSurrogate(BotorchSurrogate): @@ -28,6 +32,24 @@ def __init__( model: Optional[DeterministicModel] = None + @classmethod + def make( + cls, + inputs: Inputs, + outputs: Outputs, + input_preprocessing_specs: Optional[InputTransformSpecs] = None, + dump: str | None = None, + categorical_encodings: Optional[InputTransformSpecs] = None, + ) -> Self: + """ + Factory method to create an EmpiricalSurrogate from a data model. + Args: + # document parameters + Returns: + EmpiricalSurrogate: A new instance. + """ + return cast(Self, make_surrogate(cls, DataModel, locals())) + def _dumps(self) -> str: """Dumps the actual model to a string via pickle as this is not directly json serializable.""" with warnings.catch_warnings(record=True) as w: diff --git a/bofire/surrogates/fully_bayesian.py b/bofire/surrogates/fully_bayesian.py index 350de1cf9..88c24c9ee 100644 --- a/bofire/surrogates/fully_bayesian.py +++ b/bofire/surrogates/fully_bayesian.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional +from typing import Dict, List, Literal, Optional, cast import numpy as np import pandas as pd @@ -11,12 +11,18 @@ ) from botorch.models.transforms.input import InputTransform from botorch.models.transforms.outcome import OutcomeTransform +from typing_extensions import Self +from bofire.data_models.domain.features import Inputs, Outputs from bofire.data_models.enum import OutputFilteringEnum from bofire.data_models.surrogates.api import ( FullyBayesianSingleTaskGPSurrogate as DataModel, ) +from bofire.data_models.surrogates.scaler import ScalerEnum +from bofire.data_models.surrogates.trainable import AnyAggregation, Hyperconfig +from bofire.data_models.types import InputTransformSpecs from bofire.surrogates.botorch import TrainableBotorchSurrogate +from bofire.surrogates.model_utils import make_surrogate from bofire.utils.torch_tools import tkwargs @@ -84,3 +90,44 @@ def _predict(self, transformed_X: pd.DataFrame): preds = posterior.mixture_mean.detach().numpy() stds = np.sqrt(posterior.mixture_variance.detach().numpy()) return preds, stds + + @classmethod + def make( + cls, + inputs: Inputs, + outputs: Outputs, + hyperconfig: Optional[Hyperconfig] = None, + aggregations: Optional[List[AnyAggregation]] = None, + input_preprocessing_specs: Optional[InputTransformSpecs] = None, + dump: Optional[str] = None, + categorical_encodings: Optional[InputTransformSpecs] = None, + scaler: ScalerEnum = ScalerEnum.NORMALIZE, + output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE, + model_type: Literal["linear", "saas", "hvarfner"] = "saas", + warmup_steps: int = 256, + num_samples: int = 128, + thinning: int = 16, + features_to_warp: Optional[List[str]] = None, + ) -> Self: + """ + Factory method to create a SingleTaskGPSurrogate from a data model. + Args: + inputs: Inputs + outputs: Outputs + hyperconfig: Hyperconfig | None + aggregations: List[AnyAggregation] | None + type: Literal['FullyBayesianSingleTaskGPSurrogate'] + input_preprocessing_specs: InputTransformSpecs + dump: str | None + categorical_encodings: InputTransformSpecs + scaler: ScalerEnum + output_scaler: ScalerEnum + model_type: Literal['linear', 'saas', 'hvarfner'] + warmup_steps: int + num_samples: int + thinning: int + features_to_warp: List[str] + Returns: + SingleTaskGPSurrogate: A new instance. + """ + return cast(Self, make_surrogate(cls, DataModel, locals())) diff --git a/bofire/surrogates/map_saas.py b/bofire/surrogates/map_saas.py index ec4e6281f..54407f4d8 100644 --- a/bofire/surrogates/map_saas.py +++ b/bofire/surrogates/map_saas.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional +from typing import Dict, List, Optional, cast import torch from botorch.fit import fit_gpytorch_mll @@ -6,12 +6,18 @@ from botorch.models.transforms.input import InputTransform from botorch.models.transforms.outcome import OutcomeTransform from gpytorch.mlls import ExactMarginalLogLikelihood +from typing_extensions import Self +from bofire.data_models.domain.features import Inputs, Outputs from bofire.data_models.enum import OutputFilteringEnum from bofire.data_models.surrogates.api import ( AdditiveMapSaasSingleTaskGPSurrogate as DataModel, ) +from bofire.data_models.surrogates.scaler import ScalerEnum +from bofire.data_models.surrogates.trainable import AnyAggregation, Hyperconfig +from bofire.data_models.types import InputTransformSpecs, PositiveInt from bofire.surrogates.botorch import TrainableBotorchSurrogate +from bofire.surrogates.model_utils import make_surrogate class AdditiveMapSaasSingleTaskGPSurrogate(TrainableBotorchSurrogate): @@ -46,3 +52,35 @@ def _fit_botorch( ) mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model) fit_gpytorch_mll(mll, options=self.training_specs, max_attempts=50) + + @classmethod + def make( + cls, + inputs: Inputs, + outputs: Outputs, + hyperconfig: Optional[Hyperconfig] = None, + aggregations: Optional[List[AnyAggregation]] = None, + input_preprocessing_specs: Optional[InputTransformSpecs] = None, + dump: Optional[str] = None, + categorical_encodings: Optional[InputTransformSpecs] = None, + scaler: ScalerEnum = ScalerEnum.NORMALIZE, + output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE, + n_taus: PositiveInt = 4, + ) -> Self: + """ + Factory method to create an AdditiveMapSaasSingleTaskGPSurrogate from a data model. + Args: + inputs: Inputs + outputs: Outputs + hyperconfig: Hyperconfig | None + aggregations: List[AnyAggregation] | None + input_preprocessing_specs: InputTransformSpecs + dump: str | None + categorical_encodings: InputTransformSpecs + scaler: ScalerEnum + output_scaler: ScalerEnum + n_taus: PositiveInt + Returns: + AdditiveMapSaasSingleTaskGPSurrogate: A new instance. + """ + return cast(Self, make_surrogate(cls, DataModel, locals())) diff --git a/bofire/surrogates/model_utils.py b/bofire/surrogates/model_utils.py new file mode 100644 index 000000000..fb1367935 --- /dev/null +++ b/bofire/surrogates/model_utils.py @@ -0,0 +1,15 @@ +def make_surrogate(surrogate_type, data_model_type, locals_of_make: dict): + """Factory function to create a surrogate of type surrogate_type from a data model of type data_model_type. + This function is a helper for the `make`-`@classmethod`s of the surrogates. All locals that are not None are passed to the + surrogate constructor. The ones that are None are not passed and hence their default values are used. + Args: + surrogate_type: The class of the surrogate to be created. + data_model_type: The data model class. + locals_of_make: The local variables of the make-function that called this function. + Returns: + Surrogate: The surrogate object. + """ + locals_of_make = {k: v for k, v in locals_of_make.items() if v is not None} + # since we get all locals from the `make`-`@classmethod`s we need to remove the `cls` variable. + locals_of_make.pop("cls", None) + return surrogate_type.from_spec(data_model_type(**locals_of_make)) diff --git a/bofire/surrogates/multi_task_gp.py b/bofire/surrogates/multi_task_gp.py index 7276cf9a9..bf06bc00d 100644 --- a/bofire/surrogates/multi_task_gp.py +++ b/bofire/surrogates/multi_task_gp.py @@ -1,5 +1,5 @@ import warnings -from typing import Dict, Optional +from typing import Dict, List, Optional, cast import botorch import numpy as np @@ -9,16 +9,22 @@ from botorch.models.transforms.input import InputTransform from botorch.models.transforms.outcome import OutcomeTransform from gpytorch.mlls import ExactMarginalLogLikelihood +from typing_extensions import Self import bofire.kernels.api as kernels import bofire.priors.api as priors +from bofire.data_models.domain.api import Inputs, Outputs from bofire.data_models.enum import OutputFilteringEnum from bofire.data_models.features.api import TaskInput -from bofire.data_models.priors.api import LKJPrior - -# from bofire.data_models.molfeatures.api import MolFeatures +from bofire.data_models.kernels.api import AnyKernel +from bofire.data_models.priors.api import AnyPrior, LKJPrior +from bofire.data_models.surrogates.api import MultiTaskGPHyperconfig from bofire.data_models.surrogates.api import MultiTaskGPSurrogate as DataModel +from bofire.data_models.surrogates.scaler import ScalerEnum +from bofire.data_models.surrogates.trainable import AnyAggregation +from bofire.data_models.types import InputTransformSpecs from bofire.surrogates.botorch import TrainableBotorchSurrogate +from bofire.surrogates.model_utils import make_surrogate from bofire.utils.torch_tools import tkwargs @@ -106,6 +112,42 @@ def _predict(self, transformed_X: pd.DataFrame): return preds, stds + @classmethod + def make( + cls, + inputs: Inputs, + outputs: Outputs, + hyperconfig: Optional[MultiTaskGPHyperconfig] = None, + aggregations: Optional[List[AnyAggregation]] = None, + input_preprocessing_specs: Optional[InputTransformSpecs] = None, + dump: Optional[str] = None, + categorical_encodings: Optional[InputTransformSpecs] = None, + scaler: ScalerEnum = ScalerEnum.NORMALIZE, + output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE, + kernel: Optional[AnyKernel] = None, + noise_prior: Optional[AnyPrior] = None, + task_prior: Optional[LKJPrior] = None, + ) -> Self: + """ + Factory method to create a MultiTaskGPSurrogate from a data model. + Args: + inputs: Inputs + outputs: Outputs + hyperconfig: MultiTaskGPHyperconfig | None + aggregations: List[AnyAggregation] | None + input_preprocessing_specs: dict + dump: str | None + categorical_encodings: dict + scaler: ScalerEnum + output_scaler: ScalerEnum + kernel: AnyKernel + noise_prior: AnyPrior + task_prior: LKJPrior | None + Returns: + MultiTaskGPSurrogate: A new instance. + """ + return cast(Self, make_surrogate(cls, DataModel, locals())) + def _index_kernel_prior_closure(m): return m._eval_covar_matrix() diff --git a/bofire/surrogates/random_forest.py b/bofire/surrogates/random_forest.py index d1d4663e4..a566c5532 100644 --- a/bofire/surrogates/random_forest.py +++ b/bofire/surrogates/random_forest.py @@ -1,6 +1,6 @@ import base64 import io -from typing import Optional +from typing import List, Literal, Optional, Union, cast import numpy as np import torch @@ -10,10 +10,16 @@ from sklearn.ensemble import RandomForestRegressor from sklearn.utils.validation import check_is_fitted from torch import Tensor +from typing_extensions import Self +from bofire.data_models.domain.api import Inputs, Outputs from bofire.data_models.enum import OutputFilteringEnum from bofire.data_models.surrogates.api import RandomForestSurrogate as DataModel +from bofire.data_models.surrogates.scaler import ScalerEnum +from bofire.data_models.surrogates.trainable import AnyAggregation, Hyperconfig +from bofire.data_models.types import InputTransformSpecs from bofire.surrogates.botorch import TrainableBotorchSurrogate +from bofire.surrogates.model_utils import make_surrogate from bofire.utils.torch_tools import tkwargs @@ -163,3 +169,63 @@ def loads(self, data: str): """Loads the actual random forest from a base64 encoded pickle bytes object and writes it to the `model` attribute.""" buffer = io.BytesIO(base64.b64decode(data.encode())) self.model = torch.load(buffer, weights_only=False) + + @classmethod + def make( + cls, + inputs: Inputs, + outputs: Outputs, + hyperconfig: Optional[Hyperconfig] = None, + aggregations: Optional[List[AnyAggregation]] = None, + input_preprocessing_specs: Optional[InputTransformSpecs] = None, + dump: Optional[str] = None, + categorical_encodings: Optional[InputTransformSpecs] = None, + scaler: ScalerEnum = ScalerEnum.NORMALIZE, + output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE, + n_estimators: int = 100, + criterion: Literal[ + "squared_error", "absolute_error", "friedman_mse", "poisson" + ] = "squared_error", + max_depth: Optional[int] = None, + min_samples_split: Union[int, float] = 2, + min_samples_leaf: Union[int, float] = 1, + min_weight_fraction_leaf: float = 0.0, + max_features: Union[int, float, Literal["auto", "sqrt", "log2"]] = 1.0, + max_leaf_nodes: Optional[int] = None, + min_impurity_decrease: float = 0.0, + bootstrap: bool = True, + oob_score: bool = False, + random_state: Optional[int] = None, + ccp_alpha: float = 0.0, + max_samples: Optional[Union[int, float]] = None, + ) -> Self: + """ + Factory method to create a RandomForestSurrogate from a data model. + Args: + inputs: Inputs + outputs: Outputs + hyperconfig: Hyperconfig | None + aggregations: List[AnyAggregation] | None + input_preprocessing_specs: dict + dump: str | None + categorical_encodings: dict + scaler: ScalerEnum + output_scaler: ScalerEnum + n_estimators: int + criterion: Literal['squared_error', 'absolute_error', 'friedman_mse', 'poisson'] + max_depth: int | None + min_samples_split: int | float + min_samples_leaf: int | float + min_weight_fraction_leaf: float + max_features: int | float | Literal['auto', 'sqrt', 'log2'] + max_leaf_nodes: int | None + min_impurity_decrease: float + bootstrap: bool + oob_score: bool + random_state: int | None + ccp_alpha: float + max_samples: int | float | None + Returns: + RandomForestSurrogate: A new instance. + """ + return cast(Self, make_surrogate(cls, DataModel, locals())) diff --git a/bofire/surrogates/robust_single_task_gp.py b/bofire/surrogates/robust_single_task_gp.py index 381f71a36..74152ad22 100644 --- a/bofire/surrogates/robust_single_task_gp.py +++ b/bofire/surrogates/robust_single_task_gp.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional +from typing import Dict, List, Optional, Union, cast import pandas as pd import torch @@ -15,14 +15,23 @@ from botorch.models.transforms.input import InputTransform from botorch.models.transforms.outcome import OutcomeTransform from gpytorch.mlls import ExactMarginalLogLikelihood +from typing_extensions import Self import bofire.kernels.api as kernels import bofire.priors.api as priors +from bofire.data_models.domain.features import Inputs, Outputs from bofire.data_models.enum import OutputFilteringEnum +from bofire.data_models.kernels.api import MaternKernel, RBFKernel, ScaleKernel +from bofire.data_models.priors.api import AnyPrior # from bofire.data_models.surrogates.api import SingleTaskGPSurrogate as DataModel from bofire.data_models.surrogates.api import RobustSingleTaskGPSurrogate as DataModel +from bofire.data_models.surrogates.scaler import ScalerEnum +from bofire.data_models.surrogates.single_task_gp import SingleTaskGPHyperconfig +from bofire.data_models.surrogates.trainable import AnyAggregation +from bofire.data_models.types import InputTransformSpecs from bofire.surrogates.botorch import TrainableBotorchSurrogate +from bofire.surrogates.model_utils import make_surrogate class RobustSingleTaskGPSurrogate(TrainableBotorchSurrogate): @@ -128,3 +137,43 @@ def predict_outliers( ) return pd.concat([predictions, rho_df], axis=1) + + @classmethod + def make( + cls, + inputs: Inputs, + outputs: Outputs, + hyperconfig: Optional[SingleTaskGPHyperconfig] = None, + aggregations: Optional[List[AnyAggregation]] = None, + input_preprocessing_specs: Optional[InputTransformSpecs] = None, + dump: Optional[str] = None, + categorical_encodings: Optional[InputTransformSpecs] = None, + scaler: ScalerEnum = ScalerEnum.NORMALIZE, + output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE, + kernel: Optional[Union[ScaleKernel, RBFKernel, MaternKernel]] = None, + noise_prior: Optional[AnyPrior] = None, + prior_mean_of_support: Optional[int] = None, + convex_parametrization: bool = True, + cache_model_trace: bool = False, + ) -> Self: + """ + Factory method to create a RobustSingleTaskGPSurrogate from a data model. + Args: + inputs: Inputs + outputs: Outputs + hyperconfig: SingleTaskGPHyperconfig | None + aggregations: List[AnyAggregation] | None + input_preprocessing_specs: dict + dump: str | None + categorical_encodings: dict + scaler: ScalerEnum + output_scaler: ScalerEnum + kernel: ScaleKernel | RBFKernel | MaternKernel + noise_prior: AnyPrior + prior_mean_of_support: int | None + convex_parametrization: bool + cache_model_trace: bool + Returns: + RobustSingleTaskGPSurrogate: A new instance. + """ + return cast(Self, make_surrogate(cls, DataModel, locals())) diff --git a/bofire/surrogates/single_task_gp.py b/bofire/surrogates/single_task_gp.py index f8d0de2eb..757ca8436 100644 --- a/bofire/surrogates/single_task_gp.py +++ b/bofire/surrogates/single_task_gp.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional +from typing import Dict, List, Optional, cast import botorch import torch @@ -6,14 +6,23 @@ from botorch.models.transforms.input import InputTransform from botorch.models.transforms.outcome import OutcomeTransform from gpytorch.mlls import ExactMarginalLogLikelihood +from typing_extensions import Self import bofire.kernels.api as kernels import bofire.priors.api as priors +from bofire.data_models.domain.features import Inputs, Outputs from bofire.data_models.enum import OutputFilteringEnum +from bofire.data_models.kernels.api import AnyKernel +from bofire.data_models.priors.api import AnyPrior +from bofire.data_models.surrogates.api import ScalerEnum # from bofire.data_models.molfeatures.api import MolFeatures from bofire.data_models.surrogates.api import SingleTaskGPSurrogate as DataModel +from bofire.data_models.surrogates.single_task_gp import SingleTaskGPHyperconfig +from bofire.data_models.surrogates.trainable import AnyAggregation +from bofire.data_models.types import InputTransformSpecs from bofire.surrogates.botorch import TrainableBotorchSurrogate +from bofire.surrogates.model_utils import make_surrogate class SingleTaskGPSurrogate(TrainableBotorchSurrogate): @@ -24,6 +33,7 @@ def __init__( ): self.kernel = data_model.kernel self.noise_prior = data_model.noise_prior + self.hyperconfig = data_model.hyperconfig super().__init__(data_model=data_model, **kwargs) model: Optional[botorch.models.SingleTaskGP] = None @@ -61,3 +71,37 @@ def _fit_botorch( self.model.likelihood.noise_covar.noise_prior = priors.map(self.noise_prior) mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model) fit_gpytorch_mll(mll, options=self.training_specs, max_attempts=50) + + @classmethod + def make( + cls, + inputs: Inputs, + outputs: Outputs, + hyperconfig: Optional[SingleTaskGPHyperconfig] = None, + aggregations: Optional[List[AnyAggregation]] = None, + input_preprocessing_specs: Optional[InputTransformSpecs] = None, + dump: Optional[str] = None, + categorical_encodings: Optional[InputTransformSpecs] = None, + scaler: ScalerEnum = ScalerEnum.NORMALIZE, + output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE, + kernel: Optional[AnyKernel] = None, + noise_prior: Optional[AnyPrior] = None, + ) -> Self: + """ + Factory method to create a SingleTaskGPSurrogate from a data model. + Args: + hyperconfig: SingleTaskGPHyperconfig or None + aggregations: List[AnyAggregation] or None + inputs: Inputs + outputs: Outputs + input_preprocessing_specs: InputTransformSpecs + dump: str or None + categorical_encodings: InputTransformSpecs + scaler: ScalerEnum + output_scaler: ScalerEnum + kernel: AnyKernel + noise_prior: AnyPrior + Returns: + SingleTaskGPSurrogate: A new instance. + """ + return cast(Self, make_surrogate(cls, DataModel, locals())) diff --git a/bofire/surrogates/surrogate.py b/bofire/surrogates/surrogate.py index 76d3ed1a5..102256b6b 100644 --- a/bofire/surrogates/surrogate.py +++ b/bofire/surrogates/surrogate.py @@ -120,3 +120,8 @@ def _prepare_for_dump(self): @abstractmethod def loads(self, data: str): """Loads the actual model from a string and writes it to the `model` attribute.""" + + @classmethod + def from_spec(cls, data_model: DataModel) -> "Surrogate": + """Used by the mapper to map from data model to functional surrogate.""" + return cls(data_model=data_model) diff --git a/tests/bofire/data_models/specs/surrogates.py b/tests/bofire/data_models/specs/surrogates.py index e627269c1..5ead772bc 100644 --- a/tests/bofire/data_models/specs/surrogates.py +++ b/tests/bofire/data_models/specs/surrogates.py @@ -58,7 +58,8 @@ ).model_dump(), "kernel": ScaleKernel( base_kernel=MaternKernel( - ard=True, nu=2.5, lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR() + ard=True, + nu=2.5, # lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR() ), outputscale_prior=THREESIX_SCALE_PRIOR(), ).model_dump(), @@ -98,8 +99,8 @@ base_kernel=MaternKernel( ard=True, nu=2.5, - lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR(), - lengthscale_constraint=ROBUSTGP_LENGTHSCALE_CONSTRAINT(), + # lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR(), + # lengthscale_constraint=ROBUSTGP_LENGTHSCALE_CONSTRAINT(), ), outputscale_prior=THREESIX_SCALE_PRIOR(), outputscale_constraint=ROBUSTGP_OUTPUTSCALE_CONSTRAINT(), @@ -330,7 +331,8 @@ ).model_dump(), "kernel": ScaleKernel( base_kernel=MaternKernel( - ard=True, nu=2.5, lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR() + ard=True, + nu=2.5, # lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR() ), outputscale_prior=THREESIX_SCALE_PRIOR(), ).model_dump(), diff --git a/tests/bofire/surrogates/test_make_surrogate.py b/tests/bofire/surrogates/test_make_surrogate.py new file mode 100644 index 000000000..bdac52393 --- /dev/null +++ b/tests/bofire/surrogates/test_make_surrogate.py @@ -0,0 +1,122 @@ +import inspect +import types +import typing +from typing import Protocol, get_origin, get_type_hints + +import typing_extensions + +import bofire.surrogates.api as surrogates +from bofire.data_models.surrogates.bnn import SingleTaskIBNNSurrogate +from bofire.data_models.surrogates.mixed_single_task_gp import ( + MixedSingleTaskGPSurrogate, +) +from bofire.data_models.surrogates.multi_task_gp import MultiTaskGPSurrogate +from bofire.data_models.surrogates.random_forest import RandomForestSurrogate +from bofire.data_models.surrogates.tanimoto_gp import TanimotoGPSurrogate +from bofire.surrogates.mlp import ClassificationMLPEnsemble, RegressionMLPEnsemble +from bofire.surrogates.shape import PiecewiseLinearGPSurrogate +from bofire.surrogates.surrogate import Surrogate +from tests.bofire.data_models.specs.api import surrogates as surrogate_specs + + +surrogates_skip_annotations = [ + SingleTaskIBNNSurrogate, + RandomForestSurrogate, + MultiTaskGPSurrogate, + TanimotoGPSurrogate, +] + +surrogates_skip_all = [ + MixedSingleTaskGPSurrogate, + RegressionMLPEnsemble, + ClassificationMLPEnsemble, + PiecewiseLinearGPSurrogate, +] + + +def remove_optional(anno): + origin = get_origin(anno) + if origin == typing.Union or origin is types.UnionType: + return sorted((a for a in anno.__args__ if a is not type(None)), key=hash) + else: + return [anno] + + +def test_make(): + class SurrogateWithMake(Protocol): + @classmethod + def make(cls, *args, **kwargs) -> Surrogate: ... + + def test(surrogate_type: type[SurrogateWithMake], data_model): + if ( + type(data_model) in surrogates_skip_all + or type(surrogate_type) in surrogates_skip_all + ): + return + + data_model_dump = data_model.model_dump() + data_model_dump.pop("type") + + sig = inspect.signature(surrogate_type.make) + param_names_make = list(sig.parameters.keys()) + + # are all make parameters in the data model? + for arg_name in param_names_make: + assert ( + arg_name in data_model_dump + ), f"Missing {arg_name} in {type(data_model)}'s data model" + + # are all data model parameters in the make function? + data_model_field_names = [k for k in data_model_dump.keys() if k != "type"] + for k in data_model_field_names: + assert ( + k in param_names_make + ), f"{k} not in {type(surrogate_type).__name__}'s make parameters" + + if type(data_model) not in surrogates_skip_annotations: + # do the non-optional annotation-parts match? + for name, p_annotation in get_type_hints(surrogate_type.make).items(): + if name == "return": + assert p_annotation is typing_extensions.Self + else: + dm_anno = type(data_model).model_fields[name].annotation + p_anno = p_annotation + + dm_anno = remove_optional(dm_anno) + p_anno = remove_optional(p_anno) + assert ( + len(dm_anno) == len(p_anno) + ), f"{type(surrogate_type).__name__}. Annotations do not match for {name}: {dm_anno} !=\n {p_anno}" + for da, pa in zip(dm_anno, p_anno): + if get_origin(da) == typing.Annotated: + da_ = da.__origin__ + else: + da_ = da + + if get_origin(pa) == typing.Annotated: + pa_ = pa.__origin__ + else: + pa_ = pa + assert ( + da_ == pa_ + ), f"{type(surrogate_type).__name__}. Annotations do not match for {name}: {da} !=\n {pa}" + + made_surrogate = surrogate_type.make(**data_model_dump) + for k in data_model_dump.keys(): + field_info = getattr(type(data_model), "model_fields", {}).get(k, None) + is_optional = False + if field_info is not None: + anno = field_info.annotation + origin = get_origin(anno) + if origin is typing.Union or origin is types.UnionType: + if type(None) in anno.__args__: + is_optional = True + if not is_optional: + assert hasattr( + made_surrogate, k + ), f"{type(surrogate_type).__name__}. {k} missing in made_surrogate" + + for spec in surrogate_specs.valids: + data_model = spec.obj() + surrogate_type = surrogates.map(data_model) + test(surrogate_type, data_model)