diff --git a/CHANGELOG.md b/CHANGELOG.md index c7715778a3..de3165f616 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for GPyTorch objects (kernels, means, likelihood) as Gaussian process components, enabling full low-level customization - Factories for all Gaussian process components -- `EDBO` and `EDBO_SMOOTHED` presets for `GaussianProcessSurrogate` +- `CHEN`, `EDBO` and `EDBO_SMOOTHED` presets for `GaussianProcessSurrogate` - `TypeSelector` and `NameSelector` classes for parameter selection in kernel factories - `parameter_names` attribute to basic kernels for controlling the considered parameters - `ParameterKind` flag enum for classifying parameters by their role and automatic diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 6270ff77c0..ef31b177b8 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -39,4 +39,10 @@ - Kathrin Skubch (Merck KGaA, Darmstadt, Germany):\ Transfer learning regression benchmarks infrastructure - Myra Zmarsly (Merck Life Science KGaA, Darmstadt, Germany):\ - Identification of non-dominated parameter configurations \ No newline at end of file + Identification of non-dominated parameter configurations +- Thijs Stuyver (PSL University, Paris, France):\ + Adaptive hyper-prior tailored for reaction yield optimization tasks +- Maximilian Fleck (PSL University, Paris, France):\ + Adaptive hyper-prior tailored for reaction yield optimization tasks +- Guanming Chen (PSL University, Paris, France):\ + Adaptive hyper-prior tailored for reaction yield optimization tasks diff --git a/baybe/surrogates/gaussian_process/components/likelihood.py b/baybe/surrogates/gaussian_process/components/likelihood.py index 2f8007ed59..c94982a469 100644 --- a/baybe/surrogates/gaussian_process/components/likelihood.py +++ b/baybe/surrogates/gaussian_process/components/likelihood.py @@ -4,6 +4,10 @@ from typing import TYPE_CHECKING, Any +from attrs import define +from typing_extensions import override + +from baybe.searchspace.core import SearchSpace from baybe.surrogates.gaussian_process.components.generic import ( GPComponentFactoryProtocol, PlainGPComponentFactory, @@ -11,6 +15,7 @@ if TYPE_CHECKING: from gpytorch.likelihoods import Likelihood as GPyTorchLikelihood + from torch import Tensor LikelihoodFactoryProtocol = GPComponentFactoryProtocol[GPyTorchLikelihood] PlainLikelihoodFactory = PlainGPComponentFactory[GPyTorchLikelihood] @@ -18,3 +23,16 @@ # At runtime, we avoid loading GPyTorch eagerly for performance reasons LikelihoodFactoryProtocol = GPComponentFactoryProtocol[Any] PlainLikelihoodFactory = PlainGPComponentFactory[Any] + + +@define +class LazyGaussianLikelihoodFactory(LikelihoodFactoryProtocol): + """A factory providing Gaussian likelihoods using lazy loading.""" + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> GPyTorchLikelihood: + from gpytorch.likelihoods import GaussianLikelihood + + return GaussianLikelihood() diff --git a/baybe/surrogates/gaussian_process/presets/__init__.py b/baybe/surrogates/gaussian_process/presets/__init__.py index deb7de9e64..434fbf560f 100644 --- a/baybe/surrogates/gaussian_process/presets/__init__.py +++ b/baybe/surrogates/gaussian_process/presets/__init__.py @@ -7,6 +7,9 @@ BayBEMeanFactory, ) +# Chen preset +from baybe.surrogates.gaussian_process.presets.chen import CHENKernelFactory + # Core from baybe.surrogates.gaussian_process.presets.core import GaussianProcessPreset @@ -31,6 +34,8 @@ "BayBEKernelFactory", "BayBELikelihoodFactory", "BayBEMeanFactory", + # Chen preset + "CHENKernelFactory", # EDBO preset "EDBOKernelFactory", "EDBOLikelihoodFactory", diff --git a/baybe/surrogates/gaussian_process/presets/chen.py b/baybe/surrogates/gaussian_process/presets/chen.py new file mode 100644 index 0000000000..5e90c4aa5c --- /dev/null +++ b/baybe/surrogates/gaussian_process/presets/chen.py @@ -0,0 +1,77 @@ +"""Preset for adaptive kernel hyperpriors proposed by :cite:p:`Chen2026`.""" + +from __future__ import annotations + +import gc +import math +from typing import TYPE_CHECKING, ClassVar + +from attrs import define, field +from typing_extensions import override + +from baybe.kernels.basic import MaternKernel +from baybe.kernels.composite import ScaleKernel +from baybe.parameters.categorical import TaskParameter +from baybe.parameters.selectors import ( + ParameterSelectorProtocol, + TypeSelector, + to_parameter_selector, +) +from baybe.priors.basic import GammaPrior +from baybe.surrogates.gaussian_process.components.kernel import ( + _PureKernelFactory, +) +from baybe.surrogates.gaussian_process.components.likelihood import ( + LazyGaussianLikelihoodFactory, +) +from baybe.surrogates.gaussian_process.components.mean import LazyConstantMeanFactory + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.kernels.base import Kernel + from baybe.searchspace.core import SearchSpace + + +@define +class CHENKernelFactory(_PureKernelFactory): + """A factory providing adaptive hyperprior kernels as proposed by :cite:p:`Chen2026`.""" # noqa: E501 + + _uses_parameter_names: ClassVar[bool] = True + # See base class. + + parameter_selector: ParameterSelectorProtocol | None = field( + factory=lambda: TypeSelector([TaskParameter], exclude=True), + converter=to_parameter_selector, + ) + # TODO: Reuse base attribute (https://github.com/python-attrs/attrs/pull/1429) + + @override + def _make( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + lengthscale = 0.4 * math.sqrt(train_x.shape[-1]) + 4.0 + lengthscale_prior = GammaPrior(2.0 * lengthscale, 2.0) + lengthscale_initial_value = lengthscale + outputscale_prior = GammaPrior(1.0 * lengthscale, 1.0) + outputscale_initial_value = lengthscale + + return ScaleKernel( + MaternKernel( + nu=2.5, + lengthscale_prior=lengthscale_prior, + lengthscale_initial_value=lengthscale_initial_value, + parameter_names=self.get_parameter_names(searchspace), + ), + outputscale_prior=outputscale_prior, + outputscale_initial_value=outputscale_initial_value, + ) + + +# Collect leftover original slotted classes processed by `attrs.define` +gc.collect() + +# Aliases for generic preset imports +PresetKernelFactory = CHENKernelFactory +PresetMeanFactory = LazyConstantMeanFactory +PresetLikelihoodFactory = LazyGaussianLikelihoodFactory diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py index ad77df0b4d..5347cf85e5 100644 --- a/baybe/surrogates/gaussian_process/presets/core.py +++ b/baybe/surrogates/gaussian_process/presets/core.py @@ -11,8 +11,11 @@ class GaussianProcessPreset(Enum): BAYBE = "BAYBE" """The default BayBE settings of the Gaussian process surrogate class.""" + CHEN = "CHEN" + """The adaptive kernel hyperprior settings proposed by :cite:p:`Chen2026`.""" + EDBO = "EDBO" - """The EDBO settings.""" + """The EDBO settings proposed by :cite:p:`Shields2021`.""" EDBO_SMOOTHED = "EDBO_SMOOTHED" - """A smoothed version of the EDBO settings.""" + """A smoothed version of the EDBO settings (adapted from :cite:p:`Shields2021`).""" diff --git a/baybe/surrogates/gaussian_process/presets/edbo.py b/baybe/surrogates/gaussian_process/presets/edbo.py index 6db0af8a30..539e2ef72c 100644 --- a/baybe/surrogates/gaussian_process/presets/edbo.py +++ b/baybe/surrogates/gaussian_process/presets/edbo.py @@ -1,4 +1,4 @@ -"""EDBO preset for Gaussian process surrogates.""" +"""EDBO preset :cite:p:`Shields2021`.""" from __future__ import annotations @@ -58,11 +58,10 @@ def _contains_encoding( @define class EDBOKernelFactory(_PureKernelFactory): - """A factory providing EDBO kernels. + """A factory providing EDBO kernels, as proposed by :cite:p:`Shields2021`. - References: - * https://github.com/b-shields/edbo/blob/master/edbo/bro.py#L664 - * https://doi.org/10.1038/s41586-021-03213-y + GitHub repository: https://github.com/b-shields/edbo + Prior settings: https://github.com/b-shields/edbo/blob/9b41eac3f6d9e520547702fd5b0c7ef6441625a4/edbo/bro.py#L658 """ _uses_parameter_names: ClassVar[bool] = True @@ -130,11 +129,10 @@ def _make( @define class EDBOLikelihoodFactory(LikelihoodFactoryProtocol): - """A factory providing EDBO likelihoods. + """A factory providing EDBO likelihoods, as proposed by :cite:p:`Shields2021`. - References: - * https://github.com/b-shields/edbo/blob/master/edbo/bro.py#L664 - * https://doi.org/10.1038/s41586-021-03213-y + GitHub repository: https://github.com/b-shields/edbo + Prior settings: https://github.com/b-shields/edbo/blob/9b41eac3f6d9e520547702fd5b0c7ef6441625a4/edbo/bro.py#L658 """ @override diff --git a/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py b/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py index 1c2718adb6..904f711f31 100644 --- a/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py +++ b/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py @@ -1,4 +1,4 @@ -"""Smoothed EDBO preset for Gaussian process surrogates.""" +"""Smoothed EDBO preset (adapted from :cite:p:`Shields2021`).""" from __future__ import annotations @@ -40,12 +40,12 @@ @define class SmoothedEDBOKernelFactory(_PureKernelFactory): - """A factory providing smoothed versions of EDBO kernels. + """A factory providing smoothed versions of EDBO kernels (adapted from :cite:p:`Shields2021`). Takes the low and high dimensional limits of :class:`baybe.surrogates.gaussian_process.presets.edbo.EDBOKernelFactory` and interpolates the prior moments linearly in between. - """ + """ # noqa: E501 _uses_parameter_names: ClassVar[bool] = True # See base class. @@ -94,12 +94,12 @@ def _make( @define class SmoothedEDBOLikelihoodFactory(LikelihoodFactoryProtocol): - """A factory providing smoothed versions of EDBO likelihoods. + """A factory providing smoothed versions of EDBO likelihoods (adapted from :cite:p:`Shields2021`). Takes the low and high dimensional limits of :class:`baybe.surrogates.gaussian_process.presets.edbo.EDBOLikelihoodFactory` and interpolates the prior moments linearly in between. - """ + """ # noqa: E501 @override def __call__( diff --git a/docs/conf.py b/docs/conf.py index 11ff636a77..54d318e7ce 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -154,7 +154,10 @@ ] # Ignore the following links when checking inks for viability -linkcheck_ignore = [r"https://github.com/b-shields/edbo/blob/master/edbo/bro.py*"] +linkcheck_ignore = [ + r"https://github.com/b-shields/edbo/blob*", + r"https://doi.org/10.26434/chemrxiv.10001986/v2", +] # Ignore the warnings that are given by autosectionlabel diff --git a/docs/index.md b/docs/index.md index 3f8ba25cd9..31d138a369 100644 --- a/docs/index.md +++ b/docs/index.md @@ -40,6 +40,7 @@ Contribute Contributors Known Issues Changelog +References Github License ``` diff --git a/docs/misc/references.md b/docs/misc/references.md new file mode 100644 index 0000000000..d977a9bd5b --- /dev/null +++ b/docs/misc/references.md @@ -0,0 +1,4 @@ +# References + +```{bibliography} +``` diff --git a/docs/references.bib b/docs/references.bib index 307529d299..4d159cb686 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -8,3 +8,22 @@ @inproceedings{NIPS2007_66368270 volume = {20}, year = {2007} } + +@article{Chen2026, + author = {Chen, Guanming and Fleck, Maximilian and Stuyver, Thijs}, + title = {Leveraging Hidden-Space Representations Effectively in Bayesian Optimization for Experiment Design through Dimension-Aware Hyperpriors}, + journal = {ChemRxiv}, + year = {2026}, + doi = {10.26434/chemrxiv.10001986/v2} +} + +@article{Shields2021, + author = {Shields, Benjamin J. and Stevens, Jason and Li, Jun and Parasram, Marvin and Damani, Farhan and Alvarado, Jesus I. Martinez and Janey, Jacob M. and Adams, Ryan P. and Doyle, Abigail G.}, + title = {Bayesian reaction optimization as a tool for chemical synthesis}, + journal = {Nature}, + volume = {590}, + number = {7844}, + pages = {89--96}, + year = {2021}, + doi = {10.1038/s41586-021-03213-y} +} diff --git a/docs/userguide/transfer_learning.md b/docs/userguide/transfer_learning.md index 6f509df31b..3273e20853 100644 --- a/docs/userguide/transfer_learning.md +++ b/docs/userguide/transfer_learning.md @@ -185,7 +185,4 @@ on the optimization: :class: only-dark ``` -```{bibliography} -``` - [`TaskParameter`]: baybe.parameters.categorical.TaskParameter \ No newline at end of file