Skip to content
Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `pretty_print_df` function for printing shortened versions of dataframes
- Basic Transfer Learning example
- Repo now has reminders (https://github.com/marketplace/actions/issue-reminder) enabled
- `mypy` for recommenders

### Changed
- `Recommender`s now share their core logic via their base class
Expand Down
2 changes: 1 addition & 1 deletion baybe/recommenders/deprecation.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ def structure_recommender_protocol(val: dict, _) -> RecommenderProtocol:
f"a future version.",
DeprecationWarning,
)
fun = make_dict_structure_fn(cls, converter)
fun = make_dict_structure_fn(cls, converter) # type: ignore

return fun(val, cls)
13 changes: 8 additions & 5 deletions baybe/recommenders/meta/sequential.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"""Meta recommenders that switch recommenders based on the experimentation progress."""
# TODO After bayesian recommenders are enabled with no training data, a refactoring of
# this file will resolve type errors
# mypy: disable-error-code="arg-type"

from typing import Iterable, Iterator, List, Literal, Optional

Expand All @@ -21,7 +24,7 @@
converter,
)

# TODO: Make predictive recommenders handle empty training data
# TODO: Make bayesian recommenders handle empty training data
_unsupported_recommender_error = ValueError(
f"For cases where no training is available, the selected recommender "
f"must be a subclass of '{NonPredictiveRecommender.__name__}'."
Expand Down Expand Up @@ -62,7 +65,7 @@ def select_recommender( # noqa: D102
) -> PureRecommender:
# See base class.

# FIXME: enable predictive recommenders for empty training data
# TODO: enable bayesian recommenders for empty training data
if (train_x is None or len(train_x) == 0) and not isinstance(
self.initial_recommender, NonPredictiveRecommender
):
Expand Down Expand Up @@ -167,7 +170,7 @@ def select_recommender( # noqa: D102
# Remember the training dataset size for the next call
self._n_last_measurements = len(train_x)

# FIXME: enable predictive recommenders for empty training data
# TODO: enable bayesian recommenders for empty training data
if (train_x is None or len(train_x) == 0) and not isinstance(
recommender, NonPredictiveRecommender
):
Expand Down Expand Up @@ -249,13 +252,13 @@ def select_recommender( # noqa: D102
# Remember the training dataset size for the next call
self._n_last_measurements = len(train_x)

# FIXME: enable predictive recommenders for empty training data
# TODO: enable bayesian recommenders for empty training data
if (train_x is None or len(train_x) == 0) and not isinstance(
self._last_recommender, NonPredictiveRecommender
):
raise _unsupported_recommender_error

return self._last_recommender
return self._last_recommender # type: ignore[return-value]
Comment thread
AVHopp marked this conversation as resolved.


# The recommender iterable cannot be serialized
Expand Down
21 changes: 10 additions & 11 deletions baybe/recommenders/naive.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""Naive recommender for hybrid spaces."""

import warnings
from typing import ClassVar, Optional
from typing import ClassVar, Optional, cast

import pandas as pd
from attrs import define, evolve, field, fields
from torch import Tensor

from baybe.acquisition import PartialAcquisitionFunction
from baybe.recommenders.pure.base import PureRecommender
Expand Down Expand Up @@ -85,11 +86,9 @@ def recommend( # noqa: D102
) -> pd.DataFrame:
# See base class.

# First check whether the disc_recommender is either bayesian or non-predictive
is_bayesian_recommender = isinstance(self.disc_recommender, BayesianRecommender)
is_np_recommender = isinstance(self.disc_recommender, NonPredictiveRecommender)

if (not is_bayesian_recommender) and (not is_np_recommender):
if (not isinstance(self.disc_recommender, BayesianRecommender)) and (
not isinstance(self.disc_recommender, NonPredictiveRecommender)
):
raise NotImplementedError(
"""The discrete recommender should be either a Bayesian or a
NonPredictiveRecommender."""
Expand Down Expand Up @@ -117,7 +116,7 @@ def recommend( # noqa: D102
# will then be attached to every discrete point when the acquisition function
# is evaluated.
cont_part = searchspace.continuous.samples_random(1)
cont_part = to_tensor(cont_part).unsqueeze(-2)
cont_part_tensor = cast(Tensor, to_tensor(cont_part)).unsqueeze(-2)
Comment thread
AVHopp marked this conversation as resolved.

# Get discrete candidates. The metadata flags are ignored since the search space
# is hybrid
Expand All @@ -128,7 +127,7 @@ def recommend( # noqa: D102
)

# We now check whether the discrete recommender is bayesian.
if is_bayesian_recommender:
if isinstance(self.disc_recommender, BayesianRecommender):
# Get access to the recommenders acquisition function
self.disc_recommender.setup_acquisition_function(
searchspace, train_x, train_y
Expand All @@ -138,7 +137,7 @@ def recommend( # noqa: D102
# whenever evaluating the acquisition function
disc_acqf_part = PartialAcquisitionFunction(
acqf=self.disc_recommender._acquisition_function,
pinned_part=cont_part,
pinned_part=cont_part_tensor,
pin_discrete=False,
)

Expand All @@ -154,15 +153,15 @@ def recommend( # noqa: D102
# Get one random discrete point that will be attached when evaluating the
# acquisition function in the discrete space.
disc_part = searchspace.discrete.comp_rep.loc[disc_rec_idx].sample(1)
disc_part = to_tensor(disc_part).unsqueeze(-2)
disc_part_tensor = cast(Tensor, to_tensor(disc_part)).unsqueeze(-2)

# Setup a fresh acquisition function for the continuous recommender
self.cont_recommender.setup_acquisition_function(searchspace, train_x, train_y)

# Construct the continuous space as a standalone space
cont_acqf_part = PartialAcquisitionFunction(
acqf=self.cont_recommender._acquisition_function,
pinned_part=disc_part,
pinned_part=disc_part_tensor,
pin_discrete=True,
)
self.cont_recommender._acquisition_function = cont_acqf_part
Expand Down
13 changes: 12 additions & 1 deletion baybe/recommenders/pure/bayesian/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ def _get_acquisition_function_cls(
return fun

def setup_acquisition_function(
self, searchspace: SearchSpace, train_x: pd.DataFrame, train_y: pd.DataFrame
self,
searchspace: SearchSpace,
train_x: Optional[pd.DataFrame] = None,
Comment thread
AVHopp marked this conversation as resolved.
train_y: Optional[pd.DataFrame] = None,
) -> None:
"""Create the current acquisition function from provided training data.

Expand All @@ -79,7 +82,15 @@ def setup_acquisition_function(
searchspace: The search space in which the experiments are to be conducted.
train_x: The features of the conducted experiments.
train_y: The corresponding response values.

Raises:
NotImplementedError: If the setup is attempted from empty training data
"""
if train_x is None or train_y is None:
raise NotImplementedError(
"Bayesian recommenders do not support empty training data yet."
)

best_f = train_y.max()
surrogate_model = self._fit(searchspace, train_x, train_y)
acquisition_function_cls = self._get_acquisition_function_cls()
Expand Down
2 changes: 1 addition & 1 deletion baybe/recommenders/pure/bayesian/sequential_greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def _recommend_hybrid(
# TODO: Currently assumes that discrete parameters are first and continuous
# second. Once parameter redesign [11611] is completed, we might adjust
# this.
candidates_comp.columns = list(range(len(candidates_comp.columns)))
candidates_comp.columns = list(range(len(candidates_comp.columns))) # type: ignore[assignment]
Comment thread
AVHopp marked this conversation as resolved.
fixed_features_list = candidates_comp.to_dict("records")

else:
Expand Down
33 changes: 16 additions & 17 deletions baybe/recommenders/pure/nonpredictive/clustering.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Recommendation strategies based on clustering."""

from abc import ABC
from typing import ClassVar, List, Type, TypeVar
from typing import ClassVar, List, Type, Union

import numpy as np
import pandas as pd
from attrs import define, field
from scipy.stats import multivariate_normal
from sklearn.base import ClusterMixin
Comment thread
AVHopp marked this conversation as resolved.
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances
from sklearn.mixture import GaussianMixture
Expand All @@ -16,8 +17,6 @@
from baybe.recommenders.pure.nonpredictive.base import NonPredictiveRecommender
from baybe.searchspace import SearchSpaceType, SubspaceDiscrete

_ScikitLearnModel = TypeVar("_ScikitLearnModel")


@define
class SKLearnClusteringRecommender(NonPredictiveRecommender, ABC):
Expand All @@ -41,7 +40,7 @@ class SKLearnClusteringRecommender(NonPredictiveRecommender, ABC):
# that checks if a custom mechanism is implemented and uses default otherwise
# (similar to what is done in the recommenders)

model_class: ClassVar[Type[_ScikitLearnModel]]
model_class: ClassVar[Type[ClusterMixin]]
"""Class variable describing the model class."""

model_cluster_num_parameter_name: ClassVar[str]
Expand All @@ -57,8 +56,8 @@ class SKLearnClusteringRecommender(NonPredictiveRecommender, ABC):

def _make_selection_default(
self,
model: _ScikitLearnModel,
candidates_scaled: pd.DataFrame,
model: ClusterMixin,
candidates_scaled: Union[pd.DataFrame, np.ndarray],
) -> List[int]:
"""Select one candidate from each cluster uniformly at random.

Expand All @@ -80,8 +79,8 @@ def _make_selection_default(

def _make_selection_custom(
self,
model: _ScikitLearnModel,
candidates_scaled: pd.DataFrame,
model: ClusterMixin,
candidates_scaled: Union[pd.DataFrame, np.ndarray],
) -> List[int]:
"""Select candidates from the computed clustering.

Expand Down Expand Up @@ -136,7 +135,7 @@ def _recommend_discrete(
class PAMClusteringRecommender(SKLearnClusteringRecommender):
"""Partitioning Around Medoids (PAM) clustering recommender."""

model_class: ClassVar[Type[_ScikitLearnModel]] = KMedoids
model_class: ClassVar[Type[ClusterMixin]] = KMedoids
# See base class.

model_cluster_num_parameter_name: ClassVar[str] = "n_clusters"
Expand All @@ -156,8 +155,8 @@ def _default_model_params(self) -> dict:

def _make_selection_custom(
self,
model: _ScikitLearnModel,
candidates_scaled: pd.DataFrame,
model: ClusterMixin,
candidates_scaled: Union[pd.DataFrame, np.ndarray],
) -> List[int]:
"""Select candidates from the computed clustering.

Expand All @@ -180,7 +179,7 @@ class KMeansClusteringRecommender(SKLearnClusteringRecommender):
"""K-means clustering recommender."""

# Class variables
model_class: ClassVar[Type[_ScikitLearnModel]] = KMeans
model_class: ClassVar[Type[ClusterMixin]] = KMeans
# See base class.

model_cluster_num_parameter_name: ClassVar[str] = "n_clusters"
Expand All @@ -200,8 +199,8 @@ def _default_model_params(self) -> dict:

def _make_selection_custom(
self,
model: _ScikitLearnModel,
candidates_scaled: pd.DataFrame,
model: ClusterMixin,
candidates_scaled: Union[pd.DataFrame, np.ndarray],
) -> List[int]:
"""Select candidates from the computed clustering.

Expand Down Expand Up @@ -232,16 +231,16 @@ class GaussianMixtureClusteringRecommender(SKLearnClusteringRecommender):
"""Gaussian mixture model (GMM) clustering recommender."""

# Class variables
model_class: ClassVar[Type[_ScikitLearnModel]] = GaussianMixture
model_class: ClassVar[Type[ClusterMixin]] = GaussianMixture
# See base class.

model_cluster_num_parameter_name: ClassVar[str] = "n_components"
# See base class.

def _make_selection_custom(
self,
model: _ScikitLearnModel,
candidates_scaled: pd.DataFrame,
model: ClusterMixin,
candidates_scaled: Union[pd.DataFrame, np.ndarray],
) -> List[int]:
"""Select candidates from the computed clustering.

Expand Down
27 changes: 25 additions & 2 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ packages = baybe

; at some point, these excludes should all be gone ...
exclude = (?x)(
baybe/recommenders
| baybe/searchspace
baybe/searchspace
| baybe/serialization
| baybe/strategies
| baybe/surrogates
Expand All @@ -18,6 +17,12 @@ exclude = (?x)(
| baybe/surrogate.py
)

[mypy-botorch.acquisition]
ignore_missing_imports = True

[mypy-botorch.optim]
ignore_missing_imports = True

[mypy-botorch.test_functions]
ignore_missing_imports = True

Expand All @@ -27,9 +32,27 @@ ignore_missing_imports = True
[mypy-scipy.spatial.distance]
ignore_missing_imports = True

[mypy-scipy.stats]
ignore_missing_imports = True

[mypy-sklearn.base]
ignore_missing_imports = True

[mypy-sklearn.cluster]
ignore_missing_imports = True

[mypy-sklearn.metrics]
ignore_missing_imports = True

[mypy-sklearn.mixture]
ignore_missing_imports = True

[mypy-sklearn.preprocessing]
ignore_missing_imports = True

[mypy-sklearn_extra.cluster]
ignore_missing_imports = True

[mypy-joblib]
ignore_missing_imports = True

Expand Down