Skip to content
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 11 additions & 19 deletions bofire/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,31 +135,28 @@ def __init__(
):
super().__init__(**kwargs)
self._benchmark = benchmark
benchmark_inputs = self._benchmark.domain.inputs.get()
assert n_filler_features >= 1, "n_filler_features must be >= 1."
assert len(benchmark.domain.constraints) == 0, "Constraints not supported yet."
assert len(benchmark.domain.inputs.get(ContinuousInput)) == len(
benchmark.domain.inputs
), "Only continuous inputs supported yet."
if not Inputs.is_continuous(benchmark_inputs):
raise ValueError("Only continuous inputs supported yet.")
self.n_filler_features = n_filler_features
self.n_features_per_original_feature = n_features_per_original_feature

features = []
constraints = []
for j, feat in enumerate(self._benchmark.domain.inputs.get()):
for j, feat in enumerate(benchmark_inputs):
features += [
ContinuousInput(
key=f"{feat.key}_{i}",
bounds=(0, 1 / len(self._benchmark.domain.inputs)),
bounds=(0, 1 / len(benchmark_inputs)),
)
if self.n_features_per_original_feature == 1
else ContinuousDescriptorInput(
key=f"{feat.key}_{i}",
bounds=(0, 1 / len(self._benchmark.domain.inputs)),
descriptors=self._benchmark.domain.inputs.get_keys(),
values=[
1 if k == j else 0
for k in range(len(self._benchmark.domain.inputs))
],
values=[1 if k == j else 0 for k in range(len(benchmark_inputs))],
)
for i in range(self.n_features_per_original_feature)
]
Expand All @@ -171,7 +168,7 @@ def __init__(
for i in range(self.n_features_per_original_feature)
],
coefficients=[1.0] * self.n_features_per_original_feature,
rhs=1 / len(self._benchmark.domain.inputs),
rhs=1 / len(benchmark_inputs),
)
)

Expand Down Expand Up @@ -207,18 +204,13 @@ def __init__(
constraints=Constraints(constraints=constraints),
outputs=self._benchmark.domain.outputs,
)
self._mins = np.array(
[feat.bounds[0] for feat in self._benchmark.domain.inputs.get()]
)

self._mins = np.array([feat.bounds[0] for feat in benchmark_inputs])
self._scales = np.array(
[
feat.bounds[1] - feat.bounds[0]
for feat in self._benchmark.domain.inputs.get()
]
[feat.bounds[1] - feat.bounds[0] for feat in benchmark_inputs]
)
self._scales_new = np.array(
[1 / len(self._benchmark.domain.inputs.get_keys())]
* len(self._benchmark.domain.inputs.get_keys())
[1 / len(benchmark_inputs.get_keys())] * len(benchmark_inputs.get_keys())
)

def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
Expand Down
63 changes: 48 additions & 15 deletions bofire/data_models/domain/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
overload,
)

import numpy as np
import pandas as pd
from pydantic import Field, field_validator, validate_call
from scipy.stats.qmc import LatinHypercube, Sobol
from typing_extensions import Self
from typing_extensions import Self, TypeGuard, TypeVar

from bofire.data_models.base import BaseModel
from bofire.data_models.enum import CategoricalEncodingEnum, SamplingMethodEnum
Expand Down Expand Up @@ -56,11 +56,16 @@
from bofire.data_models.types import InputTransformSpecs


F = TypeVar("F", bound=AnyFeature)
FeatureSequence = Sequence[F]
FeatureT = TypeVar("FeatureT", bound=AnyFeature, default=AnyFeature)
InputT = TypeVar("InputT", bound=AnyInput, default=AnyInput)
EngineeredFeatureT = TypeVar(
"EngineeredFeatureT", bound=AnyEngineeredFeature, default=AnyEngineeredFeature
)
OutputT = TypeVar("OutputT", bound=AnyOutput, default=AnyOutput)
OutputGetT = TypeVar("OutputGetT", bound=AnyOutput, default=AnyOutput)


class _BaseFeatures(BaseModel, Generic[F]):
class _BaseFeatures(BaseModel, Generic[FeatureT]):
"""Container of features, both input and output features are allowed.

Attributes:
Expand All @@ -69,26 +74,26 @@ class _BaseFeatures(BaseModel, Generic[F]):
"""

type: Literal["Features"] = "Features"
features: FeatureSequence = Field(default_factory=list)
features: Sequence[FeatureT] = Field(default_factory=list)

@field_validator("features")
@classmethod
def validate_unique_feature_keys(
cls: type[_BaseFeatures],
features: FeatureSequence,
) -> FeatureSequence:
features: Sequence[FeatureT],
) -> Sequence[FeatureT]:
keys = [feat.key for feat in features]
if len(keys) != len(set(keys)):
raise ValueError("Feature keys are not unique.")
return features

def __iter__(self) -> Iterator[F]:
def __iter__(self) -> Iterator[FeatureT]:
return iter(self.features)

def __len__(self):
return len(self.features)

def __getitem__(self, i):
def __getitem__(self, i) -> FeatureT:
return self.features[i]

def __add__(self, other: Union[Sequence[AnyFeature], _BaseFeatures]):
Expand Down Expand Up @@ -126,7 +131,7 @@ def is_engineeredfeats(feats):
)
return Features(features=new_feature_seq)

def get_by_key(self, key: str, use_regex: bool = False) -> F:
def get_by_key(self, key: str, use_regex: bool = False) -> FeatureT:
"""Get a feature by its key.

First, the method tries to find the feature by its key. If no feature is
Expand Down Expand Up @@ -255,7 +260,7 @@ class Features(_BaseFeatures[AnyFeature]):
pass


class EngineeredFeatures(_BaseFeatures[AnyEngineeredFeature]):
class EngineeredFeatures(_BaseFeatures[EngineeredFeatureT]):
"""Container of engineered (input) features, only engineered features
are allowed.

Expand Down Expand Up @@ -331,7 +336,7 @@ def n_transformed_inputs(self) -> int:
return sum(feat.n_transformed_inputs for feat in self.get())


class Inputs(_BaseFeatures[AnyInput]):
class Inputs(_BaseFeatures[InputT]):
"""Container of input features, only input features are allowed.

Attributes:
Expand All @@ -343,7 +348,7 @@ class Inputs(_BaseFeatures[AnyInput]):

@field_validator("features")
@classmethod
def validate_only_one_task_input(cls, features: Sequence[AnyInput]):
def validate_only_one_task_input(cls, features: Sequence[InputT]):
filtered = filter_by_class(
features,
includes=TaskInput,
Expand Down Expand Up @@ -911,8 +916,12 @@ def is_fulfilled(self, experiments: pd.DataFrame) -> pd.Series:
.all(axis=1)
)

@staticmethod
def is_continuous(inputs: Inputs) -> TypeGuard[Inputs[ContinuousInput]]:
return len(inputs.get(ContinuousInput)) == len(inputs)


class Outputs(_BaseFeatures[AnyOutput]):
class Outputs(_BaseFeatures[OutputT]):
"""Container of output features, only output features are allowed.

Attributes:
Expand Down Expand Up @@ -1215,3 +1224,27 @@ def preprocess_experiments_any_valid_output(
),
)
return clean_exp

@overload
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need the overload two times?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately that's part of the specification for overloads (link). I think the idea is that the actual implementation of get should have no type hints, and all of the type hints go into the different overloads.

So the second overload is a fallback if the argument types don't match the first overload, which will happen if excludes is provided, since you can't express Intersection[GetIncludesT, ~GetExcludesT] in Python's type system (yet) so we need the fallback to be more generic!

def get(
self, includes: Type[OutputGetT], excludes: None = None, exact: bool = False
) -> Outputs[OutputGetT]: ...

@overload
def get(
self,
includes: Type | List[Type] | None,
excludes: Type | List[Type] | None,
exact: bool,
) -> Self: ...

def get(
self,
includes: Union[
Type, List[Type], None
] = AnyFeature, # ty: ignore[invalid-parameter-default]
excludes: Union[Type, List[Type], None] = None,
exact: bool = False,
) -> Self:
# repeat the function here as implementation must be below overloads
return super().get(includes, excludes, exact)
11 changes: 4 additions & 7 deletions bofire/utils/naming_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,10 @@ def get_column_names(outputs: Outputs) -> Tuple[List[str], List[str]]:

"""
pred_cols, sd_cols = [], []
for featkey in outputs.get_keys(CategoricalOutput):
pred_cols = pred_cols + [
f"{featkey}_{cat}_prob" for cat in outputs.get_by_key(featkey).categories
]
sd_cols = sd_cols + [
f"{featkey}_{cat}_sd" for cat in outputs.get_by_key(featkey).categories
]

for feat in outputs.get(CategoricalOutput):
pred_cols = pred_cols + [f"{feat.key}_{cat}_prob" for cat in feat.categories]
sd_cols = sd_cols + [f"{feat.key}_{cat}_sd" for cat in feat.categories]
for featkey in outputs.get_keys(ContinuousOutput):
pred_cols = pred_cols + [f"{featkey}_pred"]
sd_cols = sd_cols + [f"{featkey}_sd"]
Expand Down
Loading