Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions bofire/data_models/domain/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,19 +366,22 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
def validate_experiments(
self,
experiments: pd.DataFrame,
strict=False,
strict: bool = False,
check_nan: bool = True,
check_missing_cols: bool = True,
) -> pd.DataFrame:
for feature in self:
if feature.key not in experiments:
if (feature.key not in experiments) and check_missing_cols:
raise ValueError(f"no col for input feature `{feature.key}`")
experiments[feature.key] = feature.validate_experimental(
experiments[feature.key],
strict=strict,
)
if experiments[self.get_keys()].isnull().to_numpy().any():
raise ValueError("there are null values")
if experiments[self.get_keys()].isna().to_numpy().any():
raise ValueError("there are na values")
if check_nan:
if experiments[self.get_keys()].isnull().to_numpy().any():
raise ValueError("there are null values")
if experiments[self.get_keys()].isna().to_numpy().any():
raise ValueError("there are na values")
return experiments

def get_categorical_combinations(
Expand Down
19 changes: 12 additions & 7 deletions bofire/strategies/doe/utils_categorical_discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from bofire.data_models.features.categorical import CategoricalInput
from bofire.data_models.features.continuous import ContinuousInput
from bofire.data_models.features.discrete import DiscreteInput
from bofire.data_models.features.feature import Feature, Output
from bofire.data_models.features.feature import Feature, Output, get_encoded_name
from bofire.data_models.types import DiscreteVals


Expand Down Expand Up @@ -57,8 +57,8 @@ def discrete_to_relaxable_domain_mapper(
new_constraints = []
categorical_groups: List[List[ContinuousInput]] = []
for c_input in categorical_inputs:
current_group_keys = list(c_input.categories) # type: ignore
pick_1_constraint, group_vars = generate_mixture_constraints(current_group_keys)
assert isinstance(c_input, CategoricalInput)
pick_1_constraint, group_vars = generate_mixture_constraints(c_input)
categorical_groups.append(group_vars)
relaxable_categorical_inputs.extend(group_vars)
new_constraints.append(pick_1_constraint)
Expand Down Expand Up @@ -487,13 +487,18 @@ def NChooseKGroup(


def generate_mixture_constraints(
keys: List[str],
feature: CategoricalInput,
) -> Tuple[LinearEqualityConstraint, List[ContinuousInput]]:
binary_vars = (ContinuousInput(key=x, bounds=[0, 1]) for x in keys)
binary_vars = (
ContinuousInput(key=get_encoded_name(feature.key, category), bounds=[0, 1])
for category in feature.categories
)

mixture_constraint = LinearEqualityConstraint(
features=keys,
coefficients=[1 for x in range(len(keys))],
features=[
get_encoded_name(feature.key, category) for category in feature.categories
],
coefficients=[1 for _ in range(len(feature.categories))],
rhs=1,
)

Expand Down
89 changes: 45 additions & 44 deletions bofire/strategies/doe_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from pydantic.types import PositiveInt

import bofire.data_models.strategies.api as data_models
from bofire.data_models.features.api import CategoricalInput, Input
from bofire.data_models.enum import CategoricalEncodingEnum
from bofire.data_models.features.api import CategoricalInput
from bofire.data_models.strategies.doe import (
AnyDoEOptimalityCriterion,
DoEOptimalityCriterion,
Expand Down Expand Up @@ -37,8 +38,7 @@ def __init__(
):
super().__init__(data_model=data_model, **kwargs)
self.data_model = data_model
self._partially_fixed_candidates = None
self._fixed_candidates = None
self._allow_partially_filled_candidates = True

@property
def formula(self):
Expand All @@ -48,28 +48,6 @@ def formula(self):
)
return None

def set_candidates(self, candidates: pd.DataFrame):
original_columns = self.domain.inputs.get_keys(includes=Input)
to_many_columns = []
for col in candidates.columns:
if col not in original_columns:
to_many_columns.append(col)
if len(to_many_columns) > 0:
raise AttributeError(
f"provided candidates have columns: {*to_many_columns,}, which do not exist in original domain",
)

to_few_columns = []
for col in original_columns:
if col not in candidates.columns:
to_few_columns.append(col)
if len(to_few_columns) > 0:
raise AttributeError(
f"provided candidates are missing columns: {*to_few_columns,} which exist in original domain",
)

self._candidates = candidates

def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore
all_new_categories = []

Expand All @@ -88,16 +66,29 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore
all_new_categories.extend(new_categories)

# here we adapt the (partially) fixed experiments to the new domain
fixed_experiments_count = 0
_candidate_count = candidate_count
adapted_partially_fixed_candidates = self._transform_candidates_to_new_domain(
new_domain,
self.candidates,
)

# not yet working,
# target is to also condition on self.experiments
if self.candidates is not None:
fixed_experiments_count = self.candidates.notnull().all(axis=1).sum()
_candidate_count = candidate_count + fixed_experiments_count
adapted_partially_fixed_candidates = (
self._transform_candidates_to_new_domain(
new_domain,
self.candidates,
)
)

# we have to also adapt the experiments, commented now to convince ruff for now
# if self.experiments is not None:
# adapted_fixed_experiments = self._transform_candidates_to_new_domain(
# new_domain,
# self.experiments,
# )

num_binary_vars = len([var for group in new_categories for var in group])
num_discrete_vars = len(new_discretes)
Expand Down Expand Up @@ -183,7 +174,7 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore
ignore_index=True,
)
print(
f"Status: {i+1} of {_candidate_count} experiments determined \n"
f"Status: {i + 1} of {_candidate_count} experiments determined \n"
f"Current experimental plan:\n {design_from_new_to_original_domain(self.domain, design)}",
)

Expand Down Expand Up @@ -230,24 +221,34 @@ def _transform_candidates_to_new_domain(self, new_domain, candidates):
for col in missing_columns:
intermediate_candidates.insert(0, col, None)

cat_columns = self.domain.inputs.get(includes=CategoricalInput)
for cat in cat_columns:
for row_index, c in enumerate(intermediate_candidates[cat.key].values):
if pd.isnull(c):
continue
if c not in cat.categories: # type: ignore
raise AttributeError(
f"provided value {c} for categorical variable {cat.key} "
f"does not exist in the corresponding categories {cat.categories}", # type: ignore
)
intermediate_candidates.loc[row_index, cat.categories] = 0 # type: ignore
intermediate_candidates.loc[row_index, c] = 1

intermediate_candidates = intermediate_candidates.drop(
[cat.key for cat in cat_columns],
axis=1,
# this is doing the one-hot encoding in a well tested way
intermediate_candidates = self.domain.inputs.transform(
intermediate_candidates,
{
key: CategoricalEncodingEnum.ONE_HOT
for key in self.domain.inputs.get_keys(CategoricalInput)
},
)

# cat_columns = self.domain.inputs.get(includes=CategoricalInput)
# for cat in cat_columns:
# for row_index, c in enumerate(intermediate_candidates[cat.key].values):
# if pd.isnull(c):
# continue
# if c not in cat.categories: # type: ignore
# raise AttributeError(
# f"provided value {c} for categorical variable {cat.key} "
# f"does not exist in the corresponding categories {cat.categories}", # type: ignore
# )
# intermediate_candidates.loc[row_index, cat.categories] = 0 # type: ignore
# intermediate_candidates.loc[row_index, c] = 1

# intermediate_candidates = intermediate_candidates.drop(
# [cat.key for cat in cat_columns],
# axis=1,
# )

# What is this doing?
adapted_partially_fixed_candidates = pd.concat(
[
intermediate_candidates[candidates.notnull().all(axis=1)],
Expand Down
3 changes: 3 additions & 0 deletions bofire/strategies/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(
self.domain = data_model.domain
self.seed = data_model.seed or np.random.default_rng().integers(1000)
self.rng = np.random.default_rng(self.seed)
self._allow_partially_filled_candidates = False
self._experiments = None
self._candidates = None

Expand Down Expand Up @@ -195,6 +196,8 @@ def set_candidates(self, candidates: pd.DataFrame):
candidates = self.domain.inputs.validate_experiments(
candidates[self.domain.inputs.get_keys()],
strict=False,
check_nan=self._allow_partially_filled_candidates is False,
check_missing_cols=self._allow_partially_filled_candidates is False,
)
self._candidates = candidates[self.domain.inputs.get_keys()]

Expand Down