Skip to content
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ and this project adheres to [Pragmatic Versioning](https://github.com/experiment
- `WeightedMeanFeature` and `MolecularWeightedMeanFeature` engineered features for weighted-mean behavior.
- `plot_gp_slice_plotly` now supports fixed input features that can be a mix of `ContinuousInput` and `CategoricalInput` (with string categorical fixed values).
- Configurable `noise_constraint` support for GP-based surrogates (`SingleTaskGP`, `MixedSingleTaskGP`, `TanimotoGP`, and `MultiTaskGP`) and corresponding linear/polynomial wrappers.
- Generalized NChooseK constraint support in DoE: `min_count > 0` is now supported, non-zero lower bounds (`lb > 0`) are allowed for NChooseK features, and `nchoosek_constraints_as_bounds` generates deactivation patterns for all activity levels `k ∈ [min_count, max_count]`. When `min_count=0`, the all-zero (fully inactive) pattern is now included naturally. `none_also_valid=True` with `min_count > 0` explicitly adds the all-zero pattern.
- Generalized NChooseK constraint support in DoE: `min_count > 0` is now supported, non-zero lower bounds (`lb > 0`) are allowed for NChooseK features, overlapping NChooseK constraints (shared features) are handled via incremental pairwise merge with consistency filtering, and `nchoosek_constraints_as_bounds` generates deactivation patterns for all activity levels `k ∈ [min_count, max_count]`.

### Changed

Expand Down
281 changes: 189 additions & 92 deletions bofire/strategies/doe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,125 +556,189 @@ def hessian(self, x: np.ndarray, *args):
return hessian


def check_nchoosek_constraints_as_bounds(domain: Domain) -> None:
"""Checks if NChooseK constraints of domain can be formulated as bounds.

For every feature referenced by an NChooseKConstraint the lower bound must
be >= 0 (so that "inactive" variables can be pinned to 0) and the upper
bound must be > 0. The feature sets of different NChooseKConstraints must
be pairwise disjoint.
def _iter_nchoosek_combined_patterns(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since this function returns a list, call it _get_nchoosek... instead of _iter_....

domain: Domain,
) -> list[tuple[int, ...]]:
"""Generate combined deactivation patterns across all NChooseK constraints.

Note: a non-zero lower bound (e.g. 0.1) is fine because the bounds-based
approach overrides the bounds of inactive variables to [0, 0] per
experiment while keeping the original [lb, ub] for active variables.
For each constraint, per-feature active/inactive patterns are generated
for every allowed activity level. When multiple constraints share
features, patterns are merged incrementally (pairwise) rather than via
a single giant Cartesian product, pruning inconsistent combinations
early.

Args:
domain (Domain): Domain whose NChooseK constraints should be checked
Consistency: If a feature is shared by multiple constraints, it must be active in all or
inactive in all to yield a valid combined pattern.

Returns:
Tuples containing the domain-level indices of features to deactivate
(pin to 0) for one experiment slot. Duplicates are possible when
constraints are disjoint; the caller should deduplicate if needed.
"""
# collect NChooseK constraints
if len(domain.constraints) == 0:
return
nchoosek_constraints = [
c for c in domain.constraints if isinstance(c, NChooseKConstraint)
]

if not nchoosek_constraints:
return []

all_keys = domain.inputs.get_keys()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move all_keys into _constraint_patterns, since you only use it there.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure why you didn't do this. maybe you had a good reason that I overlooked.


def _constraint_patterns(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd move _constraint_pattern and _merge_two out of _iter_nchoosek_combined_patterns and directly into the module. I find the function _iter_nchoosek_combined_patterns really long and confusing to read. It looks like _merge_two is not even currying anything.

constraint: NChooseKConstraint,
) -> list[dict[int, bool]]:
"""Generate dicts for one constraint."""
patterns_of_constraint = []
n_features = len(constraint.features)
ind = [i for i, key in enumerate(all_keys) if key in constraint.features]
for k in range(max(constraint.min_count, 1), constraint.max_count + 1):
n_inactive = n_features - k
if n_inactive > 0:
for combo in combinations(ind, r=n_inactive):
inactive_set = set(combo)
patterns_of_constraint.append(
{idx: (idx not in inactive_set) for idx in ind}
)
else:
patterns_of_constraint.append({idx: True for idx in ind})
return patterns_of_constraint

nchoosek_constraints = []
for c in domain.constraints:
if isinstance(c, NChooseKConstraint):
nchoosek_constraints.append(c)

if len(nchoosek_constraints) == 0:
return

# check if the domains of all NChooseK constraints are compatible to linearization
for c in nchoosek_constraints:
for name in np.unique(c.features):
input = domain.inputs.get_by_key(name)
if input.bounds[0] < 0:
raise ValueError(
f"Constraint {c} cannot be formulated as bounds. "
f"Feature '{name}' has lower bound {input.bounds[0]} < 0.",
def _merge_two(
patterns_of_constraint_a: list[dict[int, bool]],
patterns_of_constraint_b: list[dict[int, bool]],
) -> list[dict[int, bool]]:
"""Merge two pattern lists, filtering conflicts.

Uses a hash-join on shared feature indices: patterns from A are
bucketed by their assignment to shared keys, so each pattern from B
only needs to merge with the bucket that agrees on those keys.

pattern_of_constraint_a: {feature_index: is_active} dict from constraint A
pattern_of_constraint_b: {feature_index: is_active} dict from constraint B

returns: merged {feature_index: is_active} dict if no conflicts, else None

"""
# Identify shared feature indices between the two sides.
shared = set(patterns_of_constraint_a[0].keys()).intersection(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it intentional that this crashes in case patterns_... is empty? can this ever happen?

set(patterns_of_constraint_b[0].keys())
)

patterns_of_merged_constraints = []
if shared:
# Bucket patterns by their assignment on shared indices.
# if activation_per_feature= {0: True, 1:True , 2: False} is a pattern and shared=[0,2], the key is (True, False)
sorted_shared = sorted(shared)
map_pattern_of_shared_to_global_pattern = {
tuple(activation_per_feature[idx] for idx in sorted_shared): []
for activation_per_feature in patterns_of_constraint_a
}
for activation_per_feature in patterns_of_constraint_a:
pattern_of_shared = tuple(
activation_per_feature[idx] for idx in sorted_shared
)
map_pattern_of_shared_to_global_pattern[pattern_of_shared].append(
activation_per_feature
)
if input.bounds[1] < 0:
raise ValueError(
f"Constraint {c} cannot be formulated as bounds. "
f"Feature '{name}' has upper bound {input.bounds[1]} < 0.",

for activation_per_feature_b in patterns_of_constraint_b:
pattern_of_shared = tuple(
activation_per_feature_b[idx] for idx in sorted_shared
)
for (
activation_per_feature_a
) in map_pattern_of_shared_to_global_pattern.get(pattern_of_shared, ()):
merged = dict(activation_per_feature_a)
merged.update(activation_per_feature_b)
patterns_of_merged_constraints.append(merged)
else:
# No shared features — full cross-product.
for activation_per_feature_b in patterns_of_constraint_b:
for activation_per_feature_a in patterns_of_constraint_a:
merged = dict(activation_per_feature_a)
merged.update(activation_per_feature_b)
patterns_of_merged_constraints.append(merged)

return patterns_of_merged_constraints

# Incremental pairwise merge — each step only materialises a generator
merged_iter = _constraint_patterns(nchoosek_constraints[0])
for constraint in nchoosek_constraints[1:]:
merged_iter = _merge_two(merged_iter, _constraint_patterns(constraint))

allowed_constraint_patterns = []
for merged in merged_iter:
allowed_constraint_patterns.append(
tuple(sorted(idx for idx, active in merged.items() if not active))
)
return allowed_constraint_patterns


def _build_nchoosek_combined_patterns(
domain: Domain,
) -> list[tuple[int, ...]]:
"""Collect combined deactivation patterns.

Args:
domain: Domain whose NChooseK constraints should be combined.

Returns:
A deduplicated list of inactive-index tuples.

Raises:
ValueError: When no valid combined patterns exist (contradictory
constraints).
"""
result = list(set(_iter_nchoosek_combined_patterns(domain)))

if not result and any(
isinstance(c, NChooseKConstraint) for c in domain.constraints
):
raise ValueError(
"No valid combined NChooseK patterns exist. "
"The overlapping constraints are contradictory."
)

# check if the parameter names of two nchoose overlap
for c in nchoosek_constraints:
for _c in nchoosek_constraints:
if c != _c:
for name in c.features:
if name in _c.features:
raise ValueError(
f"Domain {domain} cannot be used for formulation as bounds. \
names attribute of NChooseK constraints must be pairwise disjoint.",
)
return result


def nchoosek_constraints_as_bounds(
domain: Domain,
n_experiments: int,
) -> List:
"""Determines the box bounds for the decision variables
) -> list:
"""Determines the box bounds for the decision variables.

When multiple NChooseK constraints share features, their patterns are
combined via Cartesian product and filtered for consistency so that
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment above says

When multiple constraints share
features, patterns are merged incrementally (pairwise) rather than via
a single giant Cartesian product, pruning inconsistent combinations
early.

which sounds as if it is contradicting this comment.

each shared feature has a single active/inactive status per experiment.

Args:
domain (Domain): Domain to find the bounds for.
n_experiments (int): number of experiments for the design to be determined.

Returns:
A list of tuples containing bounds that respect NChooseK constraint imposed
onto the decision variables.
A list of tuples containing bounds that respect NChooseK constraints
imposed onto the decision variables.

"""
check_nchoosek_constraints_as_bounds(domain)

# bounds without NChooseK constraints
bounds = np.array(
[p.bounds for p in domain.inputs.get(ContinuousInput)] * n_experiments,
)

if len(domain.constraints) > 0:
for constraint in domain.constraints:
if isinstance(constraint, NChooseKConstraint):
n_features = len(constraint.features)
# find indices of constraint features in the domain input keys
ind = [
i
for i, p in enumerate(domain.inputs.get_keys())
if p in constraint.features
]

# Build all valid deactivation patterns for every allowed
# activity level k in [min_count, max_count]. For each k,
# exactly (n_features - k) variables are set to zero.
all_inactive_patterns = []
for k in range(max(constraint.min_count, 1), constraint.max_count + 1):
n_inactive = n_features - k
if n_inactive > 0:
all_inactive_patterns.extend(
list(combinations(ind, r=n_inactive))
)

if len(all_inactive_patterns) > 0:
# patterns may have different lengths (different activity
# levels), so we keep them as a plain list of tuples
# instead of converting to a numpy array.
np.random.shuffle(
all_inactive_patterns
) # shuffle patterns to avoid biasing the optimization towards certain patterns

# set bounds to zero for the chosen inactive variables per experiment
D = len(domain.inputs)
for i in range(n_experiments):
pattern = all_inactive_patterns[i % len(all_inactive_patterns)]
for idx in pattern:
bounds[idx + i * D, :] = [0, 0]
if i % len(all_inactive_patterns) == (
len(all_inactive_patterns) - 1
):
np.random.shuffle(all_inactive_patterns)
else:
pass
combined_patterns = _build_nchoosek_combined_patterns(domain)

if combined_patterns:
np.random.shuffle(combined_patterns)

D = len(domain.inputs)
for i in range(n_experiments):
pattern = combined_patterns[i % len(combined_patterns)]
for idx in pattern:
bounds[idx + i * D, :] = [0, 0]
if i % len(combined_patterns) == (len(combined_patterns) - 1):
np.random.shuffle(combined_patterns)

# convert bounds to list of tuples
bounds = [(b[0], b[1]) for b in bounds]
Expand Down Expand Up @@ -743,3 +807,36 @@ def _minimize(
hess=objective_function.evaluate_hessian if use_hessian else None,
)
return result.x


if __name__ == "__main__":
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this a test? make proper test out of it? is it just you playing aroung? then delete it?

# testing overlapping NChooseK constraints
from bofire.data_models.constraints.api import NChooseKConstraint
from bofire.data_models.domain.api import Domain
from bofire.data_models.features.api import ContinuousInput

inputs = Inputs(
features=[
ContinuousInput(key="x1", bounds=(0, 1)),
ContinuousInput(key="x2", bounds=(0, 1)),
ContinuousInput(key="x3", bounds=(0, 1)),
ContinuousInput(key="x4", bounds=(0, 1)),
]
)
constraints = [
NChooseKConstraint(
features=["x1", "x2", "x3"],
min_count=1,
max_count=2,
none_also_valid=True,
),
NChooseKConstraint(
features=["x2", "x3", "x4"],
min_count=1,
max_count=2,
none_also_valid=True,
),
]
domain = Domain(inputs=inputs, constraints=constraints)
patterns = _build_nchoosek_combined_patterns(domain)
print(patterns)
Loading
Loading