experimental-design · dlinzner-bcs · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,7 +16,7 @@ and this project adheres to [Pragmatic Versioning](https://github.com/experiment
 - `WeightedMeanFeature` and `MolecularWeightedMeanFeature` engineered features for weighted-mean behavior.
 - `plot_gp_slice_plotly` now supports fixed input features that can be a mix of `ContinuousInput` and `CategoricalInput` (with string categorical fixed values).
 - Configurable `noise_constraint` support for GP-based surrogates (`SingleTaskGP`, `MixedSingleTaskGP`, `TanimotoGP`, and `MultiTaskGP`) and corresponding linear/polynomial wrappers.
-- Generalized NChooseK constraint support in DoE: `min_count > 0` is now supported, non-zero lower bounds (`lb > 0`) are allowed for NChooseK features, and `nchoosek_constraints_as_bounds` generates deactivation patterns for all activity levels `k ∈ [min_count, max_count]`. When `min_count=0`, the all-zero (fully inactive) pattern is now included naturally. `none_also_valid=True` with `min_count > 0` explicitly adds the all-zero pattern.
+- Generalized NChooseK constraint support in DoE: `min_count > 0` is now supported, non-zero lower bounds (`lb > 0`) are allowed for NChooseK features, overlapping NChooseK constraints (shared features) are handled via incremental pairwise merge with consistency filtering, and `nchoosek_constraints_as_bounds` generates deactivation patterns for all activity levels `k ∈ [min_count, max_count]`.
 
 ### Changed
 

diff --git a/bofire/strategies/doe/utils.py b/bofire/strategies/doe/utils.py
@@ -1,9 +1,10 @@
 import importlib.util
 import re
 import sys
+from collections import defaultdict
 from copy import copy
 from itertools import combinations
-from typing import List, Optional, Tuple, Union, cast
+from typing import Iterator, List, Optional, Tuple, Union, cast
 
 import numpy as np
 import pandas as pd
@@ -556,125 +557,173 @@ def hessian(self, x: np.ndarray, *args):
         return hessian
 
 
-def check_nchoosek_constraints_as_bounds(domain: Domain) -> None:
-    """Checks if NChooseK constraints of domain can be formulated as bounds.
+def _iter_nchoosek_combined_patterns(
+    domain: Domain,
+) -> "Iterator[Tuple[int, ...]]":
+    """Yield combined deactivation patterns across all NChooseK constraints.
+
+    For each constraint, per-feature active/inactive patterns are generated
+    for every allowed activity level.  When multiple constraints share
+    features, patterns are merged incrementally (pairwise) rather than via
+    a single giant Cartesian product, pruning inconsistent combinations
+    early.
+
+    Consistency: If a feature is shared by multiple constraints, it must be active in all or
+    inactive in all to yield a valid combined pattern.
+
+    Yields:
+        Tuples containing the domain-level indices of features to deactivate
+        (pin to 0) for one experiment slot.  Duplicates are possible when
+        constraints are disjoint; the caller should deduplicate if needed.
+    """
+    nchoosek_constraints = [
+        c for c in domain.constraints if isinstance(c, NChooseKConstraint)
+    ]
+
+    if not nchoosek_constraints:
+        return
+
+    all_keys = domain.inputs.get_keys()
+
+    def _constraint_patterns(constraint):
+        """Yield {feature_index: is_active} dicts for one constraint."""
+        n_features = len(constraint.features)
+        ind = [i for i, key in enumerate(all_keys) if key in constraint.features]
+        for k in range(max(constraint.min_count, 1), constraint.max_count + 1):
+            n_inactive = n_features - k
+            if n_inactive > 0:
+                for combo in combinations(ind, r=n_inactive):
+                    inactive_set = set(combo)
+                    yield {idx: (idx not in inactive_set) for idx in ind}
+            else:
+                yield {idx: True for idx in ind}
+
+    def _merge_two(patterns_a, patterns_b):
+        """Yield merged dicts from two pattern iterables, filtering conflicts.
+
+        Uses a hash-join on shared feature indices: patterns from A are
+        bucketed by their assignment to shared keys, so each pattern from B
+        only needs to merge with the bucket that agrees on those keys.
+
+        Analogy: imagine two bags of puzzle pieces where each piece says
+        which animals are awake or asleep.  If both bags mention the same
+        animal, the pieces must agree (both "awake" or both "asleep").
+        Instead of trying every piece from bag B against every piece from
+        bag A, we sort bag A into labeled bins by what the shared animals
+        do.  Then each bag-B piece goes straight to the matching bin —
+        skipping all bins that would always conflict.
+        """
+        list_a = list(patterns_a)
+        list_b = list(patterns_b)
+
+        # Identify shared feature indices between the two sides.
+        keys_a = set().union(*(pa.keys() for pa in list_a)) if list_a else set()
+        keys_b = set().union(*(pb.keys() for pb in list_b)) if list_b else set()
+        shared = keys_a & keys_b
+
+        if shared:
+            # Bucket A patterns by their assignment on shared indices.
+            sorted_shared = sorted(shared)
+            buckets: dict = defaultdict(list)
+            for pa in list_a:
+                key = tuple(pa[idx] for idx in sorted_shared)
+                buckets[key].append(pa)
+
+            for pb in list_b:
+                key = tuple(pb[idx] for idx in sorted_shared)
+                for pa in buckets.get(key, ()):
+                    merged = dict(pa)
+                    merged.update(pb)
+                    yield merged
+        else:
+            # No shared features — full cross-product, no conflicts possible.
+            for pb in list_b:
+                for pa in list_a:
+                    merged = dict(pa)
+                    merged.update(pb)
+                    yield merged
+
+    # Incremental pairwise merge — each step only materialises a generator
+    # over consistent merges so far, keeping early pruning.
+    merged_iter = _constraint_patterns(nchoosek_constraints[0])
+    for constraint in nchoosek_constraints[1:]:
+        merged_iter = _merge_two(merged_iter, _constraint_patterns(constraint))
 
-    For every feature referenced by an NChooseKConstraint the lower bound must
-    be >= 0 (so that "inactive" variables can be pinned to 0) and the upper
-    bound must be > 0. The feature sets of different NChooseKConstraints must
-    be pairwise disjoint.
+    for merged in merged_iter:
+        yield tuple(sorted(idx for idx, active in merged.items() if not active))
 
-    Note: a non-zero lower bound (e.g. 0.1) is fine because the bounds-based
-    approach overrides the bounds of inactive variables to [0, 0] per
-    experiment while keeping the original [lb, ub] for active variables.
+
+def _build_nchoosek_combined_patterns(
+    domain: Domain,
+) -> List[Tuple[int, ...]]:
+    """Collect combined deactivation patterns.
 
     Args:
-        domain (Domain): Domain whose NChooseK constraints should be checked
+        domain: Domain whose NChooseK constraints should be combined.
 
-    """
-    # collect NChooseK constraints
-    if len(domain.constraints) == 0:
-        return
+    Returns:
+        A deduplicated list of inactive-index tuples.
 
-    nchoosek_constraints = []
-    for c in domain.constraints:
-        if isinstance(c, NChooseKConstraint):
-            nchoosek_constraints.append(c)
+    Raises:
+        ValueError: When no valid combined patterns exist (contradictory
+            constraints).
+    """
+    seen: set = set()
+    result: List[Tuple[int, ...]] = []
 
-    if len(nchoosek_constraints) == 0:
-        return
+    for pattern in _iter_nchoosek_combined_patterns(domain):
+        if pattern not in seen:
+            seen.add(pattern)
+            result.append(pattern)
 
-    # check if the domains of all NChooseK constraints are compatible to linearization
-    for c in nchoosek_constraints:
-        for name in np.unique(c.features):
-            input = domain.inputs.get_by_key(name)
-            if input.bounds[0] < 0:
-                raise ValueError(
-                    f"Constraint {c} cannot be formulated as bounds. "
-                    f"Feature '{name}' has lower bound {input.bounds[0]} < 0.",
-                )
-            if input.bounds[1] < 0:
-                raise ValueError(
-                    f"Constraint {c} cannot be formulated as bounds. "
-                    f"Feature '{name}' has upper bound {input.bounds[1]} < 0.",
-                )
+    if not result and any(
+        isinstance(c, NChooseKConstraint) for c in domain.constraints
+    ):
+        raise ValueError(
+            "No valid combined NChooseK patterns exist. "
+            "The overlapping constraints are contradictory."
+        )
 
-    # check if the parameter names of two nchoose overlap
-    for c in nchoosek_constraints:
-        for _c in nchoosek_constraints:
-            if c != _c:
-                for name in c.features:
-                    if name in _c.features:
-                        raise ValueError(
-                            f"Domain {domain} cannot be used for formulation as bounds. \
-                            names attribute of NChooseK constraints must be pairwise disjoint.",
-                        )
+    return result
 
 
 def nchoosek_constraints_as_bounds(
     domain: Domain,
     n_experiments: int,
 ) -> List:
-    """Determines the box bounds for the decision variables
+    """Determines the box bounds for the decision variables.
+
+    When multiple NChooseK constraints share features, their patterns are
+    combined via Cartesian product and filtered for consistency so that
+    each shared feature has a single active/inactive status per experiment.
 
     Args:
         domain (Domain): Domain to find the bounds for.
         n_experiments (int): number of experiments for the design to be determined.
 
     Returns:
-        A list of tuples containing bounds that respect NChooseK constraint imposed
-        onto the decision variables.
+        A list of tuples containing bounds that respect NChooseK constraints
+        imposed onto the decision variables.
 
     """
-    check_nchoosek_constraints_as_bounds(domain)
 
     # bounds without NChooseK constraints
     bounds = np.array(
         [p.bounds for p in domain.inputs.get(ContinuousInput)] * n_experiments,
     )
 
-    if len(domain.constraints) > 0:
-        for constraint in domain.constraints:
-            if isinstance(constraint, NChooseKConstraint):
-                n_features = len(constraint.features)
-                # find indices of constraint features in the domain input keys
-                ind = [
-                    i
-                    for i, p in enumerate(domain.inputs.get_keys())
-                    if p in constraint.features
-                ]
-
-                # Build all valid deactivation patterns for every allowed
-                # activity level k in [min_count, max_count].  For each k,
-                # exactly (n_features - k) variables are set to zero.
-                all_inactive_patterns = []
-                for k in range(max(constraint.min_count, 1), constraint.max_count + 1):
-                    n_inactive = n_features - k
-                    if n_inactive > 0:
-                        all_inactive_patterns.extend(
-                            list(combinations(ind, r=n_inactive))
-                        )
-
-                if len(all_inactive_patterns) > 0:
-                    # patterns may have different lengths (different activity
-                    # levels), so we keep them as a plain list of tuples
-                    # instead of converting to a numpy array.
-                    np.random.shuffle(
-                        all_inactive_patterns
-                    )  # shuffle patterns to avoid biasing the optimization towards certain patterns
-
-                    # set bounds to zero for the chosen inactive variables per experiment
-                    D = len(domain.inputs)
-                    for i in range(n_experiments):
-                        pattern = all_inactive_patterns[i % len(all_inactive_patterns)]
-                        for idx in pattern:
-                            bounds[idx + i * D, :] = [0, 0]
-                        if i % len(all_inactive_patterns) == (
-                            len(all_inactive_patterns) - 1
-                        ):
-                            np.random.shuffle(all_inactive_patterns)
-    else:
-        pass
+    combined_patterns = _build_nchoosek_combined_patterns(domain)
+
+    if combined_patterns:
+        np.random.shuffle(combined_patterns)
+
+        D = len(domain.inputs)
+        for i in range(n_experiments):
+            pattern = combined_patterns[i % len(combined_patterns)]
+            for idx in pattern:
+                bounds[idx + i * D, :] = [0, 0]
+            if i % len(combined_patterns) == (len(combined_patterns) - 1):
+                np.random.shuffle(combined_patterns)
 
     # convert bounds to list of tuples
     bounds = [(b[0], b[1]) for b in bounds]