experimental-design · jduerholt · Apr 29, 2026 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/bofire/data_models/constraints/nchoosek.py b/bofire/data_models/constraints/nchoosek.py
@@ -41,6 +41,10 @@ def validate_inputs(self, inputs: Inputs):
             assert isinstance(
                 feature_, ContinuousInput
             ), f"Feature {f} is not a ContinuousInput."
+            if feature_.bounds[0] < 0:
+                raise ValueError(
+                    f"Feature {f} must have a lower bound of >=0, but has {feature_.bounds[0]}",
+                )
 
     @model_validator(mode="after")
     def validate_counts(self):

diff --git a/bofire/data_models/domain/domain.py b/bofire/data_models/domain/domain.py
@@ -1,5 +1,7 @@
 import collections.abc
 import itertools
+import math
+import random
 import warnings
 from collections.abc import Sequence
 from typing import Any, Dict, Literal, Optional, Tuple, Union
@@ -236,6 +238,76 @@ def get_nchoosek_combinations(self, exhaustive: bool = False):
 
         return used_features_list_final, unused_features_list
 
+    def sample_valid_nchoosek_features(
+        self,
+        rng: random.Random,
+        n: int = 1,
+        max_iters: int = 1000,
+    ) -> list[Tuple[str, ...]]:
+        """Sample sets of active feature keys uniformly from all valid subsets.
+
+        Includes (a) one group per ``NChooseKConstraint`` (respecting
+        ``min_count``, ``max_count``, and ``none_also_valid``) and (b) one
+        singleton group per ``ContinuousInput`` with ``allow_zero=True`` that
+        is not already part of any ``NChooseKConstraint``.
+
+        Within each group the subset size ``k`` is drawn with probability
+        proportional to ``C(n, k)`` and ``k`` features are then chosen
+        uniformly, so the per-group distribution is uniform over all valid
+        subsets. When ``NChooseKConstraint``s share features, the per-group
+        union may violate one of the constraints; in that case rejection
+        sampling is used (up to ``max_iters`` attempts per drawn combination).
+
+        Args:
+            rng: Random number generator used for sampling.
+            n: Number of combinations to draw. Defaults to 1.
+            max_iters: Maximum number of rejection-sampling attempts per
+                drawn combination. Defaults to 1000.
+
+        Returns:
+            A list of ``n`` sorted tuples of active feature keys.
+
+        Raises:
+            ValueError: If a valid combination is not found within
+                ``max_iters`` attempts.
+        """
+        groups: list[Tuple[list[str], list[int], list[int]]] = []
+        nchoosek_keys: set[str] = set()
+        nchoosek_cons = list(self.constraints.get(NChooseKConstraint))
+        for con in nchoosek_cons:
+            assert isinstance(con, NChooseKConstraint)
+            ks = list(range(con.min_count, con.max_count + 1))
+            if con.none_also_valid and 0 not in ks:
+                ks.insert(0, 0)
+            weights = [math.comb(len(con.features), k) for k in ks]
+            groups.append((list(con.features), ks, weights))
+            nchoosek_keys.update(con.features)
+        for feat in self.inputs.get(ContinuousInput):
+            assert isinstance(feat, ContinuousInput)
+            if feat.allow_zero and feat.key not in nchoosek_keys:
+                groups.append(([feat.key], [0, 1], [1, 1]))
+
+        results: list[Tuple[str, ...]] = []
+        for _ in range(n):
+            for _ in range(max_iters):
+                active: set[str] = set()
+                for features, ks, weights in groups:
+                    k = rng.choices(ks, weights=weights, k=1)[0]
+                    active.update(rng.sample(features, k))
+                if all(
+                    (con.none_also_valid and len(active & set(con.features)) == 0)
+                    or con.min_count <= len(active & set(con.features)) <= con.max_count
+                    for con in nchoosek_cons
+                ):
+                    results.append(tuple(sorted(active)))
+                    break
+            else:
+                raise ValueError(
+                    f"Failed to sample a valid NChooseK combination after "
+                    f"{max_iters} attempts.",
+                )
+        return results
+
     def coerce_invalids(self, experiments: pd.DataFrame) -> pd.DataFrame:
         """Coerces all invalid output measurements to np.nan
 

diff --git a/bofire/data_models/strategies/random.py b/bofire/data_models/strategies/random.py
@@ -25,6 +25,7 @@ class RandomStrategy(Strategy):
     n_thinning: Annotated[int, Field(ge=1)] = 32
     num_base_samples: Optional[Annotated[int, Field(gt=0)]] = None
     max_iters: Annotated[int, Field(gt=0)] = 1000
+    max_combinations: Annotated[int, Field(gt=0)] = 64
     sampler_kwargs: Optional[dict] = None
 
     def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool:

diff --git a/bofire/strategies/random.py b/bofire/strategies/random.py
@@ -1,4 +1,5 @@
 import math
+import random
 import warnings
 from copy import deepcopy
 from typing import Dict, Optional, cast
@@ -59,6 +60,7 @@ def __init__(
         self.fallback_sampling_method = data_model.fallback_sampling_method
         self.n_burnin = data_model.n_burnin
         self.n_thinning = data_model.n_thinning
+        self.max_combinations = data_model.max_combinations
         self.sampler_kwargs = data_model.sampler_kwargs
 
     def has_sufficient_experiments(self) -> bool:
@@ -124,32 +126,38 @@ def _sample_with_nchooseks(
             pd.DataFrame: A DataFrame containing the sampled data.
 
         """
-        if len(self.domain.constraints.get(NChooseKConstraint)) > 0:
-            _, unused = self.domain.get_nchoosek_combinations()
-
-            if candidate_count <= len(unused):
-                sampled_combinations = [
-                    unused[i]
-                    for i in np.random.default_rng(self._get_seed()).choice(
-                        len(unused),
-                        size=candidate_count,
-                        replace=False,
-                    )
-                ]
-                num_samples_per_it = 1
-            else:
-                sampled_combinations = unused
-                num_samples_per_it = math.ceil(candidate_count / len(unused))
+        nchoosek_feature_keys: set[str] = set()
+        for constraint in self.domain.constraints.get(NChooseKConstraint):
+            assert isinstance(constraint, NChooseKConstraint)
+            nchoosek_feature_keys.update(constraint.features)
+        allow_zero_feature_keys = {
+            feat.key
+            for feat in self.domain.inputs.get(ContinuousInput)
+            if isinstance(feat, ContinuousInput) and feat.allow_zero
+        }
+        zeroable_keys = nchoosek_feature_keys | allow_zero_feature_keys
+
+        if zeroable_keys:
+            rng = random.Random(self._get_seed())
+            n_combos = min(self.max_combinations, candidate_count)
+            drawn = self.domain.sample_valid_nchoosek_features(rng, n=n_combos)
+            combinations: Dict[tuple, int] = {}
+            for combo in drawn:
+                combinations[combo] = combinations.get(combo, 0) + 1
+
+            sampling_multiplier = math.ceil(candidate_count / n_combos)
 
             samples = []
-            for u in sampled_combinations:
+            for combo, count in combinations.items():
                 # create new domain without the nchoosekconstraints
                 domain = deepcopy(self.domain)
                 domain.constraints = domain.constraints.get(excludes=NChooseKConstraint)
-                # fix the unused features
-                for key in u:
+                # fix the unselected zeroable features
+                for key in zeroable_keys - set(combo):
                     feat = domain.inputs.get_by_key(key=key)
                     assert isinstance(feat, ContinuousInput)
+                    if feat.allow_zero:
+                        feat.allow_zero = False
                     feat.bounds = [0.0, 0.0]
                 # setup then sampler for this situation
                 samples.append(
@@ -159,7 +167,7 @@ def _sample_with_nchooseks(
                         n_burnin=self.n_burnin,
                         n_thinning=self.n_thinning,
                         seed=self._get_seed(),
-                        n=num_samples_per_it,
+                        n=count * sampling_multiplier,
                         sampler_kwargs=self.sampler_kwargs,
                     ),
                 )
@@ -277,7 +285,7 @@ def _sample_from_polytope(
             samples = pd.DataFrame(
                 data=np.nan,
                 index=range(n),
-                columns=domain.inputs.get_keys(),
+                columns=domain.inputs.get_keys(ContinuousInput),
             )
         else:
             bounds = torch.tensor([lower, upper]).to(**tkwargs)
@@ -334,10 +342,13 @@ def _sample_from_polytope(
             )
 
         # setup the categoricals and discrete ones as uniform sampled vals
+        # we have to make sure here that no fixed ones occur here
         samples = pd.concat(
             [
                 samples,
-                domain.inputs.get([CategoricalInput, DiscreteInput]).sample(
+                domain.inputs.get([CategoricalInput, DiscreteInput])
+                .get_free()
+                .sample(
                     n,
                     method=fallback_sampling_method,
                     seed=seed,
@@ -350,6 +361,9 @@ def _sample_from_polytope(
         # setup the fixed continuous ones
         for key, value in fixed_features.items():
             samples[key] = value
+        # setup the fixed discrete/categorical ones
+        for feat in domain.inputs.get([CategoricalInput, DiscreteInput]).get_fixed():
+            samples[feat.key] = feat.fixed_value()[0]
 
         return samples[domain.inputs.get_keys()]
 

diff --git a/tests/bofire/data_models/domain/test_domain_nchoosek_combinatorics.py b/tests/bofire/data_models/domain/test_domain_nchoosek_combinatorics.py
@@ -247,3 +247,102 @@ def test_nchoosek_combinations_nonexhaustive():
     c = unittest.TestCase()
     c.assertCountEqual(used, expected_used)
     c.assertCountEqual(unused, expected_unused)
+
+
+def test_sample_valid_nchoosek_features_uniform_over_subsets():
+    """With one NChooseK on n=5 features and k in [1, 3], there are
+    C(5,1)+C(5,2)+C(5,3) = 25 valid subsets. With uniform sampling each
+    should appear with frequency ~1/25.
+    """
+    import random
+
+    inputs = [ContinuousInput(key=f"x{i}", bounds=(0, 1)) for i in range(5)]
+    constraint = NChooseKConstraint(
+        features=[f"x{i}" for i in range(5)],
+        min_count=1,
+        max_count=3,
+        none_also_valid=False,
+    )
+    domain = Domain(
+        inputs=Inputs(features=inputs),
+        constraints=Constraints(constraints=[constraint]),
+    )
+    n_samples = 25_000
+    samples = domain.sample_valid_nchoosek_features(random.Random(0), n=n_samples)
+    counts: dict = {}
+    for s in samples:
+        counts[s] = counts.get(s, 0) + 1
+    assert len(counts) == 25, f"Expected 25 unique subsets, got {len(counts)}"
+    expected = n_samples / 25
+    for subset, count in counts.items():
+        rel = abs(count - expected) / expected
+        assert (
+            rel < 0.20
+        ), f"Subset {subset} count {count} too far from expected {expected:.0f}"
+
+
+def test_sample_valid_nchoosek_features_none_also_valid():
+    """When none_also_valid=True, the empty subset is in the support."""
+    import random
+
+    inputs = [ContinuousInput(key=f"x{i}", bounds=(0, 1)) for i in range(3)]
+    constraint = NChooseKConstraint(
+        features=["x0", "x1", "x2"],
+        min_count=2,
+        max_count=3,
+        none_also_valid=True,
+    )
+    domain = Domain(
+        inputs=Inputs(features=inputs),
+        constraints=Constraints(constraints=[constraint]),
+    )
+    samples = domain.sample_valid_nchoosek_features(random.Random(1), n=2000)
+    unique = set(samples)
+    # Valid subsets: () + C(3,2) + C(3,3) = 1 + 3 + 1 = 5
+    assert len(unique) == 5
+    assert () in unique
+
+
+def test_sample_valid_nchoosek_features_allow_zero_singletons():
+    """Without any NChooseK, allow_zero=True features form singleton groups."""
+    import random
+
+    inputs = [
+        ContinuousInput(key="a", bounds=(0.1, 1), allow_zero=True),
+        ContinuousInput(key="b", bounds=(0.1, 1), allow_zero=True),
+        ContinuousInput(key="c", bounds=(0.1, 1)),
+    ]
+    domain = Domain(inputs=Inputs(features=inputs))
+    samples = domain.sample_valid_nchoosek_features(random.Random(2), n=2000)
+    unique = set(samples)
+    # Each of {a, b} can be on or off independently -> 4 subsets
+    assert unique == {(), ("a",), ("b",), ("a", "b")}
+
+
+def test_sample_valid_nchoosek_features_empty_returns_empty_tuple():
+    """Domain without NChooseK and without allow_zero features yields ()."""
+    import random
+
+    inputs = [ContinuousInput(key="x", bounds=(0, 1))]
+    domain = Domain(inputs=Inputs(features=inputs))
+    samples = domain.sample_valid_nchoosek_features(random.Random(3), n=4)
+    assert samples == [(), (), (), ()]
+
+
+def test_sample_valid_nchoosek_features_default_n_is_one():
+    """Default returns a list of length 1."""
+    import random
+
+    inputs = [ContinuousInput(key=f"x{i}", bounds=(0, 1)) for i in range(3)]
+    constraint = NChooseKConstraint(
+        features=["x0", "x1", "x2"],
+        min_count=1,
+        max_count=2,
+        none_also_valid=False,
+    )
+    domain = Domain(
+        inputs=Inputs(features=inputs),
+        constraints=Constraints(constraints=[constraint]),
+    )
+    samples = domain.sample_valid_nchoosek_features(random.Random(0))
+    assert len(samples) == 1