-
Notifications
You must be signed in to change notification settings - Fork 46
Combine nchoosek to support variables being shared across nchoosek #753
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 15 commits
0008c56
a2d914c
8941dad
35a83c1
a327194
8b7e6c0
9db4ff6
bb28c7c
5a38769
3185f5c
3123935
debad63
562f749
f9dc56c
f2fe3f4
a7a16ab
2887177
16451c2
b373234
d48ddd4
1ec5ce5
5b1534e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,10 @@ | ||
| import importlib.util | ||
| import re | ||
| import sys | ||
| from collections import defaultdict | ||
| from copy import copy | ||
| from itertools import combinations | ||
| from typing import List, Optional, Tuple, Union, cast | ||
| from typing import Iterator, List, Optional, Tuple, Union, cast | ||
|
|
||
| import numpy as np | ||
| import pandas as pd | ||
|
|
@@ -556,125 +557,173 @@ def hessian(self, x: np.ndarray, *args): | |
| return hessian | ||
|
|
||
|
|
||
| def check_nchoosek_constraints_as_bounds(domain: Domain) -> None: | ||
| """Checks if NChooseK constraints of domain can be formulated as bounds. | ||
| def _iter_nchoosek_combined_patterns( | ||
| domain: Domain, | ||
| ) -> "Iterator[Tuple[int, ...]]": | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the return type a string? Why |
||
| """Yield combined deactivation patterns across all NChooseK constraints. | ||
|
|
||
| For each constraint, per-feature active/inactive patterns are generated | ||
| for every allowed activity level. When multiple constraints share | ||
| features, patterns are merged incrementally (pairwise) rather than via | ||
| a single giant Cartesian product, pruning inconsistent combinations | ||
| early. | ||
|
|
||
| Consistency: If a feature is shared by multiple constraints, it must be active in all or | ||
| inactive in all to yield a valid combined pattern. | ||
|
|
||
| Yields: | ||
| Tuples containing the domain-level indices of features to deactivate | ||
| (pin to 0) for one experiment slot. Duplicates are possible when | ||
| constraints are disjoint; the caller should deduplicate if needed. | ||
| """ | ||
| nchoosek_constraints = [ | ||
| c for c in domain.constraints if isinstance(c, NChooseKConstraint) | ||
| ] | ||
|
|
||
| if not nchoosek_constraints: | ||
| return | ||
|
|
||
| all_keys = domain.inputs.get_keys() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure why you didn't do this. maybe you had a good reason that I overlooked. |
||
|
|
||
| def _constraint_patterns(constraint): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. type hint missing |
||
| """Yield {feature_index: is_active} dicts for one constraint.""" | ||
| n_features = len(constraint.features) | ||
| ind = [i for i, key in enumerate(all_keys) if key in constraint.features] | ||
| for k in range(max(constraint.min_count, 1), constraint.max_count + 1): | ||
| n_inactive = n_features - k | ||
| if n_inactive > 0: | ||
| for combo in combinations(ind, r=n_inactive): | ||
| inactive_set = set(combo) | ||
| yield {idx: (idx not in inactive_set) for idx in ind} | ||
| else: | ||
| yield {idx: True for idx in ind} | ||
|
|
||
| def _merge_two(patterns_a, patterns_b): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. type hint missing |
||
| """Yield merged dicts from two pattern iterables, filtering conflicts. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a lot of zoologic description. but i cannot immediately derive what the purpose of this function is. There is not even a description of what a patterna actually is. What is a pattern? |
||
|
|
||
| Uses a hash-join on shared feature indices: patterns from A are | ||
| bucketed by their assignment to shared keys, so each pattern from B | ||
| only needs to merge with the bucket that agrees on those keys. | ||
|
|
||
| Analogy: imagine two bags of puzzle pieces where each piece says | ||
| which animals are awake or asleep. If both bags mention the same | ||
| animal, the pieces must agree (both "awake" or both "asleep"). | ||
| Instead of trying every piece from bag B against every piece from | ||
| bag A, we sort bag A into labeled bins by what the shared animals | ||
| do. Then each bag-B piece goes straight to the matching bin — | ||
| skipping all bins that would always conflict. | ||
| """ | ||
| list_a = list(patterns_a) | ||
| list_b = list(patterns_b) | ||
|
|
||
| # Identify shared feature indices between the two sides. | ||
| keys_a = set().union(*(pa.keys() for pa in list_a)) if list_a else set() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That looks strange. Either i don't get what this does or it a very confusing way of creating a set comprehension. |
||
| keys_b = set().union(*(pb.keys() for pb in list_b)) if list_b else set() | ||
| shared = keys_a & keys_b | ||
|
|
||
| if shared: | ||
| # Bucket A patterns by their assignment on shared indices. | ||
| sorted_shared = sorted(shared) | ||
| buckets: dict = defaultdict(list) | ||
| for pa in list_a: | ||
| key = tuple(pa[idx] for idx in sorted_shared) | ||
| buckets[key].append(pa) | ||
|
|
||
| for pb in list_b: | ||
| key = tuple(pb[idx] for idx in sorted_shared) | ||
| for pa in buckets.get(key, ()): | ||
| merged = dict(pa) | ||
| merged.update(pb) | ||
| yield merged | ||
| else: | ||
| # No shared features — full cross-product, no conflicts possible. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. full product sounds exhaustive. is this what happened before the change without shared nchoosek? |
||
| for pb in list_b: | ||
| for pa in list_a: | ||
| merged = dict(pa) | ||
| merged.update(pb) | ||
| yield merged | ||
|
|
||
| # Incremental pairwise merge — each step only materialises a generator | ||
| # over consistent merges so far, keeping early pruning. | ||
| merged_iter = _constraint_patterns(nchoosek_constraints[0]) | ||
| for constraint in nchoosek_constraints[1:]: | ||
| merged_iter = _merge_two(merged_iter, _constraint_patterns(constraint)) | ||
|
|
||
| For every feature referenced by an NChooseKConstraint the lower bound must | ||
| be >= 0 (so that "inactive" variables can be pinned to 0) and the upper | ||
| bound must be > 0. The feature sets of different NChooseKConstraints must | ||
| be pairwise disjoint. | ||
| for merged in merged_iter: | ||
| yield tuple(sorted(idx for idx, active in merged.items() if not active)) | ||
|
|
||
| Note: a non-zero lower bound (e.g. 0.1) is fine because the bounds-based | ||
| approach overrides the bounds of inactive variables to [0, 0] per | ||
| experiment while keeping the original [lb, ub] for active variables. | ||
|
|
||
| def _build_nchoosek_combined_patterns( | ||
| domain: Domain, | ||
| ) -> List[Tuple[int, ...]]: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lower case |
||
| """Collect combined deactivation patterns. | ||
|
|
||
| Args: | ||
| domain (Domain): Domain whose NChooseK constraints should be checked | ||
| domain: Domain whose NChooseK constraints should be combined. | ||
|
|
||
| """ | ||
| # collect NChooseK constraints | ||
| if len(domain.constraints) == 0: | ||
| return | ||
| Returns: | ||
| A deduplicated list of inactive-index tuples. | ||
|
|
||
| nchoosek_constraints = [] | ||
| for c in domain.constraints: | ||
| if isinstance(c, NChooseKConstraint): | ||
| nchoosek_constraints.append(c) | ||
| Raises: | ||
| ValueError: When no valid combined patterns exist (contradictory | ||
| constraints). | ||
| """ | ||
| seen: set = set() | ||
| result: List[Tuple[int, ...]] = [] | ||
|
|
||
| if len(nchoosek_constraints) == 0: | ||
| return | ||
| for pattern in _iter_nchoosek_combined_patterns(domain): | ||
| if pattern not in seen: | ||
| seen.add(pattern) | ||
| result.append(pattern) | ||
|
|
||
| # check if the domains of all NChooseK constraints are compatible to linearization | ||
| for c in nchoosek_constraints: | ||
| for name in np.unique(c.features): | ||
| input = domain.inputs.get_by_key(name) | ||
| if input.bounds[0] < 0: | ||
| raise ValueError( | ||
| f"Constraint {c} cannot be formulated as bounds. " | ||
| f"Feature '{name}' has lower bound {input.bounds[0]} < 0.", | ||
| ) | ||
| if input.bounds[1] < 0: | ||
| raise ValueError( | ||
| f"Constraint {c} cannot be formulated as bounds. " | ||
| f"Feature '{name}' has upper bound {input.bounds[1]} < 0.", | ||
| ) | ||
| if not result and any( | ||
| isinstance(c, NChooseKConstraint) for c in domain.constraints | ||
| ): | ||
| raise ValueError( | ||
| "No valid combined NChooseK patterns exist. " | ||
| "The overlapping constraints are contradictory." | ||
| ) | ||
|
|
||
| # check if the parameter names of two nchoose overlap | ||
| for c in nchoosek_constraints: | ||
| for _c in nchoosek_constraints: | ||
| if c != _c: | ||
| for name in c.features: | ||
| if name in _c.features: | ||
| raise ValueError( | ||
| f"Domain {domain} cannot be used for formulation as bounds. \ | ||
| names attribute of NChooseK constraints must be pairwise disjoint.", | ||
| ) | ||
| return result | ||
|
|
||
|
|
||
| def nchoosek_constraints_as_bounds( | ||
| domain: Domain, | ||
| n_experiments: int, | ||
| ) -> List: | ||
| """Determines the box bounds for the decision variables | ||
| """Determines the box bounds for the decision variables. | ||
|
|
||
| When multiple NChooseK constraints share features, their patterns are | ||
| combined via Cartesian product and filtered for consistency so that | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment above says
which sounds as if it is contradicting this comment. |
||
| each shared feature has a single active/inactive status per experiment. | ||
|
|
||
| Args: | ||
| domain (Domain): Domain to find the bounds for. | ||
| n_experiments (int): number of experiments for the design to be determined. | ||
|
|
||
| Returns: | ||
| A list of tuples containing bounds that respect NChooseK constraint imposed | ||
| onto the decision variables. | ||
| A list of tuples containing bounds that respect NChooseK constraints | ||
| imposed onto the decision variables. | ||
|
|
||
| """ | ||
| check_nchoosek_constraints_as_bounds(domain) | ||
|
|
||
| # bounds without NChooseK constraints | ||
| bounds = np.array( | ||
| [p.bounds for p in domain.inputs.get(ContinuousInput)] * n_experiments, | ||
| ) | ||
|
|
||
| if len(domain.constraints) > 0: | ||
| for constraint in domain.constraints: | ||
| if isinstance(constraint, NChooseKConstraint): | ||
| n_features = len(constraint.features) | ||
| # find indices of constraint features in the domain input keys | ||
| ind = [ | ||
| i | ||
| for i, p in enumerate(domain.inputs.get_keys()) | ||
| if p in constraint.features | ||
| ] | ||
|
|
||
| # Build all valid deactivation patterns for every allowed | ||
| # activity level k in [min_count, max_count]. For each k, | ||
| # exactly (n_features - k) variables are set to zero. | ||
| all_inactive_patterns = [] | ||
| for k in range(max(constraint.min_count, 1), constraint.max_count + 1): | ||
| n_inactive = n_features - k | ||
| if n_inactive > 0: | ||
| all_inactive_patterns.extend( | ||
| list(combinations(ind, r=n_inactive)) | ||
| ) | ||
|
|
||
| if len(all_inactive_patterns) > 0: | ||
| # patterns may have different lengths (different activity | ||
| # levels), so we keep them as a plain list of tuples | ||
| # instead of converting to a numpy array. | ||
| np.random.shuffle( | ||
| all_inactive_patterns | ||
| ) # shuffle patterns to avoid biasing the optimization towards certain patterns | ||
|
|
||
| # set bounds to zero for the chosen inactive variables per experiment | ||
| D = len(domain.inputs) | ||
| for i in range(n_experiments): | ||
| pattern = all_inactive_patterns[i % len(all_inactive_patterns)] | ||
| for idx in pattern: | ||
| bounds[idx + i * D, :] = [0, 0] | ||
| if i % len(all_inactive_patterns) == ( | ||
| len(all_inactive_patterns) - 1 | ||
| ): | ||
| np.random.shuffle(all_inactive_patterns) | ||
| else: | ||
| pass | ||
| combined_patterns = _build_nchoosek_combined_patterns(domain) | ||
|
|
||
| if combined_patterns: | ||
| np.random.shuffle(combined_patterns) | ||
|
|
||
| D = len(domain.inputs) | ||
| for i in range(n_experiments): | ||
| pattern = combined_patterns[i % len(combined_patterns)] | ||
| for idx in pattern: | ||
| bounds[idx + i * D, :] = [0, 0] | ||
| if i % len(combined_patterns) == (len(combined_patterns) - 1): | ||
| np.random.shuffle(combined_patterns) | ||
|
|
||
| # convert bounds to list of tuples | ||
| bounds = [(b[0], b[1]) for b in bounds] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since this function returns a list, call it
_get_nchoosek...instead of_iter_....