diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 846f4f829..ec0d4a1aa 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -79,7 +79,7 @@ jobs: run: | conda install -c conda-forge cyipopt pip install torch --index-url https://download.pytorch.org/whl/cpu - pip install ".[optimization,tests,cheminfo,entmoot]" + pip install ".[optimization,tests,cheminfo,entmoot,llm]" - name: Run tests shell: bash -l {0} @@ -113,7 +113,7 @@ jobs: pip install --upgrade git+https://github.com/cornellius-gp/gpytorch.git export ALLOW_LATEST_GPYTORCH_LINOP=true pip install --upgrade git+https://github.com/pytorch/botorch.git - pip install ".[optimization,tests,cheminfo,entmoot]" + pip install ".[optimization,tests,cheminfo,entmoot,llm]" - name: Run tests shell: bash -l {0} diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..4f73f6a42 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,294 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +BoFire (**B**ayesian **O**ptimization **F**ramework **I**ntended for **R**eal **E**xperiments) is a Python framework for experimental design, combining Design of Experiments (DoE) and Bayesian Optimization (BO). It supports mixed continuous/discrete/categorical parameter spaces, single and multi-objective optimization, and chemical encodings for molecular optimization. + +## Build & Test Commands + +```bash +# Install for development (full) +pip install -e ".[all]" + +# Install core optimization only +pip install -e ".[optimization]" + +# Run all tests +pytest tests/ + +# Run data model tests only (faster, no torch required) +pytest tests/bofire/data_models + +# Run with coverage +pytest -ra --cov=bofire --cov-report term-missing tests + +# Run a single test file +pytest tests/bofire/strategies/test_sobo.py + +# Run a single test +pytest tests/bofire/strategies/test_sobo.py::test_function_name +``` + +## Linting & Type Checking + +```bash +# Install pre-commit hooks (recommended) +pip install pre-commit +pre-commit install + +# Run all linting/formatting +pre-commit run --all-files + +# Or run ruff directly +ruff check . +ruff format . + +# Type checking +ty check bofire +``` + +## Architecture + +### Data Models vs Functional Separation + +BoFire separates serializable data models (Pydantic) from functional implementations. This enables REST API integration. + +- `bofire/data_models/` - Pydantic models for serialization +- `bofire/strategies/`, `bofire/surrogates/`, etc. - Functional implementations + +### Key Modules + +- **data_models/** - Pydantic schemas for all BoFire objects + - `features/` - Input/output feature definitions (ContinuousInput, CategoricalOutput, etc.) + - `domain/` - Domain composed of Inputs + Outputs + Constraints + - `objectives/` - MinimizeObjective, MaximizeObjective, CloseToTargetObjective + - `constraints/` - Linear, nonlinear, and black-box constraints + - `surrogates/` - Surrogate model specifications + - `strategies/` - Strategy configurations + - `acquisition_functions/` - BoTorch acquisition functions (qLogEI, etc.) + +- **strategies/** - Optimization strategy implementations + - Uses ask/tell interface: `strategy.ask(n)` returns candidates, `strategy.tell(experiments)` updates model + - `botorch/` - BoTorch-based strategies (SoboStrategy, MoboStrategy) + - `doe/` - Design of Experiments strategies + - `samplers/` - Sampling methods for constrained spaces + +- **surrogates/** - Surrogate model implementations + - `single_task_gp.py`, `multi_task_gp.py` - Gaussian Processes + - `random_forest.py`, `mlp.py` - Alternative surrogates + +- **kernels/** - GP kernel implementations including molecular kernels + +### API Pattern + +Each module typically has an `api.py` file that re-exports public interfaces: +```python +from bofire.data_models.features.api import ContinuousInput, CategoricalOutput +from bofire.strategies.api import SoboStrategy +from bofire.data_models.acquisition_functions.api import qLogEI +``` + +## Data Model Testing + +Data models use a spec-based parametrized testing system. The infrastructure lives in `tests/bofire/data_models/specs/`. + +### Spec System + +The core classes in `tests/bofire/data_models/specs/specs.py`: + +- **`Spec(cls, spec_callable)`** — wraps a data model class and a lambda that returns a valid spec dict +- **`InvalidSpec(cls, spec_callable, error, message)`** — wraps an invalid spec with expected error +- **`Specs(invalidators)`** — collection that holds valid/invalid specs; use `add_valid()` and `add_invalid()` + +### Serialization Roundtrip Contract + +The key invariant enforced by `tests/bofire/data_models/serialization/test_serialization.py`: + +```python +spec = some_spec.typed_spec() # spec dict + {"type": ClassName} +obj = SomeClass(**spec) # instantiate from spec +assert obj.model_dump() == spec # EXACT match required +``` + +**This means:** every field that appears in `model_dump()` output must be present in the spec dict. When adding a new field with a default value to a base class (e.g., `context: Optional[str] = None` on `Feature`), you must add that field with its default to **every `add_valid()` spec** for all subclasses. + +### Spec File Patterns + +**Leaf specs** (single objects) — spec dicts contain plain values: +```python +# tests/bofire/data_models/specs/features.py +specs.add_valid( + features.ContinuousInput, + lambda: { + "key": str(uuid.uuid4()), + "bounds": [3, 5.3], + "unit": None, + "context": None, # all fields with defaults must be explicit + }, +) +``` + +**Container specs** (nested objects) — use `.model_dump()` for children: +```python +# tests/bofire/data_models/specs/domain.py +specs.add_valid( + Domain, + lambda: { + "inputs": Inputs(features=[...]).model_dump(), # nested via model_dump() + "outputs": Outputs(features=[...]).model_dump(), + "constraints": Constraints().model_dump(), + "context": None, + }, +) +``` + +Container specs (inputs.py, outputs.py, constraints_container.py, engineered_features.py) don't need manual updates when a field is added to a leaf class, because `.model_dump()` on the nested objects already includes all fields. + +### Invalid Specs + +Invalid specs test that construction raises the expected error. They do **not** test serialization, so they don't need every default field: + +```python +specs.add_invalid( + features.ContinuousInput, + lambda: {"key": "a", "bounds": [5, 3]}, # no need for "context" etc. + error=ValueError, + message="Sequence is not monotonically increasing.", +) +``` + +### Deserialization Tests + +`tests/bofire/data_models/serialization/test_deserialization.py` tests round-trip through `TypeAdapter`: +```python +obj = spec.obj() +deserialized = TypeAdapter(AnyFeature).validate_python(obj.model_dump()) +assert obj == deserialized +``` + +### Fixtures + +Specs are wired to pytest via `tests/bofire/conftest.py`: +```python +@fixture(params=specs.features.valids) +def feature_spec(request) -> Spec: + return request.param +``` + +### Checklist: Adding a Field to a Base Data Model Class + +1. Add the field to the class (e.g., `Feature`, `Constraint`, `Domain`) +2. Add `"field_name": default_value` to every `add_valid()` spec for that class and all subclasses +3. Invalid specs and container specs using `.model_dump()` don't need changes +4. Run `pytest tests/bofire/data_models` to verify + +### Checklist: Adding a New Data Model Class + +BoFire uses a two-layer architecture: data models (Pydantic) and functional implementations, connected by type unions, registration functions, and mappers. + +#### 1. Create the data model class + +Create a new file or add to an existing one in `bofire/data_models/{domain}/`. Every data model needs a `type` literal discriminator: + +```python +# bofire/data_models/kernels/my_kernel.py +from typing import Literal +from bofire.data_models.kernels.kernel import ContinuousKernel + +class MyCustomKernel(ContinuousKernel): + type: Literal["MyCustomKernel"] = "MyCustomKernel" + my_param: float +``` + +#### 2. Register in the type union + +Each domain has an `api.py` that defines type unions (e.g., `AnyFeature`, `AnyConstraint`, `AnyKernel`). These unions use `Union[tuple(type_list)]` where the list is mutable. + +**Option A — Static registration:** Add the import and class to the union list in `bofire/data_models/{domain}/api.py`. + +**Option B — Dynamic registration:** Use the `register_*()` functions in `bofire/data_models/{domain}/_register.py`. These handle cascading Pydantic model rebuilds automatically: + +```python +from bofire.data_models.kernels._register import register_kernel +register_kernel(MyCustomKernel) +``` + +Available registration functions: +- `bofire/data_models/features/_register.py` → `register_engineered_feature()` +- `bofire/data_models/strategies/_register.py` → `register_strategy()` +- `bofire/data_models/kernels/_register.py` → `register_kernel()` +- `bofire/data_models/priors/_register.py` → `register_prior()`, `register_prior_constraint()` +- `bofire/data_models/surrogates/botorch_surrogates.py` → `register_botorch_surrogate()` + +These functions are idempotent (check if already registered) and cascade rebuilds to dependent models (e.g., registering a kernel rebuilds surrogate models that reference kernels). + +#### 3. Create the functional implementation and register the mapper + +Each domain has a `mapper.py` that maps data model classes to implementations: + +```python +# bofire/strategies/mapper.py or bofire/surrogates/mapper.py +from bofire.strategies.mapper import register + +@register(data_model_cls=MyStrategyDataModel) +class MyStrategy(Strategy): + ... +``` + +Or function-based (kernels, priors): +```python +# bofire/kernels/mapper.py +KERNEL_MAP[MyCustomKernel] = map_my_custom_kernel +``` + +The `register()` decorators in `bofire/strategies/api.py` and `bofire/surrogates/api.py` handle both the mapper registration and the data model registration in one step. + +#### 4. Add test specs + +Add entries to the appropriate file in `tests/bofire/data_models/specs/`: + +```python +# tests/bofire/data_models/specs/kernels.py +specs.add_valid( + MyCustomKernel, + lambda: {"my_param": 1.0}, +) +``` + +The spec is automatically picked up by `tests/bofire/conftest.py` which imports all specs from `tests/bofire/data_models/specs/api.py` and parametrizes fixtures over `specs.{domain}.valids` / `specs.{domain}.invalids`. No conftest changes needed unless adding an entirely new domain. + +#### 5. Key files reference + +| Domain | Data Model | Type Union / Registration | Mapper | Test Specs | +|--------|-----------|--------------------------|--------|------------| +| Features | `bofire/data_models/features/` | `api.py`, `_register.py` | N/A | `tests/.../specs/features.py` | +| Constraints | `bofire/data_models/constraints/` | `api.py` | N/A | `tests/.../specs/constraints.py` | +| Strategies | `bofire/data_models/strategies/` | `api.py`, `_register.py` | `bofire/strategies/mapper.py` | `tests/.../specs/strategies.py` | +| Surrogates | `bofire/data_models/surrogates/` | `api.py`, `botorch_surrogates.py` | `bofire/surrogates/mapper.py` | `tests/.../specs/surrogates.py` | +| Kernels | `bofire/data_models/kernels/` | `api.py`, `_register.py` | `bofire/kernels/mapper.py` | `tests/.../specs/kernels.py` | +| Priors | `bofire/data_models/priors/` | `api.py`, `_register.py` | `bofire/priors/mapper.py` | `tests/.../specs/priors.py` | + +## Code Style + +- **Linter/Formatter**: Ruff (line length 88) +- **Docstrings**: Google-style +- **Type Checking**: Pydantic for runtime, ty for static analysis +- **Python**: 3.10+ + +## Documentation + +```bash +# Build API docs +quartodoc build + +# Render full docs +quarto render + +# Preview with live reload +quarto preview + +# Fast smoke test build +SMOKE_TEST=1 quarto render +``` diff --git a/README.md b/README.md index 87f100999..a7b02cb62 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,8 @@ Why BoFire? BoFire ... - supports built-in chemical encodings and kernels to boost surrogate performance for optimization problems including molecules, - can provide flexible DoEs that fulfill constraints, - provides sampling methods for constrained mixed variable spaces, -- provides seamless integration into RESTful APIs, by builtin serialization capabilities for problems, optimization strategies and surrogates. +- provides seamless integration into RESTful APIs, by builtin serialization capabilities for problems, optimization strategies and surrogates, +- includes an agentic `LLMStrategy` that proposes candidates by prompting a large language model with the problem description and prior experiments — useful for cold starts and domains where written context informs proposals. ## Getting started diff --git a/_quarto.yml b/_quarto.yml index d3ed68092..dc32366eb 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -66,6 +66,7 @@ quartodoc: - MultiobjectiveStrategy - AdditiveSoboStrategy - CustomSoboStrategy + - LLMStrategy - title: Data Models - Kernels desc: Kernel configuration data models contents: @@ -119,6 +120,7 @@ quartodoc: - ActiveLearningStrategy - ShortestPathStrategy - StepwiseStrategy + - LLMStrategy - title: Surrogates desc: Surrogate model implementations contents: @@ -219,6 +221,7 @@ website: - docs/tutorials/advanced_examples/random_forest_in_bofire.qmd - docs/tutorials/advanced_examples/transfer_learning_bo.qmd - docs/tutorials/advanced_examples/octane_number.qmd + - docs/tutorials/advanced_examples/llm_molecular.qmd - section: "Benchmarks" contents: - docs/tutorials/benchmarks/index.qmd diff --git a/bofire/data_models/api.py b/bofire/data_models/api.py index a1c44d827..c9ad23c22 100644 --- a/bofire/data_models/api.py +++ b/bofire/data_models/api.py @@ -22,6 +22,7 @@ Output, ) from bofire.data_models.kernels.api import AnyKernel, Kernel +from bofire.data_models.llm.api import AnyLLMProvider, LLMProvider from bofire.data_models.molfeatures.api import AnyMolFeatures, MolFeatures from bofire.data_models.objectives.api import AnyObjective, Objective from bofire.data_models.outlier_detection.api import ( diff --git a/bofire/data_models/constraints/categorical.py b/bofire/data_models/constraints/categorical.py index 9605898db..4aa655f26 100644 --- a/bofire/data_models/constraints/categorical.py +++ b/bofire/data_models/constraints/categorical.py @@ -39,6 +39,31 @@ class CategoricalExcludeConstraint(Constraint): ] logical_op: Literal["AND", "OR", "XOR"] = "AND" + def to_description(self) -> str: + """Render as ``"Exclude where solvent in ['A', 'B'] AND temp > 50"``. + + Example:: + + >>> c = CategoricalExcludeConstraint( + ... features=["solvent", "temp"], + ... conditions=[SelectionCondition(selection=["A"]), ThresholdCondition(threshold=50, operator=">")], + ... ) + >>> c.to_description() + "Exclude where solvent in ['A'] AND temp > 50" + """ + conds = [] + for feat, cond in zip(self.features, self.conditions): + if isinstance(cond, SelectionCondition): + conds.append(f"{feat} in {cond.selection}") + elif isinstance(cond, ThresholdCondition): + conds.append(f"{feat} {cond.operator} {cond.threshold}") + else: + conds.append(f"{feat}: {type(cond).__name__}") + desc = f"Exclude where {f' {self.logical_op} '.join(conds)}" + if self.context: + desc += f" — {self.context}" + return desc + def validate_inputs(self, inputs: Inputs): """Validates that the features stored in Inputs are compatible with the constraint. diff --git a/bofire/data_models/constraints/constraint.py b/bofire/data_models/constraints/constraint.py index 957b2215b..cade4abdc 100644 --- a/bofire/data_models/constraints/constraint.py +++ b/bofire/data_models/constraints/constraint.py @@ -14,6 +14,11 @@ class Constraint(BaseModel): type: Any features: FeatureKeys + context: Optional[str] = None + + @abstractmethod + def to_description(self) -> str: + """Return a human-readable description of this constraint.""" @abstractmethod def is_fulfilled( diff --git a/bofire/data_models/constraints/interpoint.py b/bofire/data_models/constraints/interpoint.py index d6846f7d8..d8280807e 100644 --- a/bofire/data_models/constraints/interpoint.py +++ b/bofire/data_models/constraints/interpoint.py @@ -33,6 +33,9 @@ class InterpointEqualityConstraint(InterpointConstraint): features: Annotated[List[str], Field(min_length=1), Field(max_length=1)] multiplicity: Optional[Annotated[int, Field(ge=2)]] = None + def to_description(self) -> str: + raise NotImplementedError + @property def feature(self) -> str: """Feature to be constrained.""" diff --git a/bofire/data_models/constraints/linear.py b/bofire/data_models/constraints/linear.py index 134e3437a..9dbb60055 100644 --- a/bofire/data_models/constraints/linear.py +++ b/bofire/data_models/constraints/linear.py @@ -78,6 +78,21 @@ class LinearEqualityConstraint(LinearConstraint, EqualityConstraint): type: Literal["LinearEqualityConstraint"] = "LinearEqualityConstraint" + def to_description(self) -> str: + """Render as ``"1.0*x1 + 2.0*x2 = 5.0"``. + + Example:: + + >>> c = LinearEqualityConstraint(features=["x1", "x2"], coefficients=[1.0, 2.0], rhs=5.0) + >>> c.to_description() + '1.0*x1 + 2.0*x2 = 5.0' + """ + terms = " + ".join(f"{c}*{f}" for c, f in zip(self.coefficients, self.features)) + desc = f"{terms} = {self.rhs}" + if self.context: + desc += f" — {self.context}" + return desc + class LinearInequalityConstraint(LinearConstraint, InequalityConstraint): """Linear inequality constraint of the form `coefficients * x <= rhs`. @@ -94,6 +109,21 @@ class LinearInequalityConstraint(LinearConstraint, InequalityConstraint): type: Literal["LinearInequalityConstraint"] = "LinearInequalityConstraint" + def to_description(self) -> str: + """Render as ``"1.0*x1 + 2.0*x2 <= 5.0"``. + + Example:: + + >>> c = LinearInequalityConstraint(features=["x1", "x2"], coefficients=[1.0, 2.0], rhs=5.0) + >>> c.to_description() + '1.0*x1 + 2.0*x2 <= 5.0' + """ + terms = " + ".join(f"{c}*{f}" for c, f in zip(self.coefficients, self.features)) + desc = f"{terms} <= {self.rhs}" + if self.context: + desc += f" — {self.context}" + return desc + def as_smaller_equal(self) -> Tuple[List[str], List[float], float]: """Return attributes in the smaller equal convention diff --git a/bofire/data_models/constraints/nchoosek.py b/bofire/data_models/constraints/nchoosek.py index aa41d47ce..f76b1d5b4 100644 --- a/bofire/data_models/constraints/nchoosek.py +++ b/bofire/data_models/constraints/nchoosek.py @@ -30,6 +30,22 @@ class NChooseKConstraint(IntrapointConstraint): max_count: int none_also_valid: bool + def to_description(self) -> str: + """Render as ``"Choose 1-3 active features from ['x1', 'x2', 'x3']"``. + + Example:: + + >>> c = NChooseKConstraint(features=["x1", "x2", "x3"], min_count=1, max_count=3, none_also_valid=False) + >>> c.to_description() + "Choose 1-3 active features from ['x1', 'x2', 'x3']" + """ + desc = f"Choose {self.min_count}-{self.max_count} active features from {self.features}" + if self.none_also_valid: + desc += ", or none" + if self.context: + desc += f" — {self.context}" + return desc + def validate_inputs(self, inputs: Inputs): keys = inputs.get_keys([ContinuousInput, DiscreteInput]) for f in self.features: diff --git a/bofire/data_models/constraints/nonlinear.py b/bofire/data_models/constraints/nonlinear.py index 2d4b0e249..ddc9a4a36 100644 --- a/bofire/data_models/constraints/nonlinear.py +++ b/bofire/data_models/constraints/nonlinear.py @@ -298,6 +298,9 @@ class NonlinearEqualityConstraint(NonlinearConstraint, EqualityConstraint): type: Literal["NonlinearEqualityConstraint"] = "NonlinearEqualityConstraint" + def to_description(self) -> str: + raise NotImplementedError + class NonlinearInequalityConstraint(NonlinearConstraint, InequalityConstraint): """Nonlinear inequality constraint of the form 'expression <= 0'. @@ -308,3 +311,6 @@ class NonlinearInequalityConstraint(NonlinearConstraint, InequalityConstraint): """ type: Literal["NonlinearInequalityConstraint"] = "NonlinearInequalityConstraint" + + def to_description(self) -> str: + raise NotImplementedError diff --git a/bofire/data_models/constraints/product.py b/bofire/data_models/constraints/product.py index 58a35a6c1..a581b01e2 100644 --- a/bofire/data_models/constraints/product.py +++ b/bofire/data_models/constraints/product.py @@ -102,6 +102,22 @@ class ProductEqualityConstraint(ProductConstraint, EqualityConstraint): type: Literal["ProductEqualityConstraint"] = "ProductEqualityConstraint" + def to_description(self) -> str: + """Render as ``"x1^2 * x2^3 = 1.0"``. + + Example:: + + >>> c = ProductEqualityConstraint(features=["x1", "x2"], exponents=[2, 3], rhs=1.0, sign=1) + >>> c.to_description() + 'x1^2 * x2^3 = 1.0' + """ + sign = "" if self.sign == 1 else "-" + terms = " * ".join(f"{f}^{e}" for f, e in zip(self.features, self.exponents)) + desc = f"{sign}{terms} = {self.rhs}" + if self.context: + desc += f" — {self.context}" + return desc + class ProductInequalityConstraint(ProductConstraint, InequalityConstraint): """Represents a product constraint of the form `sign * x1**e1 * x2**e2 * ... * xn**en <= rhs`. @@ -117,3 +133,19 @@ class ProductInequalityConstraint(ProductConstraint, InequalityConstraint): """ type: Literal["ProductInequalityConstraint"] = "ProductInequalityConstraint" + + def to_description(self) -> str: + """Render as ``"x1^2 * x2^3 <= 1.0"``. + + Example:: + + >>> c = ProductInequalityConstraint(features=["x1", "x2"], exponents=[2, 3], rhs=1.0, sign=1) + >>> c.to_description() + 'x1^2 * x2^3 <= 1.0' + """ + sign = "" if self.sign == 1 else "-" + terms = " * ".join(f"{f}^{e}" for f, e in zip(self.features, self.exponents)) + desc = f"{sign}{terms} <= {self.rhs}" + if self.context: + desc += f" — {self.context}" + return desc diff --git a/bofire/data_models/domain/domain.py b/bofire/data_models/domain/domain.py index bef8a9230..3903b7cf9 100644 --- a/bofire/data_models/domain/domain.py +++ b/bofire/data_models/domain/domain.py @@ -36,20 +36,48 @@ def is_numeric(s: Union[pd.Series, pd.DataFrame]) -> bool: class Domain(BaseModel): - type: Literal["Domain"] = "Domain" - - inputs: Inputs = Field(default_factory=lambda: Inputs()) - outputs: Outputs = Field(default_factory=lambda: Outputs()) - constraints: Constraints = Field(default_factory=lambda: Constraints()) - """Representation of the optimization problem/domain Attributes: inputs (List[Input], optional): List of input features. Defaults to []. outputs (List[Output], optional): List of output features. Defaults to []. constraints (List[Constraint], optional): List of constraints. Defaults to []. + context (str, optional): Free-text context providing additional information + about the optimization problem. Useful for agentic optimization where an + LLM agent can leverage this description to better understand the overall + problem, its goals, and any domain-specific knowledge. """ + type: Literal["Domain"] = "Domain" + + inputs: Inputs = Field(default_factory=lambda: Inputs()) + outputs: Outputs = Field(default_factory=lambda: Outputs()) + constraints: Constraints = Field(default_factory=lambda: Constraints()) + context: Optional[str] = None + + def to_description(self) -> str: + """Render a human-readable description of the optimization problem. + + Covers problem context, objectives, and constraints. Feature details + are handled separately by ``Inputs.to_pydantic_model()`` which embeds + bounds, types, and context into the dynamic output schema. + """ + lines = [] + + if self.context: + lines.append(f"## Problem Context\n{self.context}") + + lines.append("\n## Objectives") + for feat in self.outputs: + lines.append(f"- {feat.to_description()}") + + if len(self.constraints) > 0: + lines.append("\n## Constraints (candidates MUST satisfy all of these)") + for c in self.constraints: + lines.append(f"- {c.to_description()}") + + return "\n".join(lines) + @classmethod def from_lists( cls, diff --git a/bofire/data_models/domain/features.py b/bofire/data_models/domain/features.py index e609d972a..0b0d2c269 100644 --- a/bofire/data_models/domain/features.py +++ b/bofire/data_models/domain/features.py @@ -22,7 +22,7 @@ import numpy as np import pandas as pd -from pydantic import Field, field_validator, validate_call +from pydantic import Field, create_model, field_validator, validate_call from scipy.stats.qmc import LatinHypercube, Sobol from typing_extensions import Self @@ -915,6 +915,21 @@ def is_fulfilled(self, experiments: pd.DataFrame) -> pd.Series: .all(axis=1) ) + def to_pydantic_model(self, name: str = "CandidatePoint"): + """Build a dynamic Pydantic model with one field per input feature. + + Each feature's ``to_pydantic_field()`` determines the field type and + constraints (e.g., ge/le for continuous, Literal for categorical). + + Returns: + A Pydantic BaseModel subclass with typed fields matching the inputs. + """ + fields = {} + for feature in self: + field_type, field_info = feature.to_pydantic_field() + fields[feature.key] = (field_type, field_info) + return create_model(name, **fields) + class Outputs(_BaseFeatures[AnyOutput]): """Container of output features, only output features are allowed. diff --git a/bofire/data_models/features/categorical.py b/bofire/data_models/features/categorical.py index 6b7b59af6..c5f6cbc31 100644 --- a/bofire/data_models/features/categorical.py +++ b/bofire/data_models/features/categorical.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd from pydantic import Field, field_validator, model_validator +from pydantic.fields import FieldInfo from bofire.data_models.enum import CategoricalEncodingEnum from bofire.data_models.features.feature import ( @@ -15,6 +16,24 @@ from bofire.data_models.types import CategoryVals +# Max number of allowed categories still encoded as ``Literal[...]`` by +# ``to_pydantic_field``. Above this we emit ``str`` with the allowed values +# kept in the field description; the ``Domain.validate_candidates`` pass then +# catches invalid values at ask-time. +# +# Why: providers that offer constrained-decoding for structured output (OpenAI, +# Anthropic) compile each JSON Schema enum into a token-level mask. The +# compile cost scales with the total byte-length of all enum values, not just +# their count. For hundreds of long strings (e.g. SMILES categories) this +# blows the provider's compiled-schema budget and the request is rejected. +# Observed failure: Anthropic returns 400 "Schema is too complex for +# compilation." with ~390 SMILES. OpenAI documents a hard cap at 500 enum +# values and an additional 7500-char combined-length cap above 250 values. +# 32 is well below any documented limit and leaves headroom for very long +# category strings. +LLM_ENUM_SCHEMA_THRESHOLD = 32 + + class CategoricalInput(Input): """Base class for all categorical input features. @@ -50,6 +69,45 @@ def validate_categories_fitting_allowed(self): raise ValueError("no category is allowed") return self + def _description_prefix(self) -> str: + """Leading description string identifying this feature kind.""" + return f"Categorical, allowed: {self.get_allowed_categories()}" + + def _extra_description_parts(self) -> List[str]: + """Optional extras appended after the prefix, before context.""" + return [] + + def to_pydantic_field(self) -> Tuple[type, FieldInfo]: + """Return ``(Literal[...], Field(description=...))`` with allowed categories. + + When the number of allowed categories exceeds + ``LLM_ENUM_SCHEMA_THRESHOLD`` the type falls back to ``str`` (the + allowed values stay in the description). See the module-level comment + on the constant for the reason. + + Subclasses customize the output by overriding ``_description_prefix`` + and/or ``_extra_description_parts``. + + Example:: + + >>> feat = CategoricalInput(key="solvent", categories=["water", "ethanol", "toluene"]) + >>> field_type, _ = feat.to_pydantic_field() + >>> # field_type = Literal['water', 'ethanol', 'toluene'] + """ + allowed = self.get_allowed_categories() + desc_parts = [self._description_prefix(), *self._extra_description_parts()] + if self.context: + desc_parts.append(self.context) + field_type: type = ( + str + if len(allowed) > LLM_ENUM_SCHEMA_THRESHOLD + else Literal[tuple(allowed)] # ty: ignore[invalid-assignment] + ) + return ( + field_type, + Field(description=" — ".join(desc_parts)), + ) + @staticmethod def valid_transform_types() -> List[CategoricalEncodingEnum]: return [ @@ -374,6 +432,9 @@ class CategoricalOutput(Output): categories: CategoryVals objective: AnyCategoricalObjective + def to_description(self) -> str: + raise NotImplementedError + @model_validator(mode="after") def validate_objective_categories(self): """Validates that objective categories match the output categories diff --git a/bofire/data_models/features/continuous.py b/bofire/data_models/features/continuous.py index 3bedb1fc4..b621d6e68 100644 --- a/bofire/data_models/features/continuous.py +++ b/bofire/data_models/features/continuous.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd from pydantic import Field, PositiveFloat, model_validator +from pydantic.fields import FieldInfo from bofire.data_models.features.feature import Output, TTransform from bofire.data_models.features.numerical import NumericalInput @@ -44,6 +45,40 @@ def lower_bound(self) -> float: def upper_bound(self) -> float: return self.bounds[1] + def _description_prefix(self) -> str: + """Leading description string identifying this feature kind.""" + return f"Continuous, bounds [{self.bounds[0]}, {self.bounds[1]}]" + + def _extra_description_parts(self) -> List[str]: + """Optional extras appended after the prefix, before context.""" + return [] + + def to_pydantic_field(self) -> Tuple[type, FieldInfo]: + """Return ``(float, Field(ge=..., le=..., description=...))```. + + Subclasses customize the output by overriding ``_description_prefix`` + and/or ``_extra_description_parts``. + + Example:: + + >>> feat = ContinuousInput(key="temp", bounds=(20.0, 200.0), context="Temperature in C") + >>> field_type, field_info = feat.to_pydantic_field() + >>> # field_type = float + >>> # field_info has ge=20.0, le=200.0 + >>> # description = "Continuous, bounds [20.0, 200.0] — Temperature in C" + """ + desc_parts = [self._description_prefix(), *self._extra_description_parts()] + lower = self.bounds[0] + if self.allow_zero: + lower = min(0.0, lower) + desc_parts.append("can also be 0 (inactive)") + if self.context: + desc_parts.append(self.context) + return ( + float, + Field(ge=lower, le=self.bounds[1], description=" — ".join(desc_parts)), + ) + @model_validator(mode="after") def validate_step_size(self): if self.stepsize is None: @@ -242,6 +277,22 @@ class ContinuousOutput(Output): default_factory=lambda: MaximizeObjective(w=1.0), ) + def to_description(self) -> str: + """Return a human-readable description combining objective and context. + + Example:: + + >>> feat = ContinuousOutput(key="yield", objective=MaximizeObjective(w=1.0), context="Target >90%") + >>> feat.to_description() + 'yield: Maximize — Target >90%' + """ + parts = [self.key] + if self.objective is not None: + parts.append(self.objective.to_description()) + if self.context: + parts.append(self.context) + return ": ".join(parts[:2]) + (" — " + parts[2] if len(parts) > 2 else "") + def __call__(self, values: pd.Series, values_adapt: pd.Series) -> pd.Series: if self.objective is None: return pd.Series( diff --git a/bofire/data_models/features/descriptor.py b/bofire/data_models/features/descriptor.py index 588c4ab33..51aee7b72 100644 --- a/bofire/data_models/features/descriptor.py +++ b/bofire/data_models/features/descriptor.py @@ -29,6 +29,9 @@ class ContinuousDescriptorInput(ContinuousInput): descriptors: Descriptors values: DiscreteVals + def _extra_description_parts(self) -> List[str]: + return [f"descriptors: {dict(zip(self.descriptors, self.values))}"] + @model_validator(mode="after") def validate_list_lengths(self): """Compares the length of the defined descriptors list with the provided values @@ -83,6 +86,16 @@ class CategoricalDescriptorInput(CategoricalInput): Field(min_length=1), ] + def _description_prefix(self) -> str: + return f"Categorical with descriptors, allowed: {self.get_allowed_categories()}" + + def _extra_description_parts(self) -> List[str]: + mapping = { + cat: dict(zip(self.descriptors, vals)) + for cat, vals in zip(self.categories, self.values) + } + return [f"descriptors per category: {mapping}"] + @field_validator("values") @classmethod def validate_values(cls, v, info): diff --git a/bofire/data_models/features/discrete.py b/bofire/data_models/features/discrete.py index 4bd1e10df..016522e8b 100644 --- a/bofire/data_models/features/discrete.py +++ b/bofire/data_models/features/discrete.py @@ -2,7 +2,8 @@ import numpy as np import pandas as pd -from pydantic import field_validator +from pydantic import Field, field_validator +from pydantic.fields import FieldInfo from bofire.data_models.features.feature import TTransform from bofire.data_models.features.numerical import NumericalInput @@ -24,6 +25,23 @@ class DiscreteInput(NumericalInput): values: DiscreteVals rtol: float = 1e-7 + def to_pydantic_field(self) -> Tuple[type, FieldInfo]: + """Return ``(Literal[...], Field(description=...))`` with allowed values. + + Example:: + + >>> feat = DiscreteInput(key="n_steps", values=[1.0, 2.0, 5.0]) + >>> field_type, _ = feat.to_pydantic_field() + >>> # field_type = Literal[1.0, 2.0, 5.0] + """ + desc_parts = [f"Discrete, allowed values: {self.values}"] + if self.context: + desc_parts.append(self.context) + return ( + Literal[tuple(self.values)], + Field(description=" — ".join(desc_parts)), + ) + @field_validator("values") @classmethod def validate_values_unique(cls, values): diff --git a/bofire/data_models/features/feature.py b/bofire/data_models/features/feature.py index f66d0c533..6c0321b32 100644 --- a/bofire/data_models/features/feature.py +++ b/bofire/data_models/features/feature.py @@ -2,6 +2,7 @@ from typing import Any, ClassVar, List, Optional, Tuple, Union import pandas as pd +from pydantic.fields import FieldInfo from bofire.data_models.base import BaseModel from bofire.data_models.enum import CategoricalEncodingEnum @@ -18,6 +19,7 @@ class Feature(BaseModel): type: Any key: str order_id: ClassVar[int] = -1 + context: Optional[str] = None def __lt__(self, other) -> bool: """Method to compare two models to get them in the desired order. @@ -40,6 +42,14 @@ def __lt__(self, other) -> bool: class Input(Feature): """Base class for all input features.""" + @abstractmethod + def to_pydantic_field(self) -> Tuple[type, FieldInfo]: + """Return ``(type, FieldInfo)`` for use in a dynamically created Pydantic model. + + Each subclass must implement this to provide appropriate type constraints + (e.g., ``ge``/``le`` bounds for continuous, ``Literal`` for categorical). + """ + @staticmethod @abstractmethod def valid_transform_types() -> List[Union[CategoricalEncodingEnum, AnyMolFeatures]]: @@ -152,6 +162,10 @@ class Output(Feature): """ + @abstractmethod + def to_description(self) -> str: + """Return a human-readable description of this output feature.""" + @abstractmethod def __call__(self, values: pd.Series) -> pd.Series: pass diff --git a/bofire/data_models/features/molecular.py b/bofire/data_models/features/molecular.py index 497d15cfe..2d9b6901b 100644 --- a/bofire/data_models/features/molecular.py +++ b/bofire/data_models/features/molecular.py @@ -25,6 +25,12 @@ class ContinuousMolecularInput(ContinuousInput): order_id: ClassVar[int] = 4 molecule: str + def _description_prefix(self) -> str: + return ( + f"Continuous molecular (SMILES: {self.molecule}), " + f"bounds [{self.bounds[0]}, {self.bounds[1]}]" + ) + @field_validator("molecule") @classmethod def validate_smiles(cls, v: str) -> str: @@ -49,6 +55,11 @@ class CategoricalMolecularInput(CategoricalInput): # order_id: ClassVar[int] = 7 order_id: ClassVar[int] = 5 + def _description_prefix(self) -> str: + return ( + f"Categorical molecular (SMILES), allowed: {self.get_allowed_categories()}" + ) + @field_validator("categories") @classmethod def validate_smiles(cls, categories: Sequence[str]): diff --git a/bofire/data_models/llm/__init__.py b/bofire/data_models/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bofire/data_models/llm/_register.py b/bofire/data_models/llm/_register.py new file mode 100644 index 000000000..617fa78f2 --- /dev/null +++ b/bofire/data_models/llm/_register.py @@ -0,0 +1,30 @@ +"""Registration utilities for custom LLM provider types.""" + +from bofire.data_models.unions import extract_union_args, tagged_union + + +def register_llm_provider(data_model_cls: type) -> None: + """Register a custom LLM provider so it is accepted in AnyLLMProvider fields. + + Rebuilds the ``AnyLLMProvider`` union with the new type appended, and + calls ``model_rebuild`` on dependent Pydantic models (``LLMStrategy``) + so that the new type is accepted. + + Users must separately register a mapper function via + ``bofire.llm.mapper.register`` so that the data model can be turned + into a pydantic-ai ``Model`` at runtime. + + Args: + data_model_cls: A concrete subclass of ``LLMProvider``. + """ + import bofire.data_models.llm.api as llm_api + from bofire.data_models._register_utils import patch_field + from bofire.data_models.strategies.llm import LLMStrategy + + existing_types, _ = extract_union_args(llm_api.AnyLLMProvider) + if data_model_cls in existing_types: + return + llm_api.AnyLLMProvider = tagged_union(*existing_types, data_model_cls) + + patch_field(LLMStrategy, "llm", llm_api.AnyLLMProvider) + LLMStrategy.model_rebuild(force=True) diff --git a/bofire/data_models/llm/api.py b/bofire/data_models/llm/api.py new file mode 100644 index 000000000..fad6ef665 --- /dev/null +++ b/bofire/data_models/llm/api.py @@ -0,0 +1,17 @@ +from bofire.data_models.llm._register import register_llm_provider # noqa: F401 +from bofire.data_models.llm.provider import ( + AnthropicFoundryLLMProvider, + AnthropicLLMProvider, + LLMProvider, # noqa: F401 + OpenAICompatibleLLMProvider, + OpenAILLMProvider, +) +from bofire.data_models.unions import tagged_union + + +AnyLLMProvider = tagged_union( + AnthropicLLMProvider, + AnthropicFoundryLLMProvider, + OpenAILLMProvider, + OpenAICompatibleLLMProvider, +) diff --git a/bofire/data_models/llm/provider.py b/bofire/data_models/llm/provider.py new file mode 100644 index 000000000..feda1f3dd --- /dev/null +++ b/bofire/data_models/llm/provider.py @@ -0,0 +1,97 @@ +from typing import Any, Literal, Optional + +from pydantic import Field + +from bofire.data_models.base import BaseModel + + +class LLMProvider(BaseModel): + """Base class for all LLM provider configurations. + + LLM providers are standalone data models that describe how to connect to an + LLM service. They can be passed to any BoFire component that needs LLM + capabilities (strategies, agents, etc.). API keys are referenced via + environment variable names and resolved at runtime. + + Usage-specific settings (temperature, max_tokens, system_prompt) belong on + the component that uses the provider, not on the provider itself. + + Attributes: + model: The model identifier (e.g., "claude-sonnet-4-20250514", "gpt-4o"). + api_key_env_var: Name of the environment variable holding the API key. + """ + + type: Any + model: str + api_key_env_var: str + + +class AnthropicLLMProvider(LLMProvider): + """LLM provider for the direct Anthropic API. + + Attributes: + model: Anthropic model identifier. + api_key_env_var: Environment variable name for the Anthropic API key. + base_url: Optional custom base URL for the API. + """ + + type: Literal["AnthropicLLMProvider"] = "AnthropicLLMProvider" + model: str = "claude-sonnet-4-20250514" + api_key_env_var: str = "ANTHROPIC_API_KEY" + base_url: Optional[str] = Field( + default=None, + description="Custom base URL for the Anthropic API.", + ) + + +class AnthropicFoundryLLMProvider(LLMProvider): + """LLM provider for Anthropic models hosted on Azure AI Foundry. + + Attributes: + model: Anthropic model identifier on Azure. + api_key_env_var: Environment variable name for the Foundry API key. + resource_env_var: Environment variable name for the Azure resource name. + """ + + type: Literal["AnthropicFoundryLLMProvider"] = "AnthropicFoundryLLMProvider" + model: str = "claude-sonnet-4-20250514" + api_key_env_var: str = "ANTHROPIC_FOUNDRY_API_KEY" + resource_env_var: str = "ANTHROPIC_FOUNDRY_RESOURCE" + + +class OpenAILLMProvider(LLMProvider): + """LLM provider for the OpenAI API (including Azure OpenAI via base_url). + + Attributes: + model: OpenAI model identifier. + api_key_env_var: Environment variable name for the OpenAI API key. + base_url: Optional custom base URL (e.g., for Azure OpenAI). + organization: Optional OpenAI organization ID. + """ + + type: Literal["OpenAILLMProvider"] = "OpenAILLMProvider" + model: str = "gpt-4o" + api_key_env_var: str = "OPENAI_API_KEY" + base_url: Optional[str] = Field( + default=None, + description="Custom base URL for the API (e.g., Azure OpenAI endpoint).", + ) + organization: Optional[str] = Field( + default=None, + description="OpenAI organization ID.", + ) + + +class OpenAICompatibleLLMProvider(LLMProvider): + """LLM provider for any OpenAI-compatible API (vLLM, Ollama, etc.). + + Attributes: + model: Model identifier at the endpoint. + api_key_env_var: Environment variable name for the API key. + base_url: The endpoint URL (required). + """ + + type: Literal["OpenAICompatibleLLMProvider"] = "OpenAICompatibleLLMProvider" + model: str + api_key_env_var: str = "OPENAI_API_KEY" + base_url: str diff --git a/bofire/data_models/objectives/categorical.py b/bofire/data_models/objectives/categorical.py index 21752e0e5..24c146e49 100644 --- a/bofire/data_models/objectives/categorical.py +++ b/bofire/data_models/objectives/categorical.py @@ -29,6 +29,9 @@ class ConstrainedCategoricalObjective(ConstrainedObjective, Objective): desirability: List[bool] type: Literal["ConstrainedCategoricalObjective"] = "ConstrainedCategoricalObjective" + def to_description(self) -> str: + raise NotImplementedError + @model_validator(mode="after") def validate_desireability(self): """Validates that categories have unique names diff --git a/bofire/data_models/objectives/desirabilities.py b/bofire/data_models/objectives/desirabilities.py index ff438ca29..d0ef26f81 100644 --- a/bofire/data_models/objectives/desirabilities.py +++ b/bofire/data_models/objectives/desirabilities.py @@ -76,6 +76,9 @@ class IncreasingDesirabilityObjective(DesirabilityObjective): type: Literal["IncreasingDesirabilityObjective"] = "IncreasingDesirabilityObjective" log_shape_factor: float = 0.0 + def to_description(self) -> str: + raise NotImplementedError + def call_numpy( self, x: np.ndarray, @@ -125,6 +128,9 @@ class DecreasingDesirabilityObjective(DesirabilityObjective): type: Literal["DecreasingDesirabilityObjective"] = "DecreasingDesirabilityObjective" log_shape_factor: float = 0.0 + def to_description(self) -> str: + raise NotImplementedError + def call_numpy( self, x: np.ndarray, @@ -174,6 +180,9 @@ class PeakDesirabilityObjective(DesirabilityObjective): log_shape_factor_decreasing: float = 0.0 # often named log_t peak_position: float = 0.5 # often named T + def to_description(self) -> str: + raise NotImplementedError + def call_numpy( self, x: np.ndarray, @@ -216,6 +225,9 @@ def validate_peak_position(self): class InRangeDesirability(DesirabilityObjective): type: Literal["InRangeDesirability"] = "InRangeDesirability" + def to_description(self) -> str: + raise NotImplementedError + def call_numpy( self, x: np.ndarray, diff --git a/bofire/data_models/objectives/identity.py b/bofire/data_models/objectives/identity.py index e81df0b82..b6f9dc5d3 100644 --- a/bofire/data_models/objectives/identity.py +++ b/bofire/data_models/objectives/identity.py @@ -81,6 +81,9 @@ class MaximizeObjective(IdentityObjective): type: Literal["MaximizeObjective"] = "MaximizeObjective" + def to_description(self) -> str: + return "Maximize" + class MinimizeObjective(IdentityObjective): """Class returning the negative identity as reward. @@ -93,6 +96,9 @@ class MinimizeObjective(IdentityObjective): type: Literal["MinimizeObjective"] = "MinimizeObjective" + def to_description(self) -> str: + return "Minimize" + def __call__( self, x: Union[pd.Series, np.ndarray], diff --git a/bofire/data_models/objectives/objective.py b/bofire/data_models/objectives/objective.py index 814e3aabf..f4453b638 100644 --- a/bofire/data_models/objectives/objective.py +++ b/bofire/data_models/objectives/objective.py @@ -13,6 +13,10 @@ class Objective(BaseModel): type: Any + @abstractmethod + def to_description(self) -> str: + """Return a human-readable description of this objective.""" + @abstractmethod def __call__( self, diff --git a/bofire/data_models/objectives/sigmoid.py b/bofire/data_models/objectives/sigmoid.py index b2526917a..5cd2cd779 100644 --- a/bofire/data_models/objectives/sigmoid.py +++ b/bofire/data_models/objectives/sigmoid.py @@ -38,6 +38,9 @@ class MaximizeSigmoidObjective(SigmoidObjective): type: Literal["MaximizeSigmoidObjective"] = "MaximizeSigmoidObjective" + def to_description(self) -> str: + raise NotImplementedError + def __call__( self, x: Union[pd.Series, np.ndarray], @@ -69,6 +72,9 @@ class MovingMaximizeSigmoidObjective(SigmoidObjective): type: Literal["MovingMaximizeSigmoidObjective"] = "MovingMaximizeSigmoidObjective" + def to_description(self) -> str: + raise NotImplementedError + def get_adjusted_tp(self, x: Union[pd.Series, np.ndarray]) -> float: """Get the adjusted turning point for the sigmoid function. @@ -113,6 +119,9 @@ class MinimizeSigmoidObjective(SigmoidObjective): type: Literal["MinimizeSigmoidObjective"] = "MinimizeSigmoidObjective" + def to_description(self) -> str: + raise NotImplementedError + def __call__( self, x: Union[pd.Series, np.ndarray], diff --git a/bofire/data_models/objectives/target.py b/bofire/data_models/objectives/target.py index 15f38477d..313c5c95e 100644 --- a/bofire/data_models/objectives/target.py +++ b/bofire/data_models/objectives/target.py @@ -28,6 +28,9 @@ class CloseToTargetObjective(Objective): target_value: float exponent: float + def to_description(self) -> str: + raise NotImplementedError + def __call__( self, x: Union[pd.Series, np.ndarray], @@ -53,6 +56,9 @@ class TargetObjective(Objective, ConstrainedObjective): tolerance: TGe0 steepness: TGt0 + def to_description(self) -> str: + raise NotImplementedError + def __call__( self, x: Union[pd.Series, np.ndarray], diff --git a/bofire/data_models/strategies/actual_strategy_type.py b/bofire/data_models/strategies/actual_strategy_type.py index 50bcf68cc..99d9de0f6 100644 --- a/bofire/data_models/strategies/actual_strategy_type.py +++ b/bofire/data_models/strategies/actual_strategy_type.py @@ -3,6 +3,7 @@ from bofire.data_models.strategies.fractional_factorial import ( FractionalFactorialStrategy, ) +from bofire.data_models.strategies.llm import LLMStrategy from bofire.data_models.strategies.predictives.active_learning import ( ActiveLearningStrategy, ) @@ -41,6 +42,7 @@ MoboStrategy, ShortestPathStrategy, FractionalFactorialStrategy, + LLMStrategy, ] ActualStrategy = tagged_union(*_ACTUAL_STRATEGY_TYPES) diff --git a/bofire/data_models/strategies/api.py b/bofire/data_models/strategies/api.py index 00924f4a8..37c4d3a10 100644 --- a/bofire/data_models/strategies/api.py +++ b/bofire/data_models/strategies/api.py @@ -18,6 +18,7 @@ from bofire.data_models.strategies.fractional_factorial import ( FractionalFactorialStrategy, ) +from bofire.data_models.strategies.llm import LLMStrategy from bofire.data_models.strategies.meta_strategy_type import MetaStrategy from bofire.data_models.strategies.predictives.acqf_optimization import ( LSRBO, diff --git a/bofire/data_models/strategies/llm.py b/bofire/data_models/strategies/llm.py new file mode 100644 index 000000000..a4031af87 --- /dev/null +++ b/bofire/data_models/strategies/llm.py @@ -0,0 +1,119 @@ +from typing import Annotated, Any, Dict, Literal, Optional, Type + +from pydantic import Field, PositiveInt, model_validator + +from bofire.data_models.constraints.api import ( + Constraint, + LinearEqualityConstraint, + LinearInequalityConstraint, + NChooseKConstraint, +) +from bofire.data_models.features.api import Feature +from bofire.data_models.llm.api import AnyLLMProvider +from bofire.data_models.objectives.api import MaximizeObjective, MinimizeObjective +from bofire.data_models.strategies.strategy import Strategy + + +class LLMStrategy(Strategy): + """Strategy that uses a large language model to propose optimization candidates. + + Instead of fitting a surrogate and optimizing an acquisition function, + this strategy lets an LLM read the optimization problem — feature bounds, + constraints, objectives, contextual descriptions, and prior experiments — + and directly propose candidate points. It is useful for cold-start + designs, mixed / categorical / molecular spaces where domain knowledge + helps, and exploration informed by written context (``Feature.context`` + and ``Domain.context``). + + It is not a replacement for a Bayesian optimizer on well-understood + numerical problems: there is no calibrated uncertainty model and no + acquisition function. Treat candidates as informed heuristics, not + optima. + + On each ``ask()``, a pydantic output schema is generated from the + domain's input features and the LLM is prompted with a textual problem + description plus, optionally, a selection of prior experiments. Returned + candidates are validated against the domain; bound or constraint + violations are sent back to the LLM as retry messages via pydantic-ai's + ``output_retries``. + + Currently supports single-objective optimization with ``MaximizeObjective`` + or ``MinimizeObjective``, and ``LinearEquality``, ``LinearInequality``, + and ``NChooseK`` constraints. All feature types are supported. + + Example: + Basic usage:: + + strategy = LLMStrategy.make( + domain=domain, + llm=AnthropicLLMProvider(model="claude-sonnet-4-5"), + ) + + Enable extended reasoning for harder problems (many constraints, + rich context). ``thinking`` is pydantic-ai's cross-provider + capability key — it maps to Anthropic's extended thinking, OpenAI's + ``reasoning_effort``, and similar mechanisms on other providers. + Reasoning increases cost and latency considerably, so it is not + enabled by default:: + + strategy = LLMStrategy.make( + domain=domain, + llm=AnthropicLLMProvider(model="claude-sonnet-4-5"), + model_settings={"thinking": "high"}, + ) + + Attributes: + llm: LLM provider configuration. + model_settings: Optional dict forwarded directly to pydantic-ai's + ``model_settings``. Useful keys include ``temperature``, + ``max_tokens``, ``top_p``, ``seed``, ``timeout``, and the + cross-provider capability ``thinking`` (``"low"`` / ``"medium"`` + / ``"high"``). Provider-prefixed keys such as + ``anthropic_thinking`` or ``openai_reasoning_effort`` are also + accepted as escape hatches for finer control. Keys are not + validated by BoFire — pydantic-ai and the underlying provider + SDK are the source of truth. + output_retries: Number of retries when output validation fails + (constraint or bound violations). Each retry sends the LLM the + invalid candidates and the error so it can correct. + n_recent_experiments: If set, only the most recent N experiments + are shown to the LLM. Keeps prompt size bounded on long + campaigns. + n_top_experiments: If set, the top N experiments by objective + value are shown to the LLM. Combine with + ``n_recent_experiments`` to mix recency and quality. + system_prompt: Optional override for the default system prompt. + """ + + type: Literal["LLMStrategy"] = "LLMStrategy" + + llm: AnyLLMProvider + model_settings: Optional[Dict[str, Any]] = None + output_retries: PositiveInt = 3 + n_recent_experiments: Optional[Annotated[int, Field(gt=0)]] = None + n_top_experiments: Optional[Annotated[int, Field(gt=0)]] = None + system_prompt: Optional[str] = None + + @model_validator(mode="after") + def validate_single_objective(self): + """Validate that the domain has exactly one output with a supported objective.""" + outputs_with_obj = self.domain.outputs.get_by_objective( + includes=[MaximizeObjective, MinimizeObjective], + ) + if len(outputs_with_obj) != 1: + raise ValueError( + f"LLMStrategy requires exactly one output with a Maximize or " + f"Minimize objective, got {len(outputs_with_obj)}." + ) + return self + + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: + return my_type in [ + LinearEqualityConstraint, + LinearInequalityConstraint, + NChooseKConstraint, + ] + + @classmethod + def is_feature_implemented(cls, my_type: Type[Feature]) -> bool: + return True diff --git a/bofire/llm/__init__.py b/bofire/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bofire/llm/api.py b/bofire/llm/api.py new file mode 100644 index 000000000..f1ca21cb4 --- /dev/null +++ b/bofire/llm/api.py @@ -0,0 +1,6 @@ +"""Public API for mapping LLM provider data models to pydantic-ai Models.""" + +from bofire.llm.mapper import map + + +__all__ = ["map"] diff --git a/bofire/llm/mapper.py b/bofire/llm/mapper.py new file mode 100644 index 000000000..7fc452de9 --- /dev/null +++ b/bofire/llm/mapper.py @@ -0,0 +1,152 @@ +"""Map LLM provider data models to pydantic-ai Model instances. + +Each provider data model (e.g., ``AnthropicLLMProvider``) is mapped to the +corresponding pydantic-ai ``Model`` object via a registered factory function. +API keys are resolved from environment variables at mapping time. +""" + +import os +from typing import Callable, Optional, Type + +import bofire.data_models.llm.api as data_models +from bofire.data_models.llm.provider import ( + AnthropicFoundryLLMProvider, + AnthropicLLMProvider, + LLMProvider, + OpenAICompatibleLLMProvider, + OpenAILLMProvider, +) + + +LLM_MAP: dict[Type[LLMProvider], Callable] = {} + + +def register( + data_model_cls: Type[LLMProvider], + map_fn: Optional[Callable] = None, +): + """Register a custom LLM provider mapping from data model to factory function. + + Can be used as a decorator or as a direct function call:: + + # Decorator form + @register(MyLLMProvider) + def map_my_provider(data_model): + return MyPydanticAIModel(...) + + # Direct call form + register(MyLLMProvider, map_my_provider) + + Args: + data_model_cls: The Pydantic data model class. + map_fn: A callable that takes the data model instance and returns a + pydantic-ai ``Model``. If not provided, returns a decorator. + + Returns: + The mapping function (unchanged) when used as a decorator, None otherwise. + """ + + def _register(fn: Callable) -> Callable: + LLM_MAP[data_model_cls] = fn + + # Also register with the data model union so Pydantic accepts the type + data_models.register_llm_provider(data_model_cls) + + return fn + + if map_fn is not None: + _register(map_fn) + return None + + return _register + + +def _resolve_env_var(env_var_name: str) -> str: + """Resolve an environment variable, raising if not set.""" + value = os.environ.get(env_var_name) + if value is None: + raise EnvironmentError(f"Environment variable '{env_var_name}' is not set.") + return value + + +@register(AnthropicLLMProvider) +def map_anthropic(data_model: AnthropicLLMProvider): + from anthropic import AsyncAnthropic + from pydantic_ai.models.anthropic import AnthropicModel + from pydantic_ai.providers.anthropic import AnthropicProvider + + kwargs = {"api_key": _resolve_env_var(data_model.api_key_env_var)} + if data_model.base_url is not None: + kwargs["base_url"] = data_model.base_url + + client = AsyncAnthropic(**kwargs) + provider = AnthropicProvider(anthropic_client=client) + return AnthropicModel(data_model.model, provider=provider) + + +@register(AnthropicFoundryLLMProvider) +def map_anthropic_foundry(data_model: AnthropicFoundryLLMProvider): + from anthropic import AsyncAnthropicFoundry + from pydantic_ai.models.anthropic import AnthropicModel + from pydantic_ai.providers.anthropic import AnthropicProvider + + client = AsyncAnthropicFoundry( + api_key=_resolve_env_var(data_model.api_key_env_var), + resource=_resolve_env_var(data_model.resource_env_var), + ) + provider = AnthropicProvider(anthropic_client=client) + return AnthropicModel(data_model.model, provider=provider) + + +@register(OpenAILLMProvider) +def map_openai(data_model: OpenAILLMProvider): + from openai import AsyncOpenAI + from pydantic_ai.models.openai import OpenAIModel + from pydantic_ai.providers.openai import OpenAIProvider + + kwargs = {"api_key": _resolve_env_var(data_model.api_key_env_var)} + if data_model.base_url is not None: + kwargs["base_url"] = data_model.base_url + if data_model.organization is not None: + kwargs["organization"] = data_model.organization + + client = AsyncOpenAI(**kwargs) + provider = OpenAIProvider(openai_client=client) + return OpenAIModel(data_model.model, provider=provider) + + +@register(OpenAICompatibleLLMProvider) +def map_openai_compatible(data_model: OpenAICompatibleLLMProvider): + from openai import AsyncOpenAI + from pydantic_ai.models.openai import OpenAIModel + from pydantic_ai.providers.openai import OpenAIProvider + + client = AsyncOpenAI( + api_key=_resolve_env_var(data_model.api_key_env_var), + base_url=data_model.base_url, + ) + provider = OpenAIProvider(openai_client=client) + return OpenAIModel(data_model.model, provider=provider) + + +def map(data_model: LLMProvider): + """Map an LLM provider data model to a pydantic-ai Model instance. + + Args: + data_model: An LLM provider data model instance. + + Returns: + A pydantic-ai Model ready for use with pydantic-ai Agent. + + Raises: + EnvironmentError: If required environment variables are not set. + ValueError: If the provider type is not supported. + """ + mapper_fn = LLM_MAP.get(type(data_model)) + if mapper_fn is None: + supported = ", ".join(c.__name__ for c in LLM_MAP) + raise ValueError( + f"Unsupported LLM provider type: {type(data_model).__name__}. " + f"Supported: {supported}" + ) + return mapper_fn(data_model) diff --git a/bofire/strategies/api.py b/bofire/strategies/api.py index a5785b15a..9524aef1f 100644 --- a/bofire/strategies/api.py +++ b/bofire/strategies/api.py @@ -1,5 +1,6 @@ from bofire.strategies.doe_strategy import DoEStrategy from bofire.strategies.fractional_factorial import FractionalFactorialStrategy +from bofire.strategies.llm import LLMStrategy from bofire.strategies.mapper import map, register from bofire.strategies.predictives.acqf_optimization import ( AcquisitionOptimizer, diff --git a/bofire/strategies/llm.py b/bofire/strategies/llm.py new file mode 100644 index 000000000..3c4d9712e --- /dev/null +++ b/bofire/strategies/llm.py @@ -0,0 +1,300 @@ +"""LLM-based strategy for candidate proposal using pydantic-ai.""" + +import asyncio +import json +from dataclasses import dataclass +from typing import Any, Dict, Optional, cast + +import pandas as pd +from pydantic import BaseModel as PydanticBaseModel +from pydantic import Field, create_model +from pydantic.types import PositiveInt +from typing_extensions import Self + +import bofire.data_models.strategies.api as data_models +from bofire.data_models.domain.api import Domain +from bofire.data_models.features.api import ContinuousOutput +from bofire.data_models.llm.api import AnyLLMProvider +from bofire.data_models.objectives.api import MinimizeObjective +from bofire.strategies.strategy import Strategy, make_strategy + + +# --- Dependencies dataclass for pydantic-ai agent --- +@dataclass +class _LLMDeps: + domain: Domain + experiments_text: str + + +# --- Default system prompt --- +_DEFAULT_SYSTEM_PROMPT = """\ +You are an expert experimental design assistant for Bayesian optimization. + +You will receive a description of an optimization problem including: +- Objectives (what to maximize/minimize) +- Constraints (mathematical relationships candidates MUST satisfy) +- Problem context and domain knowledge + +The output schema already encodes the input features with their types, +bounds, and allowed values. Use the field descriptions to understand each feature. + +Consider: +- Domain knowledge from the context descriptions +- Feature bounds and constraints (candidates MUST be feasible) +- Diversity of proposed candidates (explore the space) +- If previous experiments are provided, use them to inform proposals +For each candidate, briefly explain your reasoning. +""" + + +# --- Experiment selection --- +def _select_experiments( + experiments: pd.DataFrame, + domain: Domain, + n_recent: Optional[int], + n_top: Optional[int], +) -> tuple[pd.DataFrame, str]: + """Select a subset of experiments for LLM presentation. + + Returns a tuple of (selected_df, description_text) where description_text + explains to the LLM what kind of experiments are being shown. + """ + if n_recent is None and n_top is None: + return experiments, f"All {len(experiments)} experiments" + + parts = [] + desc_parts = [] + + if n_recent is not None: + recent = experiments.tail(n_recent) + parts.append(recent) + desc_parts.append(f"last {len(recent)} most recent") + + if n_top is not None: + # Use the single objective output (validated at data model level) + for feat in domain.outputs: + if isinstance(feat, ContinuousOutput) and feat.objective is not None: + metric_key = feat.key + ascending = isinstance(feat.objective, MinimizeObjective) + break + + sorted_exps = experiments.sort_values(by=metric_key, ascending=ascending) + top = sorted_exps.head(n_top) + parts.append(top) + direction = "lowest" if ascending else "highest" + desc_parts.append(f"top {len(top)} by {direction} {metric_key}") + + selected = pd.concat(parts).drop_duplicates() + description = ( + " + ".join(desc_parts) + + f" ({len(selected)} unique shown out of {len(experiments)} total)" + ) + return selected, description + + +def _reasoning_column_name(domain: Domain) -> str: + """Pick a column name for the LLM reasoning that does not collide with any + feature key in the domain. Appends trailing underscores until unique. + """ + taken = set(domain.inputs.get_keys()) | set(domain.outputs.get_keys()) + name = "reasoning" + while name in taken: + name += "_" + return name + + +def _build_proposal_model(domain: Domain) -> type[PydanticBaseModel]: + """Build the pydantic-ai output model from the Domain.""" + CandidatePoint = domain.inputs.to_pydantic_model() + + class Candidate(PydanticBaseModel): + """A candidate with its feature values and reasoning.""" + + values: CandidatePoint # type: ignore[valid-type] + reasoning: str = Field( + description="Brief explanation of why this candidate was chosen." + ) + + return create_model( + "CandidateProposal", + __doc__="A set of proposed candidates with overall reasoning.", + candidates=(list[Candidate], ...), + strategy_summary=( + str, + Field(description="Overall strategy for the proposed candidates."), + ), + ) + + +# --- Strategy --- +class LLMStrategy(Strategy): + """Strategy that uses an LLM to propose optimization candidates. + + Uses pydantic-ai structured output with a dynamically generated schema + that matches the Domain's input features. Domain validation catches + constraint violations, and pydantic-ai retries automatically. + """ + + def __init__(self, data_model: data_models.LLMStrategy, **kwargs): + super().__init__(data_model=data_model, **kwargs) + self._llm_provider = data_model.llm + self._output_retries = data_model.output_retries + self._n_recent_experiments = data_model.n_recent_experiments + self._n_top_experiments = data_model.n_top_experiments + self._system_prompt = data_model.system_prompt or _DEFAULT_SYSTEM_PROMPT + self._pydantic_ai_model = None + self._agent = None + + @property + def pydantic_ai_model(self): + """Lazily constructed pydantic-ai model. Built once on first access. + + Kept out of ``__init__`` so that instantiating an ``LLMStrategy`` does + not resolve provider environment variables (e.g. API keys). + """ + if self._pydantic_ai_model is None: + import bofire.llm.mapper as llm_mapper + + self._pydantic_ai_model = llm_mapper.map(self._llm_provider) + return self._pydantic_ai_model + + @property + def agent(self): + """Lazily constructed pydantic-ai Agent. Built once on first access. + + Per BoFire's "build once, execute many" philosophy: the Agent + captures the domain, output schema, system prompt, and provider + model at first access. Per-call inputs (current experiments, + candidate count) are passed in via ``_LLMDeps`` on each + ``agent.run()``. + """ + if self._agent is None: + from pydantic_ai import Agent, ModelRetry + + proposal_model = _build_proposal_model(self.domain) + + agent = Agent( + self.pydantic_ai_model, + system_prompt=self._system_prompt, + output_type=proposal_model, + output_retries=self._output_retries, + name="LLMStrategy", + ) + + @agent.system_prompt + async def add_domain_description(ctx) -> str: + deps: _LLMDeps = ctx.deps + parts = [deps.domain.to_description()] + if deps.experiments_text: + parts.append(deps.experiments_text) + return "\n".join(parts) + + @agent.output_validator + async def validate_against_domain(ctx, proposal): + deps: _LLMDeps = ctx.deps + rows = [c.values.model_dump() for c in proposal.candidates] + candidates_df = pd.DataFrame(rows) + try: + deps.domain.validate_candidates(candidates_df, only_inputs=True) + except Exception as e: + candidates_json = json.dumps(rows, indent=2) + # ModelRetry (not ValueError) is the only exception + # pydantic-ai catches in output validators to trigger a + # retry within ``output_retries``. See + # pydantic_ai/_result.py. + raise ModelRetry( + f"Candidate validation failed: {e}\n\n" + f"The proposed candidates were:\n{candidates_json}\n\n" + f"Please fix the candidates to satisfy all constraints " + f"and feature bounds, then try again." + ) from e + return proposal + + self._agent = agent + return self._agent + + def has_sufficient_experiments(self) -> bool: + """LLM can propose candidates with zero experiments (cold start).""" + return True + + def _ask(self, candidate_count: Optional[PositiveInt] = None) -> pd.DataFrame: + """Generate candidates by calling the LLM. + + Bridges async pydantic-ai into sync BoFire via asyncio.run(). + """ + if candidate_count is None: + candidate_count = 1 + return asyncio.run(self._ask_async(candidate_count)) + + def _build_experiments_text(self) -> str: + """Render the currently held experiments as a JSON block for the LLM.""" + if self.experiments is None or len(self.experiments) == 0: + return "" + selected, description = _select_experiments( + self.domain.outputs.preprocess_experiments_all_valid_outputs( + self.experiments + ), + self.domain, + self._n_recent_experiments, + self._n_top_experiments, + ) + display_cols = self.domain.inputs.get_keys() + self.domain.outputs.get_keys() + experiments_json = json.dumps( + selected[display_cols].to_dict(orient="records"), indent=2 + ) + return ( + f"\n## Previous Experiments ({description})\n" + f"```json\n{experiments_json}\n```" + ) + + async def _ask_async(self, candidate_count: int) -> pd.DataFrame: + """Async implementation of candidate generation.""" + deps = _LLMDeps( + domain=self.domain, + experiments_text=self._build_experiments_text(), + ) + + result = await self.agent.run( + f"Propose {candidate_count} diverse candidate points for this optimization problem.", + deps=deps, + model_settings=self._data_model.model_settings, + ) + + proposal = result.output + reasoning_col = _reasoning_column_name(self.domain) + rows = [ + {**c.values.model_dump(), reasoning_col: c.reasoning} + for c in proposal.candidates + ] + return pd.DataFrame(rows) + + @classmethod + def make( + cls, + domain: Domain, + llm: AnyLLMProvider, + model_settings: Optional[Dict[str, Any]] = None, + output_retries: Optional[int] = None, + n_recent_experiments: Optional[int] = None, + n_top_experiments: Optional[int] = None, + system_prompt: Optional[str] = None, + seed: Optional[int] = None, + ) -> Self: + """Create a new LLMStrategy instance. + + Args: + domain: The optimization domain. + llm: LLM provider configuration. + model_settings: Optional dict forwarded to pydantic-ai's + ``model_settings`` (e.g. ``{"temperature": 0.2, + "max_tokens": 4096, "thinking": "high"}``). + output_retries: Number of retries for output validation. + n_recent_experiments: Number of recent experiments to show. + n_top_experiments: Number of top experiments to show. + system_prompt: Custom system prompt override. + seed: Random seed. + + Returns: + A new LLMStrategy instance. + """ + return cast(Self, make_strategy(cls, data_models.LLMStrategy, locals())) diff --git a/bofire/strategies/mapper.py b/bofire/strategies/mapper.py index 97890ab88..667de569a 100644 --- a/bofire/strategies/mapper.py +++ b/bofire/strategies/mapper.py @@ -52,5 +52,4 @@ def map(data_model: data_models.Strategy) -> Strategy: cls = META_MAP[data_cls] else: cls = ACTUAL_MAP[data_cls] - return cls.from_spec(data_model=data_model) diff --git a/bofire/strategies/mapper_actual.py b/bofire/strategies/mapper_actual.py index e38469775..3aa4f1806 100644 --- a/bofire/strategies/mapper_actual.py +++ b/bofire/strategies/mapper_actual.py @@ -4,6 +4,7 @@ from bofire.strategies.doe_strategy import DoEStrategy from bofire.strategies.factorial import FactorialStrategy from bofire.strategies.fractional_factorial import FractionalFactorialStrategy +from bofire.strategies.llm import LLMStrategy from bofire.strategies.predictives.active_learning import ActiveLearningStrategy from bofire.strategies.predictives.enting import EntingStrategy from bofire.strategies.predictives.mobo import MoboStrategy @@ -37,6 +38,7 @@ data_models.MoboStrategy: MoboStrategy, data_models.ShortestPathStrategy: ShortestPathStrategy, data_models.FractionalFactorialStrategy: FractionalFactorialStrategy, + data_models.LLMStrategy: LLMStrategy, } diff --git a/docs/tutorials/advanced_examples/index.qmd b/docs/tutorials/advanced_examples/index.qmd index 19ddf56d9..47ae0eb23 100644 --- a/docs/tutorials/advanced_examples/index.qmd +++ b/docs/tutorials/advanced_examples/index.qmd @@ -34,3 +34,6 @@ Applying transfer learning techniques to Bayesian optimization. ### [Conditional Features BO](conditional_features_bo.qmd) Define input features that are conditionally active. + +### [LLM-driven Molecular Optimization](llm_molecular.qmd) +Propose candidates with an `LLMStrategy` that prompts a large language model with the optimization problem and prior experiments. diff --git a/docs/tutorials/advanced_examples/llm_molecular.qmd b/docs/tutorials/advanced_examples/llm_molecular.qmd new file mode 100644 index 000000000..2c9c1b0e9 --- /dev/null +++ b/docs/tutorials/advanced_examples/llm_molecular.qmd @@ -0,0 +1,122 @@ +--- +title: "LLM-driven Molecular Optimization" +format: + html: + code-fold: false + toc: true +jupyter: python3 +execute: + eval: false + warning: false +--- + +# LLM-driven Molecular Optimization + +This tutorial shows how to use `LLMStrategy` to propose photoswitch candidates that maximize the E-isomer pi-pi* wavelength. The strategy reads the optimization problem — feature bounds, objectives, contextual descriptions, and prior experiments — and prompts a large language model directly for new candidates. + +::: {.callout-note} +This example needs the optional `llm` extra: + +```bash +pip install "bofire[llm]" +``` + +and an Anthropic API key in the environment (`ANTHROPIC_API_KEY`). The code is shown for illustration; it is not executed during the documentation build because real LLM calls require credentials and incur cost. +::: + +## Define the domain + +Use the photoswitch dataset shipped with BoFire as the candidate pool, and wrap it in a `LookupTableBenchmark` so we can score proposals. + +```{python} +import pandas as pd +from io import StringIO + +from bofire.benchmarks.data.photoswitches import EXPERIMENTS +from bofire.benchmarks.LookupTableBenchmark import LookupTableBenchmark +from bofire.data_models.domain.api import Domain +from bofire.data_models.features.api import CategoricalMolecularInput, ContinuousOutput +from bofire.data_models.objectives.api import MaximizeObjective + +INPUT_KEY = "Molecule" +OUTPUT_KEY = "E isomer pi-pi* wavelength in nm" + +all_experiments = pd.read_json(StringIO(EXPERIMENTS)).rename( + columns={"SMILES": INPUT_KEY}, +) +all_experiments = all_experiments.loc[all_experiments[OUTPUT_KEY].notnull()] + +domain = Domain.from_lists( + inputs=[ + CategoricalMolecularInput( + key=INPUT_KEY, + categories=all_experiments[INPUT_KEY].to_list(), + ), + ], + outputs=[ContinuousOutput(key=OUTPUT_KEY, objective=MaximizeObjective(w=1.0))], +) +domain.context = "Find molecules with high E isomer pi-pi* wavelength." + +benchmark = LookupTableBenchmark( + domain=domain, + lookup_table=all_experiments[[INPUT_KEY, OUTPUT_KEY]] + .copy() + .reset_index(drop=True), +) +``` + +## Build the strategy + +`LLMStrategy` needs an LLM provider and optional `model_settings`. Setting `thinking="medium"` enables pydantic-ai's cross-provider extended-reasoning capability — useful for harder design problems, at higher cost and latency. + +```{python} +import bofire.strategies.api as strategies +from bofire.data_models.llm.api import AnthropicLLMProvider +from bofire.data_models.strategies.api import LLMStrategy as LLMStrategyDataModel + +strategy_dm = LLMStrategyDataModel( + domain=domain, + llm=AnthropicLLMProvider(model="claude-sonnet-4-5"), + model_settings={"thinking": "medium"}, + n_recent_experiments=10, + n_top_experiments=10, +) + +strategy = strategies.map(strategy_dm) +``` + +## Cold start: propose candidates without prior experiments + +`LLMStrategy.has_sufficient_experiments()` returns `True` even before any experiments are recorded — the LLM can propose from the domain alone. + +```{python} +candidates = strategy.ask(10) +candidates[[INPUT_KEY, "reasoning"]] +``` + +The returned dataframe contains the candidate molecules plus a `reasoning` column with short explanations. Score them with the benchmark: + +```{python} +benchmark.f(candidates) +``` + +## Iterate with prior experiments + +Use `tell()` to feed observed measurements back to the strategy. The next `ask()` call includes them in the prompt so the LLM can build on what worked. + +```{python} +initial = benchmark.domain.inputs.sample(10, seed=42) +initial_observed = benchmark.f(initial, return_complete=True) +strategy.tell(initial_observed) + +next_candidates = strategy.ask(10) +benchmark.f(next_candidates) +``` + +The prompt is capped at `n_recent_experiments` most-recent + `n_top_experiments` best-performing experiments (deduplicated), keeping prompt size bounded as the campaign grows. + +## Caveats + +- **No calibrated uncertainty.** Treat candidates as informed heuristics, not optima. Where a Bayesian optimizer is applicable, it is usually preferable. +- **Cost and latency.** Reasoning models with `thinking="high"` can be 5–10x slower and more expensive than non-reasoning calls. +- **Constraint handling.** Returned candidates are validated against the domain. Failures are sent back to the LLM via pydantic-ai's `output_retries` for self-correction. diff --git a/docs/userguides/strategies.qmd b/docs/userguides/strategies.qmd index 7d864b1c3..f6c373121 100644 --- a/docs/userguides/strategies.qmd +++ b/docs/userguides/strategies.qmd @@ -24,6 +24,10 @@ The following predictive strategies are available: - `MultifidelityStrategy:` Single objective multi-fidelity BO as described [here](https://www.sciencedirect.com/science/article/pii/S0098135423000637) - `EntingStrategy`: Strategy based on the `Entmoot` [package](https://github.com/cog-imperial/entmoot) that uses tree-based surrogate models to perform both single-objective and multiobjective optimization. +## LLM-based Strategies + +- `LLMStrategy`: Uses a large language model to propose candidates from the problem description, feature bounds, constraints, and prior experiments. Supports multiple providers via [pydantic-ai](https://ai.pydantic.dev/). Useful for cold-start designs and domains where written context (e.g. molecular optimization) informs proposals. Requires the optional `[llm]` extra and provider credentials. There is no calibrated uncertainty model — treat candidates as informed heuristics, not optima. See the [LLM molecular optimization tutorial](../tutorials/advanced_examples/llm_molecular.qmd) for an end-to-end example. + ## Combining Strategies In BoFire, the `StepwiseStrategy` operates on a sequence of strategies and determines when to switch between them based on customizable logical operators. diff --git a/pyproject.toml b/pyproject.toml index 16ebc8b7a..9c051bcde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ entmoot = [ "pyomo<=6.9.4", "entmoot>=2.1.1", ] # we pin the pyomo version here due to compatibility issues +llm = ["pydantic-ai>=0.1"] cheminfo = ["rdkit>=2023.3.2", "scikit-learn>=1.0.0", "mordredcommunity>=2.0.1"] tests = ["pytest", "pytest-cov", "papermill"] docs = ["jupyter", "jupyter-cache", "matplotlib", "seaborn"] @@ -83,6 +84,7 @@ all = [ "seaborn", "pymoo>=0.6.0", "shap>=0.48.0", + "pydantic-ai>=0.1", ] [tool.setuptools.packages] diff --git a/tests/bofire/conftest.py b/tests/bofire/conftest.py index 546926542..c2cefe3ef 100644 --- a/tests/bofire/conftest.py +++ b/tests/bofire/conftest.py @@ -203,3 +203,8 @@ def molfeatures_spec(request) -> specs.Spec: @fixture(params=specs.local_search_configs.valids) def local_search_config_spec(request) -> specs.Spec: return request.param + + +@fixture(params=specs.llm.valids) +def llm_spec(request) -> specs.Spec: + return request.param diff --git a/tests/bofire/data_models/constraints/test_linear.py b/tests/bofire/data_models/constraints/test_linear.py index fb977caa2..0888fe185 100644 --- a/tests/bofire/data_models/constraints/test_linear.py +++ b/tests/bofire/data_models/constraints/test_linear.py @@ -2,7 +2,10 @@ import pandas as pd import tests.bofire.data_models.specs.api as specs -from bofire.data_models.constraints.api import LinearInequalityConstraint +from bofire.data_models.constraints.api import ( + LinearEqualityConstraint, + LinearInequalityConstraint, +) def test_from_greater_equal(): @@ -94,3 +97,27 @@ def test_jacobian(): np.array(spec["coefficients"]) / np.linalg.norm(np.array(spec["coefficients"])), ) + + +def test_linear_equality_to_description(): + c = LinearEqualityConstraint( + features=["x1", "x2"], coefficients=[1.0, 2.0], rhs=5.0 + ) + assert c.to_description() == "1.0*x1 + 2.0*x2 = 5.0" + + +def test_linear_inequality_to_description(): + c = LinearInequalityConstraint( + features=["x1", "x2"], coefficients=[1.0, 2.0], rhs=5.0 + ) + assert c.to_description() == "1.0*x1 + 2.0*x2 <= 5.0" + + +def test_linear_constraint_to_description_with_context(): + c = LinearInequalityConstraint( + features=["x1", "x2"], + coefficients=[1.0, 2.0], + rhs=5.0, + context="Safety limit", + ) + assert c.to_description() == "1.0*x1 + 2.0*x2 <= 5.0 — Safety limit" diff --git a/tests/bofire/data_models/constraints/test_nchoosek.py b/tests/bofire/data_models/constraints/test_nchoosek.py new file mode 100644 index 000000000..f7928acb5 --- /dev/null +++ b/tests/bofire/data_models/constraints/test_nchoosek.py @@ -0,0 +1,21 @@ +from bofire.data_models.constraints.api import NChooseKConstraint + + +def test_nchoosek_to_description(): + c = NChooseKConstraint( + features=["x1", "x2", "x3"], + min_count=1, + max_count=2, + none_also_valid=False, + ) + assert c.to_description() == "Choose 1-2 active features from ['x1', 'x2', 'x3']" + + +def test_nchoosek_to_description_none_valid(): + c = NChooseKConstraint( + features=["x1", "x2"], + min_count=1, + max_count=2, + none_also_valid=True, + ) + assert c.to_description() == "Choose 1-2 active features from ['x1', 'x2'], or none" diff --git a/tests/bofire/data_models/constraints/test_product.py b/tests/bofire/data_models/constraints/test_product.py new file mode 100644 index 000000000..5676eea84 --- /dev/null +++ b/tests/bofire/data_models/constraints/test_product.py @@ -0,0 +1,18 @@ +from bofire.data_models.constraints.api import ( + ProductEqualityConstraint, + ProductInequalityConstraint, +) + + +def test_product_equality_to_description(): + c = ProductEqualityConstraint( + features=["x1", "x2"], exponents=[2, 3], rhs=1.0, sign=1 + ) + assert c.to_description() == "x1^2.0 * x2^3.0 = 1.0" + + +def test_product_inequality_to_description(): + c = ProductInequalityConstraint( + features=["x1", "x2"], exponents=[2, 3], rhs=1.0, sign=1 + ) + assert c.to_description() == "x1^2.0 * x2^3.0 <= 1.0" diff --git a/tests/bofire/data_models/domain/test_domain.py b/tests/bofire/data_models/domain/test_domain.py index f87560819..d5d6d4d4a 100644 --- a/tests/bofire/data_models/domain/test_domain.py +++ b/tests/bofire/data_models/domain/test_domain.py @@ -23,7 +23,7 @@ ContinuousOutput, Feature, ) -from bofire.data_models.objectives.api import TargetObjective +from bofire.data_models.objectives.api import MaximizeObjective, TargetObjective from bofire.utils.subdomain import get_subdomain @@ -507,3 +507,45 @@ def test_is_fulfilled(): domain.is_fulfilled(experiments), pd.Series([True, False, False], index=experiments.index), ) + + +def test_domain_to_description(): + domain = Domain.from_lists( + inputs=[ContinuousInput(key="x", bounds=(0, 1))], + outputs=[ContinuousOutput(key="y", objective=MaximizeObjective(w=1.0))], + ) + assert domain.to_description() == "\n## Objectives\n- y: Maximize" + + +def test_domain_to_description_with_context(): + domain = Domain.from_lists( + inputs=[ContinuousInput(key="x", bounds=(0, 1))], + outputs=[ContinuousOutput(key="y", objective=MaximizeObjective(w=1.0))], + ) + domain.context = "Optimizing a reaction" + assert domain.to_description() == ( + "## Problem Context\nOptimizing a reaction\n\n## Objectives\n- y: Maximize" + ) + + +def test_domain_to_description_with_constraints(): + domain = Domain.from_lists( + inputs=[ + ContinuousInput(key="x1", bounds=(0, 1)), + ContinuousInput(key="x2", bounds=(0, 1)), + ], + outputs=[ContinuousOutput(key="y", objective=MaximizeObjective(w=1.0))], + constraints=[ + LinearInequalityConstraint( + features=["x1", "x2"], + coefficients=[1.0, 1.0], + rhs=1.5, + context="Budget constraint", + ) + ], + ) + assert domain.to_description() == ( + "\n## Objectives\n- y: Maximize\n" + "\n## Constraints (candidates MUST satisfy all of these)\n" + "- 1.0*x1 + 1.0*x2 <= 1.5 — Budget constraint" + ) diff --git a/tests/bofire/data_models/domain/test_inputs.py b/tests/bofire/data_models/domain/test_inputs.py index 9ccc2da31..b864ecf6d 100644 --- a/tests/bofire/data_models/domain/test_inputs.py +++ b/tests/bofire/data_models/domain/test_inputs.py @@ -1222,3 +1222,43 @@ def test_inputs_get_feature_indices( assert mol_dims == expected_molecular_indices assert ord_dims == expected_continuous_indices assert cat_dims == expected_categorical_indices + + +def test_inputs_to_pydantic_model(): + inputs = Inputs( + features=[ + ContinuousInput(key="x1", bounds=(0, 1)), + CategoricalInput(key="x2", categories=["a", "b"]), + ] + ) + Model = inputs.to_pydantic_model() + schema = Model.model_json_schema() + assert "x1" in schema["properties"] + assert "x2" in schema["properties"] + assert schema["properties"]["x1"]["type"] == "number" + + +def test_inputs_to_pydantic_model_validates_bounds(): + from pydantic import ValidationError + + inputs = Inputs(features=[ContinuousInput(key="x", bounds=(0, 1))]) + Model = inputs.to_pydantic_model() + obj = Model(x=0.5) + assert obj.x == 0.5 + with pytest.raises(ValidationError): + Model(x=5.0) + + +def test_inputs_to_pydantic_model_validates_categories(): + from pydantic import ValidationError + + inputs = Inputs( + features=[ + CategoricalInput(key="c", categories=["a", "b"], allowed=[True, False]) + ] + ) + Model = inputs.to_pydantic_model() + obj = Model(c="a") + assert obj.c == "a" + with pytest.raises(ValidationError): + Model(c="b") diff --git a/tests/bofire/data_models/features/test_categorical.py b/tests/bofire/data_models/features/test_categorical.py index 1eee787d8..bb3a8f025 100644 --- a/tests/bofire/data_models/features/test_categorical.py +++ b/tests/bofire/data_models/features/test_categorical.py @@ -497,3 +497,52 @@ def test_categorical_output_call(): ) output = categorical_output(test_df, test_df) assert output.tolist() == test_df["c1"].tolist() + + +def test_categorical_input_to_pydantic_field(): + from typing import Literal + + feat = CategoricalInput(key="sol", categories=["water", "ethanol", "toluene"]) + field_type, field_info = feat.to_pydantic_field() + assert field_type == Literal["water", "ethanol", "toluene"] + assert ( + field_info.description + == "Categorical, allowed: ['water', 'ethanol', 'toluene']" + ) + + +def test_categorical_input_to_pydantic_field_respects_allowed(): + from typing import Literal + + feat = CategoricalInput( + key="sol", + categories=["water", "ethanol", "toluene"], + allowed=[True, True, False], + ) + field_type, field_info = feat.to_pydantic_field() + assert field_type == Literal["water", "ethanol"] + assert field_info.description == "Categorical, allowed: ['water', 'ethanol']" + + +def test_categorical_input_to_pydantic_field_falls_back_to_str_above_threshold(): + from bofire.data_models.features.categorical import LLM_ENUM_SCHEMA_THRESHOLD + + categories = [f"c{i}" for i in range(LLM_ENUM_SCHEMA_THRESHOLD + 1)] + feat = CategoricalInput(key="big", categories=categories) + field_type, field_info = feat.to_pydantic_field() + assert field_type is str + # description still lists the categories so the LLM has guidance + assert "c0" in field_info.description + assert f"c{LLM_ENUM_SCHEMA_THRESHOLD}" in field_info.description + + +def test_categorical_input_to_pydantic_field_at_threshold_stays_literal(): + from typing import Literal, get_args, get_origin + + from bofire.data_models.features.categorical import LLM_ENUM_SCHEMA_THRESHOLD + + categories = [f"c{i}" for i in range(LLM_ENUM_SCHEMA_THRESHOLD)] + feat = CategoricalInput(key="edge", categories=categories) + field_type, _ = feat.to_pydantic_field() + assert get_origin(field_type) is get_origin(Literal["x"]) + assert list(get_args(field_type)) == categories diff --git a/tests/bofire/data_models/features/test_continuous.py b/tests/bofire/data_models/features/test_continuous.py index 5fb93f38a..2dabf3500 100644 --- a/tests/bofire/data_models/features/test_continuous.py +++ b/tests/bofire/data_models/features/test_continuous.py @@ -324,3 +324,50 @@ def test_continuous_input_feature_to_unit_range(feature, x, expected, real): def test_continuous_input_feature_is_fixed(input_feature, expected, expected_value): assert input_feature.is_fixed() == expected assert input_feature.fixed_value() == expected_value + + +def test_continuous_input_to_pydantic_field(): + feat = ContinuousInput(key="temp", bounds=(20.0, 200.0)) + field_type, field_info = feat.to_pydantic_field() + assert field_type is float + assert field_info.metadata[0].ge == 20.0 + assert field_info.metadata[1].le == 200.0 + assert field_info.description == "Continuous, bounds [20.0, 200.0]" + + +def test_continuous_input_to_pydantic_field_with_context(): + feat = ContinuousInput(key="temp", bounds=(20.0, 200.0), context="Temperature in C") + _, field_info = feat.to_pydantic_field() + assert ( + field_info.description == "Continuous, bounds [20.0, 200.0] — Temperature in C" + ) + + +def test_continuous_input_to_pydantic_field_allow_zero(): + feat = ContinuousInput(key="x", bounds=(0.01, 0.5), allow_zero=True) + _, field_info = feat.to_pydantic_field() + assert field_info.metadata[0].ge == 0.0 + assert ( + field_info.description + == "Continuous, bounds [0.01, 0.5] — can also be 0 (inactive)" + ) + + +def test_continuous_output_to_description(): + from bofire.data_models.features.api import ContinuousOutput + from bofire.data_models.objectives.api import MaximizeObjective + + feat = ContinuousOutput( + key="yield", + objective=MaximizeObjective(w=1.0), + context="Target >90%", + ) + assert feat.to_description() == "yield: Maximize — Target >90%" + + +def test_continuous_output_to_description_no_context(): + from bofire.data_models.features.api import ContinuousOutput + from bofire.data_models.objectives.api import MinimizeObjective + + feat = ContinuousOutput(key="yield", objective=MinimizeObjective(w=1.0)) + assert feat.to_description() == "yield: Minimize" diff --git a/tests/bofire/data_models/features/test_descriptor.py b/tests/bofire/data_models/features/test_descriptor.py index f073f6095..b06d29793 100644 --- a/tests/bofire/data_models/features/test_descriptor.py +++ b/tests/bofire/data_models/features/test_descriptor.py @@ -375,3 +375,54 @@ def test_categorical_descriptor_input_feature_from_dataframe( assert f.categories == categories assert f.descriptors == descriptors assert f.values == values + + +def test_categorical_descriptor_input_to_pydantic_field(): + feat = CategoricalDescriptorInput( + key="cat", + categories=["a", "b"], + descriptors=["d1", "d2"], + values=[[1.0, 2.0], [3.0, 4.0]], + ) + _, field_info = feat.to_pydantic_field() + assert field_info.description == ( + "Categorical with descriptors, allowed: ['a', 'b'] — " + "descriptors per category: {'a': {'d1': 1.0, 'd2': 2.0}, 'b': {'d1': 3.0, 'd2': 4.0}}" + ) + + +def test_continuous_descriptor_input_to_pydantic_field(): + from bofire.data_models.features.api import ContinuousDescriptorInput + + feat = ContinuousDescriptorInput( + key="x", + bounds=(0, 1), + descriptors=["d1"], + values=[0.5], + ) + field_type, field_info = feat.to_pydantic_field() + assert field_type is float + assert field_info.description == ( + "Continuous, bounds [0.0, 1.0] — descriptors: {'d1': 0.5}" + ) + + +def test_categorical_descriptor_input_to_pydantic_field_falls_back_above_threshold(): + from bofire.data_models.features.categorical import LLM_ENUM_SCHEMA_THRESHOLD + + n = LLM_ENUM_SCHEMA_THRESHOLD + 1 + categories = [f"c{i}" for i in range(n)] + # distinct values per category so the per-descriptor variance validator passes + values = [[float(i)] for i in range(n)] + feat = CategoricalDescriptorInput( + key="big", + categories=categories, + descriptors=["d1"], + values=values, + ) + field_type, field_info = feat.to_pydantic_field() + assert field_type is str + # description still lists the categories (via the prefix) and the mapping + assert "c0" in field_info.description + assert f"c{n - 1}" in field_info.description + assert "descriptors per category" in field_info.description diff --git a/tests/bofire/data_models/features/test_discrete.py b/tests/bofire/data_models/features/test_discrete.py index a55f77e3c..a8b958e3b 100644 --- a/tests/bofire/data_models/features/test_discrete.py +++ b/tests/bofire/data_models/features/test_discrete.py @@ -111,3 +111,14 @@ def test_from_continuous(): ) samples = d.from_continuous(continuous_values) assert np.all(samples == pd.Series([2, 2, 3, 2])) + + +def test_discrete_input_to_pydantic_field(): + from typing import Literal + + from bofire.data_models.features.api import DiscreteInput + + feat = DiscreteInput(key="n", values=[1.0, 2.0, 5.0]) + field_type, field_info = feat.to_pydantic_field() + assert field_type == Literal[1.0, 2.0, 5.0] + assert field_info.description == "Discrete, allowed values: [1.0, 2.0, 5.0]" diff --git a/tests/bofire/data_models/features/test_molecular.py b/tests/bofire/data_models/features/test_molecular.py index 2759fb452..f5b4311cf 100644 --- a/tests/bofire/data_models/features/test_molecular.py +++ b/tests/bofire/data_models/features/test_molecular.py @@ -313,3 +313,37 @@ def test_categorical_molecular_input_select_mordred_descriptors(): assert ( final_descriptor_count > 0 ), "All descriptors were removed, expected at least some to remain" + + +def test_categorical_molecular_input_to_pydantic_field(): + from typing import Literal + + feat = CategoricalMolecularInput(key="mol", categories=["CCO", "CC"]) + field_type, field_info = feat.to_pydantic_field() + assert field_type == Literal["CCO", "CC"] + assert ( + field_info.description + == "Categorical molecular (SMILES), allowed: ['CCO', 'CC']" + ) + + +def test_categorical_molecular_input_to_pydantic_field_falls_back_above_threshold(): + from bofire.data_models.features.categorical import LLM_ENUM_SCHEMA_THRESHOLD + + # Generate enough distinct SMILES by varying alkane chain length + smiles = ["C" * (i + 1) for i in range(LLM_ENUM_SCHEMA_THRESHOLD + 1)] + feat = CategoricalMolecularInput(key="mol", categories=smiles) + field_type, field_info = feat.to_pydantic_field() + assert field_type is str + # description still lists the SMILES so the LLM has guidance + assert smiles[0] in field_info.description + assert smiles[-1] in field_info.description + + +def test_continuous_molecular_input_to_pydantic_field(): + feat = ContinuousMolecularInput(key="conc", molecule="CCO", bounds=(0.0, 1.0)) + _, field_info = feat.to_pydantic_field() + assert ( + field_info.description + == "Continuous molecular (SMILES: CCO), bounds [0.0, 1.0]" + ) diff --git a/tests/bofire/data_models/serialization/test_deserialization.py b/tests/bofire/data_models/serialization/test_deserialization.py index d45d2a39d..f6f4fdd8e 100644 --- a/tests/bofire/data_models/serialization/test_deserialization.py +++ b/tests/bofire/data_models/serialization/test_deserialization.py @@ -7,6 +7,7 @@ AnyDataFrame, AnyFeature, AnyKernel, + AnyLLMProvider, AnyLocalSearchConfig, AnyMolFeatures, AnyObjective, @@ -143,3 +144,9 @@ def test_local_search_config_should_be_deserializable(local_search_config_spec: obj = local_search_config_spec.obj() deserialized = TypeAdapter(AnyLocalSearchConfig).validate_python(obj.model_dump()) assert obj == deserialized + + +def test_llm_should_be_deserializable(llm_spec: Spec): + obj = llm_spec.obj() + deserialized = TypeAdapter(AnyLLMProvider).validate_python(obj.model_dump()) + assert obj == deserialized diff --git a/tests/bofire/data_models/serialization/test_serialization.py b/tests/bofire/data_models/serialization/test_serialization.py index 60e7581cc..18310c895 100644 --- a/tests/bofire/data_models/serialization/test_serialization.py +++ b/tests/bofire/data_models/serialization/test_serialization.py @@ -127,3 +127,9 @@ def test_local_search_config_should_be_serializable(local_search_config_spec: Sp spec = local_search_config_spec.typed_spec() obj = local_search_config_spec.cls(**spec) assert obj.model_dump() == spec + + +def test_llm_should_be_serializable(llm_spec: Spec): + spec = llm_spec.typed_spec() + obj = llm_spec.cls(**spec) + assert obj.model_dump() == spec diff --git a/tests/bofire/data_models/specs/api.py b/tests/bofire/data_models/specs/api.py index dc8e493ec..8c88eabf0 100644 --- a/tests/bofire/data_models/specs/api.py +++ b/tests/bofire/data_models/specs/api.py @@ -14,6 +14,7 @@ from tests.bofire.data_models.specs.features import specs as features from tests.bofire.data_models.specs.inputs import specs as inputs from tests.bofire.data_models.specs.kernels import specs as kernels +from tests.bofire.data_models.specs.llm import specs as llm from tests.bofire.data_models.specs.local_search_config import ( specs as local_search_configs, ) diff --git a/tests/bofire/data_models/specs/constraints.py b/tests/bofire/data_models/specs/constraints.py index 8ceb159c5..bb1c3e744 100644 --- a/tests/bofire/data_models/specs/constraints.py +++ b/tests/bofire/data_models/specs/constraints.py @@ -13,6 +13,7 @@ "exponents": [random.randint(1, 10) for _ in range(3)], "rhs": random.random(), "sign": 1, + "context": None, }, ) @@ -23,6 +24,7 @@ "exponents": [random.randint(1, 10) for _ in range(3)], "rhs": random.random(), "sign": 1, + "context": None, }, ) @@ -32,6 +34,7 @@ "features": ["f1", "f2", "f3"], "coefficients": [random.randint(1, 10) for _ in range(3)], "rhs": random.random(), + "context": None, }, ) specs.add_valid( @@ -40,6 +43,7 @@ "features": ["f1", "f2", "f3"], "coefficients": [random.randint(1, 10) for _ in range(3)], "rhs": random.random(), + "context": None, }, ) specs.add_valid( @@ -49,6 +53,7 @@ "jacobian_expression": "[f2,f1,0]", "hessian_expression": "[[0,1,0],[1,0,0],[0,0,0]]", "features": ["f1", "f2", "f3"], + "context": None, }, ) specs.add_valid( @@ -58,6 +63,7 @@ "jacobian_expression": "[f2,f1,0]", "hessian_expression": "[[0,1,0],[1,0,0],[0,0,0]]", "features": ["f1", "f2", "f3"], + "context": None, }, ) specs.add_valid( @@ -67,6 +73,7 @@ "min_count": 1, "max_count": 1, "none_also_valid": False, + "context": None, }, ) @@ -75,6 +82,7 @@ lambda: { "features": ["f1"], "multiplicity": 3, + "context": None, }, ) @@ -100,6 +108,7 @@ selection=["alpha", "beta"], ).model_dump(), ], + "context": None, }, ) @@ -117,5 +126,6 @@ operator=">", ).model_dump(), ], + "context": None, }, ) diff --git a/tests/bofire/data_models/specs/domain.py b/tests/bofire/data_models/specs/domain.py index 47bf3b036..b7d6b5ae4 100644 --- a/tests/bofire/data_models/specs/domain.py +++ b/tests/bofire/data_models/specs/domain.py @@ -29,6 +29,7 @@ ], ).model_dump(), "constraints": Constraints().model_dump(), + "context": None, }, ) diff --git a/tests/bofire/data_models/specs/features.py b/tests/bofire/data_models/specs/features.py index 82bd4a4bf..00095b13a 100644 --- a/tests/bofire/data_models/specs/features.py +++ b/tests/bofire/data_models/specs/features.py @@ -22,6 +22,7 @@ "key": str(uuid.uuid4()), "features": ["a", "b", "c"], "keep_features": True, + "context": None, }, ) @@ -32,6 +33,7 @@ "key": str(uuid.uuid4()), "features": ["a", "b", "c"], "keep_features": True, + "context": None, }, ) @@ -41,6 +43,7 @@ "key": str(uuid.uuid4()), "features": ["a", "a"], "keep_features": True, + "context": None, }, ) @@ -50,6 +53,7 @@ "key": str(uuid.uuid4()), "features": ["a", "b", "c"], "keep_features": False, + "context": None, }, ) @@ -60,6 +64,7 @@ "features": ["a", "b", "c"], "descriptors": ["alpha", "beta"], "keep_features": True, + "context": None, }, ) @@ -70,6 +75,7 @@ "features": ["a", "b", "c"], "descriptors": ["alpha", "beta"], "keep_features": True, + "context": None, }, ) @@ -83,6 +89,7 @@ ignore_3D=True, ).model_dump(), "keep_features": True, + "context": None, }, ) @@ -102,6 +109,7 @@ "append_y": [], "normalize_y": 1.0, "normalize_x": False, + "context": None, }, ) @@ -115,6 +123,7 @@ ignore_3D=True, ).model_dump(), "keep_features": True, + "context": None, }, ) @@ -134,6 +143,7 @@ "append_y": [], "normalize_y": 2.0, "normalize_x": True, + "context": None, }, ) @@ -197,12 +207,22 @@ specs.add_valid( features.CloneFeature, - lambda: {"key": str(uuid.uuid4()), "features": ["a", "b"], "keep_features": True}, + lambda: { + "key": str(uuid.uuid4()), + "features": ["a", "b"], + "keep_features": True, + "context": None, + }, ) specs.add_valid( features.CloneFeature, - lambda: {"key": str(uuid.uuid4()), "features": ["a"], "keep_features": True}, + lambda: { + "key": str(uuid.uuid4()), + "features": ["a"], + "keep_features": True, + "context": None, + }, ) specs.add_valid( @@ -212,6 +232,7 @@ "values": [random.random(), random.random() + 3], "unit": random.choice(["°C", "mg", "mmol/l", None]), "rtol": 1e-7, + "context": None, }, ) @@ -237,6 +258,7 @@ "local_relative_bounds": None, "stepsize": None, "allow_zero": False, + "context": None, }, ) @@ -265,6 +287,7 @@ "local_relative_bounds": None, "stepsize": None, "allow_zero": False, + "context": None, }, ) specs.add_valid( @@ -273,6 +296,7 @@ "key": str(uuid.uuid4()), "categories": ["c1", "c2", "c3"], "allowed": [True, True, False], + "context": None, }, ) @@ -322,6 +346,7 @@ [3.0, 7.0], [5.0, 1.0], ], + "context": None, }, ) specs.add_valid( @@ -330,6 +355,7 @@ "key": str(uuid.uuid4()), "objective": objectives.valid(MaximizeObjective).typed_spec(), "unit": random.choice(["%", "area %", None]), + "context": None, }, ) @@ -342,6 +368,7 @@ categories=["a", "b", "c"], desirability=[True, True, False], ).model_dump(), + "context": None, }, ) @@ -357,6 +384,7 @@ "N[C@](C)(F)C(=O)O", ], "allowed": [True, True, True, True], + "context": None, }, ) @@ -371,6 +399,7 @@ "unit": random.choice(["°C", "mg", "mmol/l", None]), "local_relative_bounds": None, "stepsize": None, + "context": None, }, ) @@ -386,6 +415,7 @@ ], "allowed": [True, True, True], "fidelities": [0, 1, 2], + "context": None, }, ) diff --git a/tests/bofire/data_models/specs/llm.py b/tests/bofire/data_models/specs/llm.py new file mode 100644 index 000000000..26cd63096 --- /dev/null +++ b/tests/bofire/data_models/specs/llm.py @@ -0,0 +1,47 @@ +from bofire.data_models.llm.provider import ( + AnthropicFoundryLLMProvider, + AnthropicLLMProvider, + OpenAICompatibleLLMProvider, + OpenAILLMProvider, +) +from tests.bofire.data_models.specs.specs import Specs + + +specs = Specs([]) + +specs.add_valid( + AnthropicLLMProvider, + lambda: { + "model": "claude-sonnet-4-20250514", + "api_key_env_var": "ANTHROPIC_API_KEY", + "base_url": None, + }, +) + +specs.add_valid( + AnthropicFoundryLLMProvider, + lambda: { + "model": "claude-sonnet-4-20250514", + "api_key_env_var": "ANTHROPIC_FOUNDRY_API_KEY", + "resource_env_var": "ANTHROPIC_FOUNDRY_RESOURCE", + }, +) + +specs.add_valid( + OpenAILLMProvider, + lambda: { + "model": "gpt-4o", + "api_key_env_var": "OPENAI_API_KEY", + "base_url": None, + "organization": None, + }, +) + +specs.add_valid( + OpenAICompatibleLLMProvider, + lambda: { + "model": "my-model", + "api_key_env_var": "CUSTOM_API_KEY", + "base_url": "http://localhost:8000/v1", + }, +) diff --git a/tests/bofire/data_models/specs/strategies.py b/tests/bofire/data_models/specs/strategies.py index 2f4aca49f..b789d9aca 100644 --- a/tests/bofire/data_models/specs/strategies.py +++ b/tests/bofire/data_models/specs/strategies.py @@ -24,6 +24,7 @@ DiscreteInput, TaskInput, ) +from bofire.data_models.llm.provider import AnthropicLLMProvider from bofire.data_models.objectives.api import ( MaximizeObjective, MaximizeSigmoidObjective, @@ -1004,3 +1005,55 @@ error=ValueError, message="Only one task can be the target fidelity", ) + +# LLMStrategy specs — uses a single-objective domain +_llm_domain = Domain( + inputs=Inputs( + features=[ + ContinuousInput(key="x1", bounds=(0, 1)), + ContinuousInput(key="x2", bounds=(0, 1)), + CategoricalInput(key="x3", categories=["a", "b", "c"]), + ], + ), + outputs=Outputs( + features=[ContinuousOutput(key="y", objective=MaximizeObjective(w=1.0))] + ), +) + +specs.add_valid( + strategies.LLMStrategy, + lambda: { + "domain": _llm_domain.model_dump(), + "llm": AnthropicLLMProvider( + model="claude-sonnet-4-20250514", + api_key_env_var="ANTHROPIC_API_KEY", + ).model_dump(), + "seed": 42, + "model_settings": None, + "output_retries": 3, + "n_recent_experiments": None, + "n_top_experiments": None, + "system_prompt": None, + }, +) + +specs.add_valid( + strategies.LLMStrategy, + lambda: { + "domain": _llm_domain.model_dump(), + "llm": AnthropicLLMProvider( + model="claude-sonnet-4-20250514", + api_key_env_var="ANTHROPIC_API_KEY", + ).model_dump(), + "seed": 42, + "model_settings": { + "temperature": 0.7, + "max_tokens": 4096, + "thinking": "medium", + }, + "output_retries": 5, + "n_recent_experiments": 10, + "n_top_experiments": 5, + "system_prompt": "You are a helpful assistant.", + }, +) diff --git a/tests/bofire/llm/__init__.py b/tests/bofire/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/bofire/llm/test_mapper.py b/tests/bofire/llm/test_mapper.py new file mode 100644 index 000000000..a3502d1b4 --- /dev/null +++ b/tests/bofire/llm/test_mapper.py @@ -0,0 +1,20 @@ +"""Tests for bofire.llm.mapper.""" + +import os + +import pytest + +from bofire.llm.mapper import _resolve_env_var + + +def test_resolve_env_var_success(): + os.environ["_BOFIRE_TEST_VAR"] = "test_value" + try: + assert _resolve_env_var("_BOFIRE_TEST_VAR") == "test_value" + finally: + del os.environ["_BOFIRE_TEST_VAR"] + + +def test_resolve_env_var_missing(): + with pytest.raises(EnvironmentError, match="NONEXISTENT_VAR_12345"): + _resolve_env_var("NONEXISTENT_VAR_12345") diff --git a/tests/bofire/strategies/test_llm.py b/tests/bofire/strategies/test_llm.py new file mode 100644 index 000000000..fd4e01bd1 --- /dev/null +++ b/tests/bofire/strategies/test_llm.py @@ -0,0 +1,187 @@ +"""Tests for LLMStrategy utility functions and data model integration.""" + +import importlib.util + +import pandas as pd +import pytest + +from bofire.data_models.domain.api import Domain +from bofire.data_models.features.api import ( + CategoricalInput, + ContinuousInput, + ContinuousOutput, +) +from bofire.data_models.llm.provider import AnthropicLLMProvider +from bofire.data_models.objectives.api import MaximizeObjective, MinimizeObjective +from bofire.data_models.strategies.api import LLMStrategy as LLMStrategyDataModel +from bofire.strategies.api import LLMStrategy +from bofire.strategies.llm import _build_proposal_model, _select_experiments + + +PYDANTIC_AI_AVAILABLE = importlib.util.find_spec("pydantic_ai") is not None + +pytestmark = pytest.mark.skipif( + not PYDANTIC_AI_AVAILABLE, + reason="requires pydantic-ai (install with [llm] extra)", +) + + +# --- Fixtures --- + + +@pytest.fixture() +def simple_domain(): + return Domain.from_lists( + inputs=[ + ContinuousInput(key="x1", bounds=(0, 1)), + ContinuousInput(key="x2", bounds=(0, 1)), + CategoricalInput(key="x3", categories=["a", "b", "c"]), + ], + outputs=[ContinuousOutput(key="y", objective=MaximizeObjective(w=1.0))], + ) + + +@pytest.fixture() +def experiments(): + return pd.DataFrame( + { + "x1": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], + "x2": [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0], + "x3": ["a", "b", "c", "a", "b", "c", "a", "b", "c", "a"], + "y": [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], + "valid_y": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + } + ) + + +# --- _select_experiments --- + + +def test_select_experiments_all(simple_domain, experiments): + selected, desc = _select_experiments(experiments, simple_domain, None, None) + assert len(selected) == 10 + assert "All" in desc + + +def test_select_experiments_recent(simple_domain, experiments): + selected, desc = _select_experiments(experiments, simple_domain, 3, None) + assert len(selected) == 3 + assert selected["y"].tolist() == [80, 90, 100] + assert "recent" in desc + + +def test_select_experiments_top(simple_domain, experiments): + selected, desc = _select_experiments(experiments, simple_domain, None, 3) + assert len(selected) == 3 + assert set(selected["y"].tolist()) == {80, 90, 100} + assert "top" in desc + assert "highest" in desc + + +def test_select_experiments_top_minimize(experiments): + domain = Domain.from_lists( + inputs=[ + ContinuousInput(key="x1", bounds=(0, 1)), + ContinuousInput(key="x2", bounds=(0, 1)), + CategoricalInput(key="x3", categories=["a", "b", "c"]), + ], + outputs=[ContinuousOutput(key="y", objective=MinimizeObjective(w=1.0))], + ) + selected, desc = _select_experiments(experiments, domain, None, 3) + assert set(selected["y"].tolist()) == {10, 20, 30} + assert "lowest" in desc + + +def test_select_experiments_both_deduplicates(simple_domain, experiments): + selected, desc = _select_experiments(experiments, simple_domain, 5, 5) + # Last 5: y=[60,70,80,90,100], Top 5: y=[60,70,80,90,100] — same set + assert len(selected) == 5 + assert "unique" in desc + + +def test_select_experiments_both_union(simple_domain, experiments): + # Recent 2: y=[90,100], Top 2: y=[90,100] — overlap + selected, _ = _select_experiments(experiments, simple_domain, 2, 2) + assert len(selected) == 2 + + # Recent 3: y=[80,90,100], Top 3: y=[80,90,100] + selected, _ = _select_experiments(experiments, simple_domain, 3, 3) + assert len(selected) == 3 + + +# --- _build_proposal_model --- + + +def test_build_proposal_model_schema(simple_domain): + Model = _build_proposal_model(simple_domain) + schema = Model.model_json_schema() + assert "candidates" in schema["properties"] + assert "strategy_summary" in schema["properties"] + # Check nested CandidatePoint has our features + candidate_schema = schema["$defs"]["CandidatePoint"] + assert "x1" in candidate_schema["properties"] + assert "x2" in candidate_schema["properties"] + assert "x3" in candidate_schema["properties"] + + +def test_build_proposal_model_validates(simple_domain): + Model = _build_proposal_model(simple_domain) + + proposal = Model( + candidates=[ + { + "values": {"x1": 0.5, "x2": 0.5, "x3": "a"}, + "reasoning": "test", + } + ], + strategy_summary="test", + ) + assert len(proposal.candidates) == 1 + + +def test_llm_strategy_rejects_multi_objective(): + domain = Domain.from_lists( + inputs=[ContinuousInput(key="x", bounds=(0, 1))], + outputs=[ + ContinuousOutput(key="y1", objective=MaximizeObjective(w=1.0)), + ContinuousOutput(key="y2", objective=MinimizeObjective(w=1.0)), + ], + ) + with pytest.raises(ValueError, match="exactly one output"): + LLMStrategyDataModel( + domain=domain, + llm=AnthropicLLMProvider(api_key_env_var="KEY"), + ) + + +# --- End-to-end smoke test with pydantic-ai TestModel --- + + +def test_llm_strategy_ask_with_test_model(): + """Smoke test the full ask() pipeline using pydantic-ai's TestModel. + + TestModel auto-generates structured output matching the agent's schema. + We pick continuous bounds starting at 0 so the generated defaults satisfy + the domain validator, and avoid constraints for the same reason. + """ + from pydantic_ai.models.test import TestModel + + domain = Domain.from_lists( + inputs=[ + ContinuousInput(key="x1", bounds=(0, 10)), + ContinuousInput(key="x2", bounds=(0, 10)), + ], + outputs=[ContinuousOutput(key="y", objective=MaximizeObjective(w=1.0))], + ) + data_model = LLMStrategyDataModel( + domain=domain, + llm=AnthropicLLMProvider(api_key_env_var="UNUSED_KEY"), + ) + strategy = LLMStrategy(data_model=data_model) + # Inject TestModel directly to bypass provider/env-var resolution. + strategy._pydantic_ai_model = TestModel() + + # TestModel generates a single array item by default, so ask(1). + candidates = strategy.ask(1) + assert len(candidates) == 1 + assert "reasoning" in candidates.columns diff --git a/tests/bofire/test_register.py b/tests/bofire/test_register.py index 71dc86ebe..01203732c 100644 --- a/tests/bofire/test_register.py +++ b/tests/bofire/test_register.py @@ -16,6 +16,7 @@ ) from bofire.data_models.kernels.continuous import ContinuousKernel as _ContinuousBase from bofire.data_models.kernels.kernel import Kernel as KernelDataModel +from bofire.data_models.llm.provider import LLMProvider from bofire.data_models.priors.prior import Prior as PriorDataModel from bofire.data_models.strategies.strategy import Strategy as StrategyDataModel from bofire.data_models.surrogates.botorch import BotorchSurrogate @@ -743,6 +744,88 @@ class _MapperPrior(PriorDataModel): assert isinstance(k.lengthscale_prior, _MapperPrior) +# --------------------------------------------------------------------------- +# LLM provider registration tests +# --------------------------------------------------------------------------- + + +class _IntegrationLLMProvider(LLMProvider): + type: Literal["_IntegrationLLMProvider"] = "_IntegrationLLMProvider" + model: str = "fake-model" + api_key_env_var: str = "FAKE_KEY" + + +class TestLLMProviderPydanticIntegration: + """After register_llm_provider, custom LLM provider types should pass + Pydantic validation on LLMStrategy.llm and round-trip through + AnyLLMProvider.""" + + def test_custom_provider_in_llm_strategy(self): + from pydantic import TypeAdapter + + import bofire.data_models.llm.api as llm_api + from bofire.data_models.llm.api import register_llm_provider + from bofire.data_models.objectives.api import MaximizeObjective + from bofire.data_models.strategies.api import ( + LLMStrategy as LLMStrategyDataModel, + ) + + register_llm_provider(_IntegrationLLMProvider) + + obj = _IntegrationLLMProvider() + deserialized = TypeAdapter(llm_api.AnyLLMProvider).validate_python( + obj.model_dump() + ) + assert deserialized == obj + + domain = Domain( + inputs=Inputs(features=[ContinuousInput(key="x", bounds=(0, 1))]), + outputs=Outputs( + features=[ + ContinuousOutput( + key="y", + objective=MaximizeObjective(w=1.0), + ) + ] + ), + ) + data_model = LLMStrategyDataModel(domain=domain, llm=obj) + assert isinstance(data_model.llm, _IntegrationLLMProvider) + + def test_mapper_register_also_updates_pydantic(self): + """The mapper-level register() should trigger data model registration.""" + import bofire.llm.mapper as llm_mapper + + class _MapperLLMProvider(LLMProvider): + type: Literal["_MapperLLMProvider"] = "_MapperLLMProvider" + model: str = "fake" + api_key_env_var: str = "FAKE" + + sentinel = MagicMock(name="pydantic_ai_model") + llm_mapper.register(_MapperLLMProvider, lambda dm: sentinel) + + assert llm_mapper.LLM_MAP[_MapperLLMProvider] is not None + + from bofire.data_models.objectives.api import MaximizeObjective + from bofire.data_models.strategies.api import ( + LLMStrategy as LLMStrategyDataModel, + ) + + domain = Domain( + inputs=Inputs(features=[ContinuousInput(key="x", bounds=(0, 1))]), + outputs=Outputs( + features=[ + ContinuousOutput( + key="y", + objective=MaximizeObjective(w=1.0), + ) + ] + ), + ) + data_model = LLMStrategyDataModel(domain=domain, llm=_MapperLLMProvider()) + assert isinstance(data_model.llm, _MapperLLMProvider) + + # --------------------------------------------------------------------------- # Engineered feature registration tests # ---------------------------------------------------------------------------