diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 7bfeb7e278..a596f567a9 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -19,9 +19,9 @@ on: - "Non Transfer Learning" env: - TRANSFER_LEARNING_BENCHMARKS: '["aryl_halide_CT_IM_tl","aryl_halide_IP_CP_tl","aryl_halide_CT_I_BM_tl","direct_arylation_tl_temperature","easom_tl_47_negate_noise5","hartmann_tl_3_20_15","michalewicz_tl_continuous"]' + TRANSFER_LEARNING_BENCHMARKS: '["aryl_halide_CT_IM_tl","aryl_halide_IP_CP_tl","aryl_halide_CT_I_BM_tl","direct_arylation_tl_temperature","easom_tl_47_negate_noise5","hartmann_tl_3_20_15","hartmann_tl_inv_3_20_15","hartmann_tl_shift_3_20_15","michalewicz_tl_continuous"]' SYNTHETIC_BENCHMARKS: '["synthetic_2C1D_1C","hartmann_3d_discretized","hartmann_6d","hartmann_3d"]' - ALL_BENCHMARKS: '["direct_arylation_multi_batch","direct_arylation_single_batch","aryl_halide_CT_IM_tl","aryl_halide_IP_CP_tl","aryl_halide_CT_I_BM_tl","direct_arylation_tl_temperature","easom_tl_47_negate_noise5","hartmann_tl_3_20_15","michalewicz_tl_continuous","synthetic_2C1D_1C","hartmann_3d_discretized","hartmann_6d","hartmann_3d"]' + ALL_BENCHMARKS: '["direct_arylation_multi_batch","direct_arylation_single_batch","aryl_halide_CT_IM_tl","aryl_halide_IP_CP_tl","aryl_halide_CT_I_BM_tl","direct_arylation_tl_temperature","easom_tl_47_negate_noise5","hartmann_tl_3_20_15","hartmann_tl_inv_3_20_15","hartmann_tl_shift_3_20_15","michalewicz_tl_continuous","synthetic_2C1D_1C","hartmann_3d_discretized","hartmann_6d","hartmann_3d"]' NON_TL_BENCHMARKS: '["direct_arylation_multi_batch","direct_arylation_single_batch","synthetic_2C1D_1C","hartmann_3d_discretized","hartmann_6d","hartmann_3d"]' permissions: diff --git a/CHANGELOG.md b/CHANGELOG.md index 37509c1169..27d66d923d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `identify_non_dominated_configurations` method to `Campaign` and `Objective` for determining the Pareto front - Interpoint constraints for continuous search spaces +- Transfer learning benchmarks for shifted and inverted Hartmann functions ### Breaking Changes - `ContinuousLinearConstraint.to_botorch` now returns a collection of constraint tuples diff --git a/benchmarks/domains/__init__.py b/benchmarks/domains/__init__.py index d5ee6a09a1..039869837f 100644 --- a/benchmarks/domains/__init__.py +++ b/benchmarks/domains/__init__.py @@ -31,6 +31,8 @@ ) from benchmarks.domains.hartmann.convergence_tl import ( hartmann_tl_3_20_15_benchmark, + hartmann_tl_inv_3_20_15_benchmark, + hartmann_tl_shift_3_20_15_benchmark, ) from benchmarks.domains.michalewicz.convergence_tl import ( michalewicz_tl_continuous_benchmark, @@ -52,6 +54,8 @@ direct_arylation_tl_temperature_benchmark, easom_tl_47_negate_noise5_benchmark, hartmann_tl_3_20_15_benchmark, + hartmann_tl_inv_3_20_15_benchmark, + hartmann_tl_shift_3_20_15_benchmark, michalewicz_tl_continuous_benchmark, # Transfer-Learning Regression Benchmarks direct_arylation_temperature_tl_regr_benchmark, diff --git a/benchmarks/domains/hartmann/convergence_tl.py b/benchmarks/domains/hartmann/convergence_tl.py old mode 100644 new mode 100755 index d9f1b78a10..99912bef87 --- a/benchmarks/domains/hartmann/convergence_tl.py +++ b/benchmarks/domains/hartmann/convergence_tl.py @@ -2,6 +2,8 @@ from __future__ import annotations +from collections.abc import Callable + import numpy as np import pandas as pd import torch @@ -21,129 +23,169 @@ ConvergenceBenchmarkSettings, ) from benchmarks.definition.base import RunMode +from benchmarks.domains.hartmann.utils import ShiftedHartmann -def hartmann_tl_3_20_15(settings: ConvergenceBenchmarkSettings) -> pd.DataFrame: - """Benchmark function for transfer learning with the Hartmann function in 3D. +def _make_hartmann_tl_benchmark( + name: str, + *, + source_noise_std: float, + source_shift: tuple[float, float, float] | None, + source_negate: bool, +) -> Callable[[ConvergenceBenchmarkSettings], pd.DataFrame]: + """Return a named Hartmann transfer-learning benchmark callable. - Key characteristics: - • Compares two versions of Hartmann function: + The benchmark operates on Hartmann function in 3D. + It compares two discretized versions of the Hartmann function: - Target: standard Hartmann - - Source: Hartmann with added noise (noise_std=0.15) - • Uses 20 points per dimension - • Tests transfer learning with different source data percentages: + - Source: Hartmann with optional changes (noise, shifting, or negation) + - Uses 20 points per dimension + - Tests transfer learning with different source data percentages: - 1% of source data - 10% of source data - 20% of source data - Args: + The callable requires one argument: settings: Configuration settings for the convergence benchmark. + The callable returns: + DataFrame containing benchmark results. + + Args: + name: Benchmark name. + source_noise_std: Noise added to the source Hartmann function. + source_shift: Shift added to the source Hartmann function. + source_negate: Whether to negate the source Hartmann function. Returns: - DataFrame containing benchmark results. + The callable returning the benchmark results. + + Raises: + ValueError: If ``source_shift`` is provided but does not have length 3. """ - target_function = Hartmann(dim=3) - source_function = Hartmann(dim=3, noise_std=0.15) - - points_per_dim = 20 - percentages = [0.01, 0.05, 0.1] - - # Create grid locations for the parameters - bounds = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]) - grid_locations = { - f"x{d}": np.linspace(lower, upper, points_per_dim) - for d, (lower, upper) in enumerate(bounds.T) - } - - params: list[DiscreteParameter] = [ - NumericalDiscreteParameter( - name=name, - values=points, + if source_shift is not None and len(source_shift) != 3: + raise ValueError("Shift list must have length 3 for 3D Hartmann function.") + + def benchmark_fn(settings: ConvergenceBenchmarkSettings) -> pd.DataFrame: + """Execute a Hartmann transfer-learning benchmark variant.""" + # Define base bounds + bounds = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]).T + + # Create source function with specified parameters + source_function = ShiftedHartmann( + bounds=bounds, + shift=list(source_shift) if source_shift is not None else None, + dim=3, + noise_std=source_noise_std, + negate=source_negate, ) - for name, points in grid_locations.items() - ] - task_param = TaskParameter( - name="Function", - values=["Target_Function", "Source_Function"], - active_values=["Target_Function"], - ) - params_tl = params + [task_param] - - searchspace_nontl = SearchSpace.from_product(parameters=params) - searchspace_tl = SearchSpace.from_product(parameters=params_tl) - - objective = SingleTargetObjective( - target=NumericalTarget(name="Target", minimize=True) - ) - tl_campaign = Campaign( - searchspace=searchspace_tl, - objective=objective, - ) - nontl_campaign = Campaign( - searchspace=searchspace_nontl, - objective=objective, - ) - - meshgrid = np.meshgrid(*[points for points in grid_locations.values()]) - - # Create a DataFrame for the initial data coordinates - coord_columns = [p.name for p in params] - initial_data = pd.DataFrame( - {f"x{d}": grid_d.ravel() for d, grid_d in enumerate(meshgrid)}, - columns=coord_columns, # Ensure correct column order - ) - - # Convert coordinates to a PyTorch tensor - initial_data_tensor = torch.tensor(initial_data[coord_columns].values) - - with Settings(random_seed=settings.random_seed): - target_values_tensor = source_function( - initial_data_tensor - ) # Randomness from source function - - # Assign the results back to a new DataFrame for initial_data - initial_data["Target"] = target_values_tensor.detach().numpy() - initial_data["Function"] = "Source_Function" - - lookup = arrays_to_dataframes([p.name for p in params], ["Target"], use_torch=True)( - target_function - ) - - initial_data_samples = {} - with Settings(random_seed=settings.random_seed): - for p in percentages: - initial_data_samples[p] = [ - initial_data.sample(frac=p) for _ in range(settings.n_mc_iterations) - ] - results = [] - for p in percentages: + # Create target function (standard Hartmann with adjusted bounds from source) + target_function = Hartmann( + dim=source_function.dim, bounds=source_function._bounds + ) + + points_per_dim = 20 + percentages = [0.01, 0.05, 0.1] + + # Create grid locations for the parameters + grid_locations = { + f"x{d}": np.linspace(lower, upper, points_per_dim) + for d, (lower, upper) in enumerate(bounds) + } + + params: list[DiscreteParameter] = [ + NumericalDiscreteParameter( + name=name, + values=tuple(points), + ) + for name, points in grid_locations.items() + ] + task_param = TaskParameter( + name="Function", + values=("Target_Function", "Source_Function"), + active_values=("Target_Function",), + ) + params_tl = params + [task_param] + + searchspace_nontl = SearchSpace.from_product(parameters=params) + searchspace_tl = SearchSpace.from_product(parameters=params_tl) + + objective = SingleTargetObjective( + target=NumericalTarget(name="Target", minimize=True) + ) + tl_campaign = Campaign( + searchspace=searchspace_tl, + objective=objective, + ) + nontl_campaign = Campaign( + searchspace=searchspace_nontl, + objective=objective, + ) + + meshgrid = np.meshgrid(*[points for points in grid_locations.values()]) + + # Create a DataFrame for the initial data coordinates + coord_columns = [p.name for p in params] + initial_data = pd.DataFrame( + {f"x{d}": grid_d.ravel() for d, grid_d in enumerate(meshgrid)}, + columns=coord_columns, # Ensure correct column order + ) + + # Convert coordinates to a PyTorch tensor + initial_data_tensor = torch.tensor(initial_data[coord_columns].values) + + with Settings(random_seed=settings.random_seed): + target_values_tensor = source_function( + initial_data_tensor + ) # Randomness from source function + + # Assign the results back to a new DataFrame for initial_data + initial_data["Target"] = target_values_tensor.detach().numpy() + initial_data["Function"] = "Source_Function" + + lookup = arrays_to_dataframes( + [p.name for p in params], ["Target"], use_torch=True + )(target_function) + + initial_data_samples = {} + with Settings(random_seed=settings.random_seed): + for p in percentages: + initial_data_samples[p] = [ + initial_data.sample(frac=p) for _ in range(settings.n_mc_iterations) + ] + + results = [] + for p in percentages: + results.append( + simulate_scenarios( + { + f"{int(100 * p)}": tl_campaign, + f"{int(100 * p)}_naive": nontl_campaign, + }, + lookup, + initial_data=initial_data_samples[p], + batch_size=settings.batch_size, + n_doe_iterations=settings.n_doe_iterations, + impute_mode="error", + random_seed=settings.random_seed, + ) + ) results.append( simulate_scenarios( - { - f"{int(100 * p)}": tl_campaign, - f"{int(100 * p)}_naive": nontl_campaign, - }, + {"0": tl_campaign, "0_naive": nontl_campaign}, lookup, - initial_data=initial_data_samples[p], batch_size=settings.batch_size, n_doe_iterations=settings.n_doe_iterations, + n_mc_iterations=settings.n_mc_iterations, impute_mode="error", random_seed=settings.random_seed, ) ) - results.append( - simulate_scenarios( - {"0": tl_campaign, "0_naive": nontl_campaign}, - lookup, - batch_size=settings.batch_size, - n_doe_iterations=settings.n_doe_iterations, - n_mc_iterations=settings.n_mc_iterations, - impute_mode="error", - random_seed=settings.random_seed, - ) - ) - return pd.concat(results) + return pd.concat(results) + + benchmark_fn.__name__ = name + benchmark_fn.__qualname__ = name + return benchmark_fn benchmark_config = ConvergenceBenchmarkSettings( @@ -162,7 +204,34 @@ def hartmann_tl_3_20_15(settings: ConvergenceBenchmarkSettings) -> pd.DataFrame: ) hartmann_tl_3_20_15_benchmark = ConvergenceBenchmark( - function=hartmann_tl_3_20_15, - optimal_target_values={"Target": -3.851831124860353}, + function=_make_hartmann_tl_benchmark( + name="hartmann_tl_3_20_15", + source_noise_std=0.15, + source_shift=None, + source_negate=False, + ), + optimal_target_values={"Target": -3.8324342572721695}, + settings=benchmark_config, +) + +hartmann_tl_inv_3_20_15_benchmark = ConvergenceBenchmark( + function=_make_hartmann_tl_benchmark( + name="hartmann_tl_inv_3_20_15", + source_noise_std=0.15, + source_shift=None, + source_negate=True, + ), + optimal_target_values={"Target": -3.8324342572721695}, + settings=benchmark_config, +) + +hartmann_tl_shift_3_20_15_benchmark = ConvergenceBenchmark( + function=_make_hartmann_tl_benchmark( + "hartmann_tl_shift_3_20_15", + source_noise_std=0.15, + source_shift=(0.2, 0, 0), + source_negate=False, + ), + optimal_target_values={"Target": -3.8324342572721695}, settings=benchmark_config, ) diff --git a/benchmarks/domains/hartmann/utils.py b/benchmarks/domains/hartmann/utils.py new file mode 100644 index 0000000000..b354fdd3dd --- /dev/null +++ b/benchmarks/domains/hartmann/utils.py @@ -0,0 +1,93 @@ +"""Utilities for the Hartmann domain.""" + +import numpy as np +import torch +from botorch.test_functions.synthetic import Hartmann +from typing_extensions import override + + +class ShiftedHartmann(Hartmann): + """Hartmann function with support for shifting input dimensions. + + Extends the Hartmann test function to support shifting input dimensions + (and adjusting bounds accordingly). Other parameters like bounds, dim, + noise_std, and negate are passed directly to the parent Hartmann class. + + Args: + shift: Amount to shift individual dimension coordinates by. + E.g. [0.2, 0, 0] would shift dimension 0 by 0.2. + If None, no shifting is applied. + **kwargs: Keyword arguments passed to parent Hartmann class. + + Raises: + ValueError: If bounds shape is invalid (i.e., not list of (min,max) tuples). + ValueError: If shift shape does not match the used dimensions. + """ + + def __init__( + self, + shift: list[float] | None = None, + **kwargs, + ) -> None: + # Get botorch defaults if not specified + bounds = np.array(kwargs.get("bounds", Hartmann(**kwargs).bounds.T)) + + # Process the shifts + if shift is not None and len(shift) != bounds.shape[0]: + raise ValueError("Shift shape does not match used dimensions.") + self.shift = shift if shift is not None else [0.0] * bounds.shape[0] + + # Extend the bounds + # The original Hartmann function throws an error if it is called outside of its + # bounds ([0,1] by default). However, it is technically feasible to evaluate it + # outside of the default unit interval. To enable passing of the validation for + # our shifted variant, we simply expand the bounds by an appropriate amount + # that depends on ``shift``. The bounds used in the benchmark search space + # remain unchanged. + # Note: We can not only shift the upper and lower bounds as that leads to the + # optimal value (``_optimizer``) being excluded from bounds. As this is + # hard-coded in the Hartmann class init, we can not override it before it is + # evaluated. + shifted_bounds = bounds + np.array(self.shift)[:, None] + bounds_extended = [ + ( + min([bounds[row, 0], shifted_bounds[row, 0]]), + max([bounds[row, 1], shifted_bounds[row, 1]]), + ) + for row in range(bounds.shape[0]) + ] + + kwargs["bounds"] = bounds_extended + + super().__init__(**kwargs) + # Recompute optimizer coordinates for the shifted input space (x_opt - shift). + # Both _optimizers (list) and the "optimizers" buffer must be set + # independently since .optimizers reads from the buffer. + if self._optimizers is not None: + self._optimizers = [ + tuple(x - s for x, s in zip(opt, self.shift)) + for opt in self._optimizers + ] + if "optimizers" in self._buffers: + self.register_buffer( + "optimizers", + torch.tensor(self._optimizers, dtype=torch.double) + if self._optimizers is not None + else None, + ) + + @override + def __call__(self, x: torch.Tensor) -> torch.Tensor: + """Evaluate the shifted Hartmann function. + + Args: + x: Input tensor of shape (n_samples, n_dimensions). + + Returns: + Output tensor of shape (n_samples,). + """ + # Create a copy to avoid modifying input + x_shifted = x.clone() + for dim, shift in enumerate(self.shift): + x_shifted[:, dim] = x_shifted[:, dim] + shift + return super().__call__(x_shifted)