Hyperactive/src/hyperactive/opt/_adapters/_base_optuna_adapter.py at 37d77ed696ed6846ebcca425d7788904dd243efc · hyperactive-project/Hyperactive · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
"""Base adapter for Optuna optimizers."""
# copyright: hyperactive developers, MIT License (see LICENSE file)

from hyperactive.base import BaseOptimizer

__all__ = ["_BaseOptunaAdapter"]


class _BaseOptunaAdapter(BaseOptimizer):
    """Base adapter for Optuna optimizers."""

    _tags = {
        "python_dependencies": ["optuna"],
        "info:name": "Optuna-based optimizer",
        # Search space capabilities
        "capability:discrete": True,
        "capability:continuous": True,
        "capability:categorical": True,
        "capability:log_scale": True,
    }

    def __init__(
        self,
        unified_space=None,
        param_space=None,
        n_trials=100,
        initialize=None,
        random_state=None,
        early_stopping=None,
        max_score=None,
        experiment=None,
        **optimizer_kwargs,
    ):
        self.unified_space = unified_space
        self.param_space = param_space
        self.n_trials = n_trials
        self.initialize = initialize
        self.random_state = random_state
        self.early_stopping = early_stopping
        self.max_score = max_score
        self.experiment = experiment
        self.optimizer_kwargs = optimizer_kwargs
        super().__init__()

    def get_search_config(self):
        """Get the search configuration.

        Returns
        -------
        dict with str keys
            The search configuration dictionary.
        """
        search_config = super().get_search_config()

        # Resolve: unified_space is converted to param_space
        unified_space = search_config.pop("unified_space", None)
        param_space = search_config.get("param_space")

        # Validate: only one should be set
        if unified_space is not None and param_space is not None:
            raise ValueError(
                "Provide either 'unified_space' or 'param_space', not both. "
                "Use 'unified_space' for simple dict[str, list] format, "
                "or 'param_space' for native Optuna format with ranges/distributions."
            )

        # Use unified_space if param_space is not set
        if unified_space is not None:
            search_config["param_space"] = unified_space

        return search_config

    def _get_optimizer(self):
        """Get the Optuna optimizer to use.

        This method should be implemented by subclasses to return
        the specific optimizer class and its initialization parameters.

        Returns
        -------
        optimizer
            The Optuna optimizer instance
        """
        raise NotImplementedError("Subclasses must implement _get_optimizer")

    def _convert_param_space(self, param_space):
        """Convert parameter space to Optuna format.

        Parameters
        ----------
        param_space : dict
            The parameter space to convert

        Returns
        -------
        dict
            The converted parameter space
        """
        return param_space

    def _suggest_params(self, trial, param_space):
        """Suggest parameters using Optuna trial.

        Parameters
        ----------
        trial : optuna.Trial
            The Optuna trial object
        param_space : dict
            The parameter space

        Returns
        -------
        dict
            The suggested parameters
        """
        params = {}
        for key, space in param_space.items():
            if hasattr(space, "suggest"):  # optuna distribution object
                params[key] = trial._suggest(space, key)
            elif isinstance(space, tuple):
                # Tuples are continuous ranges in unified format
                params[key] = self._suggest_continuous(trial, key, space)
            elif isinstance(space, list):
                # Lists are treated as categorical choices
                params[key] = trial.suggest_categorical(key, space)
            else:
                raise ValueError(f"Invalid parameter space for key '{key}': {space}")
        return params

    def _suggest_continuous(self, trial, key, space):
        """Suggest a continuous parameter from a tuple specification.

        Handles unified tuple formats:
        - (low, high) - linear scale
        - (low, high, "log") - log scale
        - (low, high, n_points) - linear scale (n_points ignored for Optuna)
        - (low, high, n_points, "log") - log scale (n_points ignored for Optuna)

        Parameters
        ----------
        trial : optuna.Trial
            The Optuna trial object
        key : str
            The parameter name
        space : tuple
            The continuous range specification

        Returns
        -------
        float or int
            The suggested value
        """
        if len(space) < 2:
            raise ValueError(
                f"Parameter '{key}': continuous range needs at least 2 values "
                f"(low, high), got {len(space)}."
            )

        low, high = space[0], space[1]
        log_scale = False

        # Parse optional arguments
        if len(space) == 3:
            third = space[2]
            if isinstance(third, str) and third.lower() == "log":
                log_scale = True
            # If third is int/float, it's n_points - ignore for Optuna
        elif len(space) == 4:
            # (low, high, n_points, "log")
            fourth = space[3]
            if isinstance(fourth, str) and fourth.lower() == "log":
                log_scale = True

        # Suggest based on type
        if isinstance(low, int) and isinstance(high, int):
            return trial.suggest_int(key, low, high, log=log_scale)
        else:
            return trial.suggest_float(key, low, high, log=log_scale)

    def _objective(self, trial):
        """Objective function for Optuna optimization.

        Parameters
        ----------
        trial : optuna.Trial
            The Optuna trial object

        Returns
        -------
        float
            The objective value
        """
        params = self._suggest_params(trial, self._resolved_param_space)
        score = self.experiment(params)

        # Handle early stopping based on max_score
        if self.max_score is not None and score >= self.max_score:
            trial.study.stop()

        return score

    def _setup_initial_positions(self, study):
        """Set up initial starting positions if provided.

        Parameters
        ----------
        study : optuna.Study
            The Optuna study object
        """
        if self.initialize is not None:
            if isinstance(self.initialize, dict) and "warm_start" in self.initialize:
                warm_start_points = self.initialize["warm_start"]
                if isinstance(warm_start_points, list):
                    # For warm start, we manually add trials to the study history
                    # instead of using suggest methods to avoid distribution conflicts
                    for point in warm_start_points:
                        self.experiment(point)
                        study.enqueue_trial(point)

    def _solve(self, experiment, param_space, n_trials, **kwargs):
        """Run the Optuna optimization.

        Parameters
        ----------
        experiment : callable
            The experiment to optimize
        param_space : dict
            The parameter space
        n_trials : int
            Number of trials
        **kwargs
            Additional parameters

        Returns
        -------
        dict
            The best parameters found
        """
        import optuna

        # Store resolved param_space for use in _objective
        self._resolved_param_space = param_space

        # Create optimizer with random state if provided
        optimizer = self._get_optimizer()

        # Create study
        study = optuna.create_study(
            direction="maximize",  # Assuming we want to maximize scores
            sampler=optimizer,
        )

        # Setup initial positions
        self._setup_initial_positions(study)

        # Setup early stopping callback
        callbacks = []
        if self.early_stopping is not None:

            def early_stopping_callback(study, trial):
                if len(study.trials) >= self.early_stopping:
                    study.stop()

            callbacks.append(early_stopping_callback)

        # Run optimization
        study.optimize(
            self._objective,
            n_trials=n_trials,
            callbacks=callbacks if callbacks else None,
        )

        self.best_score_ = study.best_value
        self.best_params_ = study.best_params
        return study.best_params

    @classmethod
    def get_test_params(cls, parameter_set="default"):
        """Return testing parameter settings for the optimizer."""
        from sklearn.datasets import load_iris
        from sklearn.svm import SVC

        from hyperactive.experiment.integrations import SklearnCvExperiment

        X, y = load_iris(return_X_y=True)
        sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y)

        param_space = {
            "C": (0.01, 10),
            "gamma": (0.0001, 10),
        }

        return [
            {
                "param_space": param_space,
                "n_trials": 10,
                "experiment": sklearn_exp,
            }
        ]