diff --git a/bofire/data_models/strategies/api.py b/bofire/data_models/strategies/api.py index 1ae75bd2b..a3f7d2ce6 100644 --- a/bofire/data_models/strategies/api.py +++ b/bofire/data_models/strategies/api.py @@ -17,10 +17,15 @@ FractionalFactorialStrategy, ) from bofire.data_models.strategies.meta_strategy_type import MetaStrategy +from bofire.data_models.strategies.predictives.acqf_optimization import ( + LSRBO, + AcquisitionOptimizer, + BotorchOptimizer, +) from bofire.data_models.strategies.predictives.active_learning import ( ActiveLearningStrategy, ) -from bofire.data_models.strategies.predictives.botorch import LSRBO, BotorchStrategy +from bofire.data_models.strategies.predictives.botorch import BotorchStrategy from bofire.data_models.strategies.predictives.enting import EntingStrategy from bofire.data_models.strategies.predictives.mobo import MoboStrategy from bofire.data_models.strategies.predictives.multi_fidelity import ( @@ -76,5 +81,9 @@ MoboStrategy, ] - AnyLocalSearchConfig = LSRBO + +AnyAcqfOptimizer = Union[ + AcquisitionOptimizer, + BotorchOptimizer, +] diff --git a/bofire/data_models/strategies/doe.py b/bofire/data_models/strategies/doe.py index f61fc2d59..464f0e32a 100644 --- a/bofire/data_models/strategies/doe.py +++ b/bofire/data_models/strategies/doe.py @@ -113,8 +113,7 @@ class DoEStrategy(Strategy): verbose: bool = False # get rid of this at a later stage ipopt_options: Optional[Dict] = None - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: return True @classmethod diff --git a/bofire/data_models/strategies/factorial.py b/bofire/data_models/strategies/factorial.py index 537981e1c..6ed80bfc6 100644 --- a/bofire/data_models/strategies/factorial.py +++ b/bofire/data_models/strategies/factorial.py @@ -22,8 +22,7 @@ class FactorialStrategy(Strategy): type: Literal["FactorialStrategy"] = "FactorialStrategy" # type: ignore - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: return False @classmethod diff --git a/bofire/data_models/strategies/fractional_factorial.py b/bofire/data_models/strategies/fractional_factorial.py index 32b9f5b4d..9727bb3fb 100644 --- a/bofire/data_models/strategies/fractional_factorial.py +++ b/bofire/data_models/strategies/fractional_factorial.py @@ -51,8 +51,7 @@ class FractionalFactorialStrategy(Strategy): description="If true, the run order is randomized, else it is deterministic.", ) - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: return False @classmethod diff --git a/bofire/data_models/strategies/predictives/acqf_optimization.py b/bofire/data_models/strategies/predictives/acqf_optimization.py new file mode 100644 index 000000000..46d3d5b22 --- /dev/null +++ b/bofire/data_models/strategies/predictives/acqf_optimization.py @@ -0,0 +1,108 @@ +from abc import abstractmethod +from typing import Annotated, Literal, Optional, Type + +from pydantic import Field, PositiveInt, field_validator + +from bofire.data_models.base import BaseModel +from bofire.data_models.constraints import api as constraints +from bofire.data_models.enum import CategoricalMethodEnum +from bofire.data_models.types import IntPowerOfTwo + + +class AcquisitionOptimizer(BaseModel): + prefer_exhaustive_search_for_purely_categorical_domains: bool = True + + def is_constraint_implemented(self, my_type: Type[constraints.Constraint]) -> bool: + """Checks if a constraint is implemented. Currently only linear constraints are supported. + + Args: + my_type (Type[Feature]): The type of the constraint. + + Returns: + bool: True if the constraint is implemented, False otherwise. + + """ + return True + + +class LocalSearchConfig(BaseModel): + """LocalSearchConfigs provide a way to define how to switch between global + acqf optimization in the global bounds and local acqf optimization in the local + reference bounds. + """ + + type: str + + @abstractmethod + def is_local_step(self, acqf_local: float, acqf_global: float) -> bool: + """Abstract switching function between local and global acqf optimum. + + Args: + acqf_local (float): Local acqf value. + acqf_global (float): Global acqf value. + + Returns: + bool: If true, do local step, else a step towards the global acqf maximum. + + """ + + +class LSRBO(LocalSearchConfig): + """LSRBO implements the local search region method published in. + https://www.merl.com/publications/docs/TR2023-057.pdf + + Attributes: + gamma (float): The switsching parameter between local and global optimization. + Defaults to 0.1. + + """ + + type: Literal["LSRBO"] = "LSRBO" + gamma: Annotated[float, Field(ge=0)] = 0.1 + + def is_local_step(self, acqf_local: float, acqf_global: float) -> bool: + return acqf_local >= self.gamma + + +AnyLocalSearchConfig = LSRBO + + +class BotorchOptimizer(AcquisitionOptimizer): + n_restarts: PositiveInt = 8 + n_raw_samples: IntPowerOfTwo = 1024 + maxiter: PositiveInt = 2000 + batch_limit: Optional[PositiveInt] = Field(default=None, validate_default=True) + + # encoding params + descriptor_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE + categorical_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE + discrete_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE + + # local search region params + local_search_config: Optional[AnyLocalSearchConfig] = None + + @field_validator("batch_limit") + @classmethod + def validate_batch_limit(cls, batch_limit: int, info): + batch_limit = min( + batch_limit or info.data["n_restarts"], + info.data["n_restarts"], + ) + return batch_limit + + def is_constraint_implemented(self, my_type: Type[constraints.Constraint]) -> bool: + """Method to check if a specific constraint type is implemented for the strategy + + Args: + my_type (Type[Constraint]): Constraint class + + Returns: + bool: True if the constraint type is valid for the strategy chosen, False otherwise + + """ + if my_type in [ + constraints.NonlinearInequalityConstraint, + constraints.NonlinearEqualityConstraint, + ]: + return False + return True diff --git a/bofire/data_models/strategies/predictives/botorch.py b/bofire/data_models/strategies/predictives/botorch.py index 91adee516..a4abb6aab 100644 --- a/bofire/data_models/strategies/predictives/botorch.py +++ b/bofire/data_models/strategies/predictives/botorch.py @@ -1,16 +1,12 @@ import warnings -from abc import abstractmethod -from typing import Annotated, Literal, Optional, Type +from typing import Annotated, Optional, Type -from pydantic import Field, PositiveInt, field_validator, model_validator +from pydantic import Field, PositiveInt, model_validator -from bofire.data_models.base import BaseModel from bofire.data_models.constraints.api import ( Constraint, InterpointConstraint, LinearConstraint, - NonlinearEqualityConstraint, - NonlinearInequalityConstraint, ) from bofire.data_models.domain.api import Domain, Outputs from bofire.data_models.enum import CategoricalEncodingEnum, CategoricalMethodEnum @@ -21,6 +17,10 @@ TaskInput, ) from bofire.data_models.outlier_detection.api import OutlierDetections +from bofire.data_models.strategies.predictives.acqf_optimization import ( + AcquisitionOptimizer, + BotorchOptimizer, +) from bofire.data_models.strategies.predictives.predictive import PredictiveStrategy from bofire.data_models.strategies.shortest_path import has_local_search_region from bofire.data_models.surrogates.api import ( @@ -29,61 +29,14 @@ MultiTaskGPSurrogate, SingleTaskGPSurrogate, ) -from bofire.data_models.types import IntPowerOfTwo - - -class LocalSearchConfig(BaseModel): - """LocalSearchConfigs provide a way to define how to switch between global - acqf optimization in the global bounds and local acqf optimization in the local - reference bounds. - """ - - type: str - - @abstractmethod - def is_local_step(self, acqf_local: float, acqf_global: float) -> bool: - """Abstract switching function between local and global acqf optimum. - - Args: - acqf_local (float): Local acqf value. - acqf_global (float): Global acqf value. - - Returns: - bool: If true, do local step, else a step towards the global acqf maximum. - - """ - - -class LSRBO(LocalSearchConfig): - """LSRBO implements the local search region method published in. - https://www.merl.com/publications/docs/TR2023-057.pdf - - Attributes: - gamma (float): The switsching parameter between local and global optimization. - Defaults to 0.1. - - """ - - type: Literal["LSRBO"] = "LSRBO" - gamma: Annotated[float, Field(ge=0)] = 0.1 - - def is_local_step(self, acqf_local: float, acqf_global: float) -> bool: - return acqf_local >= self.gamma - - -AnyLocalSearchConfig = LSRBO class BotorchStrategy(PredictiveStrategy): - # acqf optimizer params - num_restarts: PositiveInt = 8 - num_raw_samples: IntPowerOfTwo = 1024 - maxiter: PositiveInt = 2000 - batch_limit: Optional[PositiveInt] = Field(default=None, validate_default=True) - # encoding params - descriptor_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE - categorical_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE - discrete_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE + # acquisition optimizer + acquisition_optimizer: AcquisitionOptimizer = Field( + default_factory=lambda: BotorchOptimizer() + ) + surrogate_specs: BotorchSurrogates = Field( default_factory=lambda: BotorchSurrogates(surrogates=[]), validate_default=True, @@ -95,21 +48,13 @@ class BotorchStrategy(PredictiveStrategy): # hyperopt params frequency_hyperopt: Annotated[int, Field(ge=0)] = 0 # 0 indicates no hyperopt folds: int = 5 - # local search region params - local_search_config: Optional[AnyLocalSearchConfig] = None - - @field_validator("batch_limit") - @classmethod - def validate_batch_limit(cls, batch_limit: int, info): - batch_limit = min( - batch_limit or info.data["num_restarts"], - info.data["num_restarts"], - ) - return batch_limit @model_validator(mode="after") def validate_local_search_config(self): - if self.local_search_config is not None: + if not isinstance(self.acquisition_optimizer, BotorchOptimizer): + return self + + if self.acquisition_optimizer.local_search_config is not None: if has_local_search_region(self.domain) is False: warnings.warn( "`local_search_region` config is specified, but no local search region is defined in `domain`", @@ -122,9 +67,9 @@ def validate_local_search_config(self): raise ValueError("LSR-BO only supported for linear constraints.") return self - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: - """Method to check if a specific constraint type is implemented for the strategy + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: + """Method to check if a specific constraint type is implemented for the strategy. For optimizer-specific + strategies, this is passed to the optimizer check. Args: my_type (Type[Constraint]): Constraint class @@ -133,9 +78,7 @@ def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: bool: True if the constraint type is valid for the strategy chosen, False otherwise """ - if my_type in [NonlinearInequalityConstraint, NonlinearEqualityConstraint]: - return False - return True + return self.acquisition_optimizer.is_constraint_implemented(my_type) @model_validator(mode="after") def validate_interpoint_constraints(self): @@ -158,27 +101,31 @@ def validate_surrogate_specs(self): # categorical_method = ( # values["categorical_method"] if "categorical_method" in values else None # ) - if self.categorical_method == CategoricalMethodEnum.FREE: - for m in self.surrogate_specs.surrogates: - if isinstance(m, MixedSingleTaskGPSurrogate): - raise ValueError( - "Categorical method FREE not compatible with a a MixedSingleTaskGPModel.", - ) - # we also check that if a categorical with descriptor method is used as one hot encoded the same method is - # used for the descriptor as for the categoricals - for m in self.surrogate_specs.surrogates: - keys = m.inputs.get_keys(CategoricalDescriptorInput) - for k in keys: - input_proc_specs = ( - m.input_preprocessing_specs[k] - if k in m.input_preprocessing_specs - else None - ) - if input_proc_specs == CategoricalEncodingEnum.ONE_HOT: - if self.categorical_method != self.descriptor_method: + if isinstance(self.acquisition_optimizer, BotorchOptimizer): + if ( + self.acquisition_optimizer.categorical_method + == CategoricalMethodEnum.FREE + ): + for m in self.surrogate_specs.surrogates: + if isinstance(m, MixedSingleTaskGPSurrogate): raise ValueError( - "One-hot encoded CategoricalDescriptorInput features has to be treated with the same method as categoricals.", + "Categorical method FREE not compatible with a a MixedSingleTaskGPModel.", ) + # we also check that if a categorical with descriptor method is used as one hot encoded the same method is + # used for the descriptor as for the categoricals + for m in self.surrogate_specs.surrogates: + keys = m.inputs.get_keys(CategoricalDescriptorInput) + for k in keys: + input_proc_specs = ( + m.input_preprocessing_specs[k] + if k in m.input_preprocessing_specs + else None + ) + if input_proc_specs == CategoricalEncodingEnum.ONE_HOT: + if self.categorical_method != self.descriptor_method: + raise ValueError( + "One-hot encoded CategoricalDescriptorInput features has to be treated with the same method as categoricals.", + ) return self @model_validator(mode="after") diff --git a/bofire/data_models/strategies/predictives/enting.py b/bofire/data_models/strategies/predictives/enting.py index fe1817060..7e94f24be 100644 --- a/bofire/data_models/strategies/predictives/enting.py +++ b/bofire/data_models/strategies/predictives/enting.py @@ -55,8 +55,7 @@ class EntingStrategy(PredictiveStrategy): # a value of zero implies future observations will be exactly the mean kappa_fantasy: float = 1.96 - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: return my_type in [ LinearEqualityConstraint, LinearInequalityConstraint, diff --git a/bofire/data_models/strategies/predictives/qparego.py b/bofire/data_models/strategies/predictives/qparego.py index 261c8d87b..0d07064a8 100644 --- a/bofire/data_models/strategies/predictives/qparego.py +++ b/bofire/data_models/strategies/predictives/qparego.py @@ -47,8 +47,7 @@ def is_objective_implemented(cls, my_type: Type[Objective]) -> bool: return False return True - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: """Method to check if a specific constraint type is implemented for the strategy Args: diff --git a/bofire/data_models/strategies/random.py b/bofire/data_models/strategies/random.py index 0cea999c7..af6bce87a 100644 --- a/bofire/data_models/strategies/random.py +++ b/bofire/data_models/strategies/random.py @@ -25,8 +25,7 @@ class RandomStrategy(Strategy): num_base_samples: Optional[Annotated[int, Field(gt=0)]] = None max_iters: Annotated[int, Field(gt=0)] = 1000 - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: return my_type in [ LinearInequalityConstraint, LinearEqualityConstraint, diff --git a/bofire/data_models/strategies/shortest_path.py b/bofire/data_models/strategies/shortest_path.py index a51ea3a7e..d8ea26407 100644 --- a/bofire/data_models/strategies/shortest_path.py +++ b/bofire/data_models/strategies/shortest_path.py @@ -93,8 +93,7 @@ def validate_lsr(cls, domain): raise ValueError("Domain has no local search region.") return domain - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: """Checks if a constraint is implemented. Currently only linear constraints are supported. Args: diff --git a/bofire/data_models/strategies/stepwise/stepwise.py b/bofire/data_models/strategies/stepwise/stepwise.py index ae8433fdf..5b1c9fb19 100644 --- a/bofire/data_models/strategies/stepwise/stepwise.py +++ b/bofire/data_models/strategies/stepwise/stepwise.py @@ -90,6 +90,5 @@ def validate_steps(self): def is_feature_implemented(cls, my_type: Type[Feature]) -> bool: return True - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: return True diff --git a/bofire/data_models/strategies/strategy.py b/bofire/data_models/strategies/strategy.py index 612731ab1..996ca8f1f 100644 --- a/bofire/data_models/strategies/strategy.py +++ b/bofire/data_models/strategies/strategy.py @@ -1,7 +1,7 @@ from abc import abstractmethod from typing import Annotated, Optional, Type -from pydantic import Field, field_validator +from pydantic import Field, field_validator, model_validator from bofire.data_models.base import BaseModel from bofire.data_models.constraints.api import Constraint @@ -14,14 +14,10 @@ class Strategy(BaseModel): domain: Domain seed: Optional[Annotated[int, Field(ge=0)]] = None - @field_validator("domain") - @classmethod - def validate_constraints(cls, domain: Domain): + @model_validator(mode="after") + def validate_constraints(self): """Validator to ensure that all constraints defined in the domain are valid for the chosen strategy - Args: - domain (Domain): The domain to be used in the strategy - Raises: ValueError: if a constraint is defined in the domain but is invalid for the strategy chosen @@ -29,12 +25,12 @@ def validate_constraints(cls, domain: Domain): Domain: the domain """ - for constraint in domain.constraints: - if not cls.is_constraint_implemented(type(constraint)): + for constraint in self.domain.constraints: + if not self.is_constraint_implemented(type(constraint)): raise ValueError( - f"constraint `{type(constraint)}` is not implemented for strategy `{cls.__name__}`", + f"constraint `{type(constraint)}` is not implemented for strategy `{type(self).__name__}`", ) - return domain + return self @field_validator("domain") @classmethod @@ -77,9 +73,7 @@ def validate_input_feature_count(cls, domain: Domain): raise ValueError("no input feature specified") return domain - @classmethod - @abstractmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: """Abstract method to check if a specific constraint type is implemented for the strategy Args: @@ -89,6 +83,7 @@ def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: bool: True if the constraint type is valid for the strategy chosen, False otherwise """ + return True @classmethod @abstractmethod diff --git a/bofire/strategies/api.py b/bofire/strategies/api.py index 852e7c336..136ce6227 100644 --- a/bofire/strategies/api.py +++ b/bofire/strategies/api.py @@ -1,6 +1,10 @@ from bofire.strategies.doe_strategy import DoEStrategy from bofire.strategies.fractional_factorial import FractionalFactorialStrategy from bofire.strategies.mapper import map +from bofire.strategies.predictives.acqf_optimization import ( + AcquisitionOptimizer, + get_optimizer, +) from bofire.strategies.predictives.active_learning import ActiveLearningStrategy from bofire.strategies.predictives.botorch import BotorchStrategy from bofire.strategies.predictives.enting import EntingStrategy diff --git a/bofire/strategies/predictives/acqf_optimization.py b/bofire/strategies/predictives/acqf_optimization.py new file mode 100644 index 000000000..5b0be6e76 --- /dev/null +++ b/bofire/strategies/predictives/acqf_optimization.py @@ -0,0 +1,671 @@ +import copy +from abc import ABC, abstractmethod +from typing import Callable, Dict, List, Optional, Tuple, Type + +import pandas as pd +import torch +from botorch.acquisition.acquisition import AcquisitionFunction +from botorch.optim.initializers import gen_batch_initial_conditions +from botorch.optim.optimize import ( + optimize_acqf, + optimize_acqf_discrete, + optimize_acqf_list, + optimize_acqf_mixed, +) +from torch import Tensor + +from bofire.data_models.constraints.api import ( + LinearEqualityConstraint, + LinearInequalityConstraint, + NChooseKConstraint, + ProductConstraint, +) +from bofire.data_models.domain.api import Domain +from bofire.data_models.enum import CategoricalEncodingEnum, CategoricalMethodEnum +from bofire.data_models.features.api import ( + CategoricalDescriptorInput, + CategoricalInput, + CategoricalMolecularInput, + DiscreteInput, + Input, +) +from bofire.data_models.molfeatures.api import MolFeatures +from bofire.data_models.strategies.api import ( + AcquisitionOptimizer as AcquisitionOptimizerDataModel, +) +from bofire.data_models.strategies.api import ( + BotorchOptimizer as BotorchOptimizerDataModel, +) +from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel +from bofire.data_models.strategies.api import ( + ShortestPathStrategy as ShortestPathStrategyDataModel, +) +from bofire.data_models.strategies.shortest_path import has_local_search_region +from bofire.data_models.types import InputTransformSpecs +from bofire.strategies.random import RandomStrategy +from bofire.strategies.shortest_path import ShortestPathStrategy +from bofire.utils.torch_tools import ( + get_initial_conditions_generator, + get_interpoint_constraints, + get_linear_constraints, + get_nonlinear_constraints, + tkwargs, +) + + +class AcquisitionOptimizer(ABC): + def __init__(self, data_model: AcquisitionOptimizerDataModel): + self.prefer_exhaustive_search_for_purely_categorical_domains = ( + data_model.prefer_exhaustive_search_for_purely_categorical_domains + ) + + def optimize( + self, + candidate_count: int, + acqfs: List[AcquisitionFunction], # this is a botorch object + domain: Domain, + input_preprocessing_specs: InputTransformSpecs, # this is the preprocessing specs for the inputs + experiments: Optional[pd.DataFrame] = None, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Optimizes the acquisition function(s) for the given domain and input preprocessing specs. + + Args: + candidate_count (int): Number of candidates that should be returned. + acqfs (List[AcquisitionFunction]): List of acquisition functions that should be optimized. + domain (Domain): The domain of the optimization problem. + input_preprocessing_specs (InputTransformSpecs): The input preprocessing specs. + experiments + + Returns: + A two-element tuple containing + + - a `q x d`-dim tensor of generated candidates. + - an associated acquisition value. + + """ + # we check here if we have a fully combinatorial search space + # and use _optimize_acqf_discrete in this case + if self.prefer_exhaustive_search_for_purely_categorical_domains: + if len( + domain.inputs.get(includes=[DiscreteInput, CategoricalInput]), + ) == len(domain.inputs): + if len(acqfs) > 1: + raise NotImplementedError( + "Multiple Acqfs are currently not supported for purely combinatorial search spaces.", + ) + return self._optimize_acqf_discrete( + candidate_count=candidate_count, + acqf=acqfs[0], + domain=domain, + input_preprocessing_specs=input_preprocessing_specs, + experiments=experiments, + ) + + return self._optimize( + candidate_count=candidate_count, + acqfs=acqfs, + domain=domain, + input_preprocessing_specs=input_preprocessing_specs, + experiments=experiments, + ) + + @abstractmethod + def _optimize( + self, + candidate_count: int, + acqfs: List[AcquisitionFunction], # this is a botorch object + domain: Domain, + input_preprocessing_specs: InputTransformSpecs, # this is the preprocessing specs for the inputs + experiments: Optional[pd.DataFrame] = None, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Optimizes the acquisition function(s) for the given domain and input preprocessing specs. + + Args: + candidate_count (int): Number of candidates that should be returned. + acqfs (List[AcquisitionFunction]): List of acquisition functions that should be optimized. + domain (Domain): The domain of the optimization problem. + input_preprocessing_specs (InputTransformSpecs): The input preprocessing specs. + + Returns: + A two-element tuple containing + + - a `q x d`-dim tensor of generated candidates. + - an associated acquisition value. + + """ + pass + + def _features2idx( + self, domain: Domain, input_preprocessing_specs: InputTransformSpecs + ) -> Dict[str, Tuple[int]]: + features2idx, _ = domain.inputs._get_transform_info( + input_preprocessing_specs, + ) + return features2idx + + def get_bounds( + self, domain: Domain, input_preprocessing_specs: InputTransformSpecs + ) -> torch.Tensor: + lower, upper = domain.inputs.get_bounds( + specs=input_preprocessing_specs, + ) + return torch.tensor([lower, upper]).to(**tkwargs) + + def get_fixed_features( + self, + domain: Domain, + input_preprocessing_specs: InputTransformSpecs, + categorical_method: Optional[CategoricalMethodEnum] = None, + descriptor_method: Optional[CategoricalMethodEnum] = None, + ) -> Dict[int, float]: + """Provides the values of all fixed features + + Raises: + NotImplementedError: [description] + + Returns: + fixed_features (dict): Dictionary of fixed features, keys are the feature indices, values the transformed feature values + + """ + # does this go to the actual optimizer implementation, or is this optimizer agnostic? + # -> maybe agnostic, and categorical_method, and + fixed_features = {} + features2idx = self._features2idx(domain, input_preprocessing_specs) + + for _, feat in enumerate(domain.inputs.get(Input)): + assert isinstance(feat, Input) + if feat.fixed_value() is not None: + fixed_values = feat.fixed_value( + transform_type=input_preprocessing_specs.get(feat.key), # type: ignore + ) + for j, idx in enumerate(features2idx[feat.key]): + fixed_features[idx] = fixed_values[j] # type: ignore + + # in case the optimization method is free and not allowed categories are present + # one has to fix also them, this is abit of double work as it should be also reflected + # in the bounds but helps to make it safer + + # this could be removed if we drop support for FREE + if categorical_method is not None: + if ( + categorical_method == CategoricalMethodEnum.FREE + and CategoricalEncodingEnum.ONE_HOT + in list(input_preprocessing_specs.values()) + ): + # for feat in self.get_true_categorical_features(): + for feat in [ + domain.inputs.get_by_key(featkey) + for featkey in domain.inputs.get_keys(CategoricalInput) + if input_preprocessing_specs[featkey] + == CategoricalEncodingEnum.ONE_HOT + ]: + assert isinstance(feat, CategoricalInput) + if feat.is_fixed() is False: + for cat in feat.get_forbidden_categories(): + transformed = feat.to_onehot_encoding(pd.Series([cat])) + # we fix those indices to zero where one has a 1 as response from the transformer + for j, idx in enumerate(features2idx[feat.key]): + if transformed.values[0, j] == 1.0: + fixed_features[idx] = 0 + + # for the descriptor ones + if descriptor_method is not None: + if ( + descriptor_method == CategoricalMethodEnum.FREE + and CategoricalEncodingEnum.DESCRIPTOR + in list(input_preprocessing_specs.values()) + ): + # for feat in self.get_true_categorical_features(): + for feat in [ + domain.inputs.get_by_key(featkey) + for featkey in domain.inputs.get_keys(CategoricalDescriptorInput) + if input_preprocessing_specs[featkey] + == CategoricalEncodingEnum.DESCRIPTOR + ]: + assert isinstance(feat, CategoricalDescriptorInput) + if feat.is_fixed() is False: + lower, upper = feat.get_bounds( + CategoricalEncodingEnum.DESCRIPTOR + ) + for j, idx in enumerate(features2idx[feat.key]): + if lower[j] == upper[j]: + fixed_features[idx] = lower[j] + return fixed_features + + def _optimize_acqf_discrete( + self, + candidate_count: int, + acqf: AcquisitionFunction, + domain: Domain, + input_preprocessing_specs: InputTransformSpecs, + experiments: Optional[pd.DataFrame] = None, + ) -> Tuple[Tensor, Tensor]: + """Optimizes the acquisition function for a discrete search space. + + Args: + candidate_count: Number of candidates that should be returned. + acqf: Acquisition function that should be optimized. + + Returns: + A two-element tuple containing + + - a `q x d`-dim tensor of generated candidates. + - an associated acquisition value. + """ + # assert self.experiments is not None + choices = pd.DataFrame.from_dict( + [ # type: ignore + {e[0]: e[1] for e in combi} + for combi in domain.inputs.get_categorical_combinations() + ], + ) + # adding categorical features that are fixed + for feat in domain.inputs.get_fixed(): + choices[feat.key] = feat.fixed_value()[0] # type: ignore + # compare the choices with the training data and remove all that are also part + # of the training data + merged = choices.merge( + experiments[domain.inputs.get_keys()], + on=list(choices.columns), + how="left", + indicator=True, + ) + filtered_choices = merged[merged["_merge"] == "left_only"].copy() + filtered_choices.drop(columns=["_merge"], inplace=True) + + # translate the filtered choice to torch + t_choices = torch.from_numpy( + domain.inputs.transform( + filtered_choices, + specs=input_preprocessing_specs, + ).values, + ).to(**tkwargs) + return optimize_acqf_discrete( + acq_function=acqf, + q=candidate_count, + unique=True, + choices=t_choices, + ) + # return self._postprocess_candidates(candidates=candidates) + + +class BotorchOptimizer(AcquisitionOptimizer): + def __init__(self, data_model: BotorchOptimizerDataModel): + self.n_restarts = data_model.n_restarts + self.n_raw_samples = data_model.n_raw_samples + self.maxiter = data_model.maxiter + self.batch_limit = data_model.batch_limit + + # just for completeness here, we should drop the support for FREE and only go over ones that are also + # allowed, for more speedy optimization we can user other solvers + # so this can be remomved + self.categorical_method = data_model.categorical_method + self.discrete_method = data_model.discrete_method + self.descriptor_method = data_model.descriptor_method + + self.local_search_config = data_model.local_search_config + + super().__init__(data_model) + + def _setup(self): + pass + + def _optimize( + self, + candidate_count: int, + acqfs: List[AcquisitionFunction], + domain: Domain, + input_preprocessing_specs: InputTransformSpecs, + experiments: Optional[pd.DataFrame] = None, + ) -> Tuple[torch.Tensor, torch.Tensor]: + # this is the implementation of the optimizer, here goes _optimize_acqf_continuous + + # for continuous and mixed search spaces, here different optimizers could + # be used, so we have to abstract the stuff below + ( + bounds, + local_bounds, + ic_generator, + ic_gen_kwargs, + nonlinears, + fixed_features, + fixed_features_list, + ) = self._setup_ask(domain, input_preprocessing_specs, experiments) + + # do the global opt + candidates, global_acqf_val = self._optimize_acqf_continuous( + domain=domain, + input_preprocessing_specs=input_preprocessing_specs, + candidate_count=candidate_count, + acqfs=acqfs, + bounds=bounds, + ic_generator=ic_generator, # type: ignore + ic_gen_kwargs=ic_gen_kwargs, + nonlinear_constraints=nonlinears, # type: ignore + fixed_features=fixed_features, + fixed_features_list=fixed_features_list, + ) + + if ( + self.local_search_config is not None + and has_local_search_region(domain) + and candidate_count == 1 + ): + local_candidates, local_acqf_val = self._optimize_acqf_continuous( + domain=domain, + input_preprocessing_specs=input_preprocessing_specs, + candidate_count=candidate_count, + acqfs=acqfs, + bounds=local_bounds, + ic_generator=ic_generator, # type: ignore + ic_gen_kwargs=ic_gen_kwargs, + nonlinear_constraints=nonlinears, # type: ignore + fixed_features=fixed_features, + fixed_features_list=fixed_features_list, + ) + if self.local_search_config.is_local_step( + local_acqf_val.item(), + global_acqf_val.item(), + ): + return local_candidates, local_acqf_val + + raise NotImplementedError("Johannes to have a look at this") + sp = ShortestPathStrategy( + data_model=ShortestPathStrategyDataModel( + domain=self.domain, + start=self.experiments.iloc[-1].to_dict(), + end=self._postprocess_candidates(candidates).iloc[-1].to_dict(), + ), + ) + + step = pd.DataFrame(sp.step(sp.start)).T + return pd.concat((step, self.predict(step)), axis=1) + + return candidates, global_acqf_val + + def _optimize_acqf_continuous( + self, + domain: Domain, + input_preprocessing_specs: InputTransformSpecs, + candidate_count: int, + acqfs: List[AcquisitionFunction], + bounds: Tensor, + ic_generator: Callable, + ic_gen_kwargs: Dict, + nonlinear_constraints: List[Callable[[Tensor], float]], + fixed_features: Optional[Dict[int, float]], + fixed_features_list: Optional[List[Dict[int, float]]], + ) -> Tuple[Tensor, Tensor]: + if len(acqfs) > 1: + candidates, acqf_vals = optimize_acqf_list( + acq_function_list=acqfs, + bounds=bounds, + num_restarts=self.n_restarts, + raw_samples=self.n_raw_samples, + equality_constraints=get_linear_constraints( + domain=domain, + constraint=LinearEqualityConstraint, + ), + inequality_constraints=get_linear_constraints( + domain=domain, + constraint=LinearInequalityConstraint, + ), + nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore + fixed_features=fixed_features, + fixed_features_list=fixed_features_list, + ic_gen_kwargs=ic_gen_kwargs, + ic_generator=ic_generator, + options=self._get_optimizer_options(), # type: ignore + ) + elif fixed_features_list: + candidates, acqf_vals = optimize_acqf_mixed( + acq_function=acqfs[0], + bounds=bounds, + q=candidate_count, + num_restarts=self.n_restarts, + raw_samples=self.n_raw_samples, + equality_constraints=get_linear_constraints( + domain=domain, + constraint=LinearEqualityConstraint, + ), + inequality_constraints=get_linear_constraints( + domain=domain, + constraint=LinearInequalityConstraint, + ), + nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore + fixed_features_list=fixed_features_list, + ic_generator=ic_generator, + ic_gen_kwargs=ic_gen_kwargs, + options=self._get_optimizer_options(domain), # type: ignore + ) + else: + interpoints = get_interpoint_constraints( + domain=domain, + n_candidates=candidate_count, + ) + candidates, acqf_vals = optimize_acqf( + acq_function=acqfs[0], + bounds=bounds, + q=candidate_count, + num_restarts=self.n_restarts, + raw_samples=self.n_raw_samples, + equality_constraints=get_linear_constraints( + domain=domain, + constraint=LinearEqualityConstraint, + ) + + interpoints, + inequality_constraints=get_linear_constraints( + domain=domain, + constraint=LinearInequalityConstraint, + ), + fixed_features=fixed_features, + nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore + return_best_only=True, + options=self._get_optimizer_options(domain), # type: ignore + ic_generator=ic_generator, + **ic_gen_kwargs, + ) + return candidates, acqf_vals + + def _get_optimizer_options(self, domain: Domain) -> Dict[str, int]: + """Returns a dictionary of settings passed to `optimize_acqf` controlling + the behavior of the optimizer. + + Returns: + Dict[str, int]: The dictionary with the settings. + + """ + return { + "batch_limit": ( # type: ignore + self.batch_limit + if len( + domain.constraints.get([NChooseKConstraint, ProductConstraint]), + ) + == 0 + else 1 + ), + "maxiter": self.maxiter, + } + + def _setup_ask( + self, + domain: Domain, + input_preprocessing_specs: InputTransformSpecs, + experiments: Optional[pd.DataFrame] = None, + ): + """Generates argument that can by passed to one of botorch's `optimize_acqf` method.""" + # this is botorch optimizer dependent code and should be moved to the optimizer + # the bounds should be removed and we get in _ask + + num_categorical_features = len( + domain.inputs.get([CategoricalInput, DiscreteInput]), + ) + num_categorical_combinations = len( + domain.inputs.get_categorical_combinations(), + ) + bounds = self.get_bounds(domain, input_preprocessing_specs) + + # setup local bounds + assert experiments is not None + local_lower, local_upper = domain.inputs.get_bounds( + specs=input_preprocessing_specs, + reference_experiment=experiments.iloc[-1], + ) + local_bounds = torch.tensor([local_lower, local_upper]).to(**tkwargs) + + # setup nonlinears + if len(domain.constraints.get([NChooseKConstraint, ProductConstraint])) == 0: + ic_generator = None + ic_gen_kwargs = {} + nonlinear_constraints = None + else: + # TODO: implement LSR-BO also for constraints --> use local bounds + ic_generator = gen_batch_initial_conditions + ic_gen_kwargs = { + "generator": get_initial_conditions_generator( + strategy=RandomStrategy( + data_model=RandomStrategyDataModel(domain=domain), + ), + transform_specs=input_preprocessing_specs, + ), + } + nonlinear_constraints = get_nonlinear_constraints(domain) + # setup fixed features + if ( + (num_categorical_features == 0) + or (num_categorical_combinations == 1) + or ( + all( + enc == CategoricalMethodEnum.FREE + for enc in [ + self.categorical_method, + self.descriptor_method, + self.discrete_method, + ] + ) + ) + ): + fixed_features = self.get_fixed_features( + domain, + input_preprocessing_specs, + self.categorical_method, + self.descriptor_method, + ) + fixed_features_list = None + else: + fixed_features = None + fixed_features_list = self.get_categorical_combinations( + domain, input_preprocessing_specs + ) + return ( + bounds, + local_bounds, + ic_generator, + ic_gen_kwargs, + nonlinear_constraints, + fixed_features, + fixed_features_list, + ) + + def get_categorical_combinations( + self, + domain: Domain, + input_preprocessing_specs: InputTransformSpecs, + ) -> List[Dict[int, float]]: + """Provides all possible combinations of fixed values + + Returns: + list_of_fixed_features List[dict]: Each dict contains a combination of fixed values + + """ + # this is botorch specific, it should go to the new class + + fixed_basis = self.get_fixed_features( + domain, + input_preprocessing_specs, + self.categorical_method, + self.descriptor_method, + ) + + methods = [ + self.descriptor_method, + self.discrete_method, + self.categorical_method, + ] + + if all(m == CategoricalMethodEnum.FREE for m in methods): + return [{}] + include = [] + exclude = None + + if self.discrete_method == CategoricalMethodEnum.EXHAUSTIVE: + include.append(DiscreteInput) + + if self.categorical_method == CategoricalMethodEnum.EXHAUSTIVE: + include.append(CategoricalInput) + exclude = CategoricalDescriptorInput + + if self.descriptor_method == CategoricalMethodEnum.EXHAUSTIVE: + include.append(CategoricalDescriptorInput) + exclude = None + + if not include: + include = None + + combos = domain.inputs.get_categorical_combinations( + include=(include if include else Input), + exclude=exclude, # type: ignore + ) + # now build up the fixed feature list + if len(combos) == 1: + return [fixed_basis] + features2idx = self._features2idx(domain, input_preprocessing_specs) + list_of_fixed_features = [] + + for combo in combos: + fixed_features = copy.deepcopy(fixed_basis) + + for pair in combo: + feat, val = pair + feature = domain.inputs.get_by_key(feat) + if ( + isinstance(feature, CategoricalDescriptorInput) + and input_preprocessing_specs[feat] + == CategoricalEncodingEnum.DESCRIPTOR + ): + index = feature.categories.index(val) + + for j, idx in enumerate(features2idx[feat]): + fixed_features[idx] = feature.values[index][j] + + elif isinstance(feature, CategoricalMolecularInput): + preproc = input_preprocessing_specs[feat] + if not isinstance(preproc, MolFeatures): + raise ValueError( + f"preprocessing for {feat} must be of type AnyMolFeatures" + ) + transformed = feature.to_descriptor_encoding( + preproc, pd.Series([val]) + ) + for j, idx in enumerate(features2idx[feat]): + fixed_features[idx] = transformed.values[0, j] + elif isinstance(feature, CategoricalInput): + # it has to be onehot in this case + transformed = feature.to_onehot_encoding(pd.Series([val])) + for j, idx in enumerate(features2idx[feat]): + fixed_features[idx] = transformed.values[0, j] + + elif isinstance(feature, DiscreteInput): + fixed_features[features2idx[feat][0]] = val # type: ignore + + list_of_fixed_features.append(fixed_features) + return list_of_fixed_features + + +OPTIMIZER_MAP: Dict[Type[AcquisitionOptimizerDataModel], Type[AcquisitionOptimizer]] = { + BotorchOptimizerDataModel: BotorchOptimizer, +} + + +def get_optimizer(data_model: AcquisitionOptimizerDataModel) -> AcquisitionOptimizer: + return OPTIMIZER_MAP[type(data_model)](data_model) diff --git a/bofire/strategies/predictives/botorch.py b/bofire/strategies/predictives/botorch.py index 63d56bdb4..6feb587bb 100644 --- a/bofire/strategies/predictives/botorch.py +++ b/bofire/strategies/predictives/botorch.py @@ -1,4 +1,3 @@ -import copy from abc import abstractmethod from typing import Callable, Dict, List, Optional, Tuple, get_args @@ -8,50 +7,22 @@ from botorch.acquisition.acquisition import AcquisitionFunction from botorch.acquisition.utils import get_infeasible_cost from botorch.models.gpytorch import GPyTorchModel -from botorch.optim.initializers import gen_batch_initial_conditions -from botorch.optim.optimize import ( - optimize_acqf, - optimize_acqf_discrete, - optimize_acqf_list, - optimize_acqf_mixed, -) from torch import Tensor -from bofire.data_models.constraints.api import ( - LinearEqualityConstraint, - LinearInequalityConstraint, - NChooseKConstraint, - ProductConstraint, -) -from bofire.data_models.enum import CategoricalEncodingEnum, CategoricalMethodEnum -from bofire.data_models.features.api import ( - CategoricalDescriptorInput, - CategoricalInput, - CategoricalMolecularInput, - DiscreteInput, - Input, -) -from bofire.data_models.molfeatures.api import MolFeatures +from bofire.data_models.features.api import Input from bofire.data_models.strategies.api import BotorchStrategy as DataModel from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel -from bofire.data_models.strategies.api import ( - ShortestPathStrategy as ShortestPathStrategyDataModel, -) -from bofire.data_models.strategies.shortest_path import has_local_search_region from bofire.data_models.surrogates.api import AnyTrainableSurrogate from bofire.data_models.types import InputTransformSpecs from bofire.outlier_detection.outlier_detections import OutlierDetections +from bofire.strategies.predictives.acqf_optimization import ( + AcquisitionOptimizer, + get_optimizer, +) from bofire.strategies.predictives.predictive import PredictiveStrategy from bofire.strategies.random import RandomStrategy -from bofire.strategies.shortest_path import ShortestPathStrategy from bofire.surrogates.botorch_surrogates import BotorchSurrogates -from bofire.utils.torch_tools import ( - get_initial_conditions_generator, - get_interpoint_constraints, - get_linear_constraints, - get_nonlinear_constraints, - tkwargs, -) +from bofire.utils.torch_tools import tkwargs class BotorchStrategy(PredictiveStrategy): @@ -61,11 +32,11 @@ def __init__( **kwargs, ): super().__init__(data_model=data_model, **kwargs) - self.num_restarts = data_model.num_restarts - self.num_raw_samples = data_model.num_raw_samples - self.descriptor_method = data_model.descriptor_method - self.categorical_method = data_model.categorical_method - self.discrete_method = data_model.discrete_method + + self.acqf_optimizer: AcquisitionOptimizer = get_optimizer( + data_model.acquisition_optimizer + ) + self.surrogate_specs = data_model.surrogate_specs if data_model.outlier_detection_specs is not None: self.outlier_detection_specs = OutlierDetections( @@ -80,9 +51,7 @@ def __init__( self.frequency_hyperopt = data_model.frequency_hyperopt self.folds = data_model.folds self.surrogates = None - self.local_search_config = data_model.local_search_config - self.maxiter = data_model.maxiter - self.batch_limit = data_model.batch_limit + torch.manual_seed(self.seed) model: Optional[GPyTorchModel] = None @@ -91,13 +60,6 @@ def __init__( def input_preprocessing_specs(self) -> InputTransformSpecs: return self.surrogate_specs.input_preprocessing_specs - @property - def _features2idx(self) -> Dict[str, Tuple[int]]: - features2idx, _ = self.domain.inputs._get_transform_info( - self.input_preprocessing_specs, - ) - return features2idx - @property def _features2names(self) -> Dict[str, Tuple[str]]: _, features2names = self.domain.inputs._get_transform_info( @@ -105,28 +67,6 @@ def _features2names(self) -> Dict[str, Tuple[str]]: ) return features2names - def _get_optimizer_options(self) -> Dict[str, int]: - """Returns a dictionary of settings passed to `optimize_acqf` controlling - the behavior of the optimizer. - - Returns: - Dict[str, int]: The dictionary with the settings. - - """ - return { - "batch_limit": ( # type: ignore - self.batch_limit - if len( - self.domain.constraints.get( - [NChooseKConstraint, ProductConstraint] - ), - ) - == 0 - else 1 - ), - "maxiter": self.maxiter, - } - def _fit(self, experiments: pd.DataFrame): """[summary] @@ -224,76 +164,6 @@ def calc_acquisition( return vals - def _setup_ask(self): - """Generates argument that can by passed to one of botorch's `optimize_acqf` method.""" - num_categorical_features = len( - self.domain.inputs.get([CategoricalInput, DiscreteInput]), - ) - num_categorical_combinations = len( - self.domain.inputs.get_categorical_combinations(), - ) - lower, upper = self.domain.inputs.get_bounds( - specs=self.input_preprocessing_specs, - ) - bounds = torch.tensor([lower, upper]).to(**tkwargs) - # setup local bounds - assert self.experiments is not None - local_lower, local_upper = self.domain.inputs.get_bounds( - specs=self.input_preprocessing_specs, - reference_experiment=self.experiments.iloc[-1], - ) - local_bounds = torch.tensor([local_lower, local_upper]).to(**tkwargs) - - # setup nonlinears - if ( - len(self.domain.constraints.get([NChooseKConstraint, ProductConstraint])) - == 0 - ): - ic_generator = None - ic_gen_kwargs = {} - nonlinear_constraints = None - else: - # TODO: implement LSR-BO also for constraints --> use local bounds - ic_generator = gen_batch_initial_conditions - ic_gen_kwargs = { - "generator": get_initial_conditions_generator( - strategy=RandomStrategy( - data_model=RandomStrategyDataModel(domain=self.domain), - ), - transform_specs=self.input_preprocessing_specs, - ), - } - nonlinear_constraints = get_nonlinear_constraints(self.domain) - # setup fixed features - if ( - (num_categorical_features == 0) - or (num_categorical_combinations == 1) - or ( - all( - enc == CategoricalMethodEnum.FREE - for enc in [ - self.categorical_method, - self.descriptor_method, - self.discrete_method, - ] - ) - ) - ): - fixed_features = self.get_fixed_features() - fixed_features_list = None - else: - fixed_features = None - fixed_features_list = self.get_categorical_combinations() - return ( - bounds, - local_bounds, - ic_generator, - ic_gen_kwargs, - nonlinear_constraints, - fixed_features, - fixed_features_list, - ) - def _postprocess_candidates(self, candidates: Tensor) -> pd.DataFrame: """Converts a tensor of candidates to a pandas Dataframe. @@ -323,88 +193,6 @@ def _postprocess_candidates(self, candidates: Tensor) -> pd.DataFrame: preds = self.predict(df_candidates) return pd.concat((df_candidates, preds), axis=1) - def _optimize_acqf_continuous( - self, - candidate_count: int, - acqfs: List[AcquisitionFunction], - bounds: Tensor, - ic_generator: Callable, - ic_gen_kwargs: Dict, - nonlinear_constraints: List[Callable[[Tensor], float]], - fixed_features: Optional[Dict[int, float]], - fixed_features_list: Optional[List[Dict[int, float]]], - ) -> Tuple[Tensor, Tensor]: - if len(acqfs) > 1: - candidates, acqf_vals = optimize_acqf_list( - acq_function_list=acqfs, - bounds=bounds, - num_restarts=self.num_restarts, - raw_samples=self.num_raw_samples, - equality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearEqualityConstraint, - ), - inequality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearInequalityConstraint, - ), - nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore - fixed_features=fixed_features, - fixed_features_list=fixed_features_list, - ic_gen_kwargs=ic_gen_kwargs, - ic_generator=ic_generator, - options=self._get_optimizer_options(), # type: ignore - ) - elif fixed_features_list: - candidates, acqf_vals = optimize_acqf_mixed( - acq_function=acqfs[0], - bounds=bounds, - q=candidate_count, - num_restarts=self.num_restarts, - raw_samples=self.num_raw_samples, - equality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearEqualityConstraint, - ), - inequality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearInequalityConstraint, - ), - nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore - fixed_features_list=fixed_features_list, - ic_generator=ic_generator, - ic_gen_kwargs=ic_gen_kwargs, - options=self._get_optimizer_options(), # type: ignore - ) - else: - interpoints = get_interpoint_constraints( - domain=self.domain, - n_candidates=candidate_count, - ) - candidates, acqf_vals = optimize_acqf( - acq_function=acqfs[0], - bounds=bounds, - q=candidate_count, - num_restarts=self.num_restarts, - raw_samples=self.num_raw_samples, - equality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearEqualityConstraint, - ) - + interpoints, - inequality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearInequalityConstraint, - ), - fixed_features=fixed_features, - nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore - return_best_only=True, - options=self._get_optimizer_options(), # type: ignore - ic_generator=ic_generator, - **ic_gen_kwargs, - ) - return candidates, acqf_vals - def _ask(self, candidate_count: int) -> pd.DataFrame: # type: ignore """[summary] @@ -421,103 +209,14 @@ def _ask(self, candidate_count: int) -> pd.DataFrame: # type: ignore acqfs = self._get_acqfs(candidate_count) - # we check here if we have a fully combinatorial search space - if len( - self.domain.inputs.get(includes=[DiscreteInput, CategoricalInput]), - ) == len(self.domain.inputs): - if len(acqfs) > 1: - raise NotImplementedError( - "Multiple Acqfs are currently not supported for purely combinatorial search spaces.", - ) - # generate the choices as pandas dataframe - choices = pd.DataFrame.from_dict( - [ # type: ignore - {e[0]: e[1] for e in combi} - for combi in self.domain.inputs.get_categorical_combinations() - ], - ) - # adding categorical features that are fixed - for feat in self.domain.inputs.get_fixed(): - choices[feat.key] = feat.fixed_value()[0] # type: ignore - # compare the choices with the training data and remove all that are also part - # of the training data - merged = choices.merge( - self.experiments[self.domain.inputs.get_keys()], - on=list(choices.columns), - how="left", - indicator=True, - ) - filtered_choices = merged[merged["_merge"] == "left_only"].copy() - filtered_choices.drop(columns=["_merge"], inplace=True) - - # translate the filtered choice to torch - t_choices = torch.from_numpy( - self.domain.inputs.transform( - filtered_choices, - specs=self.input_preprocessing_specs, - ).values, - ).to(**tkwargs) - - candidates, _ = optimize_acqf_discrete( - acq_function=acqfs[0], - q=candidate_count, - unique=True, - choices=t_choices, - ) - return self._postprocess_candidates(candidates=candidates) - - ( - bounds, - local_bounds, - ic_generator, - ic_gen_kwargs, - nonlinears, - fixed_features, - fixed_features_list, - ) = self._setup_ask() - - # do the global opt - candidates, global_acqf_val = self._optimize_acqf_continuous( - candidate_count=candidate_count, - acqfs=acqfs, - bounds=bounds, - ic_generator=ic_generator, # type: ignore - ic_gen_kwargs=ic_gen_kwargs, - nonlinear_constraints=nonlinears, # type: ignore - fixed_features=fixed_features, - fixed_features_list=fixed_features_list, + candidates, _ = self.acqf_optimizer.optimize( + candidate_count, + acqfs, + self.domain, + self.input_preprocessing_specs, + self.experiments, ) - if ( - self.local_search_config is not None - and has_local_search_region(self.domain) - and candidate_count == 1 - ): - local_candidates, local_acqf_val = self._optimize_acqf_continuous( - candidate_count=candidate_count, - acqfs=acqfs, - bounds=local_bounds, - ic_generator=ic_generator, # type: ignore - ic_gen_kwargs=ic_gen_kwargs, - nonlinear_constraints=nonlinears, # type: ignore - fixed_features=fixed_features, - fixed_features_list=fixed_features_list, - ) - if self.local_search_config.is_local_step( - local_acqf_val.item(), - global_acqf_val.item(), - ): - return self._postprocess_candidates(candidates=local_candidates) - sp = ShortestPathStrategy( - data_model=ShortestPathStrategyDataModel( - domain=self.domain, - start=self.experiments.iloc[-1].to_dict(), - end=self._postprocess_candidates(candidates).iloc[-1].to_dict(), - ), - ) - step = pd.DataFrame(sp.step(sp.start)).T - return pd.concat((step, self.predict(step)), axis=1) - return self._postprocess_candidates(candidates=candidates) def _tell(self) -> None: @@ -527,156 +226,6 @@ def _tell(self) -> None: def _get_acqfs(self, n: int) -> List[AcquisitionFunction]: pass - def get_fixed_features(self) -> Dict[int, float]: - """Provides the values of all fixed features - - Raises: - NotImplementedError: [description] - - Returns: - fixed_features (dict): Dictionary of fixed features, keys are the feature indices, values the transformed feature values - - """ - fixed_features = {} - features2idx = self._features2idx - - for _, feat in enumerate(self.domain.inputs.get(Input)): - assert isinstance(feat, Input) - if feat.fixed_value() is not None: - fixed_values = feat.fixed_value( - transform_type=self.input_preprocessing_specs.get(feat.key), # type: ignore - ) - for j, idx in enumerate(features2idx[feat.key]): - fixed_features[idx] = fixed_values[j] # type: ignore - - # in case the optimization method is free and not allowed categories are present - # one has to fix also them, this is abit of double work as it should be also reflected - # in the bounds but helps to make it safer - - if ( - self.categorical_method == CategoricalMethodEnum.FREE - and CategoricalEncodingEnum.ONE_HOT - in list(self.input_preprocessing_specs.values()) - ): - # for feat in self.get_true_categorical_features(): - for feat in [ - self.domain.inputs.get_by_key(featkey) - for featkey in self.domain.inputs.get_keys(CategoricalInput) - if self.input_preprocessing_specs[featkey] - == CategoricalEncodingEnum.ONE_HOT - ]: - assert isinstance(feat, CategoricalInput) - if feat.is_fixed() is False: - for cat in feat.get_forbidden_categories(): - transformed = feat.to_onehot_encoding(pd.Series([cat])) - # we fix those indices to zero where one has a 1 as response from the transformer - for j, idx in enumerate(features2idx[feat.key]): - if transformed.values[0, j] == 1.0: - fixed_features[idx] = 0 - # for the descriptor ones - if ( - self.descriptor_method == CategoricalMethodEnum.FREE - and CategoricalEncodingEnum.DESCRIPTOR - in list(self.input_preprocessing_specs.values()) - ): - # for feat in self.get_true_categorical_features(): - for feat in [ - self.domain.inputs.get_by_key(featkey) - for featkey in self.domain.inputs.get_keys(CategoricalDescriptorInput) - if self.input_preprocessing_specs[featkey] - == CategoricalEncodingEnum.DESCRIPTOR - ]: - assert isinstance(feat, CategoricalDescriptorInput) - if feat.is_fixed() is False: - lower, upper = feat.get_bounds(CategoricalEncodingEnum.DESCRIPTOR) - for j, idx in enumerate(features2idx[feat.key]): - if lower[j] == upper[j]: - fixed_features[idx] = lower[j] - return fixed_features - - def get_categorical_combinations(self) -> List[Dict[int, float]]: - """Provides all possible combinations of fixed values - - Returns: - list_of_fixed_features List[dict]: Each dict contains a combination of fixed values - - """ - fixed_basis = self.get_fixed_features() - - methods = [ - self.descriptor_method, - self.discrete_method, - self.categorical_method, - ] - - if all(m == CategoricalMethodEnum.FREE for m in methods): - return [{}] - include = [] - exclude = None - - if self.discrete_method == CategoricalMethodEnum.EXHAUSTIVE: - include.append(DiscreteInput) - - if self.categorical_method == CategoricalMethodEnum.EXHAUSTIVE: - include.append(CategoricalInput) - exclude = CategoricalDescriptorInput - - if self.descriptor_method == CategoricalMethodEnum.EXHAUSTIVE: - include.append(CategoricalDescriptorInput) - exclude = None - - if not include: - include = None - - combos = self.domain.inputs.get_categorical_combinations( - include=(include if include else Input), - exclude=exclude, # type: ignore - ) - # now build up the fixed feature list - if len(combos) == 1: - return [fixed_basis] - features2idx = self._features2idx - list_of_fixed_features = [] - - for combo in combos: - fixed_features = copy.deepcopy(fixed_basis) - - for pair in combo: - feat, val = pair - feature = self.domain.inputs.get_by_key(feat) - if ( - isinstance(feature, CategoricalDescriptorInput) - and self.input_preprocessing_specs[feat] - == CategoricalEncodingEnum.DESCRIPTOR - ): - index = feature.categories.index(val) - - for j, idx in enumerate(features2idx[feat]): - fixed_features[idx] = feature.values[index][j] - - elif isinstance(feature, CategoricalMolecularInput): - preproc = self.input_preprocessing_specs[feat] - if not isinstance(preproc, MolFeatures): - raise ValueError( - f"preprocessing for {feat} must be of type AnyMolFeatures" - ) - transformed = feature.to_descriptor_encoding( - preproc, pd.Series([val]) - ) - for j, idx in enumerate(features2idx[feat]): - fixed_features[idx] = transformed.values[0, j] - elif isinstance(feature, CategoricalInput): - # it has to be onehot in this case - transformed = feature.to_onehot_encoding(pd.Series([val])) - for j, idx in enumerate(features2idx[feat]): - fixed_features[idx] = transformed.values[0, j] - - elif isinstance(feature, DiscreteInput): - fixed_features[features2idx[feat][0]] = val # type: ignore - - list_of_fixed_features.append(fixed_features) - return list_of_fixed_features - def has_sufficient_experiments( self, ) -> bool: diff --git a/tests/bofire/data_models/specs/local_search_config.py b/tests/bofire/data_models/specs/local_search_config.py index 4af67e848..e63aa7f3b 100644 --- a/tests/bofire/data_models/specs/local_search_config.py +++ b/tests/bofire/data_models/specs/local_search_config.py @@ -1,4 +1,4 @@ -from bofire.data_models.strategies.api import LSRBO +from bofire.data_models.strategies.predictives.acqf_optimization import LSRBO from tests.bofire.data_models.specs.specs import Specs diff --git a/tests/bofire/data_models/specs/strategies.py b/tests/bofire/data_models/specs/strategies.py index 2f8064f71..6685eee98 100644 --- a/tests/bofire/data_models/specs/strategies.py +++ b/tests/bofire/data_models/specs/strategies.py @@ -1,4 +1,5 @@ import bofire.data_models.strategies.api as strategies +import bofire.data_models.strategies.predictives.acqf_optimization from bofire.data_models.acquisition_functions.api import ( qEI, qLogNEHVI, @@ -29,8 +30,8 @@ strategy_commons = { - "num_raw_samples": 1024, - "num_restarts": 8, + "n_raw_samples": 1024, + "n_restarts": 8, "descriptor_method": CategoricalMethodEnum.EXHAUSTIVE, "categorical_method": CategoricalMethodEnum.EXHAUSTIVE, "discrete_method": CategoricalMethodEnum.EXHAUSTIVE, @@ -537,7 +538,7 @@ ], ), ).model_dump(), - "local_search_config": strategies.LSRBO(), + "local_search_config": bofire.data_models.strategies.predictives.acqf_optimization.LSRBO(), }, error=ValueError, message="LSR-BO only supported for linear constraints.", diff --git a/tests/bofire/data_models/strategies/predictives/test_acqf_optimization.py b/tests/bofire/data_models/strategies/predictives/test_acqf_optimization.py new file mode 100644 index 000000000..3d64012ed --- /dev/null +++ b/tests/bofire/data_models/strategies/predictives/test_acqf_optimization.py @@ -0,0 +1,25 @@ +import pytest + +from bofire.data_models.strategies.api import LSRBO, BotorchOptimizer + + +@pytest.mark.parametrize( + "gamma, acqf_local, acqf_global, expected", + [(0.1, 0.3, 0.4, True), (0.4, 0.1, 0.5, False)], +) +def test_LSRBO(gamma, acqf_local, acqf_global, expected): + assert ( + LSRBO(gamma=gamma).is_local_step(acqf_local=acqf_local, acqf_global=acqf_global) + == expected + ) + + +def test_validate_batch_limit(): + acquisition_optimizer = BotorchOptimizer() + assert acquisition_optimizer.batch_limit == acquisition_optimizer.n_restarts + + acquisition_optimizer = BotorchOptimizer(batch_limit=50) + assert acquisition_optimizer.batch_limit == acquisition_optimizer.n_restarts + acquisition_optimizer = BotorchOptimizer(batch_limit=2, n_restarts=4) + assert acquisition_optimizer.batch_limit == 2 + assert acquisition_optimizer.n_restarts == 4 diff --git a/tests/bofire/data_models/strategies/predictives/test_botorch.py b/tests/bofire/data_models/strategies/predictives/test_botorch.py index c48e430dc..575ab3bd1 100644 --- a/tests/bofire/data_models/strategies/predictives/test_botorch.py +++ b/tests/bofire/data_models/strategies/predictives/test_botorch.py @@ -1,30 +1,15 @@ -import pytest - from bofire.data_models.domain.api import Domain from bofire.data_models.features.api import ContinuousInput, ContinuousOutput -from bofire.data_models.strategies.api import LSRBO, SoboStrategy - +from bofire.data_models.strategies.api import BotorchOptimizer, SoboStrategy -@pytest.mark.parametrize( - "gamma, acqf_local, acqf_global, expected", - [(0.1, 0.3, 0.4, True), (0.4, 0.1, 0.5, False)], -) -def test_LSRBO(gamma, acqf_local, acqf_global, expected): - assert ( - LSRBO(gamma=gamma).is_local_step(acqf_local=acqf_local, acqf_global=acqf_global) - == expected - ) - -def test_validate_batch_limit(): +def test_botorch_strategy(): domain = Domain( - inputs=[ContinuousInput(key="a", bounds=(0, 1))], - outputs=[ContinuousOutput(key="b")], + inputs=[ContinuousInput(key="x", bounds=(0, 1))], + outputs=[ContinuousOutput(key="y")], + ) + sobo = SoboStrategy( + domain=domain, + acquisition_optimizer=BotorchOptimizer(), ) - strategy_data = SoboStrategy(domain=domain) - assert strategy_data.batch_limit == strategy_data.num_restarts - strategy_data = SoboStrategy(domain=domain, batch_limit=50) - assert strategy_data.batch_limit == strategy_data.num_restarts - strategy_data = SoboStrategy(domain=domain, batch_limit=2, num_restarts=4) - assert strategy_data.batch_limit == 2 - assert strategy_data.num_restarts == 4 + assert isinstance(sobo.acquisition_optimizer, BotorchOptimizer) diff --git a/tests/bofire/strategies/dummy.py b/tests/bofire/strategies/dummy.py index 20edc390f..e26ef111d 100644 --- a/tests/bofire/strategies/dummy.py +++ b/tests/bofire/strategies/dummy.py @@ -24,8 +24,7 @@ class DummyStrategyDataModel(data_models.BotorchStrategy): type: Literal["DummyStrategyDataModel"] = "DummyStrategyDataModel" # type: ignore - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: return my_type in [ LinearEqualityConstraint, LinearInequalityConstraint, @@ -92,8 +91,7 @@ class DummyPredictiveStrategyDataModel(data_models.PredictiveStrategy): "DummyPredictiveStrategyDataModel" ) - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: return my_type in [ LinearEqualityConstraint, LinearInequalityConstraint, diff --git a/tests/bofire/strategies/test_base.py b/tests/bofire/strategies/test_base.py index 54095cb9b..733717040 100644 --- a/tests/bofire/strategies/test_base.py +++ b/tests/bofire/strategies/test_base.py @@ -71,8 +71,7 @@ def fixed_value(self): class DummyStrategyDataModel(data_models.BotorchStrategy): type: Literal["DummyStrategyDataModel"] = "DummyStrategyDataModel" - @classmethod - def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: + def is_constraint_implemented(self, my_type: Type[Constraint]) -> bool: return my_type in [ LinearEqualityConstraint, LinearInequalityConstraint, @@ -300,7 +299,7 @@ def test_base_create(domain: Domain): ValueError, match="Argument is not power of two.", ): - DummyStrategyDataModel(domain=domain, num_raw_samples=5) + DummyStrategyDataModel(domain=domain, n_raw_samples=5) def test_base_invalid_descriptor_method(): diff --git a/tests/bofire/strategies/test_optimizer.py b/tests/bofire/strategies/test_optimizer.py new file mode 100644 index 000000000..8be04b743 --- /dev/null +++ b/tests/bofire/strategies/test_optimizer.py @@ -0,0 +1,78 @@ +from typing import Tuple + +import numpy as np +import pandas as pd +import pytest + +from bofire.benchmarks import api as benchmarks +from bofire.data_models import api as domain +from bofire.data_models.features.api import ContinuousInput, DiscreteInput +from bofire.data_models.strategies import api as data_models_strategies +from bofire.strategies import api as strategies +from bofire.strategies.predictives.acqf_optimization import get_optimizer + + +@pytest.fixture( + params=[ # (benchmark, params, stategy, map_conti_inputs_to_discrete) + ("Himmelblau", {}, "SoboStrategy", False), + ("DTLZ2", {"dim": 2, "num_objectives": 2}, "AdditiveSoboStrategy", False), + ( + "Ackley", + {"num_categories": 3, "categorical": True, "dim": 4}, + "SoboStrategy", + False, + ), + ("Detergent", {}, "SoboStrategy", False), + ( + "Ackley", + {"num_categories": 3, "categorical": True, "dim": 3}, + "SoboStrategy", + True, + ), # this is for testing the "all-categoric" usecase + ] +) +def benchmark(request) -> Tuple[benchmarks.Benchmark, strategies.PredictiveStrategy]: + benchmark_name, params, strategy, map_conti_inputs_to_discrete = request.param + bm = getattr(benchmarks, benchmark_name)(**params) + + if map_conti_inputs_to_discrete: + # replace a continuous input with a discrete input of the same name, but only 5 possible values + for idx, ft in enumerate(bm.domain.inputs.features): + if isinstance(ft, ContinuousInput): + bm.domain.inputs.features[idx] = DiscreteInput( + key=ft.key, values=np.linspace(ft.bounds[0], ft.bounds[1], 5) + ) + + strategy = getattr(data_models_strategies, strategy)(domain=bm.domain) + return bm, strategy + + +@pytest.fixture() +def optimization_scope(benchmark) -> Tuple[domain.Domain, dict, pd.DataFrame, list]: + """ """ + benchmark, strategy_data = benchmark + domain = benchmark.domain + + strategy = strategies.map(strategy_data) + + experiments = benchmark.f(domain.inputs.sample(10), return_complete=True) + strategy.tell(experiments=experiments) + input_preprocessing_specs = strategy.input_preprocessing_specs + acqfs = strategy._get_acqfs(2) + + return domain, input_preprocessing_specs, experiments, acqfs + + +def test_optimizer(optimization_scope): + domain, input_preprocessing_specs, experiments, acqfs = optimization_scope + + optimizer_data_model = data_models_strategies.BotorchOptimizer() + optimizer = get_optimizer(optimizer_data_model) + + candidates, acqf_vals = optimizer.optimize( + candidate_count=2, + acqfs=acqfs, + domain=domain, + input_preprocessing_specs=input_preprocessing_specs, + experiments=experiments, + ) diff --git a/tests/bofire/strategies/test_qparego.py b/tests/bofire/strategies/test_qparego.py index bcca646f5..5ee865484 100644 --- a/tests/bofire/strategies/test_qparego.py +++ b/tests/bofire/strategies/test_qparego.py @@ -87,7 +87,7 @@ def test(benchmark_factory): # init strategy data_model = data_models.QparegoStrategy( domain=benchmark.domain, - num_restarts=1, + n_restarts=1, ) my_strategy = QparegoStrategy(data_model=data_model) my_strategy.tell(experiments) diff --git a/tests/bofire/strategies/test_sobo.py b/tests/bofire/strategies/test_sobo.py index 3c0b3050e..a852dbddc 100644 --- a/tests/bofire/strategies/test_sobo.py +++ b/tests/bofire/strategies/test_sobo.py @@ -41,8 +41,9 @@ MaximizeObjective, MaximizeSigmoidObjective, ) -from bofire.data_models.strategies.api import LSRBO +from bofire.data_models.strategies.api import BotorchOptimizer from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel +from bofire.data_models.strategies.predictives.acqf_optimization import LSRBO from bofire.data_models.unions import to_list from bofire.strategies.api import CustomSoboStrategy, RandomStrategy, SoboStrategy from tests.bofire.strategies.test_base import domains @@ -398,7 +399,7 @@ def test_sobo_lsrbo(): strategy_data = data_models.SoboStrategy( domain=bench.domain, seed=42, - local_search_config=LSRBO(gamma=0), + acquisition_optimizer=BotorchOptimizer(local_search_config=LSRBO(gamma=0)), ) strategy = SoboStrategy(data_model=strategy_data) strategy.tell(experiments) @@ -408,7 +409,7 @@ def test_sobo_lsrbo(): strategy_data = data_models.SoboStrategy( domain=bench.domain, seed=42, - local_search_config=LSRBO(gamma=500000), + acquisition_optimizer=BotorchOptimizer(local_search_config=LSRBO(gamma=500000)), ) strategy = SoboStrategy(data_model=strategy_data) strategy.tell(experiments) @@ -424,9 +425,15 @@ def test_sobo_get_optimizer_options(): ], outputs=[ContinuousOutput(key="c")], # type: ignore ) - strategy_data = data_models.SoboStrategy(domain=domain, maxiter=500, batch_limit=4) + strategy_data = data_models.SoboStrategy( + domain=domain, + acquisition_optimizer=BotorchOptimizer(maxiter=500, batch_limit=4), + ) strategy = SoboStrategy(data_model=strategy_data) - assert strategy._get_optimizer_options() == {"maxiter": 500, "batch_limit": 4} + assert strategy.acqf_optimizer._get_optimizer_options(strategy.domain) == { + "maxiter": 500, + "batch_limit": 4, + } domain = Domain( inputs=[ # type: ignore ContinuousInput(key="a", bounds=(0, 1)), @@ -442,9 +449,15 @@ def test_sobo_get_optimizer_options(): ), ], ) - strategy_data = data_models.SoboStrategy(domain=domain, maxiter=500, batch_limit=4) + strategy_data = data_models.SoboStrategy( + domain=domain, + acquisition_optimizer=BotorchOptimizer(maxiter=500, batch_limit=4), + ) strategy = SoboStrategy(data_model=strategy_data) - assert strategy._get_optimizer_options() == {"maxiter": 500, "batch_limit": 1} + assert strategy.acqf_optimizer._get_optimizer_options(strategy.domain) == { + "maxiter": 500, + "batch_limit": 1, + } def test_sobo_interpoint(): diff --git a/tutorials/benchmarks/005-Hartmann_with_nchoosek.ipynb b/tutorials/benchmarks/005-Hartmann_with_nchoosek.ipynb index 8693a85af..41a02af85 100644 --- a/tutorials/benchmarks/005-Hartmann_with_nchoosek.ipynb +++ b/tutorials/benchmarks/005-Hartmann_with_nchoosek.ipynb @@ -143,8 +143,8 @@ " data_model = SoboStrategy(\n", " domain=domain,\n", " acquisition_function=qLogEI(),\n", - " num_raw_samples=512,\n", - " num_restarts=24,\n", + " n_raw_samples=512,\n", + " n_restarts=24,\n", " )\n", " return strategies.map(data_model)\n", "\n",