Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move variance of fixed features check to predictive strategy #441

Merged
merged 8 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions bofire/strategies/predictives/predictive.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
from pydantic import PositiveInt

from bofire.data_models.features.task import TaskInput
from bofire.data_models.strategies.api import Strategy as DataModel
from bofire.data_models.types import InputTransformSpecs
from bofire.strategies.data_models.candidate import Candidate
Expand Down Expand Up @@ -83,6 +84,25 @@ def tell(
self.set_experiments(experiments)
else:
self.add_experiments(experiments)
# we check here that the experiments do not have completely fixed columns
cleaned_experiments = (
self.domain.outputs.preprocess_experiments_all_valid_outputs(
experiments=experiments
)
)

fixed_nontasks = (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just one question: what is the purpose of a multitask optimization when your historical data is only for one task and you only want candidates in the future for the same task?

Copy link
Contributor Author

@bertiqwerty bertiqwerty Oct 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry. I wasn't clear in my formulation above. I have historical data of task 1 and I want to optimize task 2. The data I measure during optimization is also for task 2.

feat
for feat in self.domain.inputs.get_fixed()
if not isinstance(feat, TaskInput)
)
for feature in fixed_nontasks:
fixed_value = feature.fixed_value()
assert fixed_value is not None
if (cleaned_experiments[feature.key] == fixed_value[0]).all():
raise ValueError(
f"No variance in experiments for fixed feature {feature.key}"
)
if retrain and self.has_sufficient_experiments():
self.fit()
# we have a seperate _tell here for things that are relevant when setting up the strategy but unrelated
Expand Down
11 changes: 0 additions & 11 deletions bofire/strategies/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,6 @@ def tell(
self.set_experiments(experiments=experiments)
else:
self.add_experiments(experiments=experiments)
# we check here that the experiments do not have completely fixed columns
cleaned_experiments = (
self.domain.outputs.preprocess_experiments_all_valid_outputs(
experiments=experiments
)
)
for feature in self.domain.inputs.get_fixed():
if (cleaned_experiments[feature.key] == feature.fixed_value()[0]).all(): # type: ignore
raise ValueError(
f"No variance in experiments for fixed feature {feature.key}"
)
self._tell()

def _tell(self) -> None:
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
"xgb": ["xgboost>=1.7.5"],
"cheminfo": ["rdkit>=2023.3.2", sklearn_dependency, "mordred"],
"tests": [
"mock",
"mopti",
"pyright==1.1.305",
"pytest",
Expand Down
7 changes: 4 additions & 3 deletions tests/bofire/strategies/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import pandas as pd
from botorch.acquisition.acquisition import AcquisitionFunction
from pydantic.types import NonNegativeInt

import bofire.data_models.strategies.api as data_models
Expand Down Expand Up @@ -86,7 +87,7 @@ def has_sufficient_experiments(
return len(self.experiments) >= 3


class DummyPredictiveStrategyDataModel(data_models.Strategy):
class DummyPredictiveStrategyDataModel(data_models.PredictiveStrategy):
type: Literal[
"DummyPredictiveStrategyDataModel"
] = "DummyPredictiveStrategyDataModel"
Expand Down Expand Up @@ -194,8 +195,8 @@ def _ask(
f"{inspect.stack()[0][3]} not implemented for {self.__class__.__name__}"
)

def _get_acqfs(self, n: int):
pass
def _get_acqfs(self, n: int) -> List[AcquisitionFunction]:
return []

def _choose_from_pool(
self,
Expand Down
96 changes: 72 additions & 24 deletions tests/bofire/strategies/test_multitask.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,39 @@
from bofire.data_models.domain.api import Domain, Inputs, Outputs
from bofire.data_models.features.api import ContinuousInput, ContinuousOutput, TaskInput
from bofire.data_models.objectives.api import MaximizeObjective
from bofire.data_models.strategies.api import SoboStrategy
from bofire.data_models.strategies.api import (
RandomStrategy,
SoboStrategy,
)
from bofire.data_models.surrogates.api import (
BotorchSurrogates,
MultiTaskGPSurrogate,
)


def _task_1_f(x):
return np.sin(x * 2 * np.pi)


def _task_2_f(x):
return 0.9 * np.sin(x * 2 * np.pi) - 0.2 + 0.2 * np.cos(x * 3 * np.pi)


def _domain(task_input):
input_features = [
ContinuousInput(key="x", bounds=(0, 1)),
task_input,
]

objective = MaximizeObjective(w=1)

inputs = Inputs(features=input_features)

output_features = [ContinuousOutput(key="y", objective=objective)]
outputs = Outputs(features=output_features)
return Domain(inputs=inputs, outputs=outputs)


@pytest.mark.parametrize(
"task_input",
[
Expand All @@ -23,17 +49,12 @@
)
def test_sobo_with_multitask(task_input):
# set the data
def task_1_f(x):
return np.sin(x * 2 * np.pi)

def task_2_f(x):
return 0.9 * np.sin(x * 2 * np.pi) - 0.2 + 0.2 * np.cos(x * 3 * np.pi)

task_1_x = np.linspace(0.6, 1, 4)
task_1_y = task_1_f(task_1_x)
task_1_y = _task_1_f(task_1_x)

task_2_x = np.linspace(0, 1, 15)
task_2_y = task_2_f(task_2_x)
task_2_y = _task_2_f(task_2_x)

experiments = pd.DataFrame(
{
Expand All @@ -43,27 +64,15 @@ def task_2_f(x):
}
)

input_features = [
ContinuousInput(key="x", bounds=(0, 1)),
task_input,
domain = _domain(task_input)
surrogate_data = [
MultiTaskGPSurrogate(inputs=domain.inputs, outputs=domain.outputs)
]

objective = MaximizeObjective(w=1)

inputs = Inputs(features=input_features)

output_features = [ContinuousOutput(key="y", objective=objective)]
outputs = Outputs(features=output_features)

surrogate_data = [MultiTaskGPSurrogate(inputs=inputs, outputs=outputs)]

surrogate_specs = BotorchSurrogates(surrogates=surrogate_data)

strategy_data_model = SoboStrategy(
domain=Domain(
inputs=inputs,
outputs=outputs,
),
domain=domain,
surrogate_specs=surrogate_specs,
acquisition_function=qLogEI(),
)
Expand All @@ -77,3 +86,42 @@ def task_2_f(x):
candidate["task"].item()
== task_input.categories[task_input.allowed.index(True)]
)


def test_nosurrogate_multitask():
def test(strat_data_model, **kwargs):
task_input = TaskInput(
key="task", categories=["task_1", "task_2"], allowed=[False, True]
)
task_1_x = np.linspace(0.6, 1, 4)
task_1_y = _task_1_f(task_1_x)
experiments = pd.DataFrame(
{
"x": task_1_x,
"y": task_1_y,
"task": ["task_1"] * len(task_1_x),
}
)
domain = _domain(task_input)
dm = strat_data_model(domain=domain, **kwargs)

strat = strategies.map(dm)
strat.tell(experiments)
candidate = strat.ask(1)
assert len(candidate) == 1

task_2_x = np.linspace(0, 1, 15)
task_2_y = _task_2_f(task_2_x)
experiments = pd.DataFrame(
{
"x": np.concatenate([task_1_x, task_2_x]),
"y": np.concatenate([task_1_y, task_2_y]),
"task": ["task_1"] * len(task_1_x) + ["task_2"] * len(task_2_x),
}
)
strat.tell(experiments)
candidate = strat.ask(1)
assert len(candidate) == 1

test(RandomStrategy)
# test(DoEStrategy, formula="linear")
6 changes: 5 additions & 1 deletion tests/bofire/strategies/test_strategy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List
from unittest import mock

import mock
import numpy as np
import pandas as pd
import pytest
Expand Down Expand Up @@ -246,6 +246,10 @@ def test_strategy_no_variance():
strategy = dummy.DummyStrategy(
data_model=dummy.DummyStrategyDataModel(domain=domain)
)
strategy.tell(experiments)
strategy = dummy.DummyPredictiveStrategy(
data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain)
)
with pytest.raises(ValueError):
strategy.tell(experiments)
# introduce variance but in an invalid experiment
Expand Down
Loading