Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NP Regression Model w/ LIG Acquisition #2683

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
119 changes: 119 additions & 0 deletions botorch_community/acquisition/latent_information_gain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

r"""
Latent Information Gain Acquisition Function for Neural Process Models.

References:

.. [Wu2023arxiv]
Wu, D., Niu, R., Chinazzi, M., Vespignani, A., Ma, Y.-A., & Yu, R. (2023).
Deep Bayesian Active Learning for Accelerating Stochastic Simulation.
arXiv preprint arXiv:2106.02770. Retrieved from https://arxiv.org/abs/2106.02770

Contributor: eibarolle
"""

from __future__ import annotations
from typing import Type, Any
import torch
from botorch.acquisition import AcquisitionFunction
from botorch_community.models.np_regression import NeuralProcessModel
from torch import Tensor
# reference: https://arxiv.org/abs/2106.02770

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class LatentInformationGain(AcquisitionFunction):
def __init__(
self,
model: Type[Any] = NeuralProcessModel,
num_samples: int = 10,
min_std: float = 0.01,
scaler: float = 0.5,
) -> None:
"""
Latent Information Gain (LIG) Acquisition Function.
Uses the model's built-in posterior function to generalize KL computation.

Args:
model: The model class to be used, defaults to NeuralProcessModel.
num_samples (int): Number of samples for calculation, defaults to 10.
min_std: Float representing the minimum possible standardized std,
defaults to 0.01.
scaler: Float scaling the std, defaults to 0.5.
"""
super().__init__(model)
self.model = model
self.num_samples = num_samples
self.min_std = min_std
self.scaler = scaler

def forward(self, candidate_x: Tensor) -> Tensor:
"""
Conduct the Latent Information Gain acquisition function using the model's
posterior.

Args:
candidate_x: Candidate input points, as a Tensor. Ideally in the shape
(N, q, D).

Returns:
torch.Tensor: The LIG scores of computed KLDs, in the shape (N, q).
"""
candidate_x = candidate_x.to(device)
if candidate_x.dim() == 2:
candidate_x = candidate_x.unsqueeze(0) # Ensure (N, q, D) format
N, q, D = candidate_x.shape

kl = torch.zeros(N, q, device=device)

if isinstance(self.model, NeuralProcessModel):
x_c, y_c, x_t, y_t = self.model.random_split_context_target(
self.model.train_X[:, 0], self.model.train_Y
)
print(x_c.shape)
print(y_c.shape)
print(self.model.train_X)
print(self.model.train_X[:, 0])
print(self.model.train_Y)
print(self.model.train_Y[:, 0])
z_mu_context, z_logvar_context = self.model.data_to_z_params(x_c, y_c, xy_dim = -1)
print(z_mu_context)
print(z_logvar_context)
for _ in range(self.num_samples):
# Taking Samples/Predictions
samples = self.model.sample_z(z_mu_context, z_logvar_context)
y_pred = self.model.decoder(candidate_x.view(-1, D), samples)
# Combining the data
combined_x = torch.cat(
[x_c, candidate_x.view(-1, D)], dim=0
).to(device)
combined_y = torch.cat([self.y_c, y_pred], dim=0).to(device)
# Computing posterior variables
z_mu_posterior, z_logvar_posterior = self.model.data_to_z_params(
combined_x, combined_y
)
std_prior = self.min_std + self.scaler * torch.sigmoid(z_logvar_context)
std_posterior = self.min_std + self.scaler * torch.sigmoid(
z_logvar_posterior
)
p = torch.distributions.Normal(z_mu_posterior, std_posterior)
q = torch.distributions.Normal(z_mu_context, std_prior)
kl_divergence = torch.distributions.kl_divergence(p, q).sum(dim=-1)
kl += kl_divergence
else:
for _ in range(self.num_samples):
posterior_prior = self.model.posterior(self.model.train_X)
posterior_candidate = self.model.posterior(candidate_x.view(-1, D))

kl_divergence = torch.distributions.kl_divergence(
posterior_candidate.mvn, posterior_prior.mvn
).sum(dim=-1)
kl += kl_divergence

return kl / self.num_samples
Loading