Optimization with GP

Author

Nam-Anh Tran

Published

July 7, 2024

if(!library(pacman, logical.return = T)) install.packages("pacman")
pacman::p_load(tidyverse, plgp, laGP, lhs, mvtnorm, plotly, knitr, kableExtra, ggpubr)

1 Motivations

We aim to optimize the function \(f(x)\), i.e.

\[ x_* = \arg\max_{x \in \mathcal X}f(x) \]

The traditional solution proceeds by evaluating the function \(f\) and taking derivative. Many variants of this framework has been proposed to overcome the challenges of taking derivative, since this step sometimes is not a piece of cake. This impasse can sometimes appears at the step of function evaluation, i.e. the structure of \(f\) is a blackbox and, thus, taking derivative is impossible. Thus, the Bayesian optimization appears as a solution for this problem.

2 Basic examples

Let consider the following function which does not have a closed-form. Note that examples in this section can be found in Gramacy et al.1

1. Gramacy RB. Surrogates: Gaussian process modeling, design, and optimization for the applied sciences. Chapman; Hall/CRC; 2020.
obj_func <- function(X)
{
if(is.null(nrow(X))) X <- matrix(X, nrow=1)
m <- 8.6928
s <- 2.4269
x1 <- 4*X[,1] - 2
x2 <- 4*X[,2] - 2

a <- 1 + (x1 + x2 + 1)^2 *
(19 - 14*x1 + 3*x1^2 - 14*x2 + 6*x1*x2 + 3*x2^2)
b <- 30 + (2*x1 - 3*x2)^2 *
(18 - 32*x1 + 12*x1^2 + 48*x2 - 36*x1*x2 + 27*x2^2)
f <- log(a*b)
f <- (f - m)/s
return(f)
}
#--------------------

xx = seq(0,1, 0.01)
xx2d = expand.grid(xx,xx)
yy = obj_func(xx2d)
yymat = matrix(yy, nrow = length(xx), byrow = TRUE)

fig1<-
  plot_ly(x = xx,y = xx, z = yymat, type = "surface", colors = c("gray50"),
        showscale = F, opacity = .7)

fig1
Figure 1: The surface of the objective function.
fig2<-
  plot_ly(x = xx,y = xx, z = yymat, type = "contour",
        showscale = F, opacity = .7, colors = "gray", contours = list(showlabels = TRUE))
fig2
Figure 2: The contour of the objective function.
set.seed(4444)
ninit <- 20
X <- randomLHS(ninit, 2)
y <- obj_func(X)
####
fig2|> add_trace(x = X[,1], y = X[,2],z = yymat, type = "scatter", mode = "markers", marker = list(color = "blue"), showlegend = F)
Figure 3: Contour and the initial design.
da <- darg(list(mle=TRUE, max=0.5), randomLHS(1000, 2))
gpi <- newGPsep(X, y, d=da$start, g=1e-6, dK=TRUE)
mleGPsep(gpi, param="d", tmin=da$min, tmax=da$max, ab=da$ab)$msg
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
####
obj.mean <- function(x, gpi) predGPsep(gpi, matrix(x, nrow=1), lite=TRUE)$mean
####

opt_ls<- c()
  
repeat {
m <- which.min(y)
opt <- optim(X[m,], obj.mean, lower=0, upper=1,
method="L-BFGS-B", gpi=gpi)
ynew <- obj_func(opt$par)
opt_ls<- rbind(opt_ls, c(opt$par, ynew))
if(abs(ynew - y[length(y)]) < 1e-4) break;
updateGPsep(gpi, matrix(opt$par, nrow=1), ynew)
#mle <- mleGPsep(gpi, param="d", tmin=da$min, tmax=da$max, ab=da$ab)
X <- rbind(X, opt$par)
y <- c(y, ynew)
}
deleteGPsep(gpi)

plot_ly(x = xx,y = xx, z = yymat, type = "contour",
        showscale = F, opacity = .7, contours = list(showlabels = TRUE), colors = "gray")|> 
  add_trace(x = X[,1], y = X[,2], type = "scatter", mode = "markers", marker = list(color = "blue"), showlegend = F)|>
  add_trace(x = opt_ls[,1], y = opt_ls[,2], type = "scatter", mode = "lines", marker = list(color = "red"), showlegend = F)
Figure 4: The red dots are the optimal desing points.
compose(as_tibble, data.frame)(opt_ls)|>
  rename(Y = "X3")|>
  round(5)|>
  arrange(Y)|>
  knitr::kable()|>
  kable_styling(bootstrap_options = c("striped", "hover"))|>
  row_spec(0, background = "deeppink")|>
   scroll_box(height = "300px")
Table 1: this is
X1 X2 Y
0.49910 0.24898 -3.12816
0.50229 0.25512 -3.10768
0.50269 0.25584 -3.10134
0.50272 0.25590 -3.10084
0.50273 0.25591 -3.10069
0.50273 0.25592 -3.10064
0.49405 0.24024 -3.04842
0.48062 0.23143 -2.84145
0.45863 0.21618 -2.45282
0.45283 0.24301 -2.43200
0.48977 0.30641 -2.13294
0.55978 0.24888 -2.07283
0.40076 0.29195 -1.73130
0.42763 0.14561 -0.88299
0.53679 0.14484 -0.85175
0.72623 0.24220 -0.05238

3 The policy expected improvement

Suppose that we aim to minimize the objective function \(f\), i.e. \[ x_* = \arg\min_{x \in \mathcal{X}}f(x). \]

We need a measure of improvement and the following function is commonly used \[ I(x) = \max\{0, f^{min}_m - y(x)\}, \] and thus the next selected design point will optimize the expected improvement (EI) calculated using the posterior predictive distribution. the function can be approximated as follows: \[ EI(x) \approx \frac{1}{R}\sum_{r=1}^R \max\{0, f^{min}_m - y^{(r)}\}, \] where \(y^{(r)}\) is simulated using \(y(x)|D\). Fortunately, \(EI\) has a closed-form as follows \[ EI(x) = (f^{min}_m - \mu_n(x))\Phi\bigg(\frac{f^{min}_m - \mu_n(x)}{\sigma_n(x)}\bigg) + \sigma_n(x)\phi\bigg(\frac{f^{min}_m - \mu_n(x)}{\sigma_n(x)}\bigg) \]