if(!library(pacman, logical.return = T)) install.packages("pacman")
pacman::p_load(tidyverse, plgp, laGP, lhs, mvtnorm, plotly, knitr, kableExtra, ggpubr)Optimization with GP
1 Motivations
We aim to optimize the function \(f(x)\), i.e.
\[ x_* = \arg\max_{x \in \mathcal X}f(x) \]
The traditional solution proceeds by evaluating the function \(f\) and taking derivative. Many variants of this framework has been proposed to overcome the challenges of taking derivative, since this step sometimes is not a piece of cake. This impasse can sometimes appears at the step of function evaluation, i.e. the structure of \(f\) is a blackbox and, thus, taking derivative is impossible. Thus, the Bayesian optimization appears as a solution for this problem.
2 Basic examples
Let consider the following function which does not have a closed-form. Note that examples in this section can be found in Gramacy et al.1
obj_func <- function(X)
{
if(is.null(nrow(X))) X <- matrix(X, nrow=1)
m <- 8.6928
s <- 2.4269
x1 <- 4*X[,1] - 2
x2 <- 4*X[,2] - 2
a <- 1 + (x1 + x2 + 1)^2 *
(19 - 14*x1 + 3*x1^2 - 14*x2 + 6*x1*x2 + 3*x2^2)
b <- 30 + (2*x1 - 3*x2)^2 *
(18 - 32*x1 + 12*x1^2 + 48*x2 - 36*x1*x2 + 27*x2^2)
f <- log(a*b)
f <- (f - m)/s
return(f)
}
#--------------------
xx = seq(0,1, 0.01)
xx2d = expand.grid(xx,xx)
yy = obj_func(xx2d)
yymat = matrix(yy, nrow = length(xx), byrow = TRUE)
fig1<-
plot_ly(x = xx,y = xx, z = yymat, type = "surface", colors = c("gray50"),
showscale = F, opacity = .7)
fig1fig2<-
plot_ly(x = xx,y = xx, z = yymat, type = "contour",
showscale = F, opacity = .7, colors = "gray", contours = list(showlabels = TRUE))
fig2set.seed(4444)
ninit <- 20
X <- randomLHS(ninit, 2)
y <- obj_func(X)
####
fig2|> add_trace(x = X[,1], y = X[,2],z = yymat, type = "scatter", mode = "markers", marker = list(color = "blue"), showlegend = F)da <- darg(list(mle=TRUE, max=0.5), randomLHS(1000, 2))
gpi <- newGPsep(X, y, d=da$start, g=1e-6, dK=TRUE)
mleGPsep(gpi, param="d", tmin=da$min, tmax=da$max, ab=da$ab)$msg[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
####
obj.mean <- function(x, gpi) predGPsep(gpi, matrix(x, nrow=1), lite=TRUE)$mean
####
opt_ls<- c()
repeat {
m <- which.min(y)
opt <- optim(X[m,], obj.mean, lower=0, upper=1,
method="L-BFGS-B", gpi=gpi)
ynew <- obj_func(opt$par)
opt_ls<- rbind(opt_ls, c(opt$par, ynew))
if(abs(ynew - y[length(y)]) < 1e-4) break;
updateGPsep(gpi, matrix(opt$par, nrow=1), ynew)
#mle <- mleGPsep(gpi, param="d", tmin=da$min, tmax=da$max, ab=da$ab)
X <- rbind(X, opt$par)
y <- c(y, ynew)
}
deleteGPsep(gpi)
plot_ly(x = xx,y = xx, z = yymat, type = "contour",
showscale = F, opacity = .7, contours = list(showlabels = TRUE), colors = "gray")|>
add_trace(x = X[,1], y = X[,2], type = "scatter", mode = "markers", marker = list(color = "blue"), showlegend = F)|>
add_trace(x = opt_ls[,1], y = opt_ls[,2], type = "scatter", mode = "lines", marker = list(color = "red"), showlegend = F)compose(as_tibble, data.frame)(opt_ls)|>
rename(Y = "X3")|>
round(5)|>
arrange(Y)|>
knitr::kable()|>
kable_styling(bootstrap_options = c("striped", "hover"))|>
row_spec(0, background = "deeppink")|>
scroll_box(height = "300px")| X1 | X2 | Y |
|---|---|---|
| 0.49910 | 0.24898 | -3.12816 |
| 0.50229 | 0.25512 | -3.10768 |
| 0.50269 | 0.25584 | -3.10134 |
| 0.50272 | 0.25590 | -3.10084 |
| 0.50273 | 0.25591 | -3.10069 |
| 0.50273 | 0.25592 | -3.10064 |
| 0.49405 | 0.24024 | -3.04842 |
| 0.48062 | 0.23143 | -2.84145 |
| 0.45863 | 0.21618 | -2.45282 |
| 0.45283 | 0.24301 | -2.43200 |
| 0.48977 | 0.30641 | -2.13294 |
| 0.55978 | 0.24888 | -2.07283 |
| 0.40076 | 0.29195 | -1.73130 |
| 0.42763 | 0.14561 | -0.88299 |
| 0.53679 | 0.14484 | -0.85175 |
| 0.72623 | 0.24220 | -0.05238 |
3 The policy expected improvement
Suppose that we aim to minimize the objective function \(f\), i.e. \[ x_* = \arg\min_{x \in \mathcal{X}}f(x). \]
We need a measure of improvement and the following function is commonly used \[ I(x) = \max\{0, f^{min}_m - y(x)\}, \] and thus the next selected design point will optimize the expected improvement (EI) calculated using the posterior predictive distribution. the function can be approximated as follows: \[ EI(x) \approx \frac{1}{R}\sum_{r=1}^R \max\{0, f^{min}_m - y^{(r)}\}, \] where \(y^{(r)}\) is simulated using \(y(x)|D\). Fortunately, \(EI\) has a closed-form as follows \[ EI(x) = (f^{min}_m - \mu_n(x))\Phi\bigg(\frac{f^{min}_m - \mu_n(x)}{\sigma_n(x)}\bigg) + \sigma_n(x)\phi\bigg(\frac{f^{min}_m - \mu_n(x)}{\sigma_n(x)}\bigg) \]