Preliminaries

First we specify the directory where mogp is installed so that the python is correctly imported. Your directory will be different from mine.

mogp_dir <- "~/Dropbox/BayesExeter/mogp_emulator"
setwd('..')
source("BuildEmulator/BuildEmulator.R")
source("HistoryMatching/HistoryMatching.R")
source("HistoryMatching/impLayoutplot.R")

We load the same data looked at in the tutorial on building emulators.

load("ConvectionModelExample.Rdata")
head(tData)
##          A_U  A_EPSILON         A_T       Noise
## 1 -0.7112332  0.5198038  0.56237588 -0.03849208
## 2 -0.2465941 -0.4163290  0.88555390 -0.02695154
## 3  0.3333848 -0.1316253  0.30027685 -0.04939970
## 4  0.4462722  0.8817862  0.78243180  0.01830642
## 5 -0.9065298 -0.3119412 -0.07690085 -0.04815524
## 6 -0.4732543  0.3305641 -0.98112145 -0.02420066
##   WAVE1_AYOTTE_24SC_zav.400.600.theta_5_6 WAVE1_AYOTTE_24SC_Ay.theta_5_6
## 1                                306.1641                     -0.3697345
## 2                                306.7235                     -0.5671676
## 3                                306.6203                     -0.4999037
## 4                                306.3918                     -0.3919627
## 5                                306.1552                     -0.3685847
## 6                                305.6225                     -0.3485485
##   WAVE1_AYOTTE_24SC_zav.400.600.WND_5_6
## 1                              13.82073
## 2                              12.25888
## 3                              11.82482
## 4                              11.94844
## 5                              14.73021
## 6                              13.17205

Build an emulator.

TestEm <- BuildNewEmulators(tData, HowManyEmulators = 3, meanFun = "fitted") 
## [1] "Max reduction is 0.141510566268957 using A_EPSILON"
## [1] "Max reduction is 0.0719279431890675 using A_U"
## [1] "Max reduction is 0.0593934956606317 using A_T"
## [1] "Max reduction is 0.0841015596079704 using A_EPSILON"
## [1] "Max reduction is 0.0215815680504046 using A_EPSILON"
## [1] "Max reduction is 0.0221974794878812 using A_EPSILON"
## [1] "Noise fitted, stopping algorithm"
## 
## Call:
## lm(formula = WAVE1_AYOTTE_24SC_zav.400.600.theta_5_6 ~ A_EPSILON + 
##     I(A_EPSILON^2) + I(A_EPSILON^3) + I(A_EPSILON^4) + A_U + 
##     A_T + I(A_U * A_EPSILON) + I(A_T * A_EPSILON) + I(A_T * A_U), 
##     data = tData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15059 -0.06624  0.01615  0.05931  0.15216 
## 
## Coefficients:
##                     Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)        306.35982    0.03461 8852.437  < 2e-16 ***
## A_EPSILON           -0.31275    0.08180   -3.824  0.00106 ** 
## I(A_EPSILON^2)      -0.19500    0.21788   -0.895  0.38145    
## I(A_EPSILON^3)      -0.43915    0.12490   -3.516  0.00217 ** 
## I(A_EPSILON^4)       0.83401    0.24582    3.393  0.00289 ** 
## A_U                  0.30423    0.03234    9.409 8.73e-09 ***
## A_T                  0.33990    0.03172   10.715 9.78e-10 ***
## I(A_U * A_EPSILON)  -0.08042    0.05404   -1.488  0.15233    
## I(A_T * A_EPSILON)  -0.08069    0.06159   -1.310  0.20500    
## I(A_T * A_U)        -0.03824    0.06560   -0.583  0.56648    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09867 on 20 degrees of freedom
## Multiple R-squared:  0.9731, Adjusted R-squared:  0.961 
## F-statistic: 80.31 on 9 and 20 DF,  p-value: 1.004e-13
## 
## 
## Call:
## lm(formula = WAVE1_AYOTTE_24SC_zav.400.600.theta_5_6 ~ A_EPSILON + 
##     I(A_EPSILON^2) + I(A_EPSILON^3) + I(A_EPSILON^4) + A_U + 
##     A_T + I(A_U * A_EPSILON) + I(A_T * A_EPSILON) + I(A_T * A_U), 
##     data = tData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15059 -0.06624  0.01615  0.05931  0.15216 
## 
## Coefficients:
##                     Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)        306.35982    0.03461 8852.437  < 2e-16 ***
## A_EPSILON           -0.31275    0.08180   -3.824  0.00106 ** 
## I(A_EPSILON^2)      -0.19500    0.21788   -0.895  0.38145    
## I(A_EPSILON^3)      -0.43915    0.12490   -3.516  0.00217 ** 
## I(A_EPSILON^4)       0.83401    0.24582    3.393  0.00289 ** 
## A_U                  0.30423    0.03234    9.409 8.73e-09 ***
## A_T                  0.33990    0.03172   10.715 9.78e-10 ***
## I(A_U * A_EPSILON)  -0.08042    0.05404   -1.488  0.15233    
## I(A_T * A_EPSILON)  -0.08069    0.06159   -1.310  0.20500    
## I(A_T * A_U)        -0.03824    0.06560   -0.583  0.56648    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09867 on 20 degrees of freedom
## Multiple R-squared:  0.9731, Adjusted R-squared:  0.961 
## F-statistic: 80.31 on 9 and 20 DF,  p-value: 1.004e-13
## 
## [1] "1 I(A_T * A_U)"
## [1] "2 I(A_T * A_EPSILON)"
## [1] "3 I(A_U * A_EPSILON)"
## [1] "4 I(A_EPSILON^4)"
## [1] "5 I(A_EPSILON^3)"
## 
## Call:
## lm(formula = WAVE1_AYOTTE_24SC_zav.400.600.theta_5_6 ~ A_EPSILON + 
##     I(A_EPSILON^2) + A_U + A_T, data = tData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31141 -0.07230  0.00132  0.06857  0.49418 
## 
## Coefficients:
##                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)    306.28286    0.04193 7304.864  < 2e-16 ***
## A_EPSILON       -0.57626    0.04796  -12.015 7.00e-12 ***
## I(A_EPSILON^2)   0.53877    0.09315    5.784 4.98e-06 ***
## A_U              0.32558    0.04794    6.792 4.06e-07 ***
## A_T              0.34208    0.04790    7.142 1.74e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.152 on 25 degrees of freedom
## Multiple R-squared:  0.9202, Adjusted R-squared:  0.9074 
## F-statistic: 72.06 on 4 and 25 DF,  p-value: 2.36e-13
## 
## [1] "Max reduction is 0.0516838339369975 using A_EPSILON"
## [1] "Max reduction is 0.0181512292708917 using A_T"
## [1] "Max reduction is 0.00908627733663028 using A_U"
## [1] "Noise fitted, stopping algorithm"
## 
## Call:
## lm(formula = WAVE1_AYOTTE_24SC_Ay.theta_5_6 ~ A_EPSILON + A_T + 
##     A_U + I(A_T * A_EPSILON) + I(A_U * A_EPSILON) + I(A_U * A_T), 
##     data = tData)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.122972 -0.017118  0.003162  0.026879  0.108563 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.466705   0.009108 -51.243  < 2e-16 ***
## A_EPSILON           0.173565   0.015924  10.900 1.47e-10 ***
## A_T                -0.078502   0.015793  -4.971 5.01e-05 ***
## A_U                -0.050460   0.015915  -3.171  0.00427 ** 
## I(A_T * A_EPSILON)  0.066547   0.030606   2.174  0.04022 *  
## I(A_U * A_EPSILON) -0.025424   0.025766  -0.987  0.33405    
## I(A_U * A_T)       -0.035683   0.031953  -1.117  0.27564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0496 on 23 degrees of freedom
## Multiple R-squared:  0.8819, Adjusted R-squared:  0.8511 
## F-statistic: 28.62 on 6 and 23 DF,  p-value: 1.439e-09
## 
## 
## Call:
## lm(formula = WAVE1_AYOTTE_24SC_Ay.theta_5_6 ~ A_EPSILON + A_T + 
##     A_U + I(A_T * A_EPSILON) + I(A_U * A_EPSILON) + I(A_U * A_T), 
##     data = tData)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.122972 -0.017118  0.003162  0.026879  0.108563 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.466705   0.009108 -51.243  < 2e-16 ***
## A_EPSILON           0.173565   0.015924  10.900 1.47e-10 ***
## A_T                -0.078502   0.015793  -4.971 5.01e-05 ***
## A_U                -0.050460   0.015915  -3.171  0.00427 ** 
## I(A_T * A_EPSILON)  0.066547   0.030606   2.174  0.04022 *  
## I(A_U * A_EPSILON) -0.025424   0.025766  -0.987  0.33405    
## I(A_U * A_T)       -0.035683   0.031953  -1.117  0.27564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0496 on 23 degrees of freedom
## Multiple R-squared:  0.8819, Adjusted R-squared:  0.8511 
## F-statistic: 28.62 on 6 and 23 DF,  p-value: 1.439e-09
## 
## [1] "1 I(A_U * A_EPSILON)"
## [1] "2 I(A_U * A_T)"
## 
## Call:
## lm(formula = WAVE1_AYOTTE_24SC_Ay.theta_5_6 ~ A_EPSILON + A_T + 
##     A_U + I(A_T * A_EPSILON), data = tData)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.125503 -0.019057  0.002386  0.026174  0.097011 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.466250   0.009099 -51.240  < 2e-16 ***
## A_EPSILON           0.175425   0.015790  11.110 3.68e-11 ***
## A_T                -0.076189   0.015710  -4.850 5.50e-05 ***
## A_U                -0.052910   0.015894  -3.329   0.0027 ** 
## I(A_T * A_EPSILON)  0.064706   0.030065   2.152   0.0412 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04981 on 25 degrees of freedom
## Multiple R-squared:  0.8705, Adjusted R-squared:  0.8498 
## F-statistic: 42.01 on 4 and 25 DF,  p-value: 9.526e-11
## 
## [1] "Max reduction is 0.482408369617749 using A_U"
## [1] "Max reduction is 0.218319375740367 using A_U"
## [1] "Max reduction is 0.125730158685294 using A_EPSILON"
## [1] "Max reduction is 0.0997702118329568 using A_U"
## [1] "Max reduction is 0.0271958533098509 using A_U"
## [1] "Max reduction is 0.0170111124043587 using A_U"
## [1] "Max reduction is 0.00102285247477041 using A_EPSILON"
## [1] "Max reduction is 0.0269160398536438 using Three Way Interactions with A_EPSILON"
## [1] "Max reduction is 0.017685867543131 using A_T"
## [1] "No further terms permitted with the given degrees of freedom"
## 
## Call:
## lm(formula = WAVE1_AYOTTE_24SC_zav.400.600.WND_5_6 ~ A_U + I(A_U^2) + 
##     I(A_U^3) + I(A_U^4) + I(A_U^5) + A_EPSILON + I(A_EPSILON^2) + 
##     A_T + I(A_EPSILON * A_U) + I(A_T * A_U) + I(A_T * A_EPSILON) + 
##     I(A_EPSILON * A_EPSILON * A_U) + I(A_EPSILON * A_T * A_U) + 
##     I(A_EPSILON * A_T * A_EPSILON), data = tData)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.096819 -0.014154  0.003559  0.029044  0.061739 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    12.14491    0.02127 570.984  < 2e-16 ***
## A_U                            -1.19660    0.07737 -15.466 1.26e-10 ***
## I(A_U^2)                        0.49853    0.11485   4.341 0.000582 ***
## I(A_U^3)                        0.04079    0.29706   0.137 0.892609    
## I(A_U^4)                        1.02544    0.12295   8.340 5.14e-07 ***
## I(A_U^5)                       -1.08677    0.24586  -4.420 0.000496 ***
## A_EPSILON                       0.31898    0.01865  17.100 3.01e-11 ***
## I(A_EPSILON^2)                 -0.13457    0.03630  -3.707 0.002109 ** 
## A_T                            -0.02568    0.02879  -0.892 0.386454    
## I(A_EPSILON * A_U)             -0.38773    0.03026 -12.814 1.75e-09 ***
## I(A_T * A_U)                    0.11796    0.03987   2.959 0.009760 ** 
## I(A_T * A_EPSILON)             -0.04880    0.03605  -1.354 0.195903    
## I(A_EPSILON * A_EPSILON * A_U)  0.38672    0.06230   6.208 1.68e-05 ***
## I(A_EPSILON * A_T * A_U)        0.11912    0.09194   1.296 0.214675    
## I(A_EPSILON * A_T * A_EPSILON)  0.06014    0.08302   0.724 0.479946    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05397 on 15 degrees of freedom
## Multiple R-squared:  0.9987, Adjusted R-squared:  0.9975 
## F-statistic:   813 on 14 and 15 DF,  p-value: < 2.2e-16
## 
## [1] "1 I(A_EPSILON * A_T * A_EPSILON)"
## [1] "2 I(A_T * A_EPSILON)"
## 
## Call:
## lm(formula = WAVE1_AYOTTE_24SC_zav.400.600.WND_5_6 ~ A_U + I(A_U^2) + 
##     I(A_U^3) + I(A_U^4) + I(A_U^5) + A_EPSILON + I(A_EPSILON^2) + 
##     A_T + I(A_EPSILON * A_U) + I(A_T * A_U) + I(A_EPSILON * A_EPSILON * 
##     A_U) + I(A_EPSILON * A_T * A_U), data = tData)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.100346 -0.022345  0.004853  0.026981  0.085676 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    12.14806    0.02138 568.288  < 2e-16 ***
## A_U                            -1.20582    0.07347 -16.413 7.36e-12 ***
## I(A_U^2)                        0.48268    0.11484   4.203 0.000598 ***
## I(A_U^3)                        0.09328    0.28155   0.331 0.744448    
## I(A_U^4)                        1.03694    0.12313   8.422 1.80e-07 ***
## I(A_U^5)                       -1.13282    0.23941  -4.732 0.000193 ***
## A_EPSILON                       0.32289    0.01861  17.351 3.01e-12 ***
## I(A_EPSILON^2)                 -0.14090    0.03560  -3.958 0.001015 ** 
## A_T                            -0.01237    0.01781  -0.694 0.496884    
## I(A_EPSILON * A_U)             -0.39676    0.02984 -13.295 2.07e-10 ***
## I(A_T * A_U)                    0.13192    0.03907   3.376 0.003589 ** 
## I(A_EPSILON * A_EPSILON * A_U)  0.39776    0.05808   6.848 2.83e-06 ***
## I(A_EPSILON * A_T * A_U)        0.10485    0.08882   1.180 0.254096    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05457 on 17 degrees of freedom
## Multiple R-squared:  0.9985, Adjusted R-squared:  0.9974 
## F-statistic: 927.6 on 12 and 17 DF,  p-value: < 2.2e-16
## 
## [1] "1 I(A_EPSILON * A_T * A_U)"
## [1] "2 I(A_EPSILON^2)"
## [1] "3 I(A_T * A_U)"

## Warning in all(lapply(ThreeWayInters, function(e) (all(e[variableNum, ] < :
## coercing argument of type 'list' to logical

## [1] "4 A_T"
## [1] "5 I(A_EPSILON * A_EPSILON * A_U)"
## [1] "6 I(A_U^5)"
## [1] "7 I(A_U^4)"
## [1] "8 I(A_EPSILON * A_U)"
## 
## Call:
## lm(formula = WAVE1_AYOTTE_24SC_zav.400.600.WND_5_6 ~ A_U + I(A_U^2) + 
##     I(A_U^3) + A_EPSILON, data = tData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70366 -0.10215  0.00988  0.08980  0.42528 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.00348    0.06088 197.177  < 2e-16 ***
## A_U         -0.75201    0.16969  -4.432 0.000163 ***
## I(A_U^2)     1.44290    0.13363  10.798 6.66e-11 ***
## I(A_U^3)    -1.24241    0.25153  -4.939 4.36e-05 ***
## A_EPSILON    0.32197    0.07089   4.542 0.000122 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2234 on 25 degrees of freedom
## Multiple R-squared:  0.9624, Adjusted R-squared:  0.9564 
## F-statistic:   160 on 4 and 25 DF,  p-value: < 2.2e-16

History matching example

The history matching codes we have included target multiple implausibility, plotting, and analyses over many waves. They are also generalised for basis emulation which will be added to this repository soon. We do not yet have a simple function historyMatching() that implements the steps below with an sequence of waves of mogp emulators, so we take you through a step by step use of our history matching codes for a single wave below. Something more general that wraps the steps below for multiple waves will be written in a future update.

The discrepancy variances for each output, the observations and the error on the observations are required for history matching.

tDisc <- c(0.15,0.05,0.1)
tObs <- c(306, 0.45, 12.4) 
tObsErr <- c(0.15,0.05,0.1)

We generate a random sample in the parameter space and calculate the implausibility across all of the outputs simultaneously. We have included a time stamp to show how fast this is for 1 million samples thanks to mogp.

sample_size <- 1000000
nparam <- length(names(TestEm$fitting.elements$Design))
Xp <- as.data.frame(2*randomLHS(sample_size, nparam)-1)
names(Xp) <- names(TestEm$fitting.elements$Design)
system.time(Timps <- ImplausibilityMOGP(NewData=Xp, Emulator=TestEm, Discrepancy=tDisc, 
                Obs=tObs, ObsErr=tObsErr))
##    user  system elapsed 
##   8.862   2.860   3.560

We collect the inputs and the implausibilities for each output into a matrix

ImpData_wave1 = cbind(Xp, Timps)

and define some of the elements required for history matching. We need the number of outputs that can exceed the implausibility cutoff, the cutoff itself (often 3 by default in the literature), and the standard parameters of the model mapped to [-1,1].

VarNames <- names(Xp)
valmax = 0 #how many outputs can be above the implausibility cut off?
cutoff_vec <- 3 #the implausibility cut off
param.defaults.norm=rep(0,nparam) #the default parameters of the model (on [-1,1])

The sampled implausibilities are then used to find NROY space and to create matrices for plotting 2D projections of parameter space.

ImpListM1 = CreateImpList(whichVars = 1:nparam, VarNames=VarNames, ImpData=ImpData_wave1, 
                            nEms=TestEm$mogp$n_emulators, whichMax=valmax+1)
NROY1 <- which(rowSums(Timps <= cutoff_vec[1]) >= TestEm$mogp$n_emulators -valmax)
length(NROY1)/dim(Xp)[1]
## [1] 0.523111

NROY density plots

imp.layoutm11(ImpListM1, VarNames, VariableDensity=FALSE, newPDF=FALSE, 
                the.title=paste("InputSpace_wave",WAVEN,".pdf",sep=""), 
                newPNG=FALSE, newJPEG=FALSE, newEPS=FALSE, 
                Points=matrix(param.defaults.norm,ncol=nparam))
mtext(paste("Remaining space:",length(NROY1)/dim(Xp)[1],sep=""), side=1)