# Problem 3 (c) x1 <- c(28.5, 30, 31.5, 32, 33.5, 35.9, 39, 40.5, 42.5, 45, 54.6, 62.3, 70, 80) y <- c(94, 93.5, 99.5, 105, 110, 116, 125, 130.6, 129.9, 140, 170, 171, 185, 177) X <- as.matrix( cbind(rep(1, length(x1)), x1) ) ###### Setting up prior specification : # 1 predictor variable, so we need exactly 1+1=2 hypothetical "prior observations" # Suppose that based on "expert opinion" we have the following guesses: xtilde.obs.1 <- c(40) ytilde.obs.1 <- 115 xtilde.obs.2 <- c(60) ytilde.obs.2 <- 150 # Making the matrix X~ : prior.obs.stacked <- rbind(xtilde.obs.1, xtilde.obs.2) xtilde <- cbind(rep(1, times=nrow(prior.obs.stacked) ), prior.obs.stacked) # Making the vector Y~ : ytilde <- c(ytilde.obs.1, ytilde.obs.2) # Diagonal matrix D contains weights that indicate how much worth we place on # our hypothetical prior observations (note the weights could vary if we are more # confident about some hypothetical prior observations than about others): D <- diag(c(2, 2)) # This D yields a D^{-1} that is diag(c(1/2, 1/2)), which # assumes that our two hypothetical prior observations TOGETHER are # "worth" about one actual sample observation. # The prior mean on the beta vector that we are inducing is: prior.mean.beta <- as.vector(solve(xtilde) %*% ytilde) print("Prior mean for beta vector was:"); print(round(prior.mean.beta,3)) ### Choosing prior parameters a and b for gamma prior on tau: # The parameter "a" reflects our confidence in our prior: # Let's choose "a" to be 0.1, which implies that our # prior knowledge about tau is "worth" about 0.2 sample observations. a <- 0.1 # set the prior guess for sigma to be (195 - 115)/1.645 = 48.63222 # So the prior guess for sigma^2 is (48.63222^2) = 2365.093 # So the prior guess for tau is 1/ 2365.093 = 0.0004228164 # Since the gamma prior mean is a/b, let # b = a / tau.guess: tau.guess <- 0.0004228164 b <- a / tau.guess # Here b is around 236.5 ... ##### Posterior information for tau and beta: # Calculate beta-hat : betahat <- as.vector(solve(t(X) %*% X + t(xtilde)%*%solve(D)%*%xtilde) %*% (t(X)%*%y + t(xtilde)%*%solve(D)%*%ytilde)) # Calculate s* : n <- length(y) sstar <- as.numeric( (t(y-X%*%betahat)%*%(y-X%*%betahat) + t(ytilde-xtilde%*%betahat)%*%solve(D)%*%(ytilde-xtilde%*%betahat) + 2*b)/(n+2*a) ) ### Point estimates for tau (and thus for sigma^2): p.mean.tau <- 1 / sstar p.mean.sig.sq <- 1 / p.mean.tau p.median.tau <- qgamma(0.50, shape=(n+2*a)/2, rate=((n+2*a)/2)*sstar) p.median.sig.sq <- 1 / p.median.tau print(paste("posterior.mean for sigma^2=", round(p.mean.sig.sq,3), "posterior.median for sigma^2=", round(p.median.sig.sq,3) )) # 3(d) ### Point estimates for beta, given the conditional draws for tau: ### "Honest" approach: # Randomly generate many tau values from its posterior distribution: how.many <-30000 tau.values <- rgamma(n=how.many, shape=(n+2*a)/2, rate=((n+2*a)/2)*sstar ) library(mvtnorm) beta.values<- matrix(0,nr = how.many, nc=length(betahat)) for (j in 1:how.many){ beta.values[j,] <- rmvnorm(n=1, mean=betahat, sigma= (1/tau.values[j])*solve(t(X)%*%X + t(xtilde)%*%solve(D)%*%xtilde) ) } # Posterior median for each regression coefficient: post.medians<-apply(beta.values,2,median) # 95% posterior interval for each regression coefficient: post.lower <- apply(beta.values,2,quantile, probs=0.025) post.upper <- apply(beta.values,2,quantile, probs=0.975) ## Summarizing: names.predictors <- c("x1") beta.post.summary <- data.frame(cbind(post.lower, post.medians, post.upper), row.names=c("intercept", names.predictors)) names(beta.post.summary) <- c('0.025 Quantile', '0.5 Quantile', '0.975 Quantile') print(beta.post.summary) ########## Problem 4 ############## #### Gill's function: t.ci.table <- function(coefs, cov.mat, level=0.95, degrees=Inf, quantiles=c(0.025, 0.50, 0.975) ) { quantile.mat <- cbind(coefs, sqrt(diag(cov.mat)), t(qt(quantiles, degrees) %o% sqrt(diag(cov.mat))) + matrix(rep(coefs,length(quantiles)), ncol=length(quantiles)) ) quantile.names <- c("mean", "Std. Error") for (i in 1:length(quantiles)) { quantile.names <- c(quantile.names, paste(quantiles[i], "Quantile")) } dimnames(quantile.mat)[2] <- list(quantile.names) return(round(quantile.mat,4)) } ener.data <- read.table("http://people.stat.sc.edu/hitchcock/energybardata.txt", header=F, col.names = c("price", "calories", "protein", "fat")) ; attach(ener.data) y <- price; x1 <- calories; x2 <- protein; x3 <- fat X <- as.matrix( cbind(rep(1, length(x1)), x1, x2, x3) ) ### Noninformative Prior Analysis: bhat <- solve(t(X) %*% X) %*% t(X) %*% y sig2hat <- t(y - X %*% bhat) %*% (y - X %*% bhat) / (nrow(X) - ncol(X)) my.cov.mat <- solve(t(X) %*% X) * ((nrow(X)-ncol(X))*sig2hat / (nrow(X) - ncol(X) - 2))[1,1] # Getting posterior information about the betas: table.beta.info <- t.ci.table(bhat, my.cov.mat, degrees = nrow(X) - ncol(X)) table.beta.info my.alpha <- (nrow(X) - ncol(X) - 1)/2 my.beta <- 0.5 * sig2hat * (nrow(X) - ncol(X)) library(TeachingDemos) library(pscl) hpd.sig.sq <- hpd(qigamma, alpha=my.alpha, beta=my.beta) round(hpd.sig.sq, 3) ## Problem 4(c) # Function to calculate a marginal probability for y: lpy.X <- function(y,X,g=length(y),nu0=1,s20=try(summary(lm(y~-1+X))$sigma^2,silent=TRUE)) { n<-dim(X)[1]; p<-dim(X)[2] if(p==0) {Hg<-0; s20<-mean(y^2)} if(p>0) {Hg<-(g/(g+1))*X%*%solve(t(X)%*%X)%*%t(X)} SSRg<- t(y)%*%( diag(1,nrow=n) - Hg)%*%y -.5*(n*log(pi)+p*log(1+g)+(nu0+n)*log(nu0*s20+SSRg)-nu0*log(nu0*s20))+lgamma((nu0+n)/2)-lgamma(nu0/2) } ### Starting values for Gibbs Sampler: z<-rep(1,dim(X)[2]) # starting with z = all 1's (all terms in model) lpy.c<-lpy.X(y,X[,z==1,drop=FALSE]) S <- 10000 # number of Monte Carlo iterations Z<-matrix(NA,S,dim(X)[2]) ### The Gibbs Sampler: for(s in 1:S) { for(j in sample(1:dim(X)[2])) { zp<-z; zp[j] <- 1-zp[j] lpy.p<-lpy.X(y,X[,zp==1,drop=FALSE]) r<- (lpy.p - lpy.c)*(-1)^(zp[j]==0) z[j]<-rbinom(1,1,1/(1+exp(-r))) if(z[j]==zp[j]) {lpy.c<-lpy.p} } Z[s,]<-z } ######### # Considering all possible subsets: poss.z.vectors <- unique(Z,MARGIN=1) z.probs <- rep(0, times= nrow(poss.z.vectors)) for(i in 1:nrow(poss.z.vectors)) { z.probs[i] <- sum(apply(Z,1,identical, y=poss.z.vectors[i,])) } z.probs <- z.probs/sum(z.probs) cbind(poss.z.vectors, z.probs)[rev(order(z.probs)),] # 4(d) x4 <- x1*x2; x5 <- x1*x3; x6 <- x2*x3 X <- as.matrix( cbind(rep(1, length(x1)), x1, x2, x3, x4, x5, x6) ) ### Starting values for Gibbs Sampler: z<-rep(1,dim(X)[2]) # starting with z = all 1's (all terms in model) lpy.c<-lpy.X(y,X[,z==1,drop=FALSE]) S <- 10000 # number of Monte Carlo iterations Z<-matrix(NA,S,dim(X)[2]) ### The Gibbs Sampler: for(s in 1:S) { for(j in sample(1:dim(X)[2])) { zp<-z; zp[j] <- 1-zp[j] lpy.p<-lpy.X(y,X[,zp==1,drop=FALSE]) r<- (lpy.p - lpy.c)*(-1)^(zp[j]==0) z[j]<-rbinom(1,1,1/(1+exp(-r))) if(z[j]==zp[j]) {lpy.c<-lpy.p} } Z[s,]<-z } ######### # Considering only certain z vectors (interaction only appearing when both first-order terms appear): # poss.z.vectors <- matrix( c( 1,0,0,0,0,0,0, 1,1,0,0,0,0,0, 1,0,1,0,0,0,0, 1,0,0,1,0,0,0, 1,1,1,0,0,0,0, 1,1,0,1,0,0,0, 1,0,1,1,0,0,0, 1,1,1,0,1,0,0, 1,1,0,1,0,1,0, 1,0,1,1,0,0,1, 1,1,1,1,0,0,0, 1,1,1,1,1,0,0, 1,1,1,1,0,1,0, 1,1,1,1,0,0,1, 1,1,1,1,1,1,0, 1,1,1,1,0,1,1, 1,1,1,1,1,0,1, 1,1,1,1,1,1,1 ), ncol=7, byrow=T) z.probs <- rep(0, times= nrow(poss.z.vectors)) for(i in 1:nrow(poss.z.vectors)) { z.probs[i] <- sum(apply(Z,1,identical, y=poss.z.vectors[i,])) } z.probs <- z.probs/sum(z.probs) cbind(poss.z.vectors, z.probs)[rev(order(z.probs)),] # ##### Problem 5, noninformative prior analysis: #5(a) cer.data <- read.table("http://people.stat.sc.edu/hitchcock/cerealdatabayes.txt", header=T) # This creates a data frame cer.data with columns named # Sugar, Sodium, Fiber, Carbohydrates, Potassium # Alternatively you could create generically named response and predictor variables: y <- cer.data$Sugar x1 <- cer.data$Sodium x2 <- cer.data$Fiber x3 <- cer.data$Carbohydrates x4 <- cer.data$Potassium cer.data.generic <- data.frame(y,x1,x2,x3,x4) #creating a data frame with these variables X <- as.matrix( cbind(rep(1, length(x1)), x1, x2, x3, x4) ) library(mvtnorm) # to get "rmvt" function to sample from multivariate t distribution bhat <- solve(t(X) %*% X) %*% t(X) %*% y sig2hat <- t(y - X %*% bhat) %*% (y - X %*% bhat) / (nrow(X) - ncol(X)) # Letting X* just equal the observed X: Xstar <- X my.Sigma <- as.numeric( sig2hat*(nrow(X) - ncol(X)) / ((nrow(X) - ncol(X) - 2)) )* (diag(rep(1,times=nrow(Xstar))) + Xstar %*% solve( (t(X) %*% X) ) %*% t(Xstar) ) # Sampling from multivariate t distribution: pred.post.err <- rmvt(n=500, sigma=my.Sigma, df=(nrow(X) - ncol(X))) # Adding multivariate t samples to fitted Y-values to get posterior predictive distribution of response values: pred.post.samp <- matrix(Xstar %*% bhat, nrow=nrow(pred.post.err), ncol=ncol(pred.post.err), byrow=T) + pred.post.err matrix.of.ys <- matrix(y, nrow=nrow(pred.post.samp), ncol=ncol(pred.post.samp), byrow=T) # Plotting original Y-values against samples from posterior predictive distribution of response values: matplot(matrix.of.ys, pred.post.samp, pch='o', cex=0.3) abline(0,1) # 5(b) # Examining model lack-of-fit: cbind(cer.data[,c(1,5)], (y - Xstar %*% bhat) )[order(y),] # 5(c) ### Predictions for a cereal having sodium=140, fiber=3.5, carbohydrates=14, and potassium=90. Xstar <- cbind(1, 140, 3.5, 14, 90) #my.Sigma <- as.numeric( sig2hat*(nrow(X) - ncol(X)) / ((nrow(X) - ncol(X) - 2)) )* #solve(diag(rep(1,times=nrow(X))) - Xstar %*% solve( (t(X) %*% X) + (t(Xstar) %*% Xstar) ) %*% t(Xstar) ) my.Sigma <- as.numeric( sig2hat*(nrow(X) - ncol(X)) / ((nrow(X) - ncol(X) - 2)) )* (diag(rep(1,times=nrow(Xstar))) + Xstar %*% solve( (t(X) %*% X) ) %*% t(Xstar) ) # Sampling from multivariate t distribution: pred.post.err <- rmvt(n=500, sigma=my.Sigma, df=(nrow(X) - ncol(X))) # Adding multivariate t samples to fitted Y-values to get posterior predictive distribution of response values: pred.post.samp <- matrix(Xstar %*% bhat, nrow=nrow(pred.post.err), ncol=ncol(pred.post.err), byrow=T) + pred.post.err # Getting 90% prediction interval, and point prediction (via median of posterior predictive distribution): quantile(pred.post.samp, probs=c(0.05, 0.5, 0.95)) ##### Problem 5, subjective prior analysis using 'rstanarm' package: library(rstanarm) library(bayesrules) library(tidyverse) library(bayesplot) library(broom) library(broom.mixed) library(tidybayes) ## Note the first part in the prior_intercept argument (22 here) ## is the prior expected response (mpg here) for a "typical" observation. cer_model <- stan_glm(Sugar ~ Sodium+Fiber+Carbohydrates+Potassium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1, -1, 1, 1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 4, iter = 5000*2) # MCMC diagnostics mcmc_trace(cer_model, size = 0.1) mcmc_acf(cer_model) #Try using the 'tidy' function in the 'broom.mixed' package: library(broom.mixed) broom.mixed::tidy(cer_model, effects = c("fixed", "aux"), conf.int = TRUE, conf.level = 0.90) # Alternatively, could use 'summary' function (change amount of rounding with the 'digits' argument): #summary(cer_model,digits=4,probs = c(0.05, 0.5, 0.95)) # Posterior prediction: # Simulate a set of predictions #set.seed(84735) shortcut_prediction <- posterior_predict(cer_model, newdata = data.frame(Sodium=140, Fiber=3.5, Carbohydrates=14, Potassium=90)) # Point prediction (median of predictive distribution) median(shortcut_prediction) # Construct a 95% posterior credible interval posterior_interval(shortcut_prediction, prob = 0.90) # Plot the approximate predictive model mcmc_dens(shortcut_prediction) + xlab("predicted Sugar for a cereal with sodium=140, fiber=3.5, carb=14, potassium=90") # Checking overall model fit: predictions <- posterior_predict(cer_model, newdata = cer.data) ppc_intervals(cer.data$Sugar, yrep = predictions, x = cer.data$Sugar, prob = 0.5, prob_outer = 0.95) # Do the observed y-values in the data set (dark blue circles) # mostly fall within the prediction intervals? prediction_summary(cer_model, data = cer.data) ## #6 (model selection in the cereal data) # Using the ELPD approach, just first order terms: # Just running 1 chain of each of these to save time: cer_model_1234 <- stan_glm(Sugar ~ Sodium+Fiber+Carbohydrates+Potassium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1, -1, 1, 1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_1 <- stan_glm(Sugar ~ Sodium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_2 <- stan_glm(Sugar ~ Fiber, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_3 <- stan_glm(Sugar ~ Carbohydrates, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_4 <- stan_glm(Sugar ~ Potassium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_12 <- stan_glm(Sugar ~ Sodium+Fiber, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1, -1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_13 <- stan_glm(Sugar ~ Sodium+Carbohydrates, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1,1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_14 <- stan_glm(Sugar ~ Sodium+Potassium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1,1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_23 <- stan_glm(Sugar ~ Fiber+Carbohydrates, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1,1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_24 <- stan_glm(Sugar ~ Fiber+Potassium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1,1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_34 <- stan_glm(Sugar ~ Carbohydrates+Potassium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(1, 1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_123 <- stan_glm(Sugar ~ Sodium+Fiber+Carbohydrates, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1, -1, 1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_124 <- stan_glm(Sugar ~ Sodium+Fiber+Potassium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1, -1, 1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_134 <- stan_glm(Sugar ~ Sodium+Carbohydrates+Potassium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1, 1, 1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) cer_model_234 <- stan_glm(Sugar ~ Fiber+Carbohydrates+Potassium, data = cer.data, family = gaussian, prior_intercept = normal(15, 20), prior = normal(c(-1, 1, 1), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) loo(cer_model_1)$estimates loo(cer_model_2)$estimates loo(cer_model_3)$estimates loo(cer_model_4)$estimates loo(cer_model_12)$estimates loo(cer_model_13)$estimates loo(cer_model_14)$estimates loo(cer_model_23)$estimates loo(cer_model_24)$estimates loo(cer_model_34)$estimates loo(cer_model_123)$estimates loo(cer_model_124)$estimates loo(cer_model_134)$estimates loo(cer_model_234)$estimates loo(cer_model_1234)$estimates