############################### # Bayesian Linear Regression # ############################### ################################################### ### Example 1, Chapter 4 notes (Oxygen uptake data) ################################################### # data from Hoff (2009) y <- c(-0.87,-10.74,-3.27,-1.97,7.50,-7.25,17.05,4.96,10.40,11.05,0.26,2.51) # y = change in maximal oxygen uptake (positive number = improvement) x1 <- c(0,0,0,0,0,0,1,1,1,1,1,1) # 0 = running group, 1 = aerobics group x2 <- c(23,22,22,25,27,20,31,23,27,28,22,24) # ages of subjects x3 <- x1*x2 # cross-product term (interaction b/w group and age) X <- as.matrix( cbind(rep(1, length(x1)), x1, x2, x3) ) # Gill's function to produce posterior medians # and quantile based credible intervals for the # regression parameters (based on multivariate-t posterior) t.ci.table <- function(coefs, cov.mat, level=0.95, degrees=Inf, quantiles=c(0.025, 0.50, 0.975) ) { quantile.mat <- cbind(coefs, sqrt(diag(cov.mat)), t(qt(quantiles, degrees) %o% sqrt(diag(cov.mat))) + matrix(rep(coefs,length(quantiles)), ncol=length(quantiles)) ) quantile.names <- c("mean", "Std. Error") for (i in 1:length(quantiles)) { quantile.names <- c(quantile.names, paste(quantiles[i], "Quantile")) } dimnames(quantile.mat)[2] <- list(quantile.names) return(round(quantile.mat,4)) } ### Noninformative Prior Analysis: bhat <- solve(t(X) %*% X) %*% t(X) %*% y sig2hat <- t(y - X %*% bhat) %*% (y - X %*% bhat) / (nrow(X) - ncol(X)) my.cov.mat <- solve(t(X) %*% X) * ((nrow(X)-ncol(X))*sig2hat / (nrow(X) - ncol(X) - 2))[1,1] # Getting posterior information about the betas: table.beta.info <- t.ci.table(bhat, my.cov.mat, degrees = nrow(X) - ncol(X)) table.beta.info # Getting posterior information about sigma^2: library(TeachingDemos) # loading TeachingDemos package, to use hpd function library(pscl) # loading pscl package, to use inverse gamma distribution my.alpha <- (nrow(X) - ncol(X) - 1)/2 my.beta <- 0.5 * sig2hat * (nrow(X) - ncol(X)) # Point estimate (posterior median here) for sigma^2: sig.sq.poster.med <- qigamma(0.50, alpha=my.alpha, beta=my.beta) round(sig.sq.poster.med, 3) hpd.sig.sq <- hpd(qigamma, alpha=my.alpha, beta=my.beta) round(hpd.sig.sq, 3) ################################################### ### Example 2, Chapter 4 notes (Automobile data) ################################################### autodata <- read.table("http://www.stat.sc.edu/~hitchcock/autoregresslarge.txt", header=T) y <- autodata$mpg x1 <- autodata$displacement x2 <- autodata$horsepower x3 <- autodata$weight X <- as.matrix( cbind(rep(1, length(x1)), x1, x2, x3) ) ###### Setting up prior specification : # 3 predictor variables, so we need exactly 3+1=4 hypothetical "prior observations" # Suppose that based on "expert opinion" we have the following guesses: xtilde.obs.1 <- c(150, 100, 2000) ytilde.obs.1 <- 25 # Prior belief: a car with displacement=150, horsepower=100, weight=2000 should have mpg around 25 xtilde.obs.2 <- c(200, 160, 4500) ytilde.obs.2 <- 10 # Prior belief: a car with displacement=200, horsepower=160, weight=4500 should have mpg around 10 xtilde.obs.3 <- c(250, 140, 3000) ytilde.obs.3 <- 20 # Prior belief: a car with displacement=250, horsepower=140, weight=3000 should have mpg around 20 xtilde.obs.4 <- c(100, 80, 1800) ytilde.obs.4 <- 35 # Prior belief: a car with displacement=100, horsepower=80, weight=1800 should have mpg around 35 # Making the matrix X~ : prior.obs.stacked <- rbind(xtilde.obs.1, xtilde.obs.2, xtilde.obs.3, xtilde.obs.4) xtilde <- cbind(rep(1, times=nrow(prior.obs.stacked) ), prior.obs.stacked) # Making the vector Y~ : ytilde <- c(ytilde.obs.1, ytilde.obs.2, ytilde.obs.3, ytilde.obs.4) # Diagonal matrix D contains weights that indicate how much worth we place on # our hypothetical prior observations (note the weights could vary if we are more # confident about some hypothetical prior observations than about others): D <- diag(c(4, 4, 4, 4)) # This D yields a D^{-1} that is diag(c(1/4, 1/4, 1/4, 1/4)), which # assumes that our four hypothetical prior observations TOGETHER are # "worth" about one actual sample observation. # The prior mean on the beta vector that we are inducing is: prior.mean.beta <- as.vector(solve(xtilde) %*% ytilde) print("Prior mean for beta vector was:"); print(round(prior.mean.beta,3)) ### Choosing prior parameters a and b for gamma prior on tau: # The parameter "a" reflects our confidence in our prior: # Let's choose "a" to be 0.5, which implies that our # prior knowledge about tau is "worth" about 1 sample observation. a <- 0.5 # Consider prior observation 1: # Prior belief: a car with displacement=150, horsepower=100, weight=2000 should have mpg around 25 # Suppose the expert believes the largest realistic mpg for this type of car is 35 # Then set the prior guess for sigma to be (35 - 25)/1.645 = 6.08 # So the prior guess for sigma^2 is (6.08^2) = 36.97 # So the prior guess for tau is 1/36.97 = 0.027 # Since the gamma prior mean is a/b, let # b = a / tau.guess: tau.guess <- 0.027 b <- a / tau.guess # Here b is around 18.5 ... ##### Posterior information for tau and beta: # Calculate beta-hat : betahat <- as.vector(solve(t(X) %*% X + t(xtilde)%*%solve(D)%*%xtilde) %*% (t(X)%*%y + t(xtilde)%*%solve(D)%*%ytilde)) # Calculate s* : n <- length(y) sstar <- as.numeric( (t(y-X%*%betahat)%*%(y-X%*%betahat) + t(ytilde-xtilde%*%betahat)%*%solve(D)%*%(ytilde-xtilde%*%betahat) + 2*b)/(n+2*a) ) ### Point estimates for tau (and thus for sigma^2): p.mean.tau <- 1 / sstar p.mean.sig.sq <- 1 / p.mean.tau p.median.tau <- qgamma(0.50, shape=(n+2*a)/2, rate=((n+2*a)/2)*sstar) p.median.sig.sq <- 1 / p.median.tau print(paste("posterior.mean for sigma^2=", round(p.mean.sig.sq,3), "posterior.median for sigma^2=", round(p.median.sig.sq,3) )) ### Marginal Interval estimate for tau (and sigma^2): library(TeachingDemos) # loading TeachingDemos package, to use hpd function hpd.95.tau <- hpd(qgamma, shape=(n+2*a)/2, rate=((n+2*a)/2)*sstar ) hpd.95.sig.sq <- 1 / hpd.95.tau print("95% posterior interval for sigma^2:") round(sort(hpd.95.sig.sq), 3) ### NOT QUITE THE BEST APPROACH: ### Point estimates for beta, given the pt. estimate for tau: # Posterior mean, median and 95% intervals for the betas: table.beta.info <- t.ci.table(betahat, (1/p.median.tau)*solve(t(X)%*%X + t(xtilde)%*%solve(D)%*%xtilde) ) table.beta.info ### A BETTER APPROACH TO GET POINT ESTIMATES FOR BETA: # Randomly generate many tau values from its posterior distribution: how.many <-30000 tau.values <- rgamma(n=how.many, shape=(n+2*a)/2, rate=((n+2*a)/2)*sstar ) library(mvtnorm) beta.values<- matrix(0,nr = how.many, nc=length(betahat)) for (j in 1:how.many){ beta.values[j,] <- rmvnorm(n=1, mean=betahat, sigma= (1/tau.values[j])*solve(t(X)%*%X + t(xtilde)%*%solve(D)%*%xtilde) ) } # Posterior median for each regression coefficient: post.medians<-apply(beta.values,2,median) # 95% posterior interval for each regression coefficient: post.lower <- apply(beta.values,2,quantile, probs=0.025) post.upper <- apply(beta.values,2,quantile, probs=0.975) ## Summarizing: names.predictors <- c("x1", "x2", "x3") beta.post.summary <- data.frame(cbind(post.lower, post.medians, post.upper), row.names=c("intercept", names.predictors)) names(beta.post.summary) <- c('0.025 Quantile', '0.5 Quantile', '0.975 Quantile') print(beta.post.summary) # On a similar but smaller (and older) data set that is built into R: y <- mtcars$mpg x1 <- mtcars$disp x2 <- mtcars$hp x3 <- mtcars$wt # Redo analysis with both types of prior... ################################################### ### A Bayesian Approach to Model Selection ################################################### # Function (from Hoff 2009) to calculate a marginal probability for y: lpy.X <- function(y,X,g=length(y),nu0=1,s20=try(summary(lm(y~-1+X))$sigma^2,silent=TRUE)) { n<-dim(X)[1]; p<-dim(X)[2] if(p==0) {Hg<-0; s20<-mean(y^2)} if(p>0) {Hg<-(g/(g+1))*X%*%solve(t(X)%*%X)%*%t(X)} SSRg<- t(y)%*%( diag(1,nrow=n) - Hg)%*%y -.5*(n*log(pi)+p*log(1+g)+(nu0+n)*log(nu0*s20+SSRg)-nu0*log(nu0*s20))+lgamma((nu0+n)/2)-lgamma(nu0/2) } ## The oxygen uptake data: y <- c(-0.87,-10.74,-3.27,-1.97,7.50,-7.25,17.05,4.96,10.40,11.05,0.26,2.51) # y = change in maximal oxygen uptake (positive number = improvement) x1 <- c(0,0,0,0,0,0,1,1,1,1,1,1) # 0 = running group, 1 = aerobics group x2 <- c(23,22,22,25,27,20,31,23,27,28,22,24) # ages of subjects x3 <- x1*x2 # cross-product term (interaction b/w group and age) X <- as.matrix( cbind(rep(1, length(x1)), x1, x2, x3) ) ### Starting values for Gibbs Sampler: z<-rep(1,dim(X)[2]) # starting with z = all 1's (all terms in model) lpy.c<-lpy.X(y,X[,z==1,drop=FALSE]) S <- 10000 # number of Monte Carlo iterations Z<-matrix(NA,S,dim(X)[2]) ### The Gibbs Sampler: for(s in 1:S) { for(j in sample(1:dim(X)[2])) { zp<-z; zp[j] <- 1-zp[j] lpy.p<-lpy.X(y,X[,zp==1,drop=FALSE]) r<- (lpy.p - lpy.c)*(-1)^(zp[j]==0) z[j]<-rbinom(1,1,1/(1+exp(-r))) if(z[j]==zp[j]) {lpy.c<-lpy.p} } Z[s,]<-z } ######### # Considering all possible subsets: poss.z.vectors <- unique(Z,MARGIN=1) z.probs <- rep(0, times= nrow(poss.z.vectors)) for(i in 1:nrow(poss.z.vectors)) { z.probs[i] <- sum(apply(Z,1,identical, y=poss.z.vectors[i,])) } z.probs <- z.probs/sum(z.probs) cbind(poss.z.vectors, z.probs)[rev(order(z.probs)),] # Considering only certain z vectors (interaction only appearing when both first-order terms appear): # poss.z.vectors <- matrix( c( 1,0,0,0, 1,1,0,0, 1,0,1,0, 1,1,1,0, 1,1,1,1 ), ncol=4, byrow=T) z.probs <- rep(0, times= nrow(poss.z.vectors)) for(i in 1:nrow(poss.z.vectors)) { z.probs[i] <- sum(apply(Z,1,identical, y=poss.z.vectors[i,])) } z.probs <- z.probs/sum(z.probs) cbind(poss.z.vectors, z.probs)[rev(order(z.probs)),] # ## The small automobile data set: y <- mtcars$mpg x1 <- mtcars$disp x2 <- mtcars$hp x3 <- mtcars$wt X <- as.matrix( cbind(rep(1, length(x1)), x1, x2, x3) ) # Rerun analysis, considering all possible subsets ...