###################### ###################### # # Binary response: # ###################### ###################### ###################### ## This code is adapted from the example at ## https://brunaw.com/phd/mcmc/report.pdf library(mvtnorm) # May need to install the mvtnorm package first? # If so, type at the command line: install.packages("mvtnorm", dependencies=T) # while plugged in to the internet. # Define Y = 0 if individual has no senility # Define Y = 1 if individual has senility present # x = score on subset of Wechler Adult Intelligence Scale (WAIS) y<-c(rep(1,times=14), rep(0, times=40)) x<-c(9,13,6,8,10,4,14,8,11,7,9,7,5,14,13,16,10,12,11,14,15,18,7,16,9,9,11,13,15,13,10,11,6,17,14,19,9,11,14,10,16,10,16,14,13,13,9,15,10,11,12,4,14,20) logreg.out <- glm(y ~ x, family=binomial(logit)) summary(logreg.out) #X<-cbind(x) x1<-x X <- cbind(rep(1,times=length(x1)), x1) # This code just below includes some subjective prior information about beta_1: #beta.prior.mean <- c(0,-0.35) # reflects no prior belief about beta_0, but good prior belief about beta_1 #beta.prior.cov <- diag(c(100,0.175^2)) # This code here shows a VERY weakly informative, basically noninformative, prior beta.prior.mean <- c(0,0) beta.prior.cov <- diag(c(100,100)) k<-1 # Can adjust this k up or down if the acceptance rate is too high or low... proposal.cov.matrix <- k*solve(t(X)%*%diag(fitted(logreg.out))%*%X) #proposal.cov.matrix <- k*diag(p) MCMCBetasI <- beta.prior.mean V <- proposal.cov.matrix mu <- beta.prior.mean Sigma_inv <- solve(beta.prior.cov) j<-0 BurnIn <- 1000 TotIter <- 50000 # Make this bigger, especially if you need to do more thinning... SaveResults<-matrix(0,nrow=TotIter,ncol=length(beta.prior.mean)) for (i in 1:TotIter){ # Proposal distribution for beta MCMCBetasC <- rmvnorm(1, MCMCBetasI, V) MCMCBetasC <- as.vector(MCMCBetasC) # Metropolis ratio ratio <- ((-k * sum(log(1 + exp(X%*%MCMCBetasC))) + sum(y * X%*%MCMCBetasC) - (1/2) * t(MCMCBetasC - mu)%*%Sigma_inv%*%(MCMCBetasC - mu)) - ((-k * sum(log(1 + exp(X%*%MCMCBetasI)))) + sum(y * X%*%MCMCBetasI) - (1/2) * t(MCMCBetasI - mu)%*%Sigma_inv%*%(MCMCBetasI - mu))) # Accept/reject step if(runif(1) < min(1, exp(ratio))) { MCMCBetasI <- MCMCBetasC j <- j+1 } # Saving results SaveResults[i,] <- MCMCBetasI } # Acceptance rate: acs<- j/TotIter acs ## Thinning (every 5th value): S <- TotIter beta.values.thin <- SaveResults[5*(1:(S/5) ),] ## Burn-in: beta.values.thin.b <- beta.values.thin[-(1:BurnIn),] par(mfrow=c(2,2)) # 2 by 2 plotting window acf(beta.values.thin.b[,1]) acf(beta.values.thin.b[,2]) plot(beta.values.thin.b[,1],type='l') plot(beta.values.thin.b[,2],type='l') par(mfrow=c(1,1)) ### Posterior summary: post.medians <- apply(beta.values.thin.b,2,median) # Posterior medians for each regression coefficent post.lower <- apply(beta.values.thin.b, 2, quantile, probs=0.025); post.upper <- apply(beta.values.thin.b, 2, quantile, probs=0.975) # approximate 0.025 and 0.975 quantiles for each regression coefficent # to get approximate 95% quantile-based intervals #library(TeachingDemos) #t(apply(beta.values.thin.b, 2, emp.hpd, conf=0.95)) # approximate 95% HPD intervals #names.predictors <- c("x1", "x2", "x3") names.predictors <- c("x1") #just one predictor in this example... beta.post.summary <- data.frame(cbind(post.lower, post.medians, post.upper), row.names=c("intercept", names.predictors)) names(beta.post.summary) <- c('0.025 Quantile', '0.5 Quantile', '0.975 Quantile') print(beta.post.summary) ### Logistic Regression using rstanarm # Load packages library(bayesrules) library(rstanarm) library(bayesplot) library(tidyverse) library(tidybayes) library(broom.mixed) # Create a data frame: wais_data <- data.frame(y,x) # Run a prior simulation wais_model <- stan_glm(y ~ x, data = wais_data, family = binomial, prior_intercept = normal(-0.5, 0.45), prior = normal(-0.35, 0.175), chains = 4, iter = 5000*2, prior_PD = F) # prior intercept: We believe that a "typical" subject might have probability 0.2 to 0.6 of senility. # So log odds of senility for such a person should be between log(0.2/0.8) = -1.4 and log(0.6/0.4) = 0.4 # So set prior mean for (CENTERED) beta_0 (different from the beta_0 in model) to be halfway between those, at -0.5. # Set prior sd to be half of the distance between -0.5 and 0.4, so prior sd=0.45. # prior slope: we believe that for a one-unit increase in WAIS score, # the odds of senility might be anywhere from half as large to the same, i.e., between 0.5 and 1. # So beta_1 might be between log(0.5) = -0.69 and log(1) = 0. # So make prior mean on beta_1 to be -0.35 and prior sd around 0.175. mcmc_trace(wais_model) mcmc_dens_overlay(wais_model) mcmc_acf(wais_model) # A little autocorrelation, but otherwise OK. ## Plotting posterior plausible models: wais_data %>% add_fitted_draws(wais_model, n = 100) %>% ggplot(aes(x = x, y = y)) + geom_line(aes(y = .value, group = .draw), alpha = 0.15) + labs(y = "probability of senility") # Summary of point estimates and interval estimates for the betas: tidy(wais_model, conf.int = TRUE, conf.level = 0.95) # How do these compare to the frequentist MLEs? logreg.out <- glm(y ~ x, family=binomial(logit)) summary(logreg.out) # Posterior predictions of binary outcome (senility present or not?) for a person with WAIS score 10: binary_prediction <- posterior_predict( wais_model, newdata = data.frame(x=10)) # Histogram to show posterior predictive distribution for this person: mcmc_hist(binary_prediction) + labs(x = "Y") # Summarize the posterior predictions of Y with a table: table(binary_prediction) # posterior predictive mean (this is the posterior predictive probability that such a person would be senile): colMeans(binary_prediction) ## Checking posterior predictive distribution: proportion_senile <- function(x){mean(x == 1)} pp_check(wais_model, nreps = 100, plotfun = "stat", stat = "proportion_senile") + xlab("probability of senility") # The observed value is near the middle of the predictive distribution -- that's good. ## Getting the confusion matrix to check predictive accuracy: classification_summary(model = wais_model, data = wais_data, cutoff = 0.5) # Could try with a different cutoff as well... # Checking classification accuracy on "out-of-sample" data using cross-validation: cv_accuracy_1 <- classification_summary_cv( model = wais_model, data = wais_data, cutoff = 0.5, k = 10) cv_accuracy_1$cv ### ### Logistic Regression with Multiple Predictors ### # Rain example from the book: # Load and process the data data(weather_perth) weather <- weather_perth %>% select(day_of_year, raintomorrow, humidity9am, humidity3pm, raintoday) # Prior belief: # On a "typical" day, there is 0.2 chance of rain. # prior mean on CENTERED beta_0 is log(0.2/(1-0.2)) = -1.4 # The prior sd on CENTERED beta_0 of 0.7 implies # a 95% chance the log-odds of rain on a typical day is between -2.8 and 0. # Converts to an odds of between 0.06 and 1 # or a probability of between 0.057 and 0.50. rain_model_2 <- stan_glm( raintomorrow ~ humidity9am + humidity3pm + raintoday, data = weather, family = binomial, prior_intercept = normal(-1.4, 0.7), prior = normal(0, 2.5, autoscale = TRUE), chains = 4, iter = 5000*2) tidy(rain_model_2, effects = "fixed", conf.int = TRUE, conf.level = 0.95) ### Model comparison: # Let's compare this model to a model that only contains one predictor: rain_model_1 <- stan_glm(raintomorrow ~ humidity9am, data = weather, family = binomial, prior_intercept = normal(-1.4, 0.7), prior = normal(0.07, 0.035), chains = 4, iter = 5000*2, prior_PD = FALSE) ### Comparing classification accuracy: ## The book notes that c = 0.2 is a reasonable cutoff value here, ## but you could try other cutoff values as desired ... cv_accuracy_1 <- classification_summary_cv( model = rain_model_1, data = weather, cutoff = 0.2, k = 10) cv_accuracy_2 <- classification_summary_cv( model = rain_model_2, data = weather, cutoff = 0.2, k = 10) # CV for the models cv_accuracy_1$cv cv_accuracy_2$cv # One approach to model selection: # Calculate ELPD for the models loo_1 <- loo(rain_model_1) loo_2 <- loo(rain_model_2) loo_1$estimates loo_2$estimates # Could use BIC to compare models (not involving any prior information): rain_model_1_glm <- glm(raintomorrow ~ humidity9am, data = weather, family = binomial(logit)) rain_model_2_glm <- glm(raintomorrow ~ humidity9am + humidity3pm + raintoday, data = weather, family = binomial(logit)) BIC(rain_model_1_glm) BIC(rain_model_2_glm) # Try a model with only humidity3pm & raintoday as predictors: rain_model_simp <- stan_glm( raintomorrow ~ humidity3pm + raintoday, data = weather, family = binomial, prior_intercept = normal(-1.4, 0.7), prior = normal(0, 2.5, autoscale = TRUE), chains = 1, iter = 5000*2) cv_accuracy_simp <- classification_summary_cv( model = rain_model_simp, data = weather, cutoff = 0.2, k = 10) cv_accuracy_simp$cv loo_simp <- loo(rain_model_simp) loo_simp$estimates rain_model_simp_glm <- glm(raintomorrow ~ humidity3pm + raintoday, data = weather, family = binomial(logit)) BIC(rain_model_simp_glm) ### Coding the multiple logistic regression model using base R: ### library(mvtnorm) # May need to install the mvtnorm package first? # If so, type at the command line: install.packages("mvtnorm", dependencies=T) # while plugged in to the internet. y <- weather$raintomorrow; x1 <- weather$humidity9am; x2 <- weather$humidity3pm; x3 <- weather$raintoday # make y and x3 numeric for the calculations below: y<-as.numeric(y); # Converts to 1's and 2's y <- y-1 # Converts to 0's and 1's x3<-as.numeric(x3) X <- cbind(rep(1,times=length(x1)), x1, x2, x3) beta.prior.mean <- c(0,-0.35, -0.2, 0.5) # reflects no prior belief about beta_0, but some prior belief about beta_1, beta_2, beta_3 beta.prior.cov <- diag(c(100,40, 40, 40 )) k<-1 # Can adjust this k up or down if the acceptance rate is too high or low... logreg.out <- glm(y ~ x1+x2+x3, family=binomial(logit)) summary(logreg.out) proposal.cov.matrix <- k*solve(t(X)%*%diag(fitted(logreg.out))%*%X) #proposal.cov.matrix <- k*diag(p) MCMCBetasI <- beta.prior.mean V <- proposal.cov.matrix mu <- beta.prior.mean Sigma_inv <- solve(beta.prior.cov) j<-0 BurnIn <- 1000 TotIter <- 50000 # Make this bigger, especially if you need to do more thinning... SaveResults<-matrix(0,nrow=TotIter,ncol=length(beta.prior.mean)) for (i in 1:TotIter){ # Proposal distribution for beta MCMCBetasC <- rmvnorm(1, MCMCBetasI, V) MCMCBetasC <- as.vector(MCMCBetasC) # Metropolis ratio ratio <- ((-k * sum(log(1 + exp(X%*%MCMCBetasC))) + sum(y * X%*%MCMCBetasC) - (1/2) * t(MCMCBetasC - mu)%*%Sigma_inv%*%(MCMCBetasC - mu)) - ((-k * sum(log(1 + exp(X%*%MCMCBetasI)))) + sum(y * X%*%MCMCBetasI) - (1/2) * t(MCMCBetasI - mu)%*%Sigma_inv%*%(MCMCBetasI - mu))) # Accept/reject step if(runif(1) < min(1, exp(ratio))) { MCMCBetasI <- MCMCBetasC j <- j+1 } # Saving results SaveResults[i,] <- MCMCBetasI } # Acceptance rate: acs<- j/TotIter acs ## Thinning (every 5th value): S <- TotIter beta.values.thin <- SaveResults[5*(1:(S/5) ),] ## Burn-in: beta.values.thin.b <- beta.values.thin[-(1:BurnIn),] par(mfrow=c(2,2)) acf(beta.values.thin.b[,1]) acf(beta.values.thin.b[,2]) acf(beta.values.thin.b[,3]) acf(beta.values.thin.b[,4]) par(mfrow=c(1,1)) dev.new() par(mfrow=c(2,2)) plot(beta.values.thin.b[,1],type='l') plot(beta.values.thin.b[,2],type='l') plot(beta.values.thin.b[,3],type='l') plot(beta.values.thin.b[,4],type='l') par(mfrow=c(1,1)) ### Posterior summary: post.medians <- apply(beta.values.thin.b,2,median) # Posterior medians for each regression coefficent post.lower <- apply(beta.values.thin.b, 2, quantile, probs=0.025); post.upper <- apply(beta.values.thin.b, 2, quantile, probs=0.975) # approximate 0.025 and 0.975 quantiles for each regression coefficent # to get approximate 95% quantile-based intervals #library(TeachingDemos) #t(apply(beta.values.thin.b, 2, emp.hpd, conf=0.95)) # approximate 95% HPD intervals names.predictors <- c("x1", "x2", "x3") beta.post.summary <- data.frame(cbind(post.lower, post.medians, post.upper), row.names=c("intercept", names.predictors)) names(beta.post.summary) <- c('0.025 Quantile', '0.5 Quantile', '0.975 Quantile') print(beta.post.summary)