## Chapter 5 R Examples library(bayesrules) library(tidyverse) ## Fraud risk phone call example from textbook: # Possible priors for lambda: # Plot the Gamma(5, 1) prior plot_gamma(shape = 5, rate = 1) # Plot the Gamma(10, 2) prior plot_gamma(shape = 10, rate = 2) # Plot the Gamma(15, 3) prior plot_gamma(shape = 15, rate = 3) # Showing the posterior distribution, along with the prior and the likelihood: plot_gamma_poisson(shape = 10, rate = 2, sum_y = 11, n = 4) # Summarizing the posterior (and the prior): summarize_gamma_poisson(shape = 10, rate = 2, sum_y = 11, n = 4) ###################### ## Credible Intervals ###################### ### Fraud risk example: # A 90% quantile-based credible interval for daily mean of fraud risk call: round(qgamma(c(0.05,0.95), shape=21, rate=6),3) library(TeachingDemos) # May need to install the TeachingDemos package first? # If so, type at the command line: install.packages("TeachingDemos", dependencies=F) # while plugged in to the internet. # Using hpd function for 90% HPD credible interval for fraud risk call example: hpd(qgamma, shape=21, rate=6, conf=0.90) ### Coin tossing Example, Chapter 5 notes: y <- 2; n <- 10 # A 95% quantile-based credible interval for theta: qbeta( c(.025, .975), y+1, n-y+1) # A 95% HPD credible interval for theta: y <- 2; n <- 10 hpd(qbeta, shape1 = y+1, shape2= n-y+1, conf=0.95) ##################################### # Conjugate Priors with Normal Data # ##################################### # Example 1, Chapter 3 notes (Midge data) # Data (in mm): y <- c(1.64, 1.70, 1.72, 1.74, 1.82, 1.82, 1.82, 1.90, 2.08) ybar <- mean(y); n <- length(y) my.seq <- seq(0.5,3,length=300) #### Case when variance is known: # Suppose we knew sigma^2 = 0.01: # prior parameters for normal prior on mu: my.delta <- 1.9; my.tau <- 0.3 # "95% sure" a priori that mu is between 1.3 and 2.5, say. # What if we adjust this? prior.mu <- dnorm(my.seq, mean=my.delta, sd=my.tau) # Defining the prior posterior.mu <- dnorm(my.seq, mean = ( (my.delta/(my.tau^2) + sum(y)/0.01)/(1/(my.tau^2) + n/0.01) ), sd = sqrt( 0.01*(my.tau^2) / (0.01 + n*(my.tau^2)) ) ) # Defining the posterior plot(my.seq, prior.mu, ylim=c(-0.7,12), yaxt='n', xlab="Values of mu", ylab="Probability distribution", type='l', lty=3) # Plotting the prior lines(my.seq, posterior.mu) # Plotting the posterior ### Point estimates for mu: posterior.mean <- ( (my.delta/(my.tau^2) + sum(y)/0.01)/(1/(my.tau^2) + n/0.01) ) posterior.median <- qnorm(0.50, mean = ( (my.delta/(my.tau^2) + sum(y)/0.01)/(1/(my.tau^2) + n/0.01) ), sd = sqrt( 0.01*(my.tau^2) / (0.01 + n*(my.tau^2)) ) ) print(paste("posterior.mean=", round(posterior.mean,3), "posterior.median=", round(posterior.median,3))) ### Interval estimate for mu: library(TeachingDemos) # loading package in order to use the hpd function hpd.95 <- hpd(qnorm, mean = ( (my.delta/(my.tau^2) + sum(y)/0.01)/(1/(my.tau^2) + n/0.01) ), sd = sqrt( 0.01*(my.tau^2) / (0.01 + n*(my.tau^2)) ), conf=0.95) round(hpd.95, 3) segments(hpd.95[1], 0, hpd.95[2], 0, lwd=4) # Showing the 95% HPD interval on the graph # Using the built-in functions from the 'bayesrules' package: library(bayesrules) # Showing the posterior distribution, along with the prior and the likelihood: plot_normal_normal(mean = 1.9, sd = 0.3, sigma = sqrt(0.01), y_bar = 1.804444, n = 9) # Summarizing the posterior (and the prior): summarize_normal_normal(mean = 1.9, sd = 0.3, sigma = sqrt(0.01), y_bar = 1.804444, n = 9) #### Case when mean and variance are both unknown: # prior parameters for inverse gamma prior on sigma^2: my.alpha <- 18; my.beta <- .34 # Based on guess of E[sig^2] = .02, var[sig^2] = .005^2 # prior parameters for normal prior on mu: my.delta <- 1.9; s0 <- 1 # low value of s0 indicates lack of prior knowledge library(pscl) # loading pscl package, to use inverse gamma distribution # May need to install the pscl package first? # If so, type at the command line: install.packages("pscl", dependencies=F) # while plugged in to the internet. ### Point estimates for sigma^2: p.mean.sig.sq <- (my.beta + 0.5*(sum(y^2) - n*(ybar^2)) ) / (my.alpha + n/2 - 0.5 - 1) p.median.sig.sq <- qigamma(0.50, my.alpha + n/2 - 0.5, my.beta + 0.5*( sum(y^2) - n*(ybar^2) ) ) print(paste("posterior.mean for sigma^2=", round(p.mean.sig.sq,4), "posterior.median for sigma^2=", round(p.median.sig.sq,4) )) ### Marginal Interval estimate for sigma^2: hpd.95.sig.sq <- hpd(qigamma, alpha=my.alpha + n/2 - 0.5, beta=my.beta + 0.5*( sum(y^2) - n*(ybar^2) ) ) #### AN APPROACH FOR INFERENCE ABOUT mu: # Randomly sample many values for the posterior of sigma^2: sig.sq.values <- rigamma(n=1000000,alpha=my.alpha + n/2 - 0.5, beta=my.beta + 0.5*( sum(y^2) - n*(ybar^2) ) ) # Randomly sample many values from the posterior of mu, GIVEN the sampled values of sigma^2 above: mu.values <- rnorm(n=1000000,mean=((sum(y)+my.delta*s0)/(n+s0)), sd=sqrt(sig.sq.values/(n+s0)) ) # Point estimates for sigma^2 and mu: round(median(sig.sq.values),4) round(median(mu.values),4) # 95% HPD interval estimates for sigma^2 and mu: round(emp.hpd(sig.sq.values),4) round(emp.hpd(mu.values),4) # Concussion/Brain example: # Picking out the sample of concussed football players: data(football) concussion_subjects <- football %>% filter(group == "fb_concuss") # Sample mean hippocampal volume: concussion_subjects %>% summarize(mean(volume)) # Checking our assumed data model by examining a density estimate of the sample data: # Does a Normal distribution with sigma=0.5 seem reasonable? ggplot(concussion_subjects, aes(x = volume)) + geom_density() # Showing the posterior distribution, along with the prior and the likelihood: # Note the 'mean' argument below is the PRIOR mean (what we call delta in class). # The 'sd' argument below is the PRIOR standard deviation (what we call tau in class). plot_normal_normal(mean = 6.5, sd = 0.4, sigma = 0.5, y_bar = 5.735, n = 25) # Summarizing the posterior (and the prior): summarize_normal_normal(mean = 6.5, sd = 0.4, sigma = 0.5, y_bar = 5.735, n = 25) hpd.95.mu <- hpd(qnorm, mean = 5.78, sd= 0.09701425, conf=0.95) hpd.95.mu