## Chapter 5 R Examples

library(bayesrules)
library(tidyverse)


## Fraud risk phone call example from textbook:

# Possible priors for lambda:

# Plot the Gamma(5, 1) prior
plot_gamma(shape = 5, rate = 1)

# Plot the Gamma(10, 2) prior
plot_gamma(shape = 10, rate = 2)

# Plot the Gamma(15, 3) prior
plot_gamma(shape = 15, rate = 3)

# Showing the posterior distribution, along with the prior and the likelihood:

plot_gamma_poisson(shape = 10, rate = 2, sum_y = 11, n = 4)

# Summarizing the posterior (and the prior):

summarize_gamma_poisson(shape = 10, rate = 2, sum_y = 11, n = 4)

######################
## Credible Intervals
######################

### Fraud risk example:

# A 90% quantile-based credible interval for daily mean of fraud risk call:
round(qgamma(c(0.05,0.95), shape=21, rate=6),3)

library(TeachingDemos)
# May need to install the TeachingDemos package first?
# If so, type at the command line:  install.packages("TeachingDemos", dependencies=F)
# while plugged in to the internet.

# Using hpd function for 90% HPD credible interval for fraud risk call example:

hpd(qgamma, shape=21, rate=6, conf=0.90)

### Coin tossing Example, Chapter 5 notes:

y <- 2; n <- 10

# A 95% quantile-based credible interval for theta:

qbeta( c(.025, .975), y+1, n-y+1)

# A 95% HPD credible interval for theta:

y <- 2; n <- 10

hpd(qbeta, shape1 = y+1, shape2= n-y+1, conf=0.95)


#####################################
# Conjugate Priors with Normal Data #
#####################################

# Example 1, Chapter 3 notes (Midge data)

# Data (in mm):
y <- c(1.64, 1.70, 1.72, 1.74, 1.82, 1.82, 1.82, 1.90, 2.08)

ybar <- mean(y); n <- length(y)

my.seq <- seq(0.5,3,length=300)


#### Case when variance is known:

# Suppose we knew sigma^2 = 0.01:

# prior parameters for normal prior on mu:

my.delta <- 1.9; my.tau <- 0.3
# "95% sure" a priori that mu is between 1.3 and 2.5, say.
# What if we adjust this?

prior.mu <- dnorm(my.seq, mean=my.delta, sd=my.tau) 
# Defining the prior

posterior.mu <- dnorm(my.seq, mean = ( (my.delta/(my.tau^2) + sum(y)/0.01)/(1/(my.tau^2) + n/0.01) ), 
                sd = sqrt( 0.01*(my.tau^2) / (0.01 + n*(my.tau^2)) )  )
# Defining the posterior

plot(my.seq, prior.mu, ylim=c(-0.7,12), yaxt='n', 
     xlab="Values of mu", ylab="Probability distribution", type='l', lty=3)
# Plotting the prior
lines(my.seq, posterior.mu)
# Plotting the posterior

### Point estimates for mu:

posterior.mean <- ( (my.delta/(my.tau^2) + sum(y)/0.01)/(1/(my.tau^2) + n/0.01) )

posterior.median <- qnorm(0.50, mean = ( (my.delta/(my.tau^2) + sum(y)/0.01)/(1/(my.tau^2) + n/0.01) ), 
                sd = sqrt( 0.01*(my.tau^2) / (0.01 + n*(my.tau^2)) ) )

print(paste("posterior.mean=", round(posterior.mean,3), 
      "posterior.median=", round(posterior.median,3)))

### Interval estimate for mu:

library(TeachingDemos) # loading package in order to use the hpd function

hpd.95 <- hpd(qnorm, mean = ( (my.delta/(my.tau^2) + sum(y)/0.01)/(1/(my.tau^2) + n/0.01) ), 
                sd = sqrt( 0.01*(my.tau^2) / (0.01 + n*(my.tau^2)) ), conf=0.95)
round(hpd.95, 3)

segments(hpd.95[1], 0, hpd.95[2], 0, lwd=4)
# Showing the 95% HPD interval on the graph

# Using the built-in functions from the 'bayesrules' package:

library(bayesrules)
# Showing the posterior distribution, along with the prior and the likelihood:

plot_normal_normal(mean = 1.9, sd = 0.3, sigma = sqrt(0.01), y_bar = 1.804444, n = 9)

# Summarizing the posterior (and the prior):

summarize_normal_normal(mean = 1.9, sd = 0.3, sigma = sqrt(0.01), y_bar = 1.804444, n = 9)


#### Case when mean and variance are both unknown:

# prior parameters for inverse gamma prior on sigma^2:

my.alpha <- 18; my.beta <- .34  # Based on guess of E[sig^2] = .02, var[sig^2] = .005^2

# prior parameters for normal prior on mu:
my.delta <- 1.9; s0 <- 1  # low value of s0 indicates lack of prior knowledge

library(pscl)  # loading pscl package, to use inverse gamma distribution

# May need to install the pscl package first?
# If so, type at the command line:  install.packages("pscl", dependencies=F)
# while plugged in to the internet.


### Point estimates for sigma^2:

p.mean.sig.sq <- (my.beta + 0.5*(sum(y^2) - n*(ybar^2)) ) / (my.alpha + n/2 - 0.5 - 1)

p.median.sig.sq <- qigamma(0.50, my.alpha + n/2 - 0.5, my.beta + 0.5*( sum(y^2) - n*(ybar^2) ) )

print(paste("posterior.mean for sigma^2=", round(p.mean.sig.sq,4), 
      "posterior.median for sigma^2=", round(p.median.sig.sq,4) ))

### Marginal Interval estimate for sigma^2:


hpd.95.sig.sq <- hpd(qigamma, alpha=my.alpha + n/2 - 0.5, beta=my.beta + 0.5*( sum(y^2) - n*(ybar^2) ) )


#### AN APPROACH FOR INFERENCE ABOUT mu:

# Randomly sample many values for the posterior of sigma^2:

sig.sq.values <- rigamma(n=1000000,alpha=my.alpha + n/2 - 0.5, beta=my.beta + 0.5*( sum(y^2) - n*(ybar^2) ) )

# Randomly sample many values from the posterior of mu, GIVEN the sampled values of sigma^2 above:

mu.values <- rnorm(n=1000000,mean=((sum(y)+my.delta*s0)/(n+s0)), sd=sqrt(sig.sq.values/(n+s0)) )

# Point estimates for sigma^2 and mu:

round(median(sig.sq.values),4)

round(median(mu.values),4)

# 95% HPD interval estimates for sigma^2 and mu:

round(emp.hpd(sig.sq.values),4)

round(emp.hpd(mu.values),4)


# Concussion/Brain example:

# Picking out the sample of concussed football players:

data(football)
concussion_subjects <- football %>%
  filter(group == "fb_concuss")
  
# Sample mean hippocampal volume:

concussion_subjects %>%
  summarize(mean(volume))
  
# Checking our assumed data model by examining a density estimate of the sample data:
# Does a Normal distribution with sigma=0.5 seem reasonable?

ggplot(concussion_subjects, aes(x = volume)) + 
  geom_density()
  
# Showing the posterior distribution, along with the prior and the likelihood:
# Note the 'mean' argument below is the PRIOR mean (what we call delta in class).
# The 'sd' argument below is the PRIOR standard deviation (what we call tau in class).

plot_normal_normal(mean = 6.5, sd = 0.4, sigma = 0.5,
                   y_bar = 5.735, n = 25)

# Summarizing the posterior (and the prior):

summarize_normal_normal(mean = 6.5, sd = 0.4, sigma = 0.5,
                        y_bar = 5.735, n = 25)

hpd.95.mu <- hpd(qnorm, mean = 5.78, sd= 0.09701425, conf=0.95)
hpd.95.mu