###### Setting up prior specification : # 1 predictor variable, so we need exactly 1+1=2 hypothetical "prior observations" # Suppose that based on "expert opinion" we have the following guesses: xtilde.obs.1 <- c(25) ytilde.obs.1 <- # Prior belief: a cheeseburger with fat=25 should have calories around: xtilde.obs.2 <- c(35) ytilde.obs.2 <- # Prior belief: a cheeseburger with fat=35 should have calories around: # Making the matrix X~ : prior.obs.stacked <- rbind(xtilde.obs.1, xtilde.obs.2) xtilde <- cbind(rep(1, times=nrow(prior.obs.stacked) ), prior.obs.stacked) # Making the vector Y~ : ytilde <- c(ytilde.obs.1, ytilde.obs.2) ## What is the calorie count of a "typical" cheeseburger? calorie.typical <- ????? # Diagonal matrix D contains weights that indicate how much worth we place on # our hypothetical prior observations (note the weights could vary if we are more # confident about some hypothetical prior observations than about others): D <- diag(c(??, ??)) # This D yields a D^{-1} that is diag(c(1/??, 1/??)), which # assumes that our two hypothetical prior observations TOGETHER are # "worth" about ?? actual sample observation. # The prior mean on the beta vector that we are inducing is: prior.mean.beta <- as.vector(solve(xtilde) %*% ytilde) print("Prior mean for beta vector was:"); print(round(prior.mean.beta,3)) ### Choosing prior parameters a and b for gamma prior on tau: # The parameter "a" reflects our confidence in our prior: # Let's choose "a" to be ??, which implies that our # prior knowledge about tau is "worth" about 1 sample observation. a <- ?? # Consider prior observation 1: # Prior belief: a cheeseburger with fat=25 should have calories around ?? # Suppose the expert believes the largest realistic calorie count for this type of cheeseburger is: ??? # Then set the prior guess for sigma to be (??? - ??)/1.645 = XXX # So the prior guess for sigma^2 is (XXX^2) = yyy # So the prior guess for tau is 1/yyy = ? # Since the gamma prior mean is a/b, let # b = a / tau.guess: tau.guess <- ? b <- a / tau.guess # Here b is around _____ ... # Data: restaurant<-c('Wendys','Whataburger','In-n-Out','McDonalds','McDonalds','Burger_King', 'Jack_in_the_Box','Steak-n-Shake') sandwich<-c('Quarter_Pound_Single_with_cheese','Whataburger','Cheeseburger','Big_Mac','Quarter_Pounder_with_cheese', 'Whopper_with_cheese','Jumbo_Jack','Double_Steakburger_with_cheese') fat_content<-c(20,39,27,29,26,47,35,38) calories<-c(430,750,480,540,510,760,690,632) y <- calories x1 <- fat_content X <- as.matrix( cbind(rep(1, length(x1)), x1) ) ##### Posterior information for tau and beta: # Calculate beta-hat : betahat <- as.vector(solve(t(X) %*% X + t(xtilde)%*%solve(D)%*%xtilde) %*% (t(X)%*%y + t(xtilde)%*%solve(D)%*%ytilde)) # Calculate s* : n <- length(y) sstar <- as.numeric( (t(y-X%*%betahat)%*%(y-X%*%betahat) + t(ytilde-xtilde%*%betahat)%*%solve(D)%*%(ytilde-xtilde%*%betahat) + 2*b)/(n+2*a) ) ### Point estimates for tau (and thus for sigma^2): p.mean.tau <- 1 / sstar p.mean.sig.sq <- 1 / p.mean.tau p.median.tau <- qgamma(0.50, shape=(n+2*a)/2, rate=((n+2*a)/2)*sstar) p.median.sig.sq <- 1 / p.median.tau print(paste("posterior.mean for sigma^2=", round(p.mean.sig.sq,3), "posterior.median for sigma^2=", round(p.median.sig.sq,3) )) ### Marginal Interval estimate for tau (and sigma^2): library(TeachingDemos) # loading TeachingDemos package, to use hpd function hpd.95.tau <- hpd(qgamma, shape=(n+2*a)/2, rate=((n+2*a)/2)*sstar ) hpd.95.sig.sq <- 1 / hpd.95.tau print("95% posterior interval for sigma^2:") round(sort(hpd.95.sig.sq), 3) ### A BETTER APPROACH TO GET POINT ESTIMATES FOR BETA: # Randomly generate many tau values from its posterior distribution: how.many <-30000 tau.values <- rgamma(n=how.many, shape=(n+2*a)/2, rate=((n+2*a)/2)*sstar ) library(mvtnorm) beta.values<- matrix(0,nr = how.many, nc=length(betahat)) for (j in 1:how.many){ beta.values[j,] <- rmvnorm(n=1, mean=betahat, sigma= (1/tau.values[j])*solve(t(X)%*%X + t(xtilde)%*%solve(D)%*%xtilde) ) } # Posterior median for each regression coefficient: post.medians<-apply(beta.values,2,median) # 95% posterior interval for each regression coefficient: post.lower <- apply(beta.values,2,quantile, probs=0.025) post.upper <- apply(beta.values,2,quantile, probs=0.975) ## Summarizing: names.predictors <- c("x1") beta.post.summary <- data.frame(cbind(post.lower, post.medians, post.upper), row.names=c("intercept", names.predictors)) names(beta.post.summary) <- c('0.025 Quantile', '0.5 Quantile', '0.975 Quantile') print(beta.post.summary) # For comparison: summary(lm(y~x1)) anova(lm(y~x1)) library(rstanarm) library(bayesrules) library(tidyverse) burger_data <- data.frame(calories,fat_content) cal_model <- stan_glm(calories~fat_content, data = burger_data, family = gaussian, prior_intercept = normal(calorie.typical, 200), prior = normal(c(????), 40, autoscale=TRUE), prior_aux = exponential(1, autoscale=TRUE), chains = 1, iter = 5000*2) tidy(cal_model, effects = c("fixed", "aux"), conf.int = TRUE, conf.level = 0.90)