# The needed packages and
# The Spotify data set:

# Load packages
library(bayesrules)
library(tidyverse)
library(rstanarm)
library(bayesplot)
library(tidybayes)
library(broom.mixed)
library(forcats)

# Load data
data(spotify)

# printing the first 30 rows:

 print(as.data.frame(spotify)[1:30,])

# For this chapter we just use a subset of the variables:

spotify <- spotify %>% 
  select(artist, title, popularity) %>% 
  mutate(artist = fct_reorder(artist, popularity, .fun = 'mean'))

# First few rows
head(spotify, 3)

# Number of songs
nrow(spotify)

# Number of artists
nlevels(spotify$artist)

# The 44 artists in this 'spotify' data set:
levels(spotify$artist)
# Which of these artists is Dr. Hitchcock's favorite / 2nd favorite in this set?

artist_means <- spotify %>% 
  group_by(artist) %>% 
  summarize(count = n(), popularity = mean(popularity))

artist_means %>%
  slice(1:2, 43:44)

# the number of songs per artist runs as low as 2 and as high as 40:

artist_means %>% 
  summarize(min(count), max(count))

# density plot of popularity as a whole:

ggplot(spotify, aes(x = popularity)) + 
  geom_density()

# Fitting an intercept-only regression model:

spotify_complete_pooled <- stan_glm(
  popularity ~ 1, 
  data = spotify, family = gaussian, 
  prior_intercept = normal(50, 2.5, autoscale = TRUE),
  prior_aux = exponential(1, autoscale = TRUE),
  chains = 4, iter = 5000*2)


complete_summary <- tidy(spotify_complete_pooled, 
                         effects = c("fixed", "aux"), 
                         conf.int = TRUE, conf.level = 0.90)
complete_summary

# Posterior predictive means by artist:

predictions_complete <- posterior_predict(spotify_complete_pooled,
                                          newdata = artist_means)

ppc_intervals(artist_means$popularity, yrep = predictions_complete,
              prob_outer = 0.80) +
  ggplot2::scale_x_continuous(labels = artist_means$artist,
                              breaks = 1:nrow(artist_means)) +
  xaxis_text(angle = 90, hjust = 1)

# No pooling:  Separate popularity densities for each artist:

ggplot(spotify, aes(x = popularity, group = artist)) + 
  geom_density()

# No pooling model estimation:

spotify_no_pooled <- stan_glm(
  popularity ~ artist - 1, 
  data = spotify, family = gaussian, 
  prior = normal(50, 2.5, autoscale = TRUE),
  prior_aux = exponential(1, autoscale = TRUE),
  chains = 4, iter = 5000*2)

# Posterior predictive distributions:

# Simulate the posterior predictive models

predictions_no <- posterior_predict(
  spotify_no_pooled, newdata = artist_means)
  
# Plot the posterior predictive intervals
ppc_intervals(artist_means$popularity, yrep = predictions_no, 
              prob_outer = 0.80) +
  ggplot2::scale_x_continuous(labels = artist_means$artist, 
                              breaks = 1:nrow(artist_means)) +
  xaxis_text(angle = 90, hjust = 1)
  
## Hierarchical Modeling:

# Checking the distribution of artist sample means:

ggplot(artist_means, aes(x = popularity)) + 
  geom_density()
  
# Fitting the model:

spotify_hierarchical <- stan_glmer(
  popularity ~ (1 | artist), 
  data = spotify, family = gaussian,
  prior_intercept = normal(50, 2.5, autoscale = TRUE),
  prior_aux = exponential(1, autoscale = TRUE),
  prior_covariance = decov(reg = 1, conc = 1, shape = 1, scale = 1),
  chains = 4, iter = 5000*2)
  
# MCMC diagnostics:

# There are lots of parameters here, so there are a bunch of plots here:
#mcmc_trace(spotify_hierarchical) # don't run, takes a lot of time to show 45+ trace plots ...
mcmc_dens_overlay(spotify_hierarchical)

# You can do these plots for one specific parameter at a time:

mcmc_trace(spotify_hierarchical, pars='(Intercept)')  # trace plot for overall mu

mcmc_dens_overlay(spotify_hierarchical, pars='sigma')  # posterior density for sigma_y
mcmc_dens_overlay(spotify_hierarchical, pars='Sigma[artist:(Intercept),(Intercept)]')  
# posterior density for sigma_mu^2  (NOTE this is for the SQUARE of sigma_mu)
mcmc_dens_overlay(spotify_hierarchical, pars='(Intercept)')  # posterior density for overall mu
mcmc_dens_overlay(spotify_hierarchical, regex_pars = "Vampire_Weekend") # posterior density for VW's (mu_j - mu)

# The ACF plots are kind of unreadable with so many parameters to estimate...
#mcmc_acf(spotify_hierarchical)
# But you can look at them individually using the 'pars' or 'regex_pars' arguments as above:
mcmc_acf(spotify_hierarchical, pars='(Intercept)')  # ACF plot for overall mu

# Posterior summary about mu:

tidy(spotify_hierarchical, effects = "fixed", 
     conf.int = TRUE, conf.level = 0.80)

# Posterior summary about sigma_y and sigma_mu:

tidy(spotify_hierarchical, effects = "ran_pars")

# Posterior summaries about the mu_j's:

# Get posterior summaries for mu_j

# Get MCMC chains for each mu_j
artist_chains <- spotify_hierarchical %>%
  spread_draws(`(Intercept)`, b[,artist]) %>% 
  mutate(mu_j = `(Intercept)` + b) 

artist_summary_scaled <- artist_chains %>% 
  select(-`(Intercept)`, -b) %>% 
  mean_qi(.width = 0.80) %>% 
  mutate(artist = fct_reorder(artist, mu_j))

# Check out the results
artist_summary_scaled %>% 
  select(artist, mu_j, .lower, .upper) %>% 
  head(4)
  
# Plotting the estimates and 80% credible intervals for the mu_j's:

ggplot(artist_summary_scaled, 
       aes(x = artist, y = mu_j, ymin = .lower, ymax = .upper)) +
  geom_pointrange() +
  xaxis_text(angle = 90, hjust = 1)
  
  
# The width of the intervals depends on the sample sizes for the artist:

artist_means %>% 
  filter(artist %in% c("Frank Ocean", "Lil Skies"))
  
# Frank Ocean's mu_j has a narrow interval; Lil Skies's mu_j has a wide interval.

### Posterior prediction:

# Store the simulation in a data frame
spotify_hierarchical_df <- as.data.frame(spotify_hierarchical)

# # Simulate Vampire Weekend's posterior predictive model

VW_chains <- spotify_hierarchical_df %>%
  rename(b = `b[(Intercept) artist:Vampire_Weekend]`) %>% 
  select(`(Intercept)`, b, sigma) %>% 
  mutate(mu_VW = `(Intercept)` + b,
         y_VW = rnorm(20000, mean = mu_VW, sd = sigma))

# First few simulated predicted song popularities
head(VW_chains, 5)

# Summary of posterior predictive distribution for Vampire Weekend:

# Predicting popularity of a (new) individual Vampire Weekend song:
# Posterior summary of Y_new,j:
VW_chains %>% 
  mean_qi(y_VW, .width = 0.80) 
     

# Posterior summary of mu_j
artist_summary_scaled %>% 
  filter(artist == "artist:Vampire_Weekend")
  
# We can predict Vampire Weekend's mean popularity with much more precision 
# than the popularity of an individual Vampire Weekend song.


# Posterior prediction for new artist:

taytay_chains <- spotify_hierarchical_df %>%
  mutate(sigma_mu = sqrt(`Sigma[artist:(Intercept),(Intercept)]`),
         mu_taytay = rnorm(20000, `(Intercept)`, sigma_mu),
         y_taytay = rnorm(20000, mu_taytay, sigma))

# Posterior predictive summaries
taytay_chains %>% 
  mean_qi(y_taytay, .width = 0.80)
  
  
  
 # Summarizing the posterior predictions for these two artists:
 
### posterior_predict function may be wonky in R:
 prediction_shortcut <- posterior_predict(
  spotify_hierarchical,
  newdata = data.frame(artist = c("Vampire Weekend", "Taylor Swift")))
# Posterior predictive model plots
mcmc_areas(prediction_shortcut, prob = 0.8) +
  ggplot2::scale_y_discrete(labels = c("Vampire Weekend", "Taylor Swift"))

## This code in base R should do something similar:

plot(density(VW_chains$y_VW),main="Song Popularity",sub="posterior predictive medians marked on axis",xlab="Song Popularity")
lines(density(taytay_chains$y_taytay),col='red')
rug(median(VW_chains$y_VW), col="black", ticksize=0.2, lwd=2); rug(median(taytay_chains$y_taytay), col="red", ticksize=0.2,lwd=2)
leg.txt <- c("Vampire Weekend", "Taylor Swift")
 legend("topright", legend = leg.txt, col = c("black","red"), pch=15:15)
# If you want 90% prediction intervals for either artist's "new" song's popularity:
#rug(quantile(VW_chains$y_VW,.05), col="black", ticksize=0.1, lwd=2); rug(quantile(VW_chains$y_VW,.95), col="black", ticksize=0.1, lwd=2); 
#rug(quantile(taytay_chains$y_taytay,.05), col="red", ticksize=0.1,lwd=2); rug(quantile(taytay_chains$y_taytay,.95), col="red", ticksize=0.1,lwd=2);

## Posterior predictive plots to show shrinkage:

### posterior_predict function may be wonky in R:
predictions_hierarchical <- posterior_predict(spotify_hierarchical, 
                                              newdata = artist_means)
ppc_intervals(artist_means$popularity, yrep = predictions_hierarchical, 
              prob_outer = 0.80) +
  ggplot2::scale_x_continuous(labels = artist_means$artist, 
                              breaks = 1:nrow(artist_means)) +
  xaxis_text(angle = 90, hjust = 1) + 
  geom_hline(yintercept = 58.4, linetype = "dashed")


## This code in base R should do something similar:

artist.mean.draws<-spotify_hierarchical_df[,2:45]+spotify_hierarchical_df[,1]
artist.popularity.draws<-matrix(NA,nrow=20000,ncol=44)
for (i in 1:44){
artist.popularity.draws[,i]<-rnorm(20000, mean = artist.mean.draws[,i], sd = spotify_hierarchical_df[,46])
}

ppc_intervals(artist_means$popularity, yrep = artist.popularity.draws, 
              prob_outer = 0.80) +
  ggplot2::scale_x_continuous(labels = artist_means$artist, 
                              breaks = 1:nrow(artist_means)) +
  xaxis_text(angle = 90, hjust = 1) + 
  geom_hline(yintercept = 58.4, linetype = "dashed")