# The needed packages and
# The Spotify data set:

# Load packages
library(bayesrules)
library(tidyverse)
library(rstanarm)
library(bayesplot)
library(tidybayes)
library(broom.mixed)
library(forcats)


# Import and wrangle the data
data(spotify)
spotify <- spotify %>% 
  select(artist, title, danceability, valence, genre)


ggplot(spotify, aes(y = danceability, x = genre)) + 
  geom_boxplot()
ggplot(spotify, aes(y = danceability, x = valence)) + 
  geom_point()
ggplot(spotify, aes(y = danceability, x = valence, group = artist)) + 
  geom_smooth(method = "lm", se = FALSE, size = 0.5)

spotify_model_1 <- stan_glmer(
  danceability ~ valence + genre + (1 | artist),
  data = spotify, family = gaussian,
  prior_intercept = normal(50, 2.5, autoscale = TRUE),
  prior = normal(0, 2.5, autoscale = TRUE), 
  prior_aux = exponential(1, autoscale = TRUE),
  prior_covariance = decov(reg = 1, conc = 1, shape = 1, scale = 1),
  chains = 1, iter = 5000*2, seed = 84735)

spotify_model_2 <- stan_glmer(
  danceability ~ valence + genre + (valence | artist), 
  data = spotify, family = gaussian,
  prior_intercept = normal(50, 2.5, autoscale = TRUE),
  prior = normal(0, 2.5, autoscale = TRUE), 
  prior_aux = exponential(1, autoscale = TRUE),
  prior_covariance = decov(reg = 1, conc = 1, shape = 1, scale = 1),
  chains = 1, iter = 5000*2, seed = 84735)

# Check out the prior specifications
prior_summary(spotify_model_1)
prior_summary(spotify_model_2)

pp_check(spotify_model_1) +
  xlab("danceability")
pp_check(spotify_model_2) +
  xlab("danceability")


# Calculate ELPD for the 2 models
elpd_spotify_1 <- loo(spotify_model_1)
elpd_spotify_2 <- loo(spotify_model_2)

# Compare the ELPD
loo_compare(elpd_spotify_1, elpd_spotify_2)

tidy(spotify_model_1, effects = "fixed",
     conf.int = TRUE, conf.level = 0.80)

# Plot the posterior models of the genre coefficients
mcmc_areas(spotify_model_1, pars = vars(starts_with("genre")), prob = 0.8) + 
  geom_vline(xintercept = 0)

tidy(spotify_model_1, effects = "ran_vals",
     conf.int = TRUE, conf.level = 0.80) %>% 
  filter(level %in% c("Camilo", "Missy_Elliott")) %>% 
  select(level, estimate, conf.low, conf.high)


### Posterior prediction:

# Store the simulation in a data frame
spotify_model_1_df <- as.data.frame(spotify_model_1)



# posterior_predict may be wonky in R:
set.seed(84735)
predict_next_song <- posterior_predict(
  spotify_model_1,
  newdata = data.frame(
    artist = c("Camilo", "Mohsen Beats", "Missy Elliott"), 
    valence = c(80, 60, 90), genre = c("latin", "rock", "rap")))

# Posterior predictive model plots
mcmc_areas(predict_next_song, prob = 0.8) +
 ggplot2::scale_y_discrete(
   labels = c("Camilo", "Mohsen Beats", "Missy Elliott"))

## This code in base R should do something similar (for Camilo and Missy Elliott songs):

Camilo.new.song.draws<-spotify_model_1_df[,'(Intercept)']+spotify_model_1_df[,'b[(Intercept) artist:Camilo]']+ spotify_model_1_df[,'valence']*80+spotify_model_1_df[,'genrelatin']+spotify_model_1_df[,'sigma']

Missy.new.song.draws<-spotify_model_1_df[,'(Intercept)']+spotify_model_1_df[,'b[(Intercept) artist:Missy_Elliott]']+ spotify_model_1_df[,'valence']*90+spotify_model_1_df[,'genrerap']+spotify_model_1_df[,'sigma']

par(mfrow=c(2,1))
plot(density(Camilo.new.song.draws),main="danceability of new Camilo Latin song with valence=80",xlim=c(0,120),xlab="danceability");abline(v=median(Camilo.new.song.draws))
plot(density(Missy.new.song.draws),main="danceability of new Missy Elliott rap song with valence=90",xlim=c(0,120),xlab="danceability");abline(v=median(Missy.new.song.draws))
par(mfrow=c(1,1))

quantile(Camilo.new.song.draws, c(0.1,0.5,0.9))

quantile(Missy.new.song.draws, c(0.1,0.5,0.9))


# Posterior prediction for new artist:

taytay_chains_paper_rings <- spotify_model_1_df %>%
  mutate(sigma_mu = sqrt(`Sigma[artist:(Intercept),(Intercept)]`),
         mu_taytay_paper_rings = rnorm(5000, `(Intercept)`+`genrepop`+`valence`*86.5, sigma_mu),
         y_taytay_paper_rings = rnorm(5000, mu_taytay_paper_rings, sigma))

# Posterior predictive summaries
taytay_chains_paper_rings %>% 
  mean_qi(y_taytay_paper_rings, .width = 0.80)

nirvana_chains_on_a_plain <- spotify_model_1_df %>%
  mutate(sigma_mu = sqrt(`Sigma[artist:(Intercept),(Intercept)]`),
         mu_nirvana_on_a_plain = rnorm(5000, `(Intercept)`+`genrerock`+`valence`*35.8, sigma_mu),
         y_nirvana_on_a_plain = rnorm(5000, mu_nirvana_on_a_plain, sigma))

# Posterior predictive summaries
nirvana_chains_on_a_plain %>% 
  mean_qi(y_nirvana_on_a_plain, .width = 0.80)

predict_next_song_new <- posterior_predict(
  spotify_model_1,
  newdata = data.frame(
    artist = c("Taylor Swift", "Nirvana"), 
    valence = c(86.5, 35.8), genre = c("pop", "rock")))

# The true danceability of "Paper Rings" is
.
.
.
.
# 81.1


# The true danceability of "On A Plain" is
.
.
.
.

# 42.8