# The needed packages and # The Spotify data set: # Load packages library(bayesrules) library(tidyverse) library(rstanarm) library(bayesplot) library(tidybayes) library(broom.mixed) library(forcats) # Import and wrangle the data data(spotify) spotify <- spotify %>% select(artist, title, danceability, valence, genre) ggplot(spotify, aes(y = danceability, x = genre)) + geom_boxplot() ggplot(spotify, aes(y = danceability, x = valence)) + geom_point() ggplot(spotify, aes(y = danceability, x = valence, group = artist)) + geom_smooth(method = "lm", se = FALSE, size = 0.5) spotify_model_1 <- stan_glmer( danceability ~ valence + genre + (1 | artist), data = spotify, family = gaussian, prior_intercept = normal(50, 2.5, autoscale = TRUE), prior = normal(0, 2.5, autoscale = TRUE), prior_aux = exponential(1, autoscale = TRUE), prior_covariance = decov(reg = 1, conc = 1, shape = 1, scale = 1), chains = 1, iter = 5000*2, seed = 84735) spotify_model_2 <- stan_glmer( danceability ~ valence + genre + (valence | artist), data = spotify, family = gaussian, prior_intercept = normal(50, 2.5, autoscale = TRUE), prior = normal(0, 2.5, autoscale = TRUE), prior_aux = exponential(1, autoscale = TRUE), prior_covariance = decov(reg = 1, conc = 1, shape = 1, scale = 1), chains = 1, iter = 5000*2, seed = 84735) # Check out the prior specifications prior_summary(spotify_model_1) prior_summary(spotify_model_2) pp_check(spotify_model_1) + xlab("danceability") pp_check(spotify_model_2) + xlab("danceability") # Calculate ELPD for the 2 models elpd_spotify_1 <- loo(spotify_model_1) elpd_spotify_2 <- loo(spotify_model_2) # Compare the ELPD loo_compare(elpd_spotify_1, elpd_spotify_2) tidy(spotify_model_1, effects = "fixed", conf.int = TRUE, conf.level = 0.80) # Plot the posterior models of the genre coefficients mcmc_areas(spotify_model_1, pars = vars(starts_with("genre")), prob = 0.8) + geom_vline(xintercept = 0) tidy(spotify_model_1, effects = "ran_vals", conf.int = TRUE, conf.level = 0.80) %>% filter(level %in% c("Camilo", "Missy_Elliott")) %>% select(level, estimate, conf.low, conf.high) ### Posterior prediction: # Store the simulation in a data frame spotify_model_1_df <- as.data.frame(spotify_model_1) # posterior_predict may be wonky in R: set.seed(84735) predict_next_song <- posterior_predict( spotify_model_1, newdata = data.frame( artist = c("Camilo", "Mohsen Beats", "Missy Elliott"), valence = c(80, 60, 90), genre = c("latin", "rock", "rap"))) # Posterior predictive model plots mcmc_areas(predict_next_song, prob = 0.8) + ggplot2::scale_y_discrete( labels = c("Camilo", "Mohsen Beats", "Missy Elliott")) ## This code in base R should do something similar (for Camilo and Missy Elliott songs): Camilo.new.song.draws<-spotify_model_1_df[,'(Intercept)']+spotify_model_1_df[,'b[(Intercept) artist:Camilo]']+ spotify_model_1_df[,'valence']*80+spotify_model_1_df[,'genrelatin']+spotify_model_1_df[,'sigma'] Missy.new.song.draws<-spotify_model_1_df[,'(Intercept)']+spotify_model_1_df[,'b[(Intercept) artist:Missy_Elliott]']+ spotify_model_1_df[,'valence']*90+spotify_model_1_df[,'genrerap']+spotify_model_1_df[,'sigma'] par(mfrow=c(2,1)) plot(density(Camilo.new.song.draws),main="danceability of new Camilo Latin song with valence=80",xlim=c(0,120),xlab="danceability");abline(v=median(Camilo.new.song.draws)) plot(density(Missy.new.song.draws),main="danceability of new Missy Elliott rap song with valence=90",xlim=c(0,120),xlab="danceability");abline(v=median(Missy.new.song.draws)) par(mfrow=c(1,1)) quantile(Camilo.new.song.draws, c(0.1,0.5,0.9)) quantile(Missy.new.song.draws, c(0.1,0.5,0.9)) # Posterior prediction for new artist: taytay_chains_paper_rings <- spotify_model_1_df %>% mutate(sigma_mu = sqrt(`Sigma[artist:(Intercept),(Intercept)]`), mu_taytay_paper_rings = rnorm(5000, `(Intercept)`+`genrepop`+`valence`*86.5, sigma_mu), y_taytay_paper_rings = rnorm(5000, mu_taytay_paper_rings, sigma)) # Posterior predictive summaries taytay_chains_paper_rings %>% mean_qi(y_taytay_paper_rings, .width = 0.80) nirvana_chains_on_a_plain <- spotify_model_1_df %>% mutate(sigma_mu = sqrt(`Sigma[artist:(Intercept),(Intercept)]`), mu_nirvana_on_a_plain = rnorm(5000, `(Intercept)`+`genrerock`+`valence`*35.8, sigma_mu), y_nirvana_on_a_plain = rnorm(5000, mu_nirvana_on_a_plain, sigma)) # Posterior predictive summaries nirvana_chains_on_a_plain %>% mean_qi(y_nirvana_on_a_plain, .width = 0.80) predict_next_song_new <- posterior_predict( spotify_model_1, newdata = data.frame( artist = c("Taylor Swift", "Nirvana"), valence = c(86.5, 35.8), genre = c("pop", "rock"))) # The true danceability of "Paper Rings" is . . . . # 81.1 # The true danceability of "On A Plain" is . . . . # 42.8