spotify <- read.csv(file="http://people.stat.sc.edu/hitchcock/spotify_bayesrules.csv",header=T) # If you want to see the whole data set: # print(as.data.frame(spotify)) # Creating a character variable that combines the artist and title: spotify$artist_title <- paste(spotify$artist,':',spotify$title) # Picking out just some of the variables. spotify_small_1 <- spotify[,c(4,5,11,13,14,22,23,24)] library(dplyr) spotify_small <- filter(spotify_small_1, artist %in% c("Beyoncé","Camilo","David Lee Roth","Frank Ocean", "House Of Pain","Kendrick Lamar","Mia X","Missy Elliott","Sean Kingston","Sufjan Stevens","The xx","TV Noise","Vampire Weekend")) head(spotify_small) # Random intercept model: spotify.lme <- lme(popularity~valence, random=~1|artist, data=spotify_small, method = "ML") #spotify.lme.full <- lme(popularity~genre+danceability+energy+valence+tempo+duration_ms, random=~1|artist, #data=spotify_small, method = "ML") #anova(spotify.lme, spotify.lme.full) # Random intercept and slope model: spotify.lme1 <- lme(popularity~valence, random=~valence|artist, data=spotify_small, method = "ML") # Comparing the two models: anova(spotify.lme, spotify.lme1) summary(spotify.lme) summary(spotify.lme1) # Plotting the observed data alongside the predicted values from the model: library(ggplot2) library(RColorBrewer) # Plotting ordinary regression lines, estimated SEPARATELY for each artist: ggplot(spotify_small, aes(x = valence, y = popularity, color = artist)) + geom_point(aes(shape=artist, color=artist)) + scale_shape_manual(values=1:13) + scale_color_manual(values=c(brewer.pal(12,"Paired"),"#0000FF")) + geom_smooth(method = "lm", se=FALSE) spotify_small$predicted.model <- predict(spotify.lme) # Plotting the artist-specific regression lines based on the random intercept model: ggplot(spotify_small, aes(x = valence, y = popularity, color = artist)) + geom_point(aes(shape=artist, color=artist)) + scale_shape_manual(values=1:13) + scale_color_manual(values=c(brewer.pal(12,"Paired"),"#0000FF")) + geom_line(aes(y=predicted.model)) # Plotting the artist-specific regression lines based on the random intercept and slope model: spotify_small$predicted.model1 <- predict(spotify.lme1) ggplot(spotify_small, aes(x = valence, y = popularity, color = artist)) + geom_point(aes(shape=artist, color=artist)) + scale_shape_manual(values=1:13) + scale_color_manual(values=c(brewer.pal(12,"Paired"),"#0000FF")) + geom_line(aes(y=predicted.model1))