spotify <- read.csv(file="http://people.stat.sc.edu/hitchcock/spotify_bayesrules.csv",header=T)

# If you want to see the whole data set:
# print(as.data.frame(spotify))

# Creating a character variable that combines the artist and title:
spotify$artist_title <- paste(spotify$artist,':',spotify$title)

# Picking out just some of the variables.
spotify_small_1 <- spotify[,c(4,5,11,13,14,22,23,24)]

library(dplyr)

spotify_small <- filter(spotify_small_1, artist %in%  c("Beyoncé","Camilo","David Lee Roth","Frank Ocean", "House Of Pain","Kendrick Lamar","Mia X","Missy Elliott","Sean Kingston","Sufjan Stevens","The xx","TV Noise","Vampire Weekend"))
head(spotify_small)


# Random intercept model:

spotify.lme <- lme(popularity~valence, random=~1|artist, data=spotify_small, method = "ML")

#spotify.lme.full <- lme(popularity~genre+danceability+energy+valence+tempo+duration_ms, random=~1|artist, #data=spotify_small, method = "ML")

#anova(spotify.lme, spotify.lme.full)

# Random intercept and slope model:

spotify.lme1 <- lme(popularity~valence, random=~valence|artist, data=spotify_small, method = "ML")

# Comparing the two models:

anova(spotify.lme, spotify.lme1)

summary(spotify.lme)

summary(spotify.lme1)

# Plotting the observed data alongside the predicted values from the model:

library(ggplot2)
library(RColorBrewer)

# Plotting ordinary regression lines, estimated SEPARATELY for each artist:

ggplot(spotify_small, aes(x = valence, y = popularity, color = artist)) + 
   geom_point(aes(shape=artist, color=artist)) + scale_shape_manual(values=1:13) + 
 scale_color_manual(values=c(brewer.pal(12,"Paired"),"#0000FF")) + geom_smooth(method = "lm", se=FALSE)

spotify_small$predicted.model <- predict(spotify.lme)

# Plotting the artist-specific regression lines based on the random intercept model:

ggplot(spotify_small, aes(x = valence, y = popularity, color = artist)) + 
   geom_point(aes(shape=artist, color=artist)) + scale_shape_manual(values=1:13) + 
scale_color_manual(values=c(brewer.pal(12,"Paired"),"#0000FF")) + geom_line(aes(y=predicted.model))

# Plotting the artist-specific regression lines based on the random intercept and slope model:

spotify_small$predicted.model1 <- predict(spotify.lme1)

ggplot(spotify_small, aes(x = valence, y = popularity, color = artist)) + 
   geom_point(aes(shape=artist, color=artist)) + scale_shape_manual(values=1:13) + 
scale_color_manual(values=c(brewer.pal(12,"Paired"),"#0000FF")) + geom_line(aes(y=predicted.model1))