# Illustration of interaction between two predictors in a regression model

data(mtcars)

mileage<-mtcars$mpg
displace<-mtcars$disp
horsepwr<-mtcars$hp

# Regression model with Y=mileage, X1=displace, X2=horsepwr

# Checking correlation between X1 and X2:

plot(displace,horsepwr)

# We see a correlation, but this is NOT related to whether X1 and X2 interact in the regression model.
# This is actually related to whether there is MULTICOLLINEARITY in the model,  which is different than interaction.
# If there is multicollinearity between two predictors, then maybe we don't need to include both in the model.


# Is there interaction between X1 and X2?
# The key is:
# Is the effect of displacement on mileage DIFFERENT at different levels of horsepower?  
# If so, there is interaction between X1 and X2.
# Similar question:
# Is the effect of horsepower on mileage DIFFERENT at different levels of displacement?  
# If so, there is interaction.

# Here's a plot to assess this:

# Breaking horsepower into categories 
# (if one of the predictors in question is already categorical, then there's no need to categorize like this)
horse.level <- ifelse(horsepwr > median(horsepwr), 'high', 'low')
#horse.level <- factor(horse.level)
plot(range(displace), range(mileage), type='n', ylab='mileage', xlab='displacement') #setting up the axes for plot
# Making the plots for each level of horsepower:
lines(displace[horse.level=='high'], mileage[horse.level=='high'], type='p', col='blue', pch = 1, cex=1.2)
lines(displace[horse.level=='low'], mileage[horse.level=='low'], type='p', col='red', pch = 20, cex=1.2)
legend('topright', c('high HP','low HP'), pch=c(1,20), col=c('blue','red'), cex=1.5)

# Is the linear relationship between Y and X1 (i.e., the slope) 
# different for the red circles than for the blue circles?
# If so, then we have possible interaction between X1 and X2.

# Fitting an interaction model (not Bayesian):

int.mod <- lm(mileage ~ displace + horsepwr + displace:horsepwr)
summary(int.mod)

# Note that when you include an interaction effect in a model, 
# you should always include both first-order terms for the component variables 
# (even if the first-order terms do not appear "significant" according to tests or intervals).
# This will make interpretations more sensible.

# Note we should NOT interpret the estimates of beta_1 and beta_2 directly here!
# The effect of displacement on mileage is DIFFERENT depending on the value of horsepower:

# Compare:
# effect of displacement on mileage at a low value of horsepower:
predict(int.mod, newdata=data.frame(displace=200,horsepwr=100)) - predict(int.mod, newdata=data.frame(displace=190,horsepwr=100)) 

# to:
# effect of displacement on mileage at a high value of horsepower:
predict(int.mod, newdata=data.frame(displace=200,horsepwr=250)) - predict(int.mod, newdata=data.frame(displace=190,horsepwr=250)) 

# There is NOT one unique effect of displacement on mileage in this model!