# This is an example of the ANOVA procedure in R

# This code will analyze data from a 		
# Completely Randomized Design (CRD)  (sec 10.2) 

#  The data given here are the golf ball distances 
# from the example we studied in class 

# I am calling the data set "golfdata". 
# The factor is brand.
# The response variable is dist. 

golfdata <- read.table(file="http://people.stat.sc.edu/hitchcock/golfball.txt",header=T)
attach(golfdata)

# The data frame called rice is now created, 
# with two variables, dist and brand.
##
#########

############################################################################* 

# lm() and anova() will do a standard analysis of variance                        
# We specify that brand is a (qualitative) factor with the factor() function: 
                                    
# Making "brand" a factor:

brand <- factor(brand)

# The lm statement specifies that dist is the response                  
# and brand is the factor 
# The ANOVA table is produced by the anova() function         

golf.fit <- lm(dist ~ brand);
anova(golf.fit)


# Checking model assumptions 

# Note the Side-by-side boxplots:

boxplot(dist ~ brand)
 
# If the spreads of the four boxplots are similar, then the equal-variances assumption may be correct.

# Normal Q-Q plots for each population: 

par(mfrow=c(2,2)) # creating 2-by-2 plotting window
qqnorm(dist[brand=='A'],main='Normal QQ plot, brand A')
qqnorm(dist[brand=='B'],main='Normal QQ plot, brand B')
qqnorm(dist[brand=='C'],main='Normal QQ plot, brand C')
qqnorm(dist[brand=='D'],main='Normal QQ plot, brand D')
par(mfrow=c(1,1)) # resetting plotting window


# Multiple Comparisons (Sec. 10.3) 

TukeyHSD(aov(golf.fit),conf.level=0.95)

# Result:  Every pair of brand means is significantly different, except for the means of brands A and D.