# This is an example of the ANOVA procedure in R # This code will analyze data from a # Completely Randomized Design (CRD) (sec 10.2) # The data given here are the golf ball distances # from the example we studied in class # I am calling the data set "golfdata". # The factor is brand. # The response variable is dist. golfdata <- read.table(file="http://people.stat.sc.edu/hitchcock/golfball.txt",header=T) attach(golfdata) # The data frame called rice is now created, # with two variables, dist and brand. ## ######### ############################################################################* # lm() and anova() will do a standard analysis of variance # We specify that brand is a (qualitative) factor with the factor() function: # Making "brand" a factor: brand <- factor(brand) # The lm statement specifies that dist is the response # and brand is the factor # The ANOVA table is produced by the anova() function golf.fit <- lm(dist ~ brand); anova(golf.fit) # Checking model assumptions # Note the Side-by-side boxplots: boxplot(dist ~ brand) # If the spreads of the four boxplots are similar, then the equal-variances assumption may be correct. # Normal Q-Q plots for each population: par(mfrow=c(2,2)) # creating 2-by-2 plotting window qqnorm(dist[brand=='A'],main='Normal QQ plot, brand A') qqnorm(dist[brand=='B'],main='Normal QQ plot, brand B') qqnorm(dist[brand=='C'],main='Normal QQ plot, brand C') qqnorm(dist[brand=='D'],main='Normal QQ plot, brand D') par(mfrow=c(1,1)) # resetting plotting window # Multiple Comparisons (Sec. 10.3) TukeyHSD(aov(golf.fit),conf.level=0.95) # Result: Every pair of brand means is significantly different, except for the means of brands A and D.