# HW 1 solution R code. # This is merely the R code; # Actual HWs should also include clearly stated answers # and insightful comments! ############### Problem 1 my.S1 <- matrix(c(2,-3,2,-3,6,4,2,4,3), byrow=T, nrow=3, ncol=3) # Part (a): # The data matrix has 3 columns (q=3) # It is unclear how many rows the data matrix has. # Part (b): my.S1.inv <- solve(my.S1) my.S1.inv # Part (c): # Easiest to use: cov2cor(my.S1) ################ Problem 2 my.S2 <- matrix(c(16,-2,4,-2,9,-1,4,-1,25), byrow=T, nrow=3, ncol=3) # Part (a): my.D.minus1.2 <- diag(c(1/4, 1/3, 1/5)) my.D.minus1.2 # Part (b): my.R <- my.D.minus1.2 %*% my.S2 %*% my.D.minus1.2 my.R # Or can use: cov2cor(my.S2) ################ Problem 3 airpol.full <- read.table("http://www.stat.sc.edu/~hitchcock/airpoll.txt", header=T) city.names <- as.character(airpol.full[1:16,1]) airpol.data.sub <- airpol.full[1:16,2:8] airpol.data.sub <- data.frame(airpol.data.sub,row.names=city.names) # Part (a) round(cov(airpol.data.sub),2) round(cor(airpol.data.sub),2) # Part (b) dist2full<-function(ds){ n<-attr(ds,"Size") full<-matrix(0,n,n) full[lower.tri(full)]<-ds full+t(full) } # Getting distance matrix (Euclidean distances) between SCALED observations std <- sapply(airpol.data.sub,sd) # finding standard deviations of variables airpol.sub.std <- sweep(airpol.data.sub,2,std,FUN="/") # dividing each variable by its standard deviation dis <- dist(airpol.sub.std) dis.matrix<-dist2full(dis) round(dis.matrix,digits=2) pick.smallest.largest(dis.matrix,4) # part (c): ############### ### chisplot <- function(x) { if (!is.matrix(x)) stop("x is not a matrix") ### determine dimensions n <- nrow(x) p <- ncol(x) # xbar <- apply(x, 2, mean) S <- var(x) S <- solve(S) index <- (1:n)/(n+1) # xcent <- t(t(x) - xbar) di <- apply(xcent, 1, function(x,S) x %*% S %*% x,S) # quant <- qchisq(index,p) plot(quant, sort(di), ylab = "Ordered distances", xlab = "Chi-square quantile", lwd=2,pch=1) } ### ############### # Use the chisplot function: chisplot(as.matrix(airpol.data.sub)) ################# Graduate Student Problem: # Answers will vary -- there is no one right transformation to use. # Your answer should include evidence of your exploration. # I did some individual normal Q-Q plots and found that the 1st, 3rd, 5th, and 6th variables # seemed most like to could use some transformation. # Trying some box-cox transformations on these: boxcox(lm(airpol.data.sub[,1]~1),lambda=seq(-10,10,1/5)) # lambda_1 around -2.5 ? boxcox(lm(airpol.data.sub[,3]~1),lambda=seq(-10,10,1/5)) # lambda_3 around 0.5 ? boxcox(lm(airpol.data.sub[,5]~1),lambda=seq(-10,10,1/5)) # lambda_5 around 0.1 ? boxcox(lm(airpol.data.sub[,6]~1),lambda=seq(-10,10,1/5)) # lambda_6 around 0.1 ? airpol.trans <- cbind( ((airpol.data.sub[,1])^(-2.5) - 1)/(-2.5), airpol.data.sub[,2], ((airpol.data.sub[,3])^0.5 - 1)/0.5, airpol.data.sub[,4], ((airpol.data.sub[,5])^0.1 - 1)/0.1, ((airpol.data.sub[,6])^0.1 - 1)/0.1, airpol.data.sub[,7] ) lambda <- c(-2.5,1,0.5,1,0,0,2.5) newmat <- NULL for (j in 1:length(lambda)){ if (lambda[j]!=0){ newcol<- (airpol.data.sub[,j]^lambda[j] - 1)/lambda[j] } else { newcol<-log(airpol.data.sub[,j]) } newmat <- cbind(newmat,newcol) } mymat3 <- newmat colnames(mymat3) <-colnames(airpol.data.sub) # mymat3 is the transformed data matrix #### #### chisplot(mymat3)