# problem 1 tests.mds.2 <- cmdscale(tests.diss, eig=T) par(pty="s") # Creates a square plotting region plot(tests.mds.2$points[,1], tests.mds.2$points[,2], type='n', xlab="Coordinate 1", ylab="Coordinate 2", xlim=c(-50,50), ylim=c(-50,50) ) text(tests.mds.2$points[,1], tests.mds.2$points[,2] , labels=row.names(tests.diss)) ## Problem 2: racq.data <- read.table("http://www.stat.sc.edu/~hitchcock/racquetsdata530.txt",header=T) racquet.names <- as.character(racq.data[,1]) racquet.numeric.data <- racq.data[,-1] racquet.numeric.data <- scale (racquet.numeric.data) racq.dist <- dist(racquet.numeric.data) # my.hier.clus <- hclust(racq.dist,method="average") # plot(my.hier.clus) # my.hier.clus <- hclust(racq.dist,method="single") # plot(my.hier.clus) my.hier.clus <- hclust(racq.dist,method="complete") plot(my.hier.clus, labels=racquet.names) racq.clusvec.3 <- cutree(my.hier.clus, k=3) racq.clust <- lapply(1:3, function(nc) racquet.names[racq.clusvec.3==nc]) racq.clust # printing the clusters in terms of the racquet names #'s racq.pc <- princomp(racquet.numeric.data,cor=T) # Reviewing the PCA: summary(racq.pc,loadings=T) # Setting up the colors for the 3 clusters on the plot: my.color.vector <- rep("blue", times=nrow(racquet.numeric.data)) my.color.vector[racq.clusvec.3==2] <- "red" my.color.vector[racq.clusvec.3==3] <- "green" # Plotting the PC scores: par(pty="s") plot(racq.pc$scores[,1], racq.pc$scores[,2], ylim=range(racq.pc$scores[,1]), xlab="PC 1", ylab="PC 2", type ='n', lwd=2) text(racq.pc$scores[,1], racq.pc$scores[,2], labels=racquet.names, cex=0.6, lwd=2, col=my.color.vector) #################################################### # A partitioning approach: library(cluster) # Choosing k based on the avg. silhouette width: my.k.choices <- 2:8 avg.sil.width <- rep(0, times=length(my.k.choices)) for (ii in (1:length(my.k.choices)) ){ avg.sil.width[ii] <- pam(racq.dist, diss=T, k=my.k.choices[ii])$silinfo$avg.width } print( cbind(my.k.choices, avg.sil.width) ) my.kmed <- pam(racq.dist, k=2, diss=T) racq.clust.kmed <- lapply(1:2, function(nc) racquet.names[my.kmed$clustering==nc]) racq.clust.kmed # printing the clusters in terms of the racquet names #'s # Setting up the colors for the 3 clusters on the plot: my.color.vector <- rep("blue", times=nrow(racquet.numeric.data)) my.color.vector[my.kmed$clustering==2] <- "red" # Plotting the PC scores: par(pty="s") plot(racq.pc$scores[,1], racq.pc$scores[,2], ylim=range(racq.pc$scores[,1]), xlab="PC 1", ylab="PC 2", type ='n', lwd=2) text(racq.pc$scores[,1], racq.pc$scores[,2], labels=racquet.names, cex=0.6, lwd=2, col=my.color.vector) ## problem 3: pottfull<-read.table("http://people.stat.sc.edu/hitchcock/potteryTable63.txt", header=T) attach(pottfull) pott<-pottfull[,-c(1,2)] library(mclust) pott.mbc <- Mclust(pott) # Plotting the BIC values: plot(pott.mbc, data=pott, what="BIC") # The clustering vector: clus.vec <- pott.mbc$classification clus.vec pott.mbc.clust <- lapply(1:3, function(nc) No[clus.vec==nc]) pott.mbc.clust # printing the clusters in terms of the ID #'s # This gives the probabilities of belonging to each cluster for every object: round(pott.mbc$z,2) # Visualizing the clusters: ## Via a scatterplot matrix: plot(pott.mbc, what="classification") # Hit ENTER to see a scatterplot matrix with the points separated by cluster. ### Via a plot of the scores on the first 2 principal components, ### with the clusters separated by color: pott.mbc.pc <- princomp(pott,cor=T) # Reviewing the PCA: summary(pott.mbc.pc,loadings=T) pott.mbc.pc <- princomp(pott,cor=T) # Setting up the colors for the 3 clusters on the plot: my.color.vector <- rep("blue", times=nrow(pott)) my.color.vector[pott.mbc$classification==2] <- "red" my.color.vector[pott.mbc$classification==3] <- "green" # Plotting the PC scores: par(pty="s") plot(pott.mbc.pc$scores[,1], pott.mbc.pc$scores[,2], ylim=range(pott.mbc.pc$scores[,1]), xlab="PC 1", ylab="PC 2", type ='n', lwd=2) text(pott.mbc.pc$scores[,1], pott.mbc.pc$scores[,2], labels=No, cex=0.7, lwd=2, col=my.color.vector)