# problem 1

tests.mds.2 <- cmdscale(tests.diss, eig=T)

par(pty="s")  # Creates a square plotting region
plot(tests.mds.2$points[,1], tests.mds.2$points[,2], type='n',
     xlab="Coordinate 1", ylab="Coordinate 2", xlim=c(-50,50), ylim=c(-50,50) )
text(tests.mds.2$points[,1], tests.mds.2$points[,2] , labels=row.names(tests.diss))


## Problem 2:

 racq.data <- read.table("http://www.stat.sc.edu/~hitchcock/racquetsdata530.txt",header=T)
 racquet.names <- as.character(racq.data[,1])
 racquet.numeric.data <- racq.data[,-1]
 racquet.numeric.data <- scale (racquet.numeric.data)
 racq.dist <- dist(racquet.numeric.data)
# my.hier.clus <- hclust(racq.dist,method="average")
# plot(my.hier.clus)
# my.hier.clus <- hclust(racq.dist,method="single")
# plot(my.hier.clus)
 my.hier.clus <- hclust(racq.dist,method="complete")
 plot(my.hier.clus, labels=racquet.names)

racq.clusvec.3 <- cutree(my.hier.clus, k=3)

racq.clust <- lapply(1:3, function(nc) racquet.names[racq.clusvec.3==nc])  
racq.clust   # printing the clusters in terms of the racquet names #'s

racq.pc <- princomp(racquet.numeric.data,cor=T)

# Reviewing the PCA:

summary(racq.pc,loadings=T)


# Setting up the colors for the 3 clusters on the plot:
my.color.vector <- rep("blue", times=nrow(racquet.numeric.data))
my.color.vector[racq.clusvec.3==2] <- "red"
my.color.vector[racq.clusvec.3==3] <- "green"

# Plotting the PC scores:

par(pty="s")
plot(racq.pc$scores[,1], racq.pc$scores[,2], ylim=range(racq.pc$scores[,1]), 
     xlab="PC 1", ylab="PC 2", type ='n', lwd=2)
text(racq.pc$scores[,1], racq.pc$scores[,2], labels=racquet.names, 
     cex=0.6, lwd=2, col=my.color.vector)


####################################################

# A partitioning approach:
library(cluster)

# Choosing k based on the avg. silhouette width:

my.k.choices <- 2:8
avg.sil.width <- rep(0, times=length(my.k.choices))
for (ii in (1:length(my.k.choices)) ){
avg.sil.width[ii] <- pam(racq.dist, diss=T, k=my.k.choices[ii])$silinfo$avg.width
}
print( cbind(my.k.choices, avg.sil.width) )


my.kmed <- pam(racq.dist, k=2, diss=T)


racq.clust.kmed <- lapply(1:2, function(nc) racquet.names[my.kmed$clustering==nc])  
racq.clust.kmed   # printing the clusters in terms of the racquet names #'s


# Setting up the colors for the 3 clusters on the plot:
my.color.vector <- rep("blue", times=nrow(racquet.numeric.data))
my.color.vector[my.kmed$clustering==2] <- "red"

# Plotting the PC scores:

par(pty="s")
plot(racq.pc$scores[,1], racq.pc$scores[,2], ylim=range(racq.pc$scores[,1]), 
     xlab="PC 1", ylab="PC 2", type ='n', lwd=2)
text(racq.pc$scores[,1], racq.pc$scores[,2], labels=racquet.names, 
     cex=0.6, lwd=2, col=my.color.vector)




## problem 3:

pottfull<-read.table("http://people.stat.sc.edu/hitchcock/potteryTable63.txt", header=T)
attach(pottfull)
pott<-pottfull[,-c(1,2)]

library(mclust)

pott.mbc <- Mclust(pott)

# Plotting the BIC values:

plot(pott.mbc, data=pott, what="BIC")


# The clustering vector:

clus.vec <- pott.mbc$classification
clus.vec

pott.mbc.clust <- lapply(1:3, function(nc) No[clus.vec==nc])  
pott.mbc.clust   # printing the clusters in terms of the ID #'s

# This gives the probabilities of belonging to each cluster for every object:

round(pott.mbc$z,2)


# Visualizing the clusters:

## Via a scatterplot matrix:

plot(pott.mbc, what="classification")

# Hit ENTER to see a scatterplot matrix with the points separated by cluster.

### Via a plot of the scores on the first 2 principal components, 
### with the clusters separated by color:

pott.mbc.pc <- princomp(pott,cor=T)



# Reviewing the PCA:

summary(pott.mbc.pc,loadings=T)


pott.mbc.pc <- princomp(pott,cor=T)

# Setting up the colors for the 3 clusters on the plot:
my.color.vector <- rep("blue", times=nrow(pott))
my.color.vector[pott.mbc$classification==2] <- "red"
my.color.vector[pott.mbc$classification==3] <- "green"

# Plotting the PC scores:

par(pty="s")
plot(pott.mbc.pc$scores[,1], pott.mbc.pc$scores[,2], ylim=range(pott.mbc.pc$scores[,1]), 
     xlab="PC 1", ylab="PC 2", type ='n', lwd=2)
text(pott.mbc.pc$scores[,1], pott.mbc.pc$scores[,2], labels=No, 
     cex=0.7, lwd=2, col=my.color.vector)