#############################
#                           #
# Probit Regression Example #
#                           #
#############################

# Original data found at: http://lib.stat.cmu.edu/aoas/107/data.txt

# reading in cleaner version of data:

educ.data <- read.table("http://www.stat.sc.edu/~hitchcock/educdata.txt", header=T)

y <- educ.data$DEG

x1 <- educ.data$CHILD
x2 <- educ.data$PDEG
x3 <- x1*x2

X<-cbind(x1,x2,x3)  # Note we are not including an intercept term here

ranks<-match(y,sort(unique(y))) ; uranks<-sort(unique(ranks))
n<-dim(X)[1] ; p<-dim(X)[2]

library(mvtnorm)  # load package to sample from multivariate normal distribution

###starting values

beta<-rep(0,p) 
z<-qnorm(rank(y,ties.method="random")/(n+1))
g<-rep(NA,length(uranks)-1)
K<-length(uranks)
mu<-rep(0,K-1) ; sigma<-rep(100,K-1)
S<-10000
BETA<-matrix(NA,S,p) ; Z<-matrix(NA,S,n) ; ac<-0; G <- matrix(NA,S,length(g))
for(s in 1:S) 
{

  #update g using its full conditional
  for(k in 1:(K-1)) 
  {
  a<-max(z[y==k])
  b<-min(z[y==k+1])
  u<-runif(1, pnorm( (a-mu[k])/sigma[k] ),
              pnorm( (b-mu[k])/sigma[k] ) )
  g[k]<- mu[k] + sigma[k]*qnorm(u)
  }

  #update beta using its full conditional
  mean.vec.beta <- (n/(n+1))*(solve(t(X)%*%X)) %*% ( t(X)%*%z )
  cov.mat.beta <- (n/(n+1))*(solve(t(X)%*%X))
  beta <- rmvnorm(1, mean=mean.vec.beta, sigma=cov.mat.beta)

  #update z using its full conditional
  ez<-X %*% t(beta)
  a<-c(-Inf,g)[ match( y-1, 0:K) ]
  b<-c(g,Inf)[y]  
  u<-runif(n, pnorm(a-ez),pnorm(b-ez) )
  z<- ez + qnorm(u)


 #help mixing the Markov Chain...including a Metropolis-Hastings step
 # c<-rnorm(1,0,n^(-1/3))  
 # zp<-z+c ; gp<-g+c
 # lhr<-  sum(dnorm(zp,ez,1,log=T) - dnorm(z,ez,1,log=T) ) + 
 #        sum(dnorm(gp,mu,sigma,log=T) - dnorm(g,mu,sigma,log=T) )
 # if(log(runif(1))<lhr) { z<-zp ; g<-gp ; ac<-ac+1 }

# Storing values of beta, z, and g:
BETA[s,] <- beta
Z[s,] <- z
G[s,] <- g  

} 

# Thinning by taking every 10th value in the chains:
    BETA <- BETA[10*(1:(S/10) ),]
    Z <- Z[10*(1:(S/10) ),]

#####

beta.pm<-apply(BETA,2,median) # Posterior medians for beta_1, beta_2, beta_3
print(beta.pm)

g.pm<-apply(G,2,median) # Posterior medians for g1, g2, g3, g4

plot(X[,1]+.25*(X[,2]),Z[1000,],
 pch=15+X[,2],col=c("gray","black")[X[,2]+1],
 xlab="number of children",ylab="z", ylim=range(c(-2.5,4,Z[1000,])),
    xlim=c(0,9))

abline(0,beta.pm[1],lwd=2 ,col="gray")
abline(beta.pm[2],beta.pm[1]+beta.pm[3],col="black",lwd=2 )

abline(h=g.pm, lty=3) # Plotting posterior median threshold cutpoints

legend(5,4,legend=c("PDEG=0","PDEG=1"),pch=c(15,16),col=c("gray","black"))

#####
#
# Estimated slopes:

print(paste("Estimated marginal effect of # children when parent degree = NO", round(beta.pm[1],4)))
print(paste("Estimated marginal effect of # children when parent degree = YES", round(beta.pm[1]+beta.pm[3],4)))

# 95% CI for interaction effect:
quantile(BETA[,3],prob=c(0.025,0.975))

windows()  # new plotting window

# Posterior density for interaction effect:

plot(density(BETA[,3]),lwd=2,main="",
    xlab=expression(beta[3]),ylab="density")

######################
######################
#
# Binary case:
#
######################
######################
#################################################################################################

# Define Y = 1 if individual has no senility
# Define Y = 2 if individual has senility present

# x = score on subset of Wechler Adult Intelligence Scale (WAIS)

y<-c(rep(2,times=14), rep(1, times=40))

x<-c(9,13,6,8,10,4,14,8,11,7,9,7,5,14,13,16,10,12,11,14,15,18,7,16,9,9,11,13,15,13,10,11,6,17,14,19,9,11,14,10,16,10,16,14,13,13,9,15,10,11,12,4,14,20)


X<-cbind(x)


# Then repeat all the code starting with "ranks<-" ...

ranks<-match(y,sort(unique(y))) ; uranks<-sort(unique(ranks))
n<-dim(X)[1] ; p<-dim(X)[2]

library(mvtnorm)  # load package to sample from multivariate normal distribution

###starting values

beta<-rep(0,p) 
z<-qnorm(rank(y,ties.method="random")/(n+1))
g<-rep(NA,length(uranks)-1)
K<-length(uranks)
mu<-rep(0,K-1) ; sigma<-rep(100,K-1)
S<-10000
BETA<-matrix(NA,S,p) ; Z<-matrix(NA,S,n) ; ac<-0; G <- matrix(NA,S,length(g))
for(s in 1:S) 
{

  #update g using its full conditional
  for(k in 1:(K-1)) 
  {
  a<-max(z[y==k])
  b<-min(z[y==k+1])
  u<-runif(1, pnorm( (a-mu[k])/sigma[k] ),
              pnorm( (b-mu[k])/sigma[k] ) )
  g[k]<- mu[k] + sigma[k]*qnorm(u)
  }

  #update beta using its full conditional
  mean.vec.beta <- (n/(n+1))*(solve(t(X)%*%X)) %*% ( t(X)%*%z )
  cov.mat.beta <- (n/(n+1))*(solve(t(X)%*%X))
  beta <- rmvnorm(1, mean=mean.vec.beta, sigma=cov.mat.beta)

  #update z using its full conditional
  ez<-X %*% t(beta)
  a<-c(-Inf,g)[ match( y-1, 0:K) ]
  b<-c(g,Inf)[y]  
  u<-runif(n, pnorm(a-ez),pnorm(b-ez) )
  z<- ez + qnorm(u)


 #help mixing the Markov Chain...including a Metropolis-Hastings step
 # c<-rnorm(1,0,n^(-1/3))  
 # zp<-z+c ; gp<-g+c
 # lhr<-  sum(dnorm(zp,ez,1,log=T) - dnorm(z,ez,1,log=T) ) + 
 #        sum(dnorm(gp,mu,sigma,log=T) - dnorm(g,mu,sigma,log=T) )
 # if(log(runif(1))<lhr) { z<-zp ; g<-gp ; ac<-ac+1 }

# Storing values of beta, z, and g:
BETA[s,] <- beta
Z[s,] <- z
G[s,] <- g  

} 

# Thinning by taking every 10th value in the chains:
    BETA <- BETA[10*(1:(S/10) ),]
    Z <- Z[10*(1:(S/10) ),]

#####

if (identical(dim(BETA),NULL))  BETA <- matrix(BETA,ncol=1)

beta.pm<-apply(BETA,2,median) # Posterior median for beta_1
print(beta.pm)

g.pm<-apply(G,2,median) # Posterior medians for g1

plot(X[,1],Z[1000,],
 pch=15,
 xlab="WAIS score",ylab="z")

abline(0,beta.pm[1],lwd=2)

abline(h=g.pm, lty=3) # Plotting posterior median threshold cutpoints


#####
#
# Estimated slopes:

print(paste("Estimated marginal effect of WAIS score", round(beta.pm[1],4)))

# 95% CI for marginal effect of WAIS score:
quantile(BETA[,1],prob=c(0.025,0.975))

windows()  # new plotting window

# Posterior density for marginal effect of WAIS score:

plot(density(BETA[,1]),lwd=2,main="",
    xlab=expression(beta[1]),ylab="density")


## Predictive probability:

# For a new individual with WAIS score x = 10,
# an approximate posterior predictive probability of senility is:

x.new <- 10
mean(x.new * BETA > g.pm)