# Kentaro Fukumoto

rm(list=ls())

###################
# Multinomial Logit

log.likehood.MNL <- function(parameter,x,y){
  k <- ncol(x)
  z <- cbind(rep(1,length(y)),(exp(x%*%parameter[1:k])),(exp(x%*%parameter[(k+1):(2*k)])))
  sum(log(apply((z*diag(3)[y,]),1,sum)/apply(z,1,sum))) # a bit not so elegant coding... 
}

MLE.MNL <- function(formula){
  model <- model.frame(formula)
  Y <- as.matrix(model.extract(model, "response"))
  X <- as.matrix(model.matrix(formula))
  initial.parameter <- rep(0,(2*ncol(X)))
  optim.out <- optim(initial.parameter,log.likehood.MNL,
                     method="BFGS",control=list(fnscale=-1),hessian=T,
                     x=X,y=Y)
  names(optim.out$par) <- c(colnames(X),colnames(X))
  optim.out
}

summarize <- function(MLE.out,digit){
  point.estimate <- MLE.out$par
  standard.error <- sqrt(diag(solve(-MLE.out$hessian)))
  p.value <- pnorm(-abs(MLE.out$par)/standard.error)
  round(cbind(point.estimate,standard.error,p.value),digit)
}

# application
# pp. 98-100

library(faraway)
attach(nes96)
sPID <-  PID
levels(sPID) <- c("Democrat","Democrat","Independent","Independent","Independent","Republican","Republican")
summary(sPID)
inca <- c(1.5,4,6,8,9.5,10.5,11.5,12.5,13.5,14.5,16,18.5,21,23.5,27.5,32.5,37.5,42.5,47.5,55,67.5,82.5,97.5,115)
nincome <- inca[unclass(nes96$income)]
library(nnet)
mmod <- multinom(sPID ~ nincome)
summary(mmod)

nPID <- rep(0,length(sPID))
nPID[sPID=="Democrat"] <- 1
nPID[sPID=="Independent"] <- 2
nPID[sPID=="Republican"] <- 3
table(nPID)
MLE.MNL.out <- MLE.MNL(nPID ~nincome)
summarize(MLE.MNL.out,3)

######################
# Ordered Logit/Probit

logistic <- function(x){1/(1+exp(-x))}

log.likehood.ordered <- function(parameter,x,y,cdf){
  k <- ncol(x)
  J <- length(unique(y))
  xb <- x%*%parameter[1:k]
  theta <- NULL
  theta[1] <- parameter[(k+1)]
  linear.predictor <- theta[1] - xb
  for(j in 2:(J-1)){        
    theta[j] <- theta[(j-1)]+exp(parameter[(k+j)])
    linear.predictor <- cbind(linear.predictor,(theta[j] - xb))    
  }
  prob.lower <- cbind(0,cdf(linear.predictor))
  prob.upper <- cbind(prob.lower[,-1],1)
  prob <- prob.upper - prob.lower
  sum(log(apply((prob*diag(J)[y,]),1,sum)))
}

MLE.ordered <- function(formula,CDF){
  model <- model.frame(formula)
  Y <- as.matrix(model.extract(model, "response"))
  X <- as.matrix(as.matrix(model.matrix(formula))[,-1]) # no constant
  J <- length(unique(Y))
  initial.parameter <- rep(0,(J-1+ncol(X)))
  optim.out <- optim(initial.parameter,log.likehood.ordered,
                     method="BFGS",control=list(fnscale=-1),hessian=T,
                     x=X,y=Y,cdf=CDF)
  #names(optim.out$par) <- c(colnames(X),rep("cutoff",(J-1)))
  optim.out
}  

# application
# pp. 108-09

library(MASS)
pomod <- polr(sPID ~ nincome)
summary(pomod)

MLE.ordered.logit.out <- MLE.ordered(nPID ~ nincome,logistic)
summarize(MLE.ordered.logit.out,3)
MLE.ordered.logit.out$par[2]+exp(MLE.ordered.logit.out$par[3])

opmod <- polr(sPID ~ nincome, method="probit")
summary(opmod)

MLE.ordered.probit.out <- MLE.ordered(nPID ~ nincome,pnorm)
summarize(MLE.ordered.probit.out,3)
MLE.ordered.probit.out$par[2]+exp(MLE.ordered.probit.out$par[3])

##################
# Gamma regression

log.likehood.gamma <- function(parameter,x,y,z){
  kx <- ncol(x)
  kz <- ncol(z)
  mu <- x%*%parameter[1:kx]
  dispersion <- exp(z%*%parameter[(kx+1):(kx+kz)])
  v <- 1/dispersion
  lambda <- v/mu
  sum(dgamma(y,shape=v,rate=lambda,log=T)) 
}

MLE.gamma <- function(formula.mean,formula.dispersion){
  model.mean <- model.frame(formula.mean)
  Y <- as.matrix(model.extract(model.mean, "response"))
  X <- as.matrix(model.matrix(formula.mean))
  model.dispersion <- model.frame(formula.dispersion)
  Z <- as.matrix(model.matrix(formula.dispersion))
  lm.out <- lm(Y~X-1)
  initial.parameter <- c(lm.out$coef,rep(0,ncol(Z)))
  optim.out <- optim(initial.parameter,log.likehood.gamma,
                     method="BFGS",control=list(fnscale=-1),hessian=T,
                     x=X,y=Y,z=Z)
  names(optim.out$par) <- c(colnames(X),colnames(Z))
  optim.out
}

# application
# p. 146

detach(nes96)
attach(weldstrength)
lmod <- lm(Strength ~ Drying + Material + Preheating)
summary(lmod)
h <- influence(lmod)$hat
d <- residuals(lmod)^2/(1-h)
gmod <- glm(d ~ Material+Preheating,family=Gamma(link=log),weldstrength,weights=1-h)
w <- 1/fitted(gmod)
lmod <- lm(Strength ~ Drying + Material + Preheating, weldstrength, weights=w)
summary(lmod)
summary(gmod)

# The following model is similar to, but not the same as, the previous one.

MLE.gamma.out <- MLE.gamma(Strength ~ Drying + Material + Preheating,
                           ~ Material+Preheating)
summarize(MLE.gamma.out,3)

summary(gmod)$coef[1,1]
summarize(MLE.gamma.out,3)[5,1]
summary(gmod)$coef[1,1] - summarize(MLE.gamma.out,3)[5,1]
log(mean(Strength^2))