# Practical Multivariate Analysis, 5th Edition
# Chapter 12


#### Page 271 Figure 12.1

# posterior probability as a function of Z
pz <- function(z) 1 / (1 + exp(-1.515 - z))
z <- seq(from = -6, to = 4, by = .01)
plot(x = z, y = pz(z), type = "l", lwd = 3)
abline(h = .5, v = -1.515, lty = 2)

#### Page 273 Table 12.1
xtabs(~ SEX + CASES, data = depress)

#### Page 274 odds ratios
logit1 <- glm(CASES ~ SEX, data = depress, family = binomial)
## odds ratio (exponentiated coefficients)
exp(coef(logit1))
## coefficients (lower down)
coef(logit1)

#### Page 275 (top of the page)
coef(glm(CASES ~ AGE + INCOME + SEX, data = depress, family = binomial))

#### Page 275 (bottom of the page)
coef(summary(glm(CASES ~ AGE + INCOME, data = depress,
  family = binomial)))

#### Page 278 (top of the page)
# make a copy of the data
dat <- within(depress, {
  EMPLOY <- ifelse(EMPLOY %in% c("Part time", "Unemployed"), 1, 0)
  EMPLOY[depress$EMPLOY == "Other"] <- NA
  INCOME <- INCOME < 10
})

## fit the logistic model and store in the object, m
m <- glm(CASES ~ INCOME + EMPLOY, data = dat, family = binomial)

## summarize the model (m) and just print the coefficients
coef(summary(m))


#### Page 279, middle of the page

## update our old model to include the interaction of INCOME and EMPLOY
## the . ~ . means use all the variables in the old model
## and then add the interaction
m2 <- update(m, . ~ . + INCOME:EMPLOY)

coef(summary(m2))

#### Page 280, LRT test at the bottom of the page
## We can compare the two models
## the 'deviance' is the likelihood ratio test statistic
anova(m, m2)

#### Page 287
## cut age into 4 categories
CatAge <- cut(depress$AGE, breaks = c(0, 28, 43, 59, 90), right = FALSE)

## fit logistic model based on categorical age and store in m3
m3 <- glm(CASES ~ CatAge + INCOME + SEX, data = depress, family = binomial)
## print coefficients from summary table
coef(summary(m3))

#### Page 288 Figure 12.2
plot(x = c(22.5, 35, 50.5, 74), y = c(0, coef(m3)[2:4]), type = "b")


#### Pages 291 to 295 have been skipped

#### Page 296, Figure 12.5

## Uncertain what the model being used for the predicted probabilities
## here is how it could be done for a model
## fit model with age income and sex
m4 <- glm(CASES ~ AGE + INCOME + SEX, data = depress)

## predicted probabilities
phat <- predict(m4)

## function to find proportion correctly classified based on cutoff
f <- function(cutoff) diag(table(phat > cutoff, depress$CASES)/c(244, 50))

## create temp data frame of cutoff values
tmp <- data.frame(cutoffs = seq(from = 0, to = .36, by = .001))
tmp <- cbind(tmp, t(sapply(tmp$cutoffs, f)) * 100)
colnames(tmp) <- c("cutoffs", "Depressed", "Nondepressed")

## Create the actual plot
with(tmp, {
  plot(x = cutoffs, y = Depressed, ylim = c(0, 100), type = "l",
    ylab = "Percent Correct Prediction", xlab = "Cutoff Point")
  lines(x = cutoffs, y = Nondepressed, lty = 2)
  legend(.25, 60, legend = c("Depressed", "Nondepressed"), lty = c(1, 2))
})

#### Page 300 Table 12.4
## load mlogit package for multinomial logistic regression
require(mlogit)

## copy depress data over dat and create a nominal cases variable
## cutting CESD into three levels
dat <- depress[, c("SEX", "AGE", "INCOME")]
dat$nomCases <- cut(depress$CESD, breaks = c(0, 10, 16, 60),
  include.lowest = TRUE, right = FALSE,
  labels = c("Nondepressed", "Borderline", "Clinically"))

## convert to mlogit data
mdat <- mlogit.data(dat, choice = "nomCases", shape = "wide")

## fit the model
m5 <- mlogit(nomCases ~ 1 | SEX + AGE + INCOME,
  data = mdat, reflevel="Nondepressed")
## print output for Table 12.4
summary(m5)

#### Page 303 Table 12.5

## load rms package for ordinal regression
require(rms)

lrm(nomCases2 ~ SEX + AGE + INCOME, data = dat)
summary(m6)