# Practical Multivariate Analysis, 5th Edition # Chapter 12 #### Page 271 Figure 12.1 # posterior probability as a function of Z pz <- function(z) 1 / (1 + exp(-1.515 - z)) z <- seq(from = -6, to = 4, by = .01) plot(x = z, y = pz(z), type = "l", lwd = 3) abline(h = .5, v = -1.515, lty = 2) #### Page 273 Table 12.1 xtabs(~ SEX + CASES, data = depress) #### Page 274 odds ratios logit1 <- glm(CASES ~ SEX, data = depress, family = binomial) ## odds ratio (exponentiated coefficients) exp(coef(logit1)) ## coefficients (lower down) coef(logit1) #### Page 275 (top of the page) coef(glm(CASES ~ AGE + INCOME + SEX, data = depress, family = binomial)) #### Page 275 (bottom of the page) coef(summary(glm(CASES ~ AGE + INCOME, data = depress, family = binomial))) #### Page 278 (top of the page) # make a copy of the data dat <- within(depress, { EMPLOY <- ifelse(EMPLOY %in% c("Part time", "Unemployed"), 1, 0) EMPLOY[depress$EMPLOY == "Other"] <- NA INCOME <- INCOME < 10 }) ## fit the logistic model and store in the object, m m <- glm(CASES ~ INCOME + EMPLOY, data = dat, family = binomial) ## summarize the model (m) and just print the coefficients coef(summary(m)) #### Page 279, middle of the page ## update our old model to include the interaction of INCOME and EMPLOY ## the . ~ . means use all the variables in the old model ## and then add the interaction m2 <- update(m, . ~ . + INCOME:EMPLOY) coef(summary(m2)) #### Page 280, LRT test at the bottom of the page ## We can compare the two models ## the 'deviance' is the likelihood ratio test statistic anova(m, m2) #### Page 287 ## cut age into 4 categories CatAge <- cut(depress$AGE, breaks = c(0, 28, 43, 59, 90), right = FALSE) ## fit logistic model based on categorical age and store in m3 m3 <- glm(CASES ~ CatAge + INCOME + SEX, data = depress, family = binomial) ## print coefficients from summary table coef(summary(m3)) #### Page 288 Figure 12.2 plot(x = c(22.5, 35, 50.5, 74), y = c(0, coef(m3)[2:4]), type = "b") #### Pages 291 to 295 have been skipped #### Page 296, Figure 12.5 ## Uncertain what the model being used for the predicted probabilities ## here is how it could be done for a model ## fit model with age income and sex m4 <- glm(CASES ~ AGE + INCOME + SEX, data = depress) ## predicted probabilities phat <- predict(m4) ## function to find proportion correctly classified based on cutoff f <- function(cutoff) diag(table(phat > cutoff, depress$CASES)/c(244, 50)) ## create temp data frame of cutoff values tmp <- data.frame(cutoffs = seq(from = 0, to = .36, by = .001)) tmp <- cbind(tmp, t(sapply(tmp$cutoffs, f)) * 100) colnames(tmp) <- c("cutoffs", "Depressed", "Nondepressed") ## Create the actual plot with(tmp, { plot(x = cutoffs, y = Depressed, ylim = c(0, 100), type = "l", ylab = "Percent Correct Prediction", xlab = "Cutoff Point") lines(x = cutoffs, y = Nondepressed, lty = 2) legend(.25, 60, legend = c("Depressed", "Nondepressed"), lty = c(1, 2)) }) #### Page 300 Table 12.4 ## load mlogit package for multinomial logistic regression require(mlogit) ## copy depress data over dat and create a nominal cases variable ## cutting CESD into three levels dat <- depress[, c("SEX", "AGE", "INCOME")] dat$nomCases <- cut(depress$CESD, breaks = c(0, 10, 16, 60), include.lowest = TRUE, right = FALSE, labels = c("Nondepressed", "Borderline", "Clinically")) ## convert to mlogit data mdat <- mlogit.data(dat, choice = "nomCases", shape = "wide") ## fit the model m5 <- mlogit(nomCases ~ 1 | SEX + AGE + INCOME, data = mdat, reflevel="Nondepressed") ## print output for Table 12.4 summary(m5) #### Page 303 Table 12.5 ## load rms package for ordinal regression require(rms) lrm(nomCases2 ~ SEX + AGE + INCOME, data = dat) summary(m6)