* load data; data titanic; set "C:\path\titanic"; /* insert correct path to file here */ run; * initial model; proc logistic data=titanic descending; class pclass female; model survived = pclass age sibsp parch fare female / expb; run; *change to reference coding; proc logistic data=titanic descending; class pclass female / param=ref; model survived = pclass age sibsp parch fare female / expb; run; *******ASSESSING MODEL FIT****************; * deviance and pearson X2; proc logistic data=titanic descending; class pclass female; model survived = pclass age sibsp parch fare female / expb aggregate scale=none; run; * H&L more appropriate if using continuous covariates; proc logistic data=titanic descending; class female pclass / param=ref; model survived = pclass age sibsp parch fare female / expb lackfit; run; * deltad; proc logistic data=titanic descending; class female pclass / param=ref; model survived = pclass age sibsp parch fare female / expb lackfit; output out=diag difdev=deltad predicted=predp; run; * plot of deltad vs predicted probabilities; proc sgplot data=diag; scatter x=predp y=deltad; run; * finding the 20 obs with highest residuals; proc sort data=diag; by descending deltad; run; proc print data=diag(obs=20); run; * redoing deltad vs probability plot split by gender and colored by class; proc sgpanel data=diag; panelby female; scatter x=predp y=deltad / group=pclass; run; * exploring nonlinear age; * create format to split age into intervals; proc format; value agecat 0-<15 = '[0,15)' 15-<30 = '[15,30)' 30-<45 = '[30,45)' 45-<60 = '[45,60)' 60-high = '60+'; run; * estimate coefficients for age categories (by female) and then plot; proc logistic data=titanic descending; class female pclass age(ref=first) / param=glm; model survived = pclass age|female sibsp parch fare / expb; lsmeans age*female / plots=meanplot(join sliceby=female); format age agecat.; run; ******MODELING INTERACTIONS AND NONLINEAR EFFECTS*******; * adding interactions; proc logistic data=titanic descending; class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb; run; * plotting ORs for pclass across female; * odds ratio statement; proc logistic data=titanic descending; class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb; oddsratio pclass; run; * putting variables on odds ratio graphs; proc logistic data=titanic descending; class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb; oddsratio female; oddsratio age; oddsratio sibsp; oddsratio parch; oddsratio fare; run; * options for odds ratio graphs; proc logistic data=titanic descending plots(only)=(oddsratio(logbase=e type=horizontalstat)) ; class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb; oddsratio female / at(pclass='3' age=1 5 18 30 50 70); run; * cubic splines for age by gender; proc logistic data=titanic descending; effect agesp = spline(age / basis=tpf(noint) naturalcubic knotmethod=percentiles(5) details); class female pclass / param=ref; model survived = female|pclass female|agesp sibsp parch fare / expb; run; * plot for splines; proc logistic data=titanic descending; effect agesp = spline(age / basis=tpf(noint) naturalcubic knotmethod=percentiles(5) details); class female pclass / param=ref; model survived = female|pclass female|agesp sibsp parch fare / expb; effectplot slicefit (x=age sliceby=female); run; * reassessing model fit; proc logistic data=titanic descending; class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb lackfit; run; * influence plots; proc logistic data=titanic descending plots(label)=(influence(unpack) dfbetas(unpack)); class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb lackfit; run; * identifying influential observations; data influence; set titanic; if _N_ in (3, 46, 161) then output; run; proc print data=influence; run; *removing them; data noninfluence; set titanic; if ^(_N_ in (3, 46, 161)) then output; run; *rerunning - nothing too different NOT RUN IN SEMINAR; proc logistic data=noninfluence descending; class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb lackfit; run; ******************PREDICTIVE POWER*************; * rerunning model to get association table; proc logistic data=titanic descending; class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb lackfit; run; * ROC curve for model; proc logistic data=titanic descending plots(only)=roc; class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb lackfit; run; * comparing ROC curve from model with no interactions; proc logistic data=titanic descending; class female pclass / param=ref; model survived = female|pclass female|age sibsp parch fare / expb lackfit; roc 'no interactions' female pclass age sibsp parch fare; roccontrast 'no interactions'; run; **************ESTIMATING AND REPORTING PREDICTED PROBABILITIES*********************************; * lsmeans for predicted probabilities; proc logistic data=titanic descending; class female pclass / param=glm; model survived = female|pclass female|age sibsp parch fare / expb lackfit; lsmeans female*pclass / ilink; run; * lsmeans for predicted probabilities, fixing covariates values; proc logistic data=titanic descending; class female pclass / param=glm; model survived = female|pclass female|age sibsp parch fare / expb lackfit; lsmeans female*pclass / at(age sibsp parch fare)=(30 0 1 100) ilink; run; * plots of covariate effects against predicted probabilities; proc logistic data=titanic descending; class female pclass / param=glm; model survived = female|pclass female|age sibsp parch fare / expb lackfit; effectplot interaction (x=pclass sliceby=female) / clm connect noobs; effectplot slicefit (x=age sliceby=female) / clm; run; ***********************CODE END***********************;