OPTIONS NOCENTER NODATE NOTES nonumber nolabel LINESIZE=80 MISSING=. FORMCHAR = '|----|+|---+=|-/<>*'; libname in v8 'e:\'; /*********************************** Logistic Regression Part ************************************/ /************************************************* Creating the binary outcome variable hiwrite **************************************************/ data hsb2; set in.hsb2; hiwrite = write >=52; run; *Checking on hiwrite; proc means data = hsb2 mean std; run; /***************************************************** A simple model to illustrate the concept of odds and odds ratio. ******************************************************/ proc logistic data = hsb2 ; model hiwrite (event='1') = female ; ods output ParameterEstimates = model_female; run; *the intercept is the odds for males and the slope is the odds ratio for females vs. males.; data model_fem; set model_female; o = exp(estimate); run; proc print data = model_fem; var variable estimate o; run; *Checking manually, see the webpage for the calculation; proc freq data = hsb2; tables hiwrite*female /nocum nopercent; run; /******************************************************** Second model with both a categorical variable and a continuous variable as predictors *********************************************************/ proc logistic data = hsb2; model hiwrite (event='1') = female math; output out = m2 p = prob xbeta=logit; run; *plotting the linear predictions and the probabilities against math; proc sort data = m2; by math; run; symbol1 i = join v=star l=32 c = black; symbol2 i = join v=circle l = 1 c=black; proc gplot data = m2; plot logit*math = female; plot prob*math = female; run; quit; *What is the odds for every 5 unit change in math?; proc logistic data = hsb2 ; model hiwrite (event='1') = female math / clodds=wald; unit math = 5; run; /******************************************************* Third model including more predictors to explore features of proc logistic *******************************************************/ proc logistic data = hsb2 ; class prog (ref='1') /param = ref; model hiwrite (event='1') = female read math prog ; run; *Contrasting statement; proc logistic data = hsb2 ; class prog /param = glm ; model hiwrite (event='1') = female read math prog; contrast '1 vs 2 of prog' prog 1 -1 0 / estimate; run; *Testing statement; proc logistic data = hsb2 ; class prog(ref='1') /param = ref; model hiwrite (event='1') = prog female read math; test_read_math: test read, math; test_equal: test read = math; run; *Hosmer-Lemeshow goodness-of-fit test and R-squares; proc logistic data = hsb2; class prog(ref='1') /param = ref; model hiwrite(event='1') = female prog read math / rsq lackfit; run; *influence statistics; proc logistic data = hsb2 ; class prog(ref='1') /param = ref; model hiwrite(event='1') = female prog read math ; output out=dinf prob=p resdev=dr h=pii reschi=pr difchisq=difchi; run; *plot of difchisq against the predicted value to detect influential points; goptions reset = all; symbol1 pointlabel = ("#id" h=1 ) value=none; proc gplot data = dinf; plot difchi*p; run; quit; /*************************************************** Scoring a new data set ***************************************************/ *Generating a new data set to be scored; proc sql; create table gdata as select distinct female, (prog=2) as prog2,(prog=3) as prog3, mean(read) as read, mean(math) as math from hsb2; quit; proc print data = gdata; run; *creating data set containing parameter estimates; proc logistic data = hsb2 outest=mg; class prog(ref='1') /param = ref; model hiwrite(event='1') = female prog read math ; run; *Scoring the data set to get the linear predictions; proc score data=gdata score=mg out=gpred type=parms; var female prog2 prog3 read math; run; data gpred; set gpred; odds = exp(hiwrite); p_1 = odds /(1+odds); p_0 = 1 - p_1; run; proc print data = gpred; run; /*Syntax for SAS 9.0 and higher proc sql; create table gdata1 as select distinct female, ses, mean(read) as read, mean(math) as math from hsb2; quit; proc logistic data = hsb2 outmodel=mg1; class prog(ref='1') /param = ref; model hiwrite(event='1') = female prog read math ; run; proc logistic inmodel=mg1; score data = gdata1 out=gpred1; run; proc print data = gpred1; run;*/ /****************************************** Exact Logistic Regression *******************************************/ data dose; input dose deaths total; datalines; 0 0 3 1 0 3 2 0 3 3 0 3 4 1 3 5 2 3 ; run; proc logistic data = dose desc; model deaths/total = dose; exact dose /estimate = both; run; /************************************** Multinomial Logistic Regression **************************************/ data school; input school program style $ count; cards; 1 1 self 10 1 1 team 17 1 1 class 26 1 2 self 5 1 2 team 12 1 2 class 50 2 1 self 21 2 1 team 17 2 1 class 26 2 2 self 16 2 2 team 12 2 2 class 36 3 1 self 15 3 1 team 15 3 1 class 16 3 2 self 12 3 2 team 12 3 2 class 20 ; run; /************************************ A Simple Example *************************************/ *manually computing the odds for each level against the reference level; proc freq data = school; weight count; tables style /list ; ods output OneWayFreqs = test; run; data test1; set test; godds = frequency/85; run; proc print data = test1; var style frequency godds; run; * obtaining the same results from running proc logistic; proc logistic data = school order = internal; freq count; model style = /link = glogit ; ods output parameterestimates= odds; run; data odds1; set odds; odds = exp(estimate); run; proc print data = odds1; var response estimate odds; run; /********************************************** Second Example - A saturated model ***********************************************/ proc logistic data=school order = internal; freq count; class school program / order = data; model style = school program school*program /link = glogit scale = none aggregate; run; *main effect only model; proc logistic data=school order = internal; freq count; class school program /order = data; model style = school program /link = glogit scale = none aggregate; run; *output the predicted probabilities; proc logistic data=school order = internal; freq count; class school program ; model style = school program / link = glogit; output out = smodel p=prob; run; proc freq data = smodel; where school = 1 or school = 2; format prob 5.4; tables school*program*_level_*prob /list nopercent nocum; run; /*********************************** Ordinal Logistic Regression Part ************************************/ libname ord 'e:\seminar_logistic'; data ordwarm2; set ord.ordwarm2; run; proc contents data = ordwarm2; run; /************************************* A Simple Model Calculating the cumulative odds from the frequency table; See the same result from running proc logistic; *************************************/ proc freq data = ordwarm2; table warm ; ods output onewayfreqs = test (keep = warm frequency cumfrequency); run; data test1; set test; if _n_ <=3 then odds = cumfrequency /(2293-cumfrequency); run; proc print data= test1; run; proc logistic data = ordwarm2 ; model warm = /link = clogit; ods output ParameterEstimates = model_based; run; data test2; set model_based; odds = exp(estimate); run; proc print data = test2 noobs; var variable estimate odds; run; /************************************************ Second model with two categorical variables *************************************************/ proc logistic data = ordwarm2 ; model warm = white male /link = clogit; run; /************************************************* Third model with only one continuous predictor **************************************************/ *using the output statement to obtain the probabilities for each level of the outcome and each level of the predictors; proc logistic data = ordwarm2 ; model warm = age /link = clogit; output out = pred p=p xbeta=linp; run; proc sort data = pred; by age; run; * plotting the probabilities against age; goptions reset = all ; symbol i = join w=.4 ; proc gplot data = pred; plot p*age=_level_; plot linp*age=_level_; run; quit; *proportionality assumption test; proc logistic data = ordwarm2 ; model warm = white male /link = clogit; run; *deviance and Pearson goodness-of-fit tests; proc logistic data = ordwarm2 ; model warm = white age ed /link = clogit scale=none aggregate; run;