* ADA2_02.sas ; * SAS code from Chapter 02 ; * Stat 428/528 Advanced Data Analysis II ; * ; * Prof. Erik Erhardt ; * University of New Mexico ; * Spring 2012 ; *******************************************************************************; * Indian systolic blood pressure example; * plot and simple linear regression; options ls=79 nodate nocenter; data indian; infile "F:\Dropbox\UNM\teach\ADA2_stat528\sas\ADA2_02_indian.dat"; input n x1-x11; if x3 lt 60 then x12='l'; * x12 (low, med, high weight) ; else if x3 ge 70 then x12='h'; * for plotting by weight ; else x12 ='m'; label x1='age in years' x2='years since migration' x3='weight in kilos' x4='height in mm' x5='chin skin fold in mm' x6='forearm skin fold in mm' x7='calf skin fold in mm' x8='pulse rate-beats/min' x9='systolic bp' x10='diastolic bp' x11='years since migration div by age' x12='cat weight'; * give descriptive variable names to the variables we'll use; wt = x3; sysbp = x9; yrage = x11; wtcat = x12; label wt='weight in kilos' sysbp='systolic bp' yrage='years since migration div by age' wtcat='cat weight'; run; * define v=symbol, c=color, and i=interpolation; symbol1 v="h" c=blue i=none; symbol2 v="l" c=red i=none; symbol3 v="m" c=green i=none; proc gplot data=indian; plot sysbp*yrage=wtcat; run; proc reg data=indian; model sysbp = yrage; run; *******************************************************************************; * multiple regression; proc reg data=indian; model sysbp = yrage wt; * put both predictors to the right of "=" sign; run; *******************************************************************************; * GCE example - plots, correlation, individual simple linear regressions; data gce; infile "F:\Dropbox\UNM\teach\ADA2_stat528\sas\ADA2_02_gce.dat"; input y x1 x2; label y='gce score of 1000' x1='compulsory part of gce score of 200' x2='scel paper score'; run; * define v=symbol, c=color, and i=interpolation; symbol1 v=circle c=black i=none; proc gplot data=gce; plot y*(x1 x2); plot x1*x2; run; proc corr data=gce; var y x1 x2; run; *******************************************************************************; * individual simple linear regression (SLR); proc reg data=gce; model y=x1; model y=x2; run; *******************************************************************************; * multiple regression with diagnostic plots; proc reg data=gce; model y=x1 x2/p r; plot student.*predicted. nqq.*student. cookd.*obs.; * diagnostic plots; run; *******************************************************************************; * partial residual plots; * to check relationship of X_i with Y after accounting for all other Xs in model; * note: uncomment ods lines to produce high-resolution plots in newer SAS versions; *ods graphics on; proc reg data=gce; model y=x1 x2/p r partial; * /partial gives partial resid plots for each predictor; run; *ods graphics off; *******************************************************************************; * run analysis excluding influential observation 10 ; data gce2; set gce; if _N_ ~= 10; * keep all observations that are not obs 10 (i.e., delete obs 10); run; proc reg data=gce2; model y=x1 x2/p r partial; * /partial gives partial resid plots for each predictor; run; *******************************************************************************;