/* Model Diagnostics Example */ /* SAS code for influence & outlier detection */ /* Also Partial Regression Plots */ /* Also PRESS statistic for model validation */ /* Life Insurance Data, Table 10.1 in book */ DATA lifeins; INPUT income riskaver amount; CARDS; 45.010 6 91 57.204 4 162 26.852 5 11 66.290 7 240 40.964 5 73 72.996 10 311 79.380 1 316 52.766 8 154 55.916 6 164 38.122 4 54 35.840 6 53 75.796 9 326 37.408 5 55 54.376 2 130 46.186 7 112 46.130 4 91 30.366 3 14 39.060 5 63 ; run; /* Residual Plot against Income */ /* Partial Regression Plots also */ PROC REG DATA = lifeins; model amount = income riskaver / partial ; plot r.*income; run; /* Example using body fat data from Chapter 7 */ data body; input triceps thigh midarm bodyfat; cards; 19.5 43.1 29.1 11.9 24.7 49.8 28.2 22.8 30.7 51.9 37.0 18.7 29.8 54.3 31.1 20.1 19.1 42.2 30.9 12.9 25.6 53.9 23.7 21.7 31.4 58.5 27.6 27.1 27.9 52.1 30.6 25.4 22.1 49.9 23.2 21.3 25.5 53.5 24.8 19.3 31.1 56.6 30.0 25.4 30.4 56.7 28.3 27.2 18.7 46.5 23.0 11.7 19.7 44.2 28.6 17.8 14.6 42.7 21.3 12.8 29.5 54.4 30.1 23.9 27.7 55.3 25.7 22.6 30.2 58.6 24.6 25.4 22.7 48.2 27.1 14.8 25.2 51.0 27.5 21.1 ; run; /* Residual Plot against triceps */ /* Residual Plot against thigh */ /* Partial Regression Plots also */ PROC REG DATA = body; model bodyfat = triceps thigh / partial ; plot r.*triceps; plot r.*thigh; run; /* ******************************************************************* */ /* Example of outlier detection & influence statistics in SAS */ /* Example using body fat data from Chapter 7 */ /* The "influence" option will give several measures of influential points. */ /* The "R" option gives some details related to the residuals. */ PROC REG DATA = body; MODEL bodyfat = triceps thigh midarm / vif influence R; run; /* In the output, note that "Student Residual" lists the */ /* internally studentized residuals, while "RStudent" lists */ /* the externally studentized residuals. */ /******************************************************************/ /* MODEL VALIDATION: */ /* At the end of the output, note the PRESS statistic is given */ /* as 160.737. Compare this to the SSE = 98.40 given above that. */ /******************************************************************/ /* Example using Surgical Unit Data, Table 9.1 in book */ DATA surg; INPUT x1 x2 x3 x4 x5 x6 x7 x8 y lny; /* defining some interaction variables for use below: */ x1x2 = x1*x2; x1x3 = x1*x3; x1x8 = x1*x8; x2x3 = x2*x3; x2x8 = x2*x8; x3x8 = x3*x8; label x1 = 'blood-clotting' x2 = 'prognostic' x3 = 'enzyme' x4 = 'liver function' x5 = 'age' x6 = 'gender' x7 = 'moderate alcohol use' x8 = 'heavy alcohol use' y = 'survival' lny = 'log survival'; cards; 6.7 62 81 2.59 50 0 1 0 695 6.544 5.1 59 66 1.70 39 0 0 0 403 5.999 7.4 57 83 2.16 55 0 0 0 710 6.565 6.5 73 41 2.01 48 0 0 0 349 5.854 7.8 65 115 4.30 45 0 0 1 2343 7.759 5.8 38 72 1.42 65 1 1 0 348 5.852 5.7 46 63 1.91 49 1 0 1 518 6.250 3.7 68 81 2.57 69 1 1 0 749 6.619 6.0 67 93 2.50 58 0 1 0 1056 6.962 3.7 76 94 2.40 48 0 1 0 968 6.875 6.3 84 83 4.13 37 0 1 0 745 6.613 6.7 51 43 1.86 57 0 1 0 257 5.549 5.8 96 114 3.95 63 1 0 0 1573 7.361 5.8 83 88 3.95 52 1 0 0 858 6.754 7.7 62 67 3.40 58 0 0 1 702 6.554 7.4 74 68 2.40 64 1 1 0 809 6.695 6.0 85 28 2.98 36 1 1 0 682 6.526 3.7 51 41 1.55 39 0 0 0 205 5.321 7.3 68 74 3.56 59 1 0 0 550 6.309 5.6 57 87 3.02 63 0 0 1 838 6.731 5.2 52 76 2.85 39 0 0 0 359 5.883 3.4 83 53 1.12 67 1 1 0 353 5.866 6.7 26 68 2.10 30 0 0 1 599 6.395 5.8 67 86 3.40 49 1 1 0 562 6.332 6.3 59 100 2.95 36 1 1 0 651 6.478 5.8 61 73 3.50 62 1 1 0 751 6.621 5.2 52 86 2.45 70 0 1 0 545 6.302 11.2 76 90 5.59 58 1 0 1 1965 7.583 5.2 54 56 2.71 44 1 0 0 477 6.167 5.8 76 59 2.58 61 1 1 0 600 6.396 3.2 64 65 0.74 53 0 1 0 443 6.094 8.7 45 23 2.52 68 0 1 0 181 5.198 5.0 59 73 3.50 57 0 1 0 411 6.019 5.8 72 93 3.30 39 1 0 1 1037 6.944 5.4 58 70 2.64 31 1 1 0 482 6.179 5.3 51 99 2.60 48 0 1 0 634 6.453 2.6 74 86 2.05 45 0 0 0 678 6.519 4.3 8 119 2.85 65 1 0 0 362 5.893 4.8 61 76 2.45 51 1 1 0 637 6.457 5.4 52 88 1.81 40 1 0 0 705 6.558 5.2 49 72 1.84 46 0 0 0 536 6.283 3.6 28 99 1.30 55 0 0 1 582 6.366 8.8 86 88 6.40 30 1 1 0 1270 7.147 6.5 56 77 2.85 41 0 1 0 538 6.288 3.4 77 93 1.48 69 0 1 0 482 6.178 6.5 40 84 3.00 54 1 1 0 611 6.416 4.5 73 106 3.05 47 1 1 0 960 6.867 4.8 86 101 4.10 35 1 0 1 1300 7.170 5.1 67 77 2.86 66 1 0 0 581 6.365 3.9 82 103 4.55 50 0 1 0 1078 6.983 6.6 77 46 1.95 50 0 1 0 405 6.005 6.4 85 40 1.21 58 0 0 1 579 6.361 6.4 59 85 2.33 63 0 1 0 550 6.310 8.8 78 72 3.20 56 0 0 0 651 6.478 ; RUN; /* previously we had selected the model with x1, x2, x3, x8 as predictors: */ /* Let's formally test whether the interaction terms are not needed in the model: */ PROC REG DATA = surg; MODEL lny = x1 x2 x3 x8 x1x2 x1x3 x1x8 x2x3 x2x8 x3x8; TEST x1x2=0, x1x3=0, x1x8=0, x2x3=0, x2x8=0, x3x8=0; run; /* We fail to reject H_0 (p-value = 0.3465), so we may conclude */ /* the interaction terms are not needed in the model. */ /* We fit the model with only x1, x2, x3, x8 as predictors: */ PROC REG DATA = surg; MODEL lny = x1 x2 x3 x8 / vif influence R; RUN; /*****************************************************************/ /* MODEL VALIDATION: */ /* At the end of the output, note the PRESS statistic is given */ /* as 2.738. Compare this to the SSE = 2.179 given above that. */ /*****************************************************************/