/* Example and code from Rick Wicklin at: */ /* https://blogs.sas.com/content/iml/2014/11/21/resampling-in-sas.html */ data mosq; if _n_ le 18 then drink="water"; else drink="beer"; infile datalines delimiter=','; input bites @@; datalines; 21, 19, 13, 22, 15, 22, 15, 22, 20, 12, 24, 24, 21, 19, 18, 16, 23, 20 27, 19, 20, 20, 23, 17, 21, 24, 31, 26, 28, 20, 27, 19, 25, 31, 24 28, 24, 29, 21, 21, 18, 27, 20 ; run; proc univariate plot data=mosq; var bites; where drink="beer"; title "Summary: Bites for Beer Drinkers"; run; proc univariate plot data=mosq; var bites; where drink="water"; title "Summary: Bites for Water Drinkers"; run; title; proc iml; G1 = {27, 19, 20, 20, 23, 17, 21, 24, 31, 26, 28, 20, 27, 19, 25, 31, 24, 28, 24, 29, 21, 21, 18, 27, 20}; G2 = {21, 19, 13, 22, 15, 22, 15, 22, 20, 12, 24, 24, 21, 19, 18, 16, 23, 20}; obsdiff = mean(G1) - mean(G2); print obsdiff; *call randseed(12345); /* set random number seed if you want to replicate results */ alldata = G1 // G2; /* stack data in a single vector */ N1 = nrow(G1); N = N1 + nrow(G2); NRepl = 10000; /* number of permutations */ nulldist = j(NRepl,1); /* allocate vector to hold results */ do k = 1 to NRepl; x = sample(alldata, N, "WOR"); /* permute the data */ nulldist[k] = mean(x[1:N1]) - mean(x[(N1+1):N]); /* difference of means */ end; title "Histogram of Null Distribution"; refline = "refline " + char(obsdiff) + " / axis=x lineattrs=(color=red);"; call Histogram(nulldist) other=refline; title; /* P-value of upper-tailed test */ pval_upper = sum(nulldist >= obsdiff) / NRepl; print pval_upper; /* P-value of lower-tailed test */ pval_lower = sum(nulldist <= obsdiff) / NRepl; print pval_lower; /* P-value of two-tailed test */ pval_two = sum(abs(nulldist) >= abs(obsdiff)) / NRepl; print pval_two; quit; /***************************************************************************************************/ data delays; input HA AQ; diffs=HA-AQ; cards; 2.54 0.79 5.63 1.45 9.57 0.12 1.96 2.97 6.65 0.71 0.37 1.63 0.71 3.11 0.76 2.17 1.74 0.02 0.15 0.67 0.51 0.12 0.39 0.44 0.63 0.88 2.97 0.72 2.00 0.57 0.58 3.05 0.62 0.18 4.31 5.54 0.56 1.19 1.68 0.82 0.72 4.02 0.87 0.26 ; run; proc univariate plot data=delays; var diffs; run; proc iml; use delays; /* read data for mean delays */ read all var {HA AQ}; /* for the AQ and HA airlines */ close delays; x = HA - AQ; /* form vector of differences */ obsDiff = x[:]; /* observed mean of differences */ print obsDiff; /* permute HA/AQ values for random rows */ *call randseed(12345); /* initialize seed for random numbers */ y = HA || AQ; /* concatenate values into 365 x 2 matrix */ u = j(nrow(y),1); /* allocate vector to hold random numbers */ call randgen(u, "Uniform"); /* fill u with uniform variates in (0, 1) */ z = y; /* copy original data */ rows = loc(u>0.5); /* locate rows for which u[i] > 0.5 */ z[rows,] = z[rows, {2 1}]; /* swap values of these rows */ x = z[,1] - z[,2]; /* difference of permuted data */ s = x[:]; /* mean of difference */ *print s; /* bootstrap permutation test for matched pairs */ B = 1000; /* number of bootstrap resamples */ s = j(B,1); /* allocate vector to hold bootstrap dist */ do i = 1 to B; call randgen(u, "Uniform"); z = y; /* copy original data */ rows = loc(u>0.5); /* locate rows for which u[i] > 0.5 */ z[rows,] = z[rows,{2 1}]; /* swap values of these rows */ x = z[,1] - z[,2]; /* difference of permuted data */ s[i] = x[:]; /* mean of difference */ end; title "Histogram of Null Distribution"; refline = "refline " + char(obsDiff) + " / axis=x lineattrs=(color=red);"; call Histogram(s) other=refline; title; /* compute empirical upper-tailed p-value */ pval_upper = sum(s> abs(obsDiff)) / B ; print pval_upper; /* compute empirical lower-tailed p-value */ pval_lower = sum(s< obsDiff) / B; print pval_lower; /* compute empirical two-sided p-value */ pval_two = sum(s> abs(obsDiff)) / B + sum(s< -abs(obsDiff)) / B; print pval_two; quit; /***************************************************************************************************/ DATA emissions; INPUT co2 @@; /* The @@ tells SAS to read in more than one observation per line */ mu0=7; lines; 3.3 4.2 5.6 5.6 5.7 5.7 6.2 6.3 7.0 7.6 8.0 8.1 8.3 8.6 8.7 9.4 9.7 9.9 10.3 10.3 10.4 11.3 12.7 13.1 24.5 ; run; PROC UNIVARIATE DATA=emissions PLOT; VAR co2; TITLE 'Summary of CO2 emissions for 25 European Countries'; /* Adds a TITLE to the output */ RUN; title; proc iml; use emissions; /* read */ read all var {co2 mu0}; /* for the co2 */ close emissions; x = co2 - mu0; /* form vector of differences */ obsDiff = x[:]; /* observed mean of differences */ print obsDiff; *call randseed(12345); /* initialize seed for random numbers */ y = co2 || mu0; /* concatenate values into 365 x 2 matrix */ u = j(nrow(y),1); /* allocate vector to hold random numbers */ call randgen(u, "Uniform"); /* fill u with uniform variates in (0, 1) */ z = y; /* copy original data */ rows = loc(u>0.5); /* locate rows for which u[i] > 0.5 */ z[rows,] = z[rows, {2 1}]; /* swap values of these rows */ x = z[,1] - z[,2]; /* difference of permuted data */ s = x[:]; /* mean of difference */ *print s; /* bootstrap permutation test for matched pairs */ B = 1000; /* number of bootstrap resamples */ s = j(B,1); /* allocate vector to hold bootstrap dist */ do i = 1 to B; call randgen(u, "Uniform"); z = y; /* copy original data */ rows = loc(u>0.5); /* locate rows for which u[i] > 0.5 */ z[rows,] = z[rows,{2 1}]; /* swap values of these rows */ x = z[,1] - z[,2]; /* difference of permuted data */ s[i] = x[:]; /* mean of difference */ end; title "Histogram of Null Distribution"; refline = "refline " + char(obsDiff) + " / axis=x lineattrs=(color=red);"; call Histogram(s) other=refline; title; /* compute empirical upper-tailed p-value */ pval_upper = sum(s> abs(obsDiff)) / B ; print pval_upper; /* compute empirical lower-tailed p-value */ pval_lower = sum(s< obsDiff) / B; print pval_lower; /* compute empirical two-sided p-value */ pval_two = sum(s> abs(obsDiff)) / B + sum(s< -abs(obsDiff)) / B; print pval_two; quit;