*Initial input of downloaded data; proc import out=coviduscty datafile="/home/grego1/STAT 540/time_series_covid19_deaths_US.csv" replace; run; *Alternative that webscrapes the current data set; filename covidts "/home/grego1/STAT 540/COVID_ts.csv"; *I tried filename with the url keyword, but it didn't behave; proc http url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv" out=covidts; run; *Province_State was truncated--rather than fix FORMAT and INFORMAT in code, use GUESSINGROWS; proc import out=coviduscty datafile="/home/grego1/STAT 540/COVID_ts.csv" replace; guessingrows=max; run; proc freq data=coviduscty; table Province_State; run; *Documenting unassigned deaths; proc print data=coviduscty; where Province_State="South Carolina"; var fips admin2; run; *Remove unassigned deaths; data coviduscty; set coviduscty; if FIPS lt 80000; run; *Create four output data sets for eventual macro call; data ga nc sc tn; set coviduscty; if Province_State="South Carolina" then output SC; else if province_state="Georgia" then output GA; else if province_state="Tennessee" then output TN; else if province_state="North Carolina" then output NC; run; *Compute statewide totals and rename--PROC SUMMARY is easier to use than PROC MEANS; proc sort data=coviduscty; by Province_State; run; proc summary data=coviduscty; by Province_State; var '1/22/20'n--'11/29/20'n; output out=covidusstate (rename=(Province_State=State)) sum=; run; *Clean up file; proc summary data=coviduscty; by Province_State; var '1/22/20'n--'11/29/20'n; output out=covidusstate (rename=(Province_State=State) drop=_TYPE_ _FREQ_) sum=; run; *Raw proc transpose; proc transpose data=covidusstate out=covidstate; by State; run; *Clean up proc transpose; proc transpose data=covidusstate out=covidstate (rename=(COL1=Cum_Deaths)) name=Date; by State; run; proc sort data=covidstate; by State; run; *Fix date and add daily deaths; *Use BY STATE so that the lag function behaves; data covidout; set covidstate; by State; DateJulian=input(Date,mmddyy8.); format DateJulian mmddyy8.; DailyDeaths=Cum_Deaths-lag(Cum_Deaths); if FIRST.State=1 then DailyDeaths=.; run; *Error in Georgia data?; proc sgplot data=covidout; where state in ("South Carolina" "North Carolina" "Georgia"); series x=DateJulian y=DailyDeaths/group=state; run; *Seems real; proc print data=covidout; where state="Georgia" and DateJulian gt '15Oct2020'd; run; proc sgplot data=covidout; where state in ("South Carolina" "North Carolina" "Georgia"); series x=DateJulian y=DailyDeaths/group=state; yaxis max=120 label="Daily Deaths"; xaxis label="Date"; run; *Maybe smoothing helps; proc sgplot data=covidout; where state in ("South Carolina" "North Carolina" "Georgia"); loess x=DateJulian y=DailyDeaths/group=state; yaxis max=120 label="Daily Deaths"; xaxis label="Date"; run; *This macro transposes a subset of states whose data have been previously saved separately; %macro stateclean(states=); %local k state; %let k=1; %let state=%scan(&states,&k); %do %while("&state" ne ""); proc summary data=&state; var '1/22/20'n--'11/23/20'n; output out=&state.Total (drop=_TYPE_ _FREQ_) sum=; run; proc transpose data=&state.Total out=&state.COVID (rename=(Deaths1=Deaths)) name=Date prefix=Deaths; run; Data &state.COVID; set &state.COVID; *Used this first form of julian date for a growth curve model; *DateJulian=input(Date,mmddyy8.)-'21JAN20'd; DateJulian=input(Date,mmddyy8.); format DateJulian mmddyy8.; State="&state"; DailyDeaths=Deaths-lag(Deaths); run; %let k=%eval(&k+1); %let state=%scan(&states,&k); %end; %mend stateclean; %stateclean(states=SC) %stateclean(states=SC GA TN NC) *Commands that saved data sets used elsewhere in the course; data sccovid; set sccovid (drop=state); run; proc export data=sccovid dbms=csv outfile="/home/grego1/STAT 540/SC_COVID_Deaths.csv" replace; run; *Save a SAS permanent data set; libname STAT540 "/home/grego1/STAT 540"; data STAT540.secovid; set gacovid nccovid sccovid tncovid; drop DateJulian; run;