* use to load the smoking data
assumes a file called smoking.xls exists in the library
named by the datalib macro parameter having 4 columns
with the names id, smoker, time, and FEV1 as described
in the label statement given later
last changed on 7/20/12
;
options nodate pageno=1 pagesize=53 linesize=76;
title1 "Smoking Data";
%let datalib=C:\projects\longmeths;
ods html close;
ods listing;
ods graphics off;
PROC IMPORT OUT=WORK.smoking
DATAFILE="&datalib\smoking.xls"
DBMS=EXCEL REPLACE;
RANGE="smoking$";
GETNAMES=YES;
MIXED=NO;
SCANTEXT=YES;
USEDATE=YES;
SCANTIME=YES;
RUN;
proc format;
value smkrfmt 0="former" 1="current";
value nyfmt 0="no" 1="yes";
run;
data smoking;
set smoking;
time2=time;
time_0=(time=0);
time_3=(time=3);
time_6=(time=6);
time_9=(time=9);
time_12=(time=12);
time_15=(time=15);
time_19=(time=19);
nmeas=.;
nmiss=.;
somemiss=.;
label id="Subject ID"
smoker="Smoking Status"
time="Time in Years"
FEV1="Forced Expiratory Volume in 1 Second"
time2="Time"
time_0="Time = 0 or Not"
time_3="Time = 3 or Not"
time_6="Time = 6 or Not"
time_9="Time = 9 or Not"
time_12="Time = 12 or Not"
time_15="Time = 15 or Not"
time_19="Time = 19 or Not"
timesq="Time Squared"
nmeas="Number of Measurements"
nmiss="Number of Missing Measurements"
somemiss="Some Missing Measurements or Not"
;
format smoker smkrfmt. time_0--time_19 somemiss nyfmt.;
run;
proc sort;
by id time;
run;
* compute nmeas and related variables;
proc iml;
edit smoking var _all_;
read all var _all_;
* uniq function generates a column vector so transpose it with ` operator;
uniqids=unique(id)`;
* number of rows of uniqids equals the number of subjects;
nsubj=nrow(uniqids);
* create a column vector with nsubj rows and 1 column containing all
missing values
;
uniqmeas=repeat(.,nsubj,1);
do i=1 to nsubj;
* generates a column vector with 1's when the observation corresponds
to a measurement for the current subject with id given by uniqids[i]
and 0 otherwise
;
measindx=(id=uniqids[i]);
* loads the sum of the values of measindx, which equals the number of
measurements for the current subject, into the locations of nmeas
correponding to the observation indexes for that subject's meaurements
;
nmeas[loc(measindx)]=measindx[+];
* also store the count in the associated uniqmeas location;
uniqmeas[i]=measindx[+];
end;
* nmeas[<>] means to compute the maximum value of the vector nmeas;
maxmeas=nmeas[<>];
* computes the total number of measurements assuming there are
no missing values for the outcome variable in the data set
;
totmeas=nrow(id);
* computes the average number of measuremetns per subject;
avemeas=uniqmeas[:];
* computes number of missing measurements;
nmiss=maxmeas-nmeas;
* computes indicator for having some missing measurements;
somemiss=(nmeas