* use to load the smoking data assumes a file called smoking.xls exists in the library named by the datalib macro parameter having 4 columns with the names id, smoker, time, and FEV1 as described in the label statement given later last changed on 7/20/12 ; options nodate pageno=1 pagesize=53 linesize=76; title1 "Smoking Data"; %let datalib=C:\projects\longmeths; ods html close; ods listing; ods graphics off; PROC IMPORT OUT=WORK.smoking DATAFILE="&datalib\smoking.xls" DBMS=EXCEL REPLACE; RANGE="smoking$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES; RUN; proc format; value smkrfmt 0="former" 1="current"; value nyfmt 0="no" 1="yes"; run; data smoking; set smoking; time2=time; time_0=(time=0); time_3=(time=3); time_6=(time=6); time_9=(time=9); time_12=(time=12); time_15=(time=15); time_19=(time=19); nmeas=.; nmiss=.; somemiss=.; label id="Subject ID" smoker="Smoking Status" time="Time in Years" FEV1="Forced Expiratory Volume in 1 Second" time2="Time" time_0="Time = 0 or Not" time_3="Time = 3 or Not" time_6="Time = 6 or Not" time_9="Time = 9 or Not" time_12="Time = 12 or Not" time_15="Time = 15 or Not" time_19="Time = 19 or Not" timesq="Time Squared" nmeas="Number of Measurements" nmiss="Number of Missing Measurements" somemiss="Some Missing Measurements or Not" ; format smoker smkrfmt. time_0--time_19 somemiss nyfmt.; run; proc sort; by id time; run; * compute nmeas and related variables; proc iml; edit smoking var _all_; read all var _all_; * uniq function generates a column vector so transpose it with ` operator; uniqids=unique(id)`; * number of rows of uniqids equals the number of subjects; nsubj=nrow(uniqids); * create a column vector with nsubj rows and 1 column containing all missing values ; uniqmeas=repeat(.,nsubj,1); do i=1 to nsubj; * generates a column vector with 1's when the observation corresponds to a measurement for the current subject with id given by uniqids[i] and 0 otherwise ; measindx=(id=uniqids[i]); * loads the sum of the values of measindx, which equals the number of measurements for the current subject, into the locations of nmeas correponding to the observation indexes for that subject's meaurements ; nmeas[loc(measindx)]=measindx[+]; * also store the count in the associated uniqmeas location; uniqmeas[i]=measindx[+]; end; * nmeas[<>] means to compute the maximum value of the vector nmeas; maxmeas=nmeas[<>]; * computes the total number of measurements assuming there are no missing values for the outcome variable in the data set ; totmeas=nrow(id); * computes the average number of measuremetns per subject; avemeas=uniqmeas[:]; * computes number of missing measurements; nmiss=maxmeas-nmeas; * computes indicator for having some missing measurements; somemiss=(nmeas