11-10-2006

SAS Syntax

 

data hwk_reg;

    input height weight;

    cards;

 69.0  112.5

 56.5   84.0

 65.3   98.0

 62.8  102.5

 63.5  102.5

 57.3   83.0

 59.8   84.5

 62.5  112.5

 62.5   84.0

 59.0   99.5

 51.3   50.5

 64.3   90.0

 56.3   77.0

 66.5  112.0

 72.0  150.0

 64.8  128.0

 67.0  133.0

 57.5   85.0

 ;

 run;

      proc means data=hwk_reg std mean;

            var height weight;

                  output out=means;

            run;

 

      data mean;

            set means;

                  id=_n_;

                  if id in(1,2,3,5) then delete;*DELETING SOME EXTRANEOUS INFORMATION;

                  height_mean=height;*RENAMING VARIABLES;

                  weight_mean=weight;

            keep height_mean weight_mean;*KEEPING SPECIFIC VARIABLES;

      run;

      data std;

            set means;

                  id=_n_;

                  if id in(1,2,3,4) then delete;

                  height_std=height;

                  weight_std=weight;

            keep height_std weight_std;

      run;

 

 

      data mean_std;*CREATING ONE OBSERVATION ON THE MEAN AND STANDARD DEVIATION VARIABLE FOR EACH OF THE 18 SUBJECTS;

            merge mean std;

                  do i=1 to 18;

                        output;

                  end;

            drop i;

      run;

      data merge_all;

            merge hwk_reg mean_std;*COMPUTING THE PRINCIPAL AXIS REGRESSION LINE;

                  A1=height_std/weight_std;

                  A0=height_mean-A1*weight_mean;

                  YHAT=A0+A1*weight;

      run;

 

      GOPTIONS;*PLOTTING;

            SYMBOL1 VALUE=DOT COLOR=STEEL I=REG;

            SYMBOL2 VALUE=CIRCLE COLOR=SALMON I=REG;

            PROC GPLOT DATA=MERGE_ALL;

                  PLOT  HEIGHT*WEIGHT YHAT*WEIGHT / OVERLAY;

            RUN;

 

PROC GLM DATA=hwk_reg;

MODEL HEIGHT= / SOLUTION;

RUN;

 

 

/* HOW REGRESSION AND ANOVA WORK */

 

PROC IML;

      HEIGHT={69.0,

            56.5,

            65.3,

            62.8,

            63.5,

            57.3,

            59.8,

            62.5,

            62.5,

            59.0,

            51.3,

            64.3,

            56.3,

            66.5,

            72.0,

            64.8,

            67.0,

            57.5};

      X=J(18,1,1);

PRINT X;

 

      INVX=INV(X`*X); *IN THIS SPECIAL CASE THIS BECOMES 1/n;

      SUM_HEIGHT=X`*HEIGHT;*IN THIS SPECIAL CASE THIS BECOMES THE SUM OF THE Y VALUES -- HEIGHT;

      PRINT INVX SUM_HEIGHT;

 

      INTERCEPT=INV(X`*X) * X`*HEIGHT; *PUT IT TOGETHER AND YOU SEE THAT THE INTERCEPT IS THE MEAN OF HEIGHT;

      PRINT INTERCEPT;

 

 

WEIGHT={112.5,

            84.0,

            98.0,

            102.5,

            102.5,

             83.0,

             84.5,

            112.5,

             84.0,

             99.5,

             50.5,

             90.0,

             77.0,

            112.0,

            150.0,

            128.0,

            133.0,

             85.0};

      XTOT=X||WEIGHT;

            BETA_HAT=INV(XTOT`*XTOT) * XTOT`*HEIGHT;*HERE IS THE SAME COMPUTATION ONLY NOW WE INCLUDE WEIGHT;

            PRINT BETA_HAT;

      QUIT;

 

PROC GLM DATA=hwk_reg;

MODEL HEIGHT=WEIGHT / SOLUTION;

RUN;

R Code

data.frame <- read.table("f:/Psyc 281/reg/ta10_007.txt", as.is = TRUE, header = TRUE, sep = "\t", row.names = 1)

 

#Previous line imports data -- not you will need to change path

#Also note that rather than the C:\ windows format, R does forward slashes: C:/

 

names(data.frame)  "sat"   "act"  #Produces an error but still works

 attach(data.frame)

data.frame    #Printing the data to make sure it looks right

 

summary(data)  # Getting the means

sd(data)       # Getting the SDs

 

satbar=912.7   #copy and pasting -- not ideal, but good enough for Govt. work.

actbar=21.13

satSD=180.111704 

actSD=4.713726

 

a1=actSD/satSD         #Computing principal axis a1

a0=actbar - a1*satbar  #Computing principal axis a0

 

a0 #Printing a0

a1 #Printing a1

 

  

plot(sat, act, type="n")  

abline(a0, a1 , col=2)             # Fitting the principal axis line to the scatter

points(sat, act)                   # Including the scatter points

fit1 <- lm(act ~ sat)              # Estimating an OLS regression line

abline(coef(fit1), lty=2, col=4)   # Fitting the OLS regression line to the scatter

summary(fit1)                      # printing out the results