# Week 3: VCV Matrix # June 3, 2009 data <- read.csv("vcv_data.csv") attach(data) N <- 15 K <- 4 model <- lm(Y ~ X1 + X2 + X3) # (1): Compute the VCV matrix and R^2 sigma2.hat <- sum(model$residuals^2)/(N - K) # sigma^2 estimate dm <- model.matrix(model) # The design matrix VCV <- sigma2.hat * solve(t(dm) %*% dm) # The variance-covariance matrix ESS <- coef(model) %*% t(dm) %*% Y - N * mean(Y)^2 # Explained sum of squares TSS <- t(Y) %*% Y - N * mean(Y)^2 # Total sum of squares R.2 <- ESS/TSS # R^2 # (2): The only coefficient that is close to statistical significance at the 0.05 level is X3, which has a t-value of 12.3. # (3): Assumption violations plot(X3, model$residuals) # No obvious signs of heteroskedasticity cor(X1, X2) # X1 and X2 are very collinear # (4): Correlation between X1 and X2 c.X1.X2 <- (sum((X1 - mean(X1)) * (X2 - mean(X2)))/(N -1))/(sd(X1) * sd(X2))