######################################################## ### ### R functions for ### ### Sparse Principal Component Analysis via Regularized Low Rank Matrix Approximation ### ### by Haipeng Shen and Jianhua Z. Huang (2008) ### ### Journal of Multivariate Analysis, 99, 1015-1034 ### ### Copyright by Haipeng Shen ### ######################################################## ### function to perform sPCA-rSVD using # of nonzero loadings (i.e. complement of sparsity) as the parameter (1-component) rsvd.spca.varnum <- function(X, u, v, varnum, type=c("soft", "hard","SCAD"), niter=100, err=10^(-3), trace=FALSE){ ### X: data (predictor) matrix or the population covariance matrix ### u, v: initial values. For example, u=svd(X)$u, v=svd(X)$v*svd(X)$d ### varnum: tuning parameter, # of nonzero loadings (i.e. complement of sparsity) ### type: penalty type, for SCAD, a=3.7 ### niter: # of maximum iterations ### err: threshold to declare converge u.d <- v.d <- 1 iter <- 0 while(u.d>err | v.d>err){ iter <- iter+1 v1 <- t(X)%*%u if (varnum < length(v1)){ lambda <- sort(abs(v1))[length(v1)-varnum] } else { lambda <- min(abs(v1))/2 } v1 <- thresh(v1, type, lambda, 3.7) u1 <- X%*%v1 u1 <- u1/sqrt(sum(u1^2)) u.d <- sqrt(sum((u1-u)^2)) v.d <- sqrt(sum((v1-v)^2)) if(iter > niter){ print("Fail to converge! Increase the niter!") break } u <- u1 v <- v1 } if(trace){ print(paste("iter:", iter, "u.d:", u.d, "v.d:", v.d)) } return(list(u=as.vector(u1), v=as.vector(v1))) } ### function to implement the soft-, hard, SCAD thresholding rule thresh <- function(z, type=c("soft", "hard", "SCAD"), delta, a=3.7){ ### z: argument ### type: thresholding rule type ### delta: thresholding level ### a: default choice for SCAD penalty if(type=="soft"){ return(sign(z)*(abs(z)>=delta)*(abs(z)-delta)) } if(type=="hard"){ return(z*(abs(z)>delta)) } if(type=="SCAD"){ return(sign(z)*(abs(z)>=delta)*(abs(z)-delta)*(abs(z)<=2*delta)+((a-1)*z-sign(z)*a*delta)/(a-2)*(2*deltaa*delta)) } } ### function to generate a covariance matrix according to Gram-Schmidt ### orthogonalization gram <- function(v.ini, c.seq, seed=1){ ### v.ini: the leading eigenvectors, which could be sparse ### c.seq: the sequence of eigenvalues ### seed: the random seed, in order to reproduce the result set.seed(seed) n <- dim(v.ini)[1] m <- dim(v.ini)[2] v.mat <- matrix(0, nrow=n, ncol=n) v.mat[,1:m] <- v.ini flag <- FALSE while(!flag){ print(flag) v.mat[,-(1:m)] <- matrix(runif((n-m)*n), nrow=n) v.mat.qr <- qr(v.mat) flag <- v.mat.qr$rank==n v.mat.Q <- qr.Q(v.mat.qr) } s.mat <- v.mat.Q%*%diag(c.seq)%*%t(v.mat.Q) s.mat }