#### R functions for #### Interday Forecasting and Intraday Updating of Call Center Arrivals #### by Haipeng Shen and Jianhua Z. Huang #### forthcoming in Manufacturing & Service Operations Management #### updated: 3/30/08 for public distribution ########################################################################## #### Overall Description #### #### The following functions implement the SVD-based rolling ahead forecasting and within-day-updating for the simulation studies and the real data analysis reported in the paper #### #### They use varying-coefficients AR(1) models to forecast the interday feature vectors, which work well for the reported analyses. #### However, they may not be suitable for other data. Empirical analysis has to be performed first to identify appropriate time series models, before modifying the functions correspondingly. ########################################################################## ########################################################################## ### Individual Functions ### ### svd.fore.one function for one-day-ahead forecasting using day-of-week-based AR(1) model with count/rate prediction interval ### svd.fore.one.all function for one-day-ahead forecasting using day-of-week-based AR(1) model with count/rate prediction bootstrapped distribution ### svd.fore.pls function for PLS within-day updating using day-based AR(1) model with count/rate prediction interval ### svd.fore.pls.all function for PLS within-day updating using day-based AR(1) model with count/rate prediction bootstrapped distribution ########################################################################## ### function for one-day-ahead forecasting using day-based AR(1) model ### output count/rate confidence interval svd.fore.one <- function(mat, dind, mc=3, first=151, nroll=60, nhis=150, nh=1, B=1000, alpha=0.05){ ### mat: data matrix: row for different "day", column for different "period"; ### dind: day indicator; ### mc: number of eigen vector pairs to use ### first: index (or row number) for the first out-of-sample day ### nroll: # of rolling ### nhis: # of historical "day" used to generate forecast ### nh: # of forward-forecasting period, nh=1 for one-day-ahead... ### B: # of bootstrap iterations ### alpha: type I error, 1-alpha=confidence level options(contrasts=c("contr.treatment", "contr.treatment")) mat.fore <- matrix(0, nrow=nroll, ncol=dim(mat)[2]) rate.ci <- array(0, dim=c(nroll, 2, dim(mat)[2])) count.ci <- array(0, dim=c(nroll, 2, dim(mat)[2])) for(i in 1:nroll){ #cat("rolling:", i, "\n") mat.fore.boot <- matrix(0, nrow=B, ncol=dim(mat)[2]) temp.mat <- mat[(first-1-nhis+i):(first-2+i),] temp <- svd(temp.mat) for(j in 1:mc){ temp.mat <- temp.mat-temp$d[j]*temp$u[,j]%*%t(temp$v[,j]) ### approximation error matrix df.mat <- data.frame(cbind(y=temp$u[-1, j], x1=temp$u[-nhis,j], x2=factor(dind)[(first-1-nhis+i):(first-2+i-1)])) lm.df <- lm(y~x1+factor(x2), data=df.mat) df.out <- predict(lm.df, newdata=data.frame(cbind(x1=temp$u[nhis,j], x2=dind[first-2+i]))) mat.fore[i,] <- mat.fore[i,]+temp$d[j]*df.out*temp$v[,j] df.out <- df.out+sample(lm.df$residuals, B, replace=T) ### bootstrap time series forecasts mat.fore.boot <- mat.fore.boot+temp$d[j]*df.out%*%t(temp$v[,j]) } rate.ci[i,1,] <- apply(mat.fore.boot^2, 2, quantile, prob=alpha/2) rate.ci[i,2,] <- apply(mat.fore.boot^2, 2, quantile, prob=1-alpha/2) mat.fore.boot <- mat.fore.boot+temp.mat[sample(1:nhis, B, replace=T), ] count.ci[i,1,] <- apply(mat.fore.boot^2, 2, quantile, prob=alpha/2) count.ci[i,2,] <- apply(mat.fore.boot^2, 2, quantile, prob=1-alpha/2) } return(list(fore=mat.fore^2, rate=rate.ci, count=count.ci)) } ### function for one-day-ahead forecasting using day-based AR(1) model ### output count/rate bootstrapped samples ### better to use for one-day at a time svd.fore.one.all <- function(mat, dind, mc=3, first=151, nroll=1, nhis=150, nh=1, B=1000){ ### mat: data matrix: row for different "day", column for different "period"; ### dind: day indicator; ### mc: number of eigen vector pairs to use ### first: index (or row number) for the first out-of-sample day ### nroll: # of rolling ### nhis: # of historical "day" used to generate forecast ### nh: # of forward-forecasting period, nh=1 for one-day-ahead... ### B: # of bootstrap iterations options(contrasts=c("contr.treatment", "contr.treatment")) mat.fore <- matrix(0, nrow=nroll, ncol=dim(mat)[2]) rate.all <- array(0, dim=c(nroll, B, dim(mat)[2])) count.all <- array(0, dim=c(nroll, B, dim(mat)[2])) for(i in 1:nroll){ #cat("rolling:", i, "\n") mat.fore.boot <- matrix(0, nrow=B, ncol=dim(mat)[2]) temp.mat <- mat[(first-1-nhis+i):(first-2+i),] temp <- svd(temp.mat) for(j in 1:mc){ temp.mat <- temp.mat-temp$d[j]*temp$u[,j]%*%t(temp$v[,j]) ### approximation error matrix df.mat <- data.frame(cbind(y=temp$u[-1, j], x1=temp$u[-nhis,j], x2=factor(dind)[(first-1-nhis+i):(first-2+i-1)])) lm.df <- lm(y~x1+factor(x2), data=df.mat) df.out <- predict(lm.df, newdata=data.frame(cbind(x1=temp$u[nhis,j], x2=dind[first-2+i]))) mat.fore[i,] <- mat.fore[i,]+temp$d[j]*df.out*temp$v[,j] df.out <- df.out+sample(lm.df$residuals[dind[(first-1-nhis+i):(first-2+i-1)]==dind[first-2+i]], B, replace=T) ### bootstrap time series forecasts mat.fore.boot <- mat.fore.boot+temp$d[j]*df.out%*%t(temp$v[,j]) } rate.all[i,,] <- mat.fore.boot^2 mat.fore.boot <- mat.fore.boot+temp.mat[sample(1:nhis, B, replace=T), ] count.all[i,,] <- mat.fore.boot^2 } return(list(fore=mat.fore^2, rate=rate.all, count=count.all)) } ### function for PLS within-day updating using day-based AR(1) model ### output count/rate confidence interval svd.fore.pls <- function(mat, dind, mc=3, first=151, nroll=60, nhis=150, m0, lambda, B=1000, alpha=0.05){ ### mat: data matrix: row for different "day", column for different "period"; ### dind: day indicator; ### mc: number of eigen vector pairs to use ### first: index (or row number) for the first out-of-sample day ### nroll: # of rolling ### nhis: # of historical "day" used to generate forecast ### m0: # of available time periods for the current day ###lambda: penalty parameter for PLS. 0 means LS updating ### B: # of bootstrap iterations ### alpha: type I error, 1-alpha=confidence level options(contrasts=c("contr.treatment", "contr.treatment")) mat.fore <- matrix(0, nrow=nroll, ncol=dim(mat)[2]) rate.ci <- array(0, dim=c(nroll, 2, dim(mat)[2])) count.ci <- array(0, dim=c(nroll, 2, dim(mat)[2])) ts.out <- matrix(0, nrow=mc, ncol=1) ts.out.mat <- matrix(0, nrow=mc, ncol=B) ### matrix for bootstrapped TS forecast for(i in 1:nroll){ cat("rolling:", i, "\n") mat.fore.boot <- matrix(0, nrow=B, ncol=dim(mat)[2]) temp.mat <- mat[(first-1-nhis+i):(first-2+i),] temp <- svd(temp.mat) v.m0 <- NULL x.m0 <- mat[first-1+i,1:m0] for(j in 1:mc){ temp.mat <- temp.mat-temp$d[j]*temp$u[,j]%*%t(temp$v[,j]) ### approximation error matrix df.mat <- data.frame(cbind(y=temp$u[-1, j], x1=temp$u[-nhis,j], x2=factor(dind)[(first-1-nhis+i):(first-2+i-1)])) lm.df <- lm(y~x1+factor(x2), data=df.mat) ts.out[j,1] <- predict(lm.df, newdata=data.frame(cbind(x1=temp$u[nhis,j], x2=dind[first-2+i]))) ts.out.mat[j,] <- ts.out[j,1]+sample(lm.df$residuals, B, replace=T) ### bootstrap time series forecasts v.m0 <- cbind(v.m0, temp$d[j]*temp$v[1:m0,j]) } pls.out <- solve(t(v.m0)%*%v.m0+lambda*diag(mc))%*%(t(v.m0)%*%x.m0+lambda*diag(mc)%*%ts.out) pls.out.mat <- solve(t(v.m0)%*%v.m0+lambda*diag(mc))%*%(matrix(rep(t(v.m0)%*%x.m0, B), nrow=mc)+lambda*diag(mc)%*%ts.out.mat) ### PLS for(j in 1:mc){ mat.fore[i,] <- mat.fore[i,]+temp$d[j]*pls.out[j]*temp$v[,j] mat.fore.boot <- mat.fore.boot+temp$d[j]*pls.out.mat[j,]%*%t(temp$v[,j]) } rate.ci[i,1,] <- apply(mat.fore.boot^2, 2, quantile, prob=alpha/2) rate.ci[i,2,] <- apply(mat.fore.boot^2, 2, quantile, prob=1-alpha/2) mat.fore.boot <- mat.fore.boot+temp.mat[sample(1:nhis, B, replace=T), ] count.ci[i,1,] <- apply(mat.fore.boot^2, 2, quantile, prob=alpha/2) count.ci[i,2,] <- apply(mat.fore.boot^2, 2, quantile, prob=1-alpha/2) } return(list(fore=mat.fore[, -(1:m0)]^2, rate=rate.ci[,,-(1:m0)], count=count.ci[,,-(1:m0)])) } ### function for PLS within-day updating using day-based AR(1) model ### output count/rate bootstrapped samples svd.fore.pls.all <- function(mat, dind, mc=3, first=151, nroll=1, nhis=150, m0, lambda, B=1000){ ### mat: data matrix: row for different "day", column for different "period"; ### dind: day indicator; ### mc: number of eigen vector pairs to use ### first: index (or row number) for the first out-of-sample day ### nroll: # of rolling ### nhis: # of historical "day" used to generate forecast ### m0: # of available time periods for the current day ###lambda: penalty parameter for PLS. 0 means LS updating ### B: # of bootstrap iterations options(contrasts=c("contr.treatment", "contr.treatment")) mat.fore <- matrix(0, nrow=nroll, ncol=dim(mat)[2]) rate.all <- array(0, dim=c(nroll, B, dim(mat)[2])) count.all <- array(0, dim=c(nroll, B, dim(mat)[2])) ts.out <- matrix(0, nrow=mc, ncol=1) ts.out.mat <- matrix(0, nrow=mc, ncol=B) ### matrix for bootstrapped TS forecast for(i in 1:nroll){ cat("rolling:", i, "\n") mat.fore.boot <- matrix(0, nrow=B, ncol=dim(mat)[2]) temp.mat <- mat[(first-1-nhis+i):(first-2+i),] temp <- svd(temp.mat) v.m0 <- NULL x.m0 <- mat[first-1+i,1:m0] for(j in 1:mc){ temp.mat <- temp.mat-temp$d[j]*temp$u[,j]%*%t(temp$v[,j]) ### approximation error matrix df.mat <- data.frame(cbind(y=temp$u[-1, j], x1=temp$u[-nhis,j], x2=factor(dind)[(first-1-nhis+i):(first-2+i-1)])) lm.df <- lm(y~x1+factor(x2), data=df.mat) ts.out[j,1] <- predict(lm.df, newdata=data.frame(cbind(x1=temp$u[nhis,j], x2=dind[first-2+i]))) ts.out.mat[j,] <- ts.out[j,1]+sample(lm.df$residuals[dind[(first-1-nhis+i):(first-2+i-1)]==dind[first-2+i]], B, replace=T) ### bootstrap time series forecasts v.m0 <- cbind(v.m0, temp$d[j]*temp$v[1:m0,j]) } pls.out <- solve(t(v.m0)%*%v.m0+lambda*diag(mc))%*%(t(v.m0)%*%x.m0+lambda*diag(mc)%*%ts.out) pls.out.mat <- solve(t(v.m0)%*%v.m0+lambda*diag(mc))%*%(matrix(rep(t(v.m0)%*%x.m0, B), nrow=mc)+lambda*diag(mc)%*%ts.out.mat) ### PLS for(j in 1:mc){ mat.fore[i,] <- mat.fore[i,]+temp$d[j]*pls.out[j]*temp$v[,j] mat.fore.boot <- mat.fore.boot+temp$d[j]*pls.out.mat[j,]%*%t(temp$v[,j]) } rate.all[i,,] <- mat.fore.boot^2 mat.fore.boot <- mat.fore.boot+temp.mat[sample(1:nhis, B, replace=T), ] count.all[i,,] <- mat.fore.boot^2 } return(list(fore=mat.fore[, -(1:m0)]^2, rate=rate.all[,,-(1:m0)], count=count.all[,,-(1:m0)])) }