/* bcent.sas 2011/07/15 Alan R. Ellis, MSW Research Associate and Fellow Cecil G. Sheps Center for Health Services Research University of North Carolina at Chapel Hill CB 7590 Chapel Hill, NC 27599 are@unc.edu IML modules and supporting SAS macros to calculate betweenness centrality Uses algorithm by Ulrik Brandes Brandes, Ulrik. (2001). A faster algorithm for betweenness centrality. Journal of Mathematical Sociology, 25(2), 163-177. returns a vector of betweenness centrality values Usage: %include bcent.sas; bc=bcent(m,directed,normalize) m = matrix for which betweenness centrality is to be computed directed = 0 if relationships are undirected, 1 if directed normalize = 0 if raw centrality is desired, 1 if normalized value is desired This module creates the following macros: push, pop, enq, deq, isempty Input matrix should be square and, if relationships are undirected, should be symmetric. Citation: Ellis, A. R. (2009). Using SAS to calculate betweenness centrality. CONNECTIONS: Official Journal of the International Network for Social Network Analysis, 29(1), 26-32. Available at http://www.insna.org/pubs/connections/ 2006/04/11 original version 2006/05/04 changed code so input matrix would not be modified 2007/08/31 revised comments 2007/09/12 changed name of input matrix and disabled error checking code that modified input matrix 2011/07/15 added citation */ /* push values onto a stack - i.e., insert into column 1 of a row vector */ %macro push(s,val); if ncol(&s)=0 then /* if empty then start with value */ &s=&val; else &s=insert(&s,&val,0,1); /* otherwise insert value */ %mend; /* pop value off a stack - i.e., remove from column 1 of a row vector */ %macro pop(s,val); if ncol(&s)=0 then do; /* if empty then return undefined value */ free &val; end; else do; &val=&s[1]; &s=remove(&s,1); end; %mend; /* enqueue a value - i.e., insert it at the end of a row vector */ /* adapted from "push" macro - simply changed location of insertion */ %macro enq(s,val); if ncol(&s)=0 then /* if empty then start with value */ &s=&val; else &s=insert(&s,&val,0,1+ncol(&s)); /* otherwise insert value at end of queue */ %mend; /* dequeue a value - i.e., remove it from column 1 of a row vector */ %macro deq(q,val); %pop(&q,&val); %mend; /* function to determine whether a stack or queue (i.e., row vector) is empty */ %macro isempty(sq); %str((ncol(&sq)=0)) %mend; /* function to calculate Betweenness Centrality using Brandes' algorithm */ start bcent(m,directed,normalize); nvert=nrow(m); /* # vertices in network */ /* check input */ err=0; if ((directed^=0) & (directed^=1) & (normalize^=0) & (normalize^=1)) then err=1; /* The following lines could be enabled in order to check input further. Note that the input matrix is modified. */ /* else if (nvert^=ncol(m)) then err=1; else do; tm=m`; if ((directed=0) & any(m^=tm)) then err=1; end; */ if err=1 then do; file log; put 'ERROR: invalid parameter values for bcent(). Returning -1.'; put 'USAGE: bcent(m,directed,normalize);'; put ' : square matrix (symmetric if graph is undirected)'; put ' : 1 if graph is directed, 0 otherwise'; put ' : 1 if result should be normalized, 0 otherwise'; return(-1); end; cb=j(nvert,1,0); /* betweenness centrality of each vertex starts at zero */ do s=1 to nvert; free stack; /* start with empty stack - just being explicit */ p=j(nvert,nvert,0); /* create empty list of predecessors for each vertex */ sigma=j(nvert,1,0); /* count # geodesics each vertex is on: initially zero, */ sigma[s]=1; /* except one for current vertex */ d=j(nvert,1,-1); /* vector of -1 values, except zero for current vertex */ d[s]=0; /* d appears to measure depth */ free queue; /* start with empty queue - just being explicit */ %enq(queue,s); /* add current vertex to queue */ do while (%isempty(queue)^=1); /* while queue is not empty */ %deq(queue,v); /* de-queue a vertex number into v */ %push(stack,v); /* and also push it onto the stack */ /* loop through each neighbor w-sub-j of v */ w=loc(m[v,]); /* loop through vertices w where m(v,w) is nonzero */ /* i.e., there is a path from v to w */ do j=1 to ncol(w); /* w-sub-j found for the first time? */ if d[w[j]]<0 then do; %enq(queue,w[j]); d[w[j]]=d[v]+1; end; /* shortest path to w via v? */ if d[w[j]]=d[v]+1 then do; sigma[w[j]]=sigma[w[j]]+sigma[v]; p[w[j],v]=1; /* add v to w's list of vertices */ end; end; end; delta=j(nvert,1,0); /* initialize delta to zero for each vertex */ /* stack returns vertices in order of non-increasing distance from vertex s */ do while (%isempty(stack)^=1); /* while stack is not empty */ %pop(stack,ww); /* use double-w; this is distinct from */ /* the w used for neighbors-to-v above */ vv=loc(p[ww,]); /* indices of vertices on ww's list of predecessors */ /* use vv - this is a new v, too */ do k=1 to ncol(vv); delta[vv[k]]=delta[vv[k]]+(sigma[vv[k]]/sigma[ww])*(1+delta[ww]); end; if ww ^= s then cb[ww]=cb[ww]+delta[ww]; end; end; if (directed=0) then cb=cb/2; /* if undirected then divide by 2 */ if (normalize=1) then do; if (directed=0) then cb=2*cb/(nvert-1)/(nvert-2); /* normalize by maximum possible centrality */ else cb=cb/(nvert-1)/(nvert-2); end; return(cb); finish;