#! /usr/bin/env python import SeqIO, string, cp, sys, os, re def main(): """ %(filename)s Converts fasta format sequence alignments to an interleaved format more suitable for viewing. Can also display differences from the consensus. Blocksize and line length can be specified. Use the command $ fastaview [alignment1] [alignment2] ... [alignmentn] | less to view the sequence one page at a time. {{w Line width 60}} {{blo Number of characters in each block; for example, \ set -blo=3 to view codon structure. -blo=0 turns blocks off. 10}} {{nonum Suppress numbering of positions}} {{dif display positions in each sequence identical to the consensus \ as a (.) by default or the specified character .}} {{con calculate consensus and display at the end of the alignment}} {{conseq specify a sequence from the alignment by name to be used as the \ consensus.}} {{plu pluarality for calculating consensus 2}} {{hea Include header lines}} {{out name of output file. Writes to screen by defult.}} {{h Print documentation}} {{version print version info and exit}} %(version)s """ debug = 0 docstringdict = {'filename':os.path.split(sys.argv[0])[-1], 'version':'$Id: fastaview.py,v 1.1 2004/10/10 02:53:10 nghoffma Exp $'} optlist, formattedDocString, formattedSummary = cp.optStringParser(main.__doc__ % docstringdict, optWidth=15, lineWidth=60, valOffset = 4, putVals=1) dict = cp.commandparser(options=optlist, usage='Options:\n' + formattedSummary, debug = debug, exitWithUsage=1) if dict.status('version'): sys.exit( 'Version: %(version)s\n' % docstringdict ) if dict.status('h'): print formattedDocString sys.exit( 0 ) #assigning command line arguments infile_list = dict.value('infile_list','in','file') width = dict.value('w') blocksOn = dict.value('blo') blocksize = dict.value('blo') number = not dict.status('nonum') hea=dict.status('hea') dif = dict.status('dif') difchar = dict.value('dif') makecon = dict.status('con') plu = dict.value('plu') conseqname = dict.value('conseq') blanklines = 1 #load the fasta file into list of objects seqList = SeqIO.readFastaList( infile_list, degap=0, v = 0, output='list' ) #write to stdout by default writeToFile = 0 if dict.status('out'): outfile = dict.value('out','out','file') writeToFile = 1 else: outfile = sys.stdout # find consensus if necessary if (makecon or dif) and not conseqname: consSeq = SeqIO.consFromList(seqList, plu=plu) elif conseqname: consSeq = '' for seq in seqList: if conseqname.strip().lower() == seq.getName().lower(): consSeq = seq if not consSeq: #consSeq wasn't found sys.exit('Error: the sequence %s could not be found in the supplied alignment' % conseqname) #find longest sequence and name longest = len(seqList[0]) namelen = 0 for i in range(len(seqList)): seq = seqList[i] if not hea: namelen = max( [ namelen, len( seq.getName() ) ] ) else: namelen = max( [ namelen, len( seq.getName() + ' ' + seq.getHea() ) ] ) longest = max([len(seq), longest]) # replace sequences if dif if dif: seqList[i] = SeqIO.diff(seq, consSeq, difchar) # add consensus to end if necessary if makecon: seqList.append(consSeq) formattingString = "%"+`namelen`+"s %s\n" #write sequence objects to screen start, stop = 0, width positionCounter = 1 while 1: if number: w = min([positionCounter + width - 1,longest]) - positionCounter if blocksOn: w = w + int(w/blocksize) - 1 fstr = '#' + ' '*(namelen + 2) + '%s%' + `w` + 's\n' outfile.write( fstr % (positionCounter, min([positionCounter + width - 1,longest])) ) # print one block of sequences for i in range(len(seqList)): thisLine = seqList[i][start:stop] if blocksOn: thisLine = block(thisLine,blocksize) if not hea: outfile.write( formattingString % (seqList[i].getName(), thisLine) ) else: outfile.write( formattingString % (seqList[i].getName() + ' ' + seqList[i].getHea(), thisLine) ) start, stop = stop, stop + width positionCounter = positionCounter + width outfile.write( blanklines*'\n' ) if start > longest: break if writeToFile: outfile.close() def block(str, blocksize): """Returns a string containing space-delimited substrings of str of length blocksize""" if blocksize < 1: sys.exit('block(): blocksize must be >= 1') elif blocksize > len(str): return str expStr = r'[ \S]' * blocksize #expStr = r'\S' * blocksize #all nonwhitespace chars exp = re.compile(expStr) lis = exp.findall(str) if len(str) <= blocksize: remainder = (blocksize - len(str))*-1 else: remainder = len(str)%blocksize*-1 if remainder == 0: end = [] else: end = [str[remainder:]] return string.join(lis + end, ' ') if __name__ == '__main__': main()