#! /usr/bin/env python import os, sys, string, cp, os.path import SeqIO, Seq def main(): """ %(filename)s Calculates the consensus for an alignment of nucleotide or protein sequences. {{cg include to count gaps in the consensus calculation}} {{plu plurality required for assignment of consensus to a character 2}} {{h Prints documentation.}} {{v verbosity 1}} {{out name of outfile. Writes to stdout if unspecified.}} {{version print version info and exit}} %(version)s """ debug = 0 docstringdict = {'filename':os.path.split(sys.argv[0])[-1], 'version':'$Id: consensus.py,v 1.1 2004/10/10 02:53:09 nghoffma Exp $'} optlist, formattedDocString, formattedSummary = cp.optStringParser(main.__doc__ % docstringdict, optWidth=15, lineWidth=60, valOffset = 4, putVals=1) dict = cp.commandparser(options=optlist, usage='Options:\n' + formattedSummary, debug = debug, exitWithUsage=1) if dict.status('version'): sys.exit( 'Version: %(version)s\n' % docstringdict ) if dict.status('h'): print formattedDocString sys.exit( 0 ) v = dict.value('v') countgaps = dict.status('cg') plurality = dict.value('plu') #read from a list of fasta files fasta_list = dict.value('infile_list', 'in','file') fasta = SeqIO.readFastaList(fasta_list, output='list') #write to stdout by default writeToFile = 0 if dict.status('out'): outfile = dict.value('out','out','file') else: outfile = sys.stdout v = 0 if len(dict.value('infile_list')) == 1 and dict.value('infile_list')[0] != '': name = os.path.splitext(dict.value('infile_list')[0])[0] + '_cons' else: name = 'consensus' # tabulate list of sequence objects # tabulate( seqList, v=debugLevel ) tab = SeqIO.tabulate( fasta ) # consensus( dict, countGaps=0, plu=2, v=debugLevel ) consStr = '' for pos in tab: consStr += SeqIO.consensus( pos, countGaps=countgaps, plu=plurality, v=0 ) outfile.write( SeqIO.seqToFasta(Seq.Seq(name=name, seq=consStr)) ) if __name__ == '__main__': main()