#! /usr/bin/env python import os, sys, string, cp, commands import SeqIO def main(): """ %(filename)s Creates an execution script for various analysis using PAUP. Nucleotide sequences are supplied either as one or more fasta format files or in a single nexus format file. {{nex nexus file}} {{m method nj nj tree jack jackknife analysis using nj boot bootstrap analysis using nj dist write square distance matrix tstv estimate transition/transversion ratio}} {{e execute paup on output file}} {{nocod a list of codons to exclude, eg 5-8,12}} {{h Prints documentation.}} {{v verbosity 1}} {{out name of outfile stdout}} {{version print version info and exit}} %(version)s """ distance_criteria = r""" begin paup; set errorstop; set criterion=distance; dset distance=gtr rates=gamma;""" bootstring = r""" bootstrap nreps=100 search=nj grpfreq=no treefile=%(basename)s_bootreps.pauptrees replace; treeinfo; cleartrees; execute %(basename)s_bootreps.pauptrees; treeinfo; contree 'all' / strict=no majrule=yes percent=50 treefile=%(basename)s_cons.pauptrees replace grpfreq=no; quit; endblock """ njstring = r""" nj brlens=yes; savetrees brlens=yes file=%(basename)s_nj.pauptrees replace=yes from=1 to=1; quit; endblock """ diststring = """ savedist file=%(basename)s.paupdist replace format=tabtext triangle=both diagonal=yes ndecimals=4; quit; endblock""" jackstring = """ jackknife nreps=100 search=nj conlevel=50; savetrees brlens=yes savebootp=nodelabels maxdecimals=0 file=%(basename)s_jack.pauptrees replace=yes from=1 to=1; quit; endblock""" tstvstring = """ log file = %(basename)s_tstv.pauplog replace=yes; nj brlens=yes; set criterion=likelihood; lset nst=2 basefreq=empirical rates=gamma ncat=4; lset tratio=estimate shape=estimate; lscore 1 / tratio=estimate; log stop; quit warntsave=no; endblock""" exsetString = """ begin assumptions; exset * toexclude = %s; end; """ #savetree SaveBootP=NodeLabels MaxDecimals=0; # NOTE: align2nexus: # java -cp /afs/isis.unc.edu/home/n/g/nghoffma/bin/readseq.jar run -a -f17 $* debug = 0 docstringdict = {'filename':os.path.split(sys.argv[0])[-1], 'version':'$Id: paupboot.py,v 1.1 2004/10/10 02:53:10 nghoffma Exp $'} optlist, formattedDocString, formattedSummary = cp.optStringParser(main.__doc__ % docstringdict, optWidth=15, lineWidth=60, valOffset = 4, putVals=1) dict = cp.commandparser(options=optlist, usage='Options:\n' + formattedSummary, debug = debug, exitWithUsage=1) if dict.status('version'): sys.exit( 'Version: %(version)s\n' % docstringdict ) if dict.status('h'): print formattedDocString sys.exit( 0 ) v = dict.value('v') if dict.value('m') == 'boot': paupstring = distance_criteria + bootstring suffix = 'boot' elif dict.value('m') == 'nj': paupstring = distance_criteria + njstring suffix = 'nj' elif dict.value('m') == 'dist': paupstring = distance_criteria + diststring suffix = 'dist' elif dict.value('m') == 'jack': paupstring = distance_criteria + jackstring suffix = 'jack' elif dict.value('m') == 'tstv': paupstring = distance_criteria + tstvstring suffix = 'tstv' if dict.status('nex'): nexfilename = dict.value('nex') basename = os.path.splitext(nexfilename)[0] else: #read from a list of fasta files fasta_list = dict.value('infile_list', 'in','file') fasta = SeqIO.readFastaList(fasta_list, output='list') basename = os.path.splitext(fasta_list[0].name)[0] #write to stdout by default writeToFile = 1 if dict.status('out') and dict.value('out') == 'stdout': outfile = sys.stdout writeToFile = 0 v = 0 elif dict.status('out'): outfilename = dict.value('out') basename = dict.value('out') else: outfilename = '%s.%sscript' % (basename, suffix) if writeToFile: sys.stderr.write('Writing %s \n' % outfilename) outfile = open(outfilename, 'w') if dict.status('nex'): outfile.write(open(nexfilename,'r').read()) else: SeqIO.seqToNexus(outfile, fasta) if dict.status('nocod'): excludeStringForPaup = excludeCodons(dict.value('nocod')) outfile.write(exsetString % excludeStringForPaup) params = {'basename':basename} outfile.write(paupstring % params) if writeToFile: outfile.close() if writeToFile and dict.status('e'): print 'Executing PAUP ...' tup = commands.getstatusoutput('paup %s' % outfilename) print tup[1] def excludeCodons(exp): """exp is from command line, eg 5-8,12. exp refers to codons, so this function converts to nucleotides and returns a string sutiable for paup""" #make sure cp hasn't evaluated to a string if type(exp) == type(1): exp = `exp` rangelist = exp.strip().split(',') nucRanges = [] for r in rangelist: rsplit = r.split('-') if len(rsplit) == 1: val = eval(rsplit[0]) nucRanges.append('%i-%i' % (val*3-2,val*3)) else: val1, val2 = eval(rsplit[0]), eval(rsplit[1]) nucRanges.append('%i-%i' % (val1*3-2,val2*3)) return string.join(nucRanges) if __name__ == '__main__': main()