#! /usr/bin/env python ### ### HISTORY ### #010910 wr first reimplementation of sequence class as Seq using special methods #011112 NH Overloaded getSeq1 method to take a single argument (returns a char at position specified) #011203 wr made type optional so it can be phased out #011204 NH added clone() method #020103 NH added deprecation warning for type field (so don't specify type!) #020222 NH removed getType(), setType(), and self.__type ### ### DebugLevel ### debugLevel = 0 import string, os, sys, types, traceback from Dictionaries import translationWithoutAmbiguity, translationWithAmbiguity, translationWithoutAmbiguity3, translationWithAmbiguity3 class Seq: """ The Sequence class""" def __init__( self, name, seq, type='', acc='',hea='', data={} ): """ __init__(self, name, seq='', acc='',hea='') name name of sequene [type] {'','nucl'|'prot'} seq sequence [acc] accession number [hea] header information !!!!! ALL INDEXING IN THIS CLASS IS 0-BASED - EXCEPT getSeq1() !!!!! $Id: Seq.py,v 1.1 2004/10/10 02:53:09 nghoffma Exp $ """ #assign private class variables - accessable through methods below self.__name = name self.__seq = seq self.__acc = acc self.__hea = hea self.__data = data if type != '': print '-'*60 traceback.print_stack(file=sys.stdout) print """ Deprecation warning: do not include a type argument when initializing sequence objects.""" print '-'*60 ### ### ACCESSORS I ### def getSeq( self ): return self.__seq[:] def getSeq1( self, start, end ): """Return sequence string from 1-based index |start| to 1-based index |end|, both inclusive; only positive indexes allowed""" if start <= 0 or end <= 0: raise IndexError, "Zero or negative index" start = start - 1 return self.__seq[ start:end ] # def getSeq1( self, ind ): # """Return sequence string at 1-based index |ind|; only # positive indexes allowed""" # # if ind <= 0: # raise IndexError, "Zero or negative index" # # ind = ind - 1 # return self.__seq[ ind ] def getName( self ): return self.__name def getAcc( self ): return self.__acc def getHea( self ): return self.__hea ### ### ACCESSORS II ### def setAcc( self, newAcc ): self.__acc = newAcc return def setSeq( self, newSeq ): self.__seq = newSeq return def setHea( self, newHea ): self.__hea = newHea return def setName( self, newName ): self.__name = newName return def setData( self, d ): self.__data = d return def getData( self ): return self.__data # 011204 NH def clone( self, beg=0, end=0 ): """Returns a new sequence object identical to the object from which the function was called. Sequence range can be redefined using the beg and end keys.""" if end == 0: end = len(self.__seq) return Seq( name=self.__name, seq=self.__seq[beg:end], acc=self.__acc, hea=self.__hea ) ### ### SPECIAL METHODS ### def __repr__( self ): """Special method to return string representation of the whole sequence object when `obj` or repr(obj) is called""" linelength = 60 reprStr = '' reprStr = reprStr + '_'*linelength+'\n' reprStr = reprStr + 'name:'.ljust(10) + self.__name +'\n' reprStr = reprStr + 'acc:'.ljust(10) + self.__acc + '\n' reprStr = reprStr + 'hea:'.ljust(10) + self.__hea + '\n' reprStr = reprStr + 'data:'.ljust(10) + `self.__data` + '\n' reprStr = reprStr + 'seq:\n' for i in range(0, len(self.__seq), linelength): reprStr = reprStr + self.__seq[i:i+linelength]+ '\n' reprStr = reprStr + '_'*linelength return reprStr def __str__( self ): """Special method to return sequence string when str(obj) is called""" return self.__seq def __eq__( self, other ): """Special method executed when 2 sequence objects are compared; returns 1 if sequences are idential (irrespective of case) and 0 otherwise; does not take into account name or other fields""" return self.__seq.upper() == other.getSeq().upper() def __call__( self ): """Special method executed when obj() is called; returns the sequence string""" return self.__seq[:] def __len__( self ): """Special method execued when len( obj ) is called; returns length of sequence""" return len( self.__seq ) def __getitem__( self, index ): """Special method to obtain single elements from sequence string by calling obj[key]; !!!!! INDEXES ARE 0-BASED !!!!!""" return self.__seq[index] def __getslice__( self, start, end ): """Special method to obtain object slices as in obj[i:j] !!!!! INDEXES ARE 0-BASED !!!!! """ return self.__seq[start:end] def __contains__( self, substring ): """Special method called when 'item in obj' is executed; returns 0 or 1 """ found = self.__seq.upper().find( substring.upper() ) if found == -1: found = 0 else: found = 1 return found def __add__( self, seqObj ): """Special method to concatenate two sequence objects. All properties of the leftmost Seq object are retained. """ newStr = self.__seq + seqObj.getSeq() return Seq( name=self.__name, seq=newStr, acc=self.__acc, hea=self.__hea ) def translate( seq, table = 'ambi', start=0, v=debugLevel ): """seq sequence object or a string; return type is determined by type passed table {'ambi'|'unambi'} these are standard eukaryotic translation tables with or without ambiguity start zero-based index of start position. [v] verbosity""" #retrieve translation table translationTableDict = {'ambi':translationWithAmbiguity, 'unambi':translationWithoutAmbiguity, 'ambi3':translationWithAmbiguity3, 'unambi3':translationWithoutAmbiguity3} table = translationTableDict.get(table, translationWithAmbiguity) # unknown character is X*1 or X*3 unk = 'X'*len(table.values()[0]) #make a slice copy: this will allow the function to work with #strings as well as with sequence objects s = string.upper( seq[start:] ) #convert all gap characters to dashes s = s.replace('.','-') s = s.replace('~','-') peptide = '' if v: print '\n\n*\n*%30s\n*' % 'Seq.translate' print "\nTranslating", len(s), "nucleotides. " for i in range( 0, len(s), 3 ): #split sequence into codons thisCodon = s[i:i+3] if len(thisCodon) == 3: peptide = peptide + table.get(thisCodon, unk) if v: print thisCodon, table.get(thisCodon, unk) else: peptide = peptide + unk #make sequence object if type( seq ) == types.StringType: return peptide else: seqObj = Seq( name=seq.getName(), seq=peptide, hea="Translated from: "+seq.getName() ) return seqObj def testCode(): s = 'AGAGAGAGAGCTCTCTCTCT---~~~....GTGTGTGTGT' seq = Seq(name='name1', seq=s, type = 'nucl',hea='some random header', acc='fakeaccecionnumber') print "* print `seq` yields\n", `seq` print "* print seq yields", seq print "* print str(seq) yields", str( seq ) print "* seq == 'blehblah' yields", seq == 'blehblah' print "* seq == 'AGAGAGAGAGCTCTCTCTCT---~~~....GTGTGTGTGT' yields", seq == 'AGAGAGAGAGCTCTCTCTCT---~~~....GTGTGTGTGT' print "* print seq() yields", seq() print "* len( seq ) yields", len( seq ) print "* seq[0:10] yields", seq[0:10] print "* seq[0:-1] yields", seq[0:-1] print "* seq[0], seq[1] yield", seq[0], seq[1] print "* seq[-1], seq[-2] yield", seq[-1], seq[-2] print "* seq.getSeq1(1,11) yields", seq.getSeq1(1,11) temp = 'ct' in seq print "* 'ct' in seq yields", temp temp = 'cg' in seq print "* 'cg' in seq yields", temp print "* looping through sequence" for i in seq: print i, pep = translate( seq, table = 'ambi', v=1 ) print "\n* translating sequence yields\n", `pep` def describe(): """ Seq.py The Sequence object. Contains a nucleotide sequence plus information such as name, a header string, accession number. """ print describe.__doc__ if __name__ == '__main__': describe()