#! /usr/bin/env python # from __future__ import division # / operator performs float rather than int division import os, sys, string, cp def getSeqRange(theList, rangeString): """rangeString in format 'a-b,c-d,...' returns a string""" length = len(theList) if not type(rangeString) == type(''): rangeString='%s-%s' % (rangeString, rangeString) coordList = rangeString.split(',') listOut = [] for coords in coordList: if coords[-1] == '-': coords = coords[:-1] try: c1,c2 = coords.split('-') c1,c2 = eval(c1)-1,eval(c2) #convert to 0-index except ValueError: c1 = eval(coords.split('-')[0]) c2 = None if c1 < 0: c1 = 0 if not c2 or c2 > length: listOut += theList[c1:] else: listOut += theList[c1:c2] return listOut def spaceDelimTable(dataArray, gutter=2, columnGuide=0): """Writes a table with left-justified columns separated by space characters. Each column is (gutter) spaces wider than the widest element in the column. dataArray is a list of lists : dataArray[line][element]. Returns a string.""" lineCount = len(dataArray) ###figure out number of columns colCount = 0 for line in dataArray: colCount = max([colCount, len(line)]) ### determine column widths colWidths = [0]*colCount for j in range(colCount): for i in range(lineCount): try: thisLen = len(dataArray[i][j]) except IndexError: dataArray[i].append('') thisLen = 0 except TypeError: thisLen = len(`dataArray[i][j]`) colWidths[j] = max([colWidths[j], thisLen + gutter]) fstr = '%%-%is'*colCount % tuple(colWidths) + '\n' outstr = '' if columnGuide: guideList = [] for w in colWidths: guideList.append(columnGuide*(w-gutter)) outstr += fstr % tuple(guideList) for line in dataArray: outstr += fstr % tuple(line) outstr = outstr.strip() + '\n' return outstr def main(): """ %(filename)s Get a specified column from input file. {{c column number (1-based, inclusive), can also specify a range like a-b,c-d,... 1-}} {{out name of outfile. Prints to stdout by default.}} {{d delim char w any whitespace character t tab s space}} {{h Prints documentation.}} {{v verbosity 1}} {{version print version info and exit}} %(version)s """ debug = 0 # copy from here... docstringdict = {'filename':os.path.split(sys.argv[0])[-1], 'version':'$Id: col.py,v 1.1 2004/10/10 02:53:09 nghoffma Exp $'} optlist, formattedDocString, formattedSummary = cp.optStringParser(main.__doc__ % docstringdict, optWidth=15, lineWidth=60, valOffset = 4, putVals=1) dict = cp.commandparser(options=optlist, usage='Options:\n' + formattedSummary, debug = debug, exitWithUsage=1) if dict.status('version'): sys.exit( 'Version: %(version)s\n' % docstringdict ) #...to here if dict.status('h'): print formattedDocString sys.exit( 0 ) v = dict.value('v') rangeString = dict.value('c') delimchar = dict.value('d') if delimchar == 't': splitstr = r"line.strip().split('\t')" elif delimchar == 's': splitstr = r"line.strip().split(' ')" elif delimchar == 'w': splitstr = r"line.strip().split()" #read from a list of fasta files infile_list = dict.value('infile_list', 'in','file') #write to stdout by default writeToFile = 0 if dict.status('out'): outfile = dict.value('out','out','file') writeToFile = 1 else: outfile = sys.stdout v = 0 listOut = [] for f in infile_list: for line in f.readlines(): lis = eval(splitstr) thisLineList = getSeqRange(lis, rangeString) listOut.append(thisLineList) outfile.write(spaceDelimTable(listOut)) if writeToFile: outfile.close() if __name__ == '__main__': main()