sharplabtool: tools/rgenetics/rgRegion.py comparison

comparison tools/rgenetics/rgRegion.py @ 0:9071e359b9a3

Uploaded

author	xuebing
date	Fri, 09 Mar 2012 19:37:19 -0500
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:9071e359b9a3
+"""
+released under the terms of the LGPL
+copyright ross lazarus August 2007
+for the rgenetics project
+Special galaxy tool for the camp2007 data
+Allows grabbing arbitrary columns from an arbitrary region
+Needs a mongo results file in the location hardwired below or could be passed in as
+a library parameter - but this file must have a very specific structure
+rs chrom offset float1...floatn
+called as
+<command interpreter="python">
+rsRegion.py $infile '$cols' $r $tag $out_file1
+</command>
+cols is a delimited list of chosen column names for the subset
+r is a ucsc location region pasted into the tool
+"""
+import sys,string
+trantab = string.maketrans(string.punctuation,'_'*len(string.punctuation))
+print >> sys.stdout, '##rgRegion.py started'
+if len(sys.argv) <> 6:
+print >> sys.stdout, '##!expected  params in sys.argv, got %d - %s' % (len(sys.argv),sys.argv)
+sys.exit(1)
+print '##got %d - %s' % (len(sys.argv),sys.argv)
+# quick and dirty for galaxy - we always get something for each parameter
+fname = sys.argv[1]
+wewant = sys.argv[2].split(',')
+region = sys.argv[3].lower()
+tag = sys.argv[4].translate(trantab)
+ofname = sys.argv[5]
+myname = 'rgRegion'
+if len(wewant) == 0: # no columns selected?
+print >> sys.stdout, '##!%s:  no columns selected - cannot run' % myname
+sys.exit(1)
+try:
+f = open(fname,'r')
+except: # bad input file name?
+print >> sys.stdout, '##!%s unable to open file %s' % (myname, fname)
+sys.exit(1)
+try: # TODO make a regexp?
+c,rest = region.split(':')
+c = c.replace('chr','') # leave although will break strict genome graphs
+rest = rest.replace(',','') # remove commas
+spos,epos = rest.split('-')
+spos = int(spos)
+epos = int(epos)
+except:
+print >> sys.stdout, '##!%s unable to parse region %s - MUST look like "chr8:10,000-100,000' % (myname,region)
+sys.exit(1)
+print >> sys.stdout, '##%s parsing chrom %s from %d to %d' % (myname, c,spos,epos)
+res = []
+cnames = f.next().strip().split() # column titles for output
+linelen = len(cnames)
+wewant = [int(x) - 1 for x in wewant] # need col numbers base 0
+for n,l in enumerate(f):
+ll = l.strip().split()
+thisc = ll[1]
+thispos = int(ll[2])
+if (thisc == c) and (thispos >= spos) and (thispos <= epos):
+if len(ll) == linelen:
+res.append([ll[x] for x in wewant]) # subset of columns!
+else:
+print >> sys.stdout, '##! looking for %d fields - found %d in ll=%s' % (linelen,len(ll),str(ll))
+o = file(ofname,'w')
+res = ['%s\n' % '\t'.join(x) for x in res] # turn into tab delim string
+print >> sys.stdout, '##%s selected and returning %d data rows' % (myname,len(res))
+head = [cnames[x] for x in wewant] # ah, list comprehensions - list of needed column names
+o.write('%s\n' % '\t'.join(head)) # header row for output
+o.write(''.join(res))
+o.close()
+f.close()

Mercurial > repos > xuebing > sharplabtool

comparison tools/rgenetics/rgRegion.py @ 0:9071e359b9a3