Mercurial > repos > xuebing > sharplabtool
comparison tools/rgenetics/rgRegion.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 """ | |
2 released under the terms of the LGPL | |
3 copyright ross lazarus August 2007 | |
4 for the rgenetics project | |
5 | |
6 Special galaxy tool for the camp2007 data | |
7 Allows grabbing arbitrary columns from an arbitrary region | |
8 | |
9 Needs a mongo results file in the location hardwired below or could be passed in as | |
10 a library parameter - but this file must have a very specific structure | |
11 rs chrom offset float1...floatn | |
12 | |
13 called as | |
14 <command interpreter="python"> | |
15 rsRegion.py $infile '$cols' $r $tag $out_file1 | |
16 </command> | |
17 | |
18 cols is a delimited list of chosen column names for the subset | |
19 r is a ucsc location region pasted into the tool | |
20 | |
21 """ | |
22 | |
23 | |
24 import sys,string | |
25 | |
26 trantab = string.maketrans(string.punctuation,'_'*len(string.punctuation)) | |
27 print >> sys.stdout, '##rgRegion.py started' | |
28 if len(sys.argv) <> 6: | |
29 print >> sys.stdout, '##!expected params in sys.argv, got %d - %s' % (len(sys.argv),sys.argv) | |
30 sys.exit(1) | |
31 print '##got %d - %s' % (len(sys.argv),sys.argv) | |
32 # quick and dirty for galaxy - we always get something for each parameter | |
33 fname = sys.argv[1] | |
34 wewant = sys.argv[2].split(',') | |
35 region = sys.argv[3].lower() | |
36 tag = sys.argv[4].translate(trantab) | |
37 ofname = sys.argv[5] | |
38 myname = 'rgRegion' | |
39 if len(wewant) == 0: # no columns selected? | |
40 print >> sys.stdout, '##!%s: no columns selected - cannot run' % myname | |
41 sys.exit(1) | |
42 try: | |
43 f = open(fname,'r') | |
44 except: # bad input file name? | |
45 print >> sys.stdout, '##!%s unable to open file %s' % (myname, fname) | |
46 sys.exit(1) | |
47 try: # TODO make a regexp? | |
48 c,rest = region.split(':') | |
49 c = c.replace('chr','') # leave although will break strict genome graphs | |
50 rest = rest.replace(',','') # remove commas | |
51 spos,epos = rest.split('-') | |
52 spos = int(spos) | |
53 epos = int(epos) | |
54 except: | |
55 print >> sys.stdout, '##!%s unable to parse region %s - MUST look like "chr8:10,000-100,000' % (myname,region) | |
56 sys.exit(1) | |
57 print >> sys.stdout, '##%s parsing chrom %s from %d to %d' % (myname, c,spos,epos) | |
58 res = [] | |
59 cnames = f.next().strip().split() # column titles for output | |
60 linelen = len(cnames) | |
61 wewant = [int(x) - 1 for x in wewant] # need col numbers base 0 | |
62 for n,l in enumerate(f): | |
63 ll = l.strip().split() | |
64 thisc = ll[1] | |
65 thispos = int(ll[2]) | |
66 if (thisc == c) and (thispos >= spos) and (thispos <= epos): | |
67 if len(ll) == linelen: | |
68 res.append([ll[x] for x in wewant]) # subset of columns! | |
69 else: | |
70 print >> sys.stdout, '##! looking for %d fields - found %d in ll=%s' % (linelen,len(ll),str(ll)) | |
71 o = file(ofname,'w') | |
72 res = ['%s\n' % '\t'.join(x) for x in res] # turn into tab delim string | |
73 print >> sys.stdout, '##%s selected and returning %d data rows' % (myname,len(res)) | |
74 head = [cnames[x] for x in wewant] # ah, list comprehensions - list of needed column names | |
75 o.write('%s\n' % '\t'.join(head)) # header row for output | |
76 o.write(''.join(res)) | |
77 o.close() | |
78 f.close() | |
79 | |
80 |