comparison get_flanks.py @ 0:a72f0decd7b3

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:51:10 -0400
parents
children 4cd116158aef
comparison
equal deleted inserted replaced
-1:000000000000 0:a72f0decd7b3
1 #!/usr/bin/env python
2 #Done by: Guru
3
4 """
5 Get Flanking regions.
6
7 usage: %prog input out_file size direction region
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
9 -o, --off=N: Offset
10 """
11
12 import sys, re, os
13 from bx.cookbook import doc_optparse
14 from galaxy.tools.util.galaxyops import *
15
16 def stop_err( msg ):
17 sys.stderr.write( msg )
18 sys.exit()
19
20 def main():
21 try:
22 if int( sys.argv[3] ) < 0:
23 raise Exception
24 except:
25 stop_err( "Length of flanking region(s) must be a non-negative integer." )
26
27 # Parsing Command Line here
28 options, args = doc_optparse.parse( __doc__ )
29 try:
30 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
31 inp_file, out_file, size, direction, region = args
32 if strand_col_1 <= 0:
33 strand = "+" #if strand is not defined, default it to +
34 except:
35 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." )
36 try:
37 offset = int(options.off)
38 size = int(size)
39 except:
40 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." )
41
42 fo = open(out_file,'w')
43
44 skipped_lines = 0
45 first_invalid_line = 0
46 invalid_line = None
47 elems = []
48 j=0
49 for i, line in enumerate( file( inp_file ) ):
50 line = line.strip()
51 if line and (not line.startswith( '#' )) and line != '':
52 j+=1
53 try:
54 elems = line.split('\t')
55 #if the start and/or end columns are not numbers, skip that line.
56 assert int(elems[start_col_1])
57 assert int(elems[end_col_1])
58 if strand_col_1 != -1:
59 strand = elems[strand_col_1]
60 #if the stand value is not + or -, skip that line.
61 assert strand in ['+', '-']
62 if direction == 'Upstream':
63 if strand == '+':
64 if region == 'end':
65 elems[end_col_1] = str(int(elems[end_col_1]) + offset)
66 elems[start_col_1] = str( int(elems[end_col_1]) - size )
67 else:
68 elems[end_col_1] = str(int(elems[start_col_1]) + offset)
69 elems[start_col_1] = str( int(elems[end_col_1]) - size )
70 elif strand == '-':
71 if region == 'end':
72 elems[start_col_1] = str(int(elems[start_col_1]) - offset)
73 elems[end_col_1] = str(int(elems[start_col_1]) + size)
74 else:
75 elems[start_col_1] = str(int(elems[end_col_1]) - offset)
76 elems[end_col_1] = str(int(elems[start_col_1]) + size)
77 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
78 fo.write( "%s\n" % '\t'.join( elems ) )
79
80 elif direction == 'Downstream':
81 if strand == '-':
82 if region == 'start':
83 elems[end_col_1] = str(int(elems[end_col_1]) - offset)
84 elems[start_col_1] = str( int(elems[end_col_1]) - size )
85 else:
86 elems[end_col_1] = str(int(elems[start_col_1]) - offset)
87 elems[start_col_1] = str( int(elems[end_col_1]) - size )
88 elif strand == '+':
89 if region == 'start':
90 elems[start_col_1] = str(int(elems[start_col_1]) + offset)
91 elems[end_col_1] = str(int(elems[start_col_1]) + size)
92 else:
93 elems[start_col_1] = str(int(elems[end_col_1]) + offset)
94 elems[end_col_1] = str(int(elems[start_col_1]) + size)
95 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
96 fo.write( "%s\n" % '\t'.join( elems ) )
97
98 elif direction == 'Both':
99 if strand == '-':
100 if region == 'start':
101 start = str(int(elems[end_col_1]) - offset)
102 end1 = str(int(start) + size)
103 end2 = str(int(start) - size)
104 elems[start_col_1]=start
105 elems[end_col_1]=end1
106 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
107 fo.write( "%s\n" % '\t'.join( elems ) )
108 elems[start_col_1]=end2
109 elems[end_col_1]=start
110 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
111 fo.write( "%s\n" % '\t'.join( elems ) )
112 elif region == 'end':
113 start = str(int(elems[start_col_1]) - offset)
114 end1 = str(int(start) + size)
115 end2 = str(int(start) - size)
116 elems[start_col_1]=start
117 elems[end_col_1]=end1
118 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
119 fo.write( "%s\n" % '\t'.join( elems ) )
120 elems[start_col_1]=end2
121 elems[end_col_1]=start
122 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
123 fo.write( "%s\n" % '\t'.join( elems ) )
124 else:
125 start1 = str(int(elems[end_col_1]) - offset)
126 end1 = str(int(start1) + size)
127 start2 = str(int(elems[start_col_1]) - offset)
128 end2 = str(int(start2) - size)
129 elems[start_col_1]=start1
130 elems[end_col_1]=end1
131 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
132 fo.write( "%s\n" % '\t'.join( elems ) )
133 elems[start_col_1]=end2
134 elems[end_col_1]=start2
135 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
136 fo.write( "%s\n" % '\t'.join( elems ) )
137 elif strand == '+':
138 if region == 'start':
139 start = str(int(elems[start_col_1]) + offset)
140 end1 = str(int(start) - size)
141 end2 = str(int(start) + size)
142 elems[start_col_1]=end1
143 elems[end_col_1]=start
144 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
145 fo.write( "%s\n" % '\t'.join( elems ) )
146 elems[start_col_1]=start
147 elems[end_col_1]=end2
148 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
149 fo.write( "%s\n" % '\t'.join( elems ) )
150 elif region == 'end':
151 start = str(int(elems[end_col_1]) + offset)
152 end1 = str(int(start) - size)
153 end2 = str(int(start) + size)
154 elems[start_col_1]=end1
155 elems[end_col_1]=start
156 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
157 fo.write( "%s\n" % '\t'.join( elems ) )
158 elems[start_col_1]=start
159 elems[end_col_1]=end2
160 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
161 fo.write( "%s\n" % '\t'.join( elems ) )
162 else:
163 start1 = str(int(elems[start_col_1]) + offset)
164 end1 = str(int(start1) - size)
165 start2 = str(int(elems[end_col_1]) + offset)
166 end2 = str(int(start2) + size)
167 elems[start_col_1]=end1
168 elems[end_col_1]=start1
169 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
170 fo.write( "%s\n" % '\t'.join( elems ) )
171 elems[start_col_1]=start2
172 elems[end_col_1]=end2
173 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
174 fo.write( "%s\n" % '\t'.join( elems ) )
175 except:
176 skipped_lines += 1
177 if not invalid_line:
178 first_invalid_line = i + 1
179 invalid_line = line
180 fo.close()
181
182 if skipped_lines == j:
183 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
184 if skipped_lines > 0:
185 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
186 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset )
187
188 if __name__ == "__main__":
189 main()