0
|
1 #!/usr/bin/env python
|
|
2 #Done by: Guru
|
|
3
|
|
4 """
|
|
5 Get Flanking regions.
|
|
6
|
|
7 usage: %prog input out_file size direction region
|
|
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
|
|
9 -o, --off=N: Offset
|
|
10 """
|
|
11
|
|
12 import sys, re, os
|
|
13 from bx.cookbook import doc_optparse
|
|
14 from galaxy.tools.util.galaxyops import *
|
|
15
|
|
16 def stop_err( msg ):
|
|
17 sys.stderr.write( msg )
|
|
18 sys.exit()
|
|
19
|
|
20 def main():
|
|
21 try:
|
|
22 if int( sys.argv[3] ) < 0:
|
|
23 raise Exception
|
|
24 except:
|
|
25 stop_err( "Length of flanking region(s) must be a non-negative integer." )
|
|
26
|
|
27 # Parsing Command Line here
|
|
28 options, args = doc_optparse.parse( __doc__ )
|
|
29 try:
|
|
30 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
|
|
31 inp_file, out_file, size, direction, region = args
|
|
32 if strand_col_1 <= 0:
|
|
33 strand = "+" #if strand is not defined, default it to +
|
|
34 except:
|
|
35 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." )
|
|
36 try:
|
|
37 offset = int(options.off)
|
|
38 size = int(size)
|
|
39 except:
|
|
40 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." )
|
|
41
|
|
42 fo = open(out_file,'w')
|
|
43
|
|
44 skipped_lines = 0
|
|
45 first_invalid_line = 0
|
|
46 invalid_line = None
|
|
47 elems = []
|
|
48 j=0
|
|
49 for i, line in enumerate( file( inp_file ) ):
|
|
50 line = line.strip()
|
|
51 if line and (not line.startswith( '#' )) and line != '':
|
|
52 j+=1
|
|
53 try:
|
|
54 elems = line.split('\t')
|
|
55 #if the start and/or end columns are not numbers, skip that line.
|
|
56 assert int(elems[start_col_1])
|
|
57 assert int(elems[end_col_1])
|
|
58 if strand_col_1 != -1:
|
|
59 strand = elems[strand_col_1]
|
|
60 #if the stand value is not + or -, skip that line.
|
|
61 assert strand in ['+', '-']
|
|
62 if direction == 'Upstream':
|
|
63 if strand == '+':
|
|
64 if region == 'end':
|
|
65 elems[end_col_1] = str(int(elems[end_col_1]) + offset)
|
|
66 elems[start_col_1] = str( int(elems[end_col_1]) - size )
|
|
67 else:
|
|
68 elems[end_col_1] = str(int(elems[start_col_1]) + offset)
|
|
69 elems[start_col_1] = str( int(elems[end_col_1]) - size )
|
|
70 elif strand == '-':
|
|
71 if region == 'end':
|
|
72 elems[start_col_1] = str(int(elems[start_col_1]) - offset)
|
|
73 elems[end_col_1] = str(int(elems[start_col_1]) + size)
|
|
74 else:
|
|
75 elems[start_col_1] = str(int(elems[end_col_1]) - offset)
|
|
76 elems[end_col_1] = str(int(elems[start_col_1]) + size)
|
|
77 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
78 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
79
|
|
80 elif direction == 'Downstream':
|
|
81 if strand == '-':
|
|
82 if region == 'start':
|
|
83 elems[end_col_1] = str(int(elems[end_col_1]) - offset)
|
|
84 elems[start_col_1] = str( int(elems[end_col_1]) - size )
|
|
85 else:
|
|
86 elems[end_col_1] = str(int(elems[start_col_1]) - offset)
|
|
87 elems[start_col_1] = str( int(elems[end_col_1]) - size )
|
|
88 elif strand == '+':
|
|
89 if region == 'start':
|
|
90 elems[start_col_1] = str(int(elems[start_col_1]) + offset)
|
|
91 elems[end_col_1] = str(int(elems[start_col_1]) + size)
|
|
92 else:
|
|
93 elems[start_col_1] = str(int(elems[end_col_1]) + offset)
|
|
94 elems[end_col_1] = str(int(elems[start_col_1]) + size)
|
|
95 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
96 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
97
|
|
98 elif direction == 'Both':
|
|
99 if strand == '-':
|
|
100 if region == 'start':
|
|
101 start = str(int(elems[end_col_1]) - offset)
|
|
102 end1 = str(int(start) + size)
|
|
103 end2 = str(int(start) - size)
|
|
104 elems[start_col_1]=start
|
|
105 elems[end_col_1]=end1
|
|
106 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
107 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
108 elems[start_col_1]=end2
|
|
109 elems[end_col_1]=start
|
|
110 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
111 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
112 elif region == 'end':
|
|
113 start = str(int(elems[start_col_1]) - offset)
|
|
114 end1 = str(int(start) + size)
|
|
115 end2 = str(int(start) - size)
|
|
116 elems[start_col_1]=start
|
|
117 elems[end_col_1]=end1
|
|
118 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
119 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
120 elems[start_col_1]=end2
|
|
121 elems[end_col_1]=start
|
|
122 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
123 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
124 else:
|
|
125 start1 = str(int(elems[end_col_1]) - offset)
|
|
126 end1 = str(int(start1) + size)
|
|
127 start2 = str(int(elems[start_col_1]) - offset)
|
|
128 end2 = str(int(start2) - size)
|
|
129 elems[start_col_1]=start1
|
|
130 elems[end_col_1]=end1
|
|
131 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
132 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
133 elems[start_col_1]=end2
|
|
134 elems[end_col_1]=start2
|
|
135 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
136 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
137 elif strand == '+':
|
|
138 if region == 'start':
|
|
139 start = str(int(elems[start_col_1]) + offset)
|
|
140 end1 = str(int(start) - size)
|
|
141 end2 = str(int(start) + size)
|
|
142 elems[start_col_1]=end1
|
|
143 elems[end_col_1]=start
|
|
144 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
145 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
146 elems[start_col_1]=start
|
|
147 elems[end_col_1]=end2
|
|
148 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
149 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
150 elif region == 'end':
|
|
151 start = str(int(elems[end_col_1]) + offset)
|
|
152 end1 = str(int(start) - size)
|
|
153 end2 = str(int(start) + size)
|
|
154 elems[start_col_1]=end1
|
|
155 elems[end_col_1]=start
|
|
156 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
157 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
158 elems[start_col_1]=start
|
|
159 elems[end_col_1]=end2
|
|
160 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
161 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
162 else:
|
|
163 start1 = str(int(elems[start_col_1]) + offset)
|
|
164 end1 = str(int(start1) - size)
|
|
165 start2 = str(int(elems[end_col_1]) + offset)
|
|
166 end2 = str(int(start2) + size)
|
|
167 elems[start_col_1]=end1
|
|
168 elems[end_col_1]=start1
|
|
169 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
170 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
171 elems[start_col_1]=start2
|
|
172 elems[end_col_1]=end2
|
|
173 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
174 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
175 except:
|
|
176 skipped_lines += 1
|
|
177 if not invalid_line:
|
|
178 first_invalid_line = i + 1
|
|
179 invalid_line = line
|
|
180 fo.close()
|
|
181
|
|
182 if skipped_lines == j:
|
|
183 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
|
|
184 if skipped_lines > 0:
|
|
185 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
|
|
186 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset )
|
|
187
|
|
188 if __name__ == "__main__":
|
|
189 main()
|