Mercurial > repos > devteam > get_flanks
comparison get_flanks.py @ 0:a72f0decd7b3
Imported from capsule None
author | devteam |
---|---|
date | Tue, 01 Apr 2014 10:51:10 -0400 |
parents | |
children | 4cd116158aef |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a72f0decd7b3 |
---|---|
1 #!/usr/bin/env python | |
2 #Done by: Guru | |
3 | |
4 """ | |
5 Get Flanking regions. | |
6 | |
7 usage: %prog input out_file size direction region | |
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | |
9 -o, --off=N: Offset | |
10 """ | |
11 | |
12 import sys, re, os | |
13 from bx.cookbook import doc_optparse | |
14 from galaxy.tools.util.galaxyops import * | |
15 | |
16 def stop_err( msg ): | |
17 sys.stderr.write( msg ) | |
18 sys.exit() | |
19 | |
20 def main(): | |
21 try: | |
22 if int( sys.argv[3] ) < 0: | |
23 raise Exception | |
24 except: | |
25 stop_err( "Length of flanking region(s) must be a non-negative integer." ) | |
26 | |
27 # Parsing Command Line here | |
28 options, args = doc_optparse.parse( __doc__ ) | |
29 try: | |
30 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | |
31 inp_file, out_file, size, direction, region = args | |
32 if strand_col_1 <= 0: | |
33 strand = "+" #if strand is not defined, default it to + | |
34 except: | |
35 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) | |
36 try: | |
37 offset = int(options.off) | |
38 size = int(size) | |
39 except: | |
40 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) | |
41 | |
42 fo = open(out_file,'w') | |
43 | |
44 skipped_lines = 0 | |
45 first_invalid_line = 0 | |
46 invalid_line = None | |
47 elems = [] | |
48 j=0 | |
49 for i, line in enumerate( file( inp_file ) ): | |
50 line = line.strip() | |
51 if line and (not line.startswith( '#' )) and line != '': | |
52 j+=1 | |
53 try: | |
54 elems = line.split('\t') | |
55 #if the start and/or end columns are not numbers, skip that line. | |
56 assert int(elems[start_col_1]) | |
57 assert int(elems[end_col_1]) | |
58 if strand_col_1 != -1: | |
59 strand = elems[strand_col_1] | |
60 #if the stand value is not + or -, skip that line. | |
61 assert strand in ['+', '-'] | |
62 if direction == 'Upstream': | |
63 if strand == '+': | |
64 if region == 'end': | |
65 elems[end_col_1] = str(int(elems[end_col_1]) + offset) | |
66 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
67 else: | |
68 elems[end_col_1] = str(int(elems[start_col_1]) + offset) | |
69 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
70 elif strand == '-': | |
71 if region == 'end': | |
72 elems[start_col_1] = str(int(elems[start_col_1]) - offset) | |
73 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
74 else: | |
75 elems[start_col_1] = str(int(elems[end_col_1]) - offset) | |
76 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
77 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
78 fo.write( "%s\n" % '\t'.join( elems ) ) | |
79 | |
80 elif direction == 'Downstream': | |
81 if strand == '-': | |
82 if region == 'start': | |
83 elems[end_col_1] = str(int(elems[end_col_1]) - offset) | |
84 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
85 else: | |
86 elems[end_col_1] = str(int(elems[start_col_1]) - offset) | |
87 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
88 elif strand == '+': | |
89 if region == 'start': | |
90 elems[start_col_1] = str(int(elems[start_col_1]) + offset) | |
91 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
92 else: | |
93 elems[start_col_1] = str(int(elems[end_col_1]) + offset) | |
94 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
95 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
96 fo.write( "%s\n" % '\t'.join( elems ) ) | |
97 | |
98 elif direction == 'Both': | |
99 if strand == '-': | |
100 if region == 'start': | |
101 start = str(int(elems[end_col_1]) - offset) | |
102 end1 = str(int(start) + size) | |
103 end2 = str(int(start) - size) | |
104 elems[start_col_1]=start | |
105 elems[end_col_1]=end1 | |
106 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
107 fo.write( "%s\n" % '\t'.join( elems ) ) | |
108 elems[start_col_1]=end2 | |
109 elems[end_col_1]=start | |
110 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
111 fo.write( "%s\n" % '\t'.join( elems ) ) | |
112 elif region == 'end': | |
113 start = str(int(elems[start_col_1]) - offset) | |
114 end1 = str(int(start) + size) | |
115 end2 = str(int(start) - size) | |
116 elems[start_col_1]=start | |
117 elems[end_col_1]=end1 | |
118 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
119 fo.write( "%s\n" % '\t'.join( elems ) ) | |
120 elems[start_col_1]=end2 | |
121 elems[end_col_1]=start | |
122 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
123 fo.write( "%s\n" % '\t'.join( elems ) ) | |
124 else: | |
125 start1 = str(int(elems[end_col_1]) - offset) | |
126 end1 = str(int(start1) + size) | |
127 start2 = str(int(elems[start_col_1]) - offset) | |
128 end2 = str(int(start2) - size) | |
129 elems[start_col_1]=start1 | |
130 elems[end_col_1]=end1 | |
131 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
132 fo.write( "%s\n" % '\t'.join( elems ) ) | |
133 elems[start_col_1]=end2 | |
134 elems[end_col_1]=start2 | |
135 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
136 fo.write( "%s\n" % '\t'.join( elems ) ) | |
137 elif strand == '+': | |
138 if region == 'start': | |
139 start = str(int(elems[start_col_1]) + offset) | |
140 end1 = str(int(start) - size) | |
141 end2 = str(int(start) + size) | |
142 elems[start_col_1]=end1 | |
143 elems[end_col_1]=start | |
144 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
145 fo.write( "%s\n" % '\t'.join( elems ) ) | |
146 elems[start_col_1]=start | |
147 elems[end_col_1]=end2 | |
148 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
149 fo.write( "%s\n" % '\t'.join( elems ) ) | |
150 elif region == 'end': | |
151 start = str(int(elems[end_col_1]) + offset) | |
152 end1 = str(int(start) - size) | |
153 end2 = str(int(start) + size) | |
154 elems[start_col_1]=end1 | |
155 elems[end_col_1]=start | |
156 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
157 fo.write( "%s\n" % '\t'.join( elems ) ) | |
158 elems[start_col_1]=start | |
159 elems[end_col_1]=end2 | |
160 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
161 fo.write( "%s\n" % '\t'.join( elems ) ) | |
162 else: | |
163 start1 = str(int(elems[start_col_1]) + offset) | |
164 end1 = str(int(start1) - size) | |
165 start2 = str(int(elems[end_col_1]) + offset) | |
166 end2 = str(int(start2) + size) | |
167 elems[start_col_1]=end1 | |
168 elems[end_col_1]=start1 | |
169 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
170 fo.write( "%s\n" % '\t'.join( elems ) ) | |
171 elems[start_col_1]=start2 | |
172 elems[end_col_1]=end2 | |
173 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
174 fo.write( "%s\n" % '\t'.join( elems ) ) | |
175 except: | |
176 skipped_lines += 1 | |
177 if not invalid_line: | |
178 first_invalid_line = i + 1 | |
179 invalid_line = line | |
180 fo.close() | |
181 | |
182 if skipped_lines == j: | |
183 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | |
184 if skipped_lines > 0: | |
185 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | |
186 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset ) | |
187 | |
188 if __name__ == "__main__": | |
189 main() |