Mercurial > repos > devteam > flanking_features
comparison flanking_features.py @ 2:a09d13b108fd draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
author | devteam |
---|---|
date | Thu, 22 Jun 2017 18:41:16 -0400 |
parents | 8307665c4b6c |
children |
comparison
equal
deleted
inserted
replaced
1:8307665c4b6c | 2:a09d13b108fd |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 #By: Guruprasad Ananda | 2 # By: Guruprasad Ananda |
3 """ | 3 """ |
4 Fetch closest up/downstream interval from features corresponding to every interval in primary | 4 Fetch closest up/downstream interval from features corresponding to every interval in primary |
5 | 5 |
6 usage: %prog primary_file features_file out_file direction | 6 usage: %prog primary_file features_file out_file direction |
7 -1, --cols1=N,N,N,N: Columns for start, end, strand in first file | 7 -1, --cols1=N,N,N,N: Columns for start, end, strand in first file |
8 -2, --cols2=N,N,N,N: Columns for start, end, strand in second file | 8 -2, --cols2=N,N,N,N: Columns for start, end, strand in second file |
9 -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval | 9 -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval |
10 -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval | 10 -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval |
11 """ | 11 """ |
12 from __future__ import print_function | |
12 | 13 |
13 import fileinput | 14 import fileinput |
14 import sys | 15 import sys |
16 | |
15 from bx.cookbook import doc_optparse | 17 from bx.cookbook import doc_optparse |
16 from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper | 18 from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper |
17 from bx.intervals.operations import quicksect | 19 from bx.intervals.operations import quicksect |
18 from bx.tabular.io import ParseError | 20 from bx.tabular.io import ParseError |
19 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped | 21 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped |
22 | |
20 from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper | 23 from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper |
21 | 24 |
22 assert sys.version_info[:2] >= ( 2, 4 ) | 25 assert sys.version_info[:2] >= ( 2, 4 ) |
23 | 26 |
24 | 27 |
25 def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down): | 28 def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down): |
26 #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases | 29 # direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases |
27 #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand | 30 # threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand |
28 #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand | 31 # threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand |
29 if direction == 1: | 32 if direction == 1: |
30 if node.maxend <= threshold_up: | 33 if node.maxend <= threshold_up: |
31 if node.end == node.maxend: | 34 if node.end == node.maxend: |
32 report_func_up(node) | 35 report_func_up(node) |
33 elif node.right and node.left: | 36 elif node.right and node.left: |
101 else: | 104 else: |
102 root = rightTree.chroms[chrom] # root node for the chrom tree | 105 root = rightTree.chroms[chrom] # root node for the chrom tree |
103 result_up = [] | 106 result_up = [] |
104 result_down = [] | 107 result_down = [] |
105 if (strand == '+' and up) or (strand == '-' and down): | 108 if (strand == '+' and up) or (strand == '-' and down): |
106 #upstream +ve strand and downstream -ve strand cases | 109 # upstream +ve strand and downstream -ve strand cases |
107 get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None) | 110 get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None) |
108 | 111 |
109 if (strand == '+' and down) or (strand == '-' and up): | 112 if (strand == '+' and down) or (strand == '-' and up): |
110 #downstream +ve strand and upstream -ve strand case | 113 # downstream +ve strand and upstream -ve strand case |
111 get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node )) | 114 get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node )) |
112 | 115 |
113 if result_up: | 116 if result_up: |
114 if len(result_up) > 1: # The results_up list has a list of intervals upstream to the given interval. | 117 if len(result_up) > 1: # The results_up list has a list of intervals upstream to the given interval. |
115 ends = [] | 118 ends = [] |
121 if not(either): | 124 if not(either): |
122 yield [ interval, result_up[res_ind].other ] | 125 yield [ interval, result_up[res_ind].other ] |
123 | 126 |
124 if result_down: | 127 if result_down: |
125 if not(either): | 128 if not(either): |
126 #The last element of result_down will be the closest element to the given interval | 129 # The last element of result_down will be the closest element to the given interval |
127 yield [ interval, result_down[-1].other ] | 130 yield [ interval, result_down[-1].other ] |
128 | 131 |
129 if either and (result_up or result_down): | 132 if either and (result_up or result_down): |
130 iter_val = [] | 133 iter_val = [] |
131 if result_up and result_down: | 134 if result_up and result_down: |
132 if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)): | 135 if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)): |
133 iter_val = [ interval, result_up[res_ind].other ] | 136 iter_val = [ interval, result_up[res_ind].other ] |
134 else: | 137 else: |
135 #The last element of result_down will be the closest element to the given interval | 138 # The last element of result_down will be the closest element to the given interval |
136 iter_val = [ interval, result_down[-1].other ] | 139 iter_val = [ interval, result_down[-1].other ] |
137 elif result_up: | 140 elif result_up: |
138 iter_val = [ interval, result_up[res_ind].other ] | 141 iter_val = [ interval, result_up[res_ind].other ] |
139 elif result_down: | 142 elif result_down: |
140 #The last element of result_down will be the closest element to the given interval | 143 # The last element of result_down will be the closest element to the given interval |
141 iter_val = [ interval, result_down[-1].other ] | 144 iter_val = [ interval, result_down[-1].other ] |
142 yield iter_val | 145 yield iter_val |
143 | 146 |
144 | 147 |
145 def main(): | 148 def main(): |
201 output_line_fields.extend( line.fields ) | 204 output_line_fields.extend( line.fields ) |
202 output_line_fields.extend( closest_feature.fields ) | 205 output_line_fields.extend( closest_feature.fields ) |
203 out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) ) | 206 out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) ) |
204 else: | 207 else: |
205 out_file.write( "%s\n" % result ) | 208 out_file.write( "%s\n" % result ) |
206 except ParseError, exc: | 209 except ParseError as exc: |
207 fail( "Invalid file format: %s" % str( exc ) ) | 210 fail( "Invalid file format: %s" % str( exc ) ) |
208 | 211 |
209 print "Direction: %s" % (direction) | 212 print("Direction: %s" % (direction)) |
210 if g1.skipped > 0: | 213 if g1.skipped > 0: |
211 print skipped( g1, filedesc=" of 1st dataset" ) | 214 print(skipped( g1, filedesc=" of 1st dataset" )) |
212 if g2.skipped > 0: | 215 if g2.skipped > 0: |
213 print skipped( g2, filedesc=" of 2nd dataset" ) | 216 print(skipped( g2, filedesc=" of 2nd dataset" )) |
217 | |
214 | 218 |
215 if __name__ == "__main__": | 219 if __name__ == "__main__": |
216 main() | 220 main() |