Mercurial > repos > xuebing > sharplabtool
comparison tools/regVariation/featureCounter.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 #Guruprasad Ananda | |
3 """ | |
4 Calculate count and coverage of one query on another, and append the Coverage and counts to | |
5 the last four columns as bases covered, percent coverage, number of completely present features, number of partially present/overlapping features. | |
6 | |
7 usage: %prog bed_file_1 bed_file_2 out_file | |
8 -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file | |
9 -2, --cols2=N,N,N,N: Columns for chr, start, end, strand in second file | |
10 """ | |
11 from galaxy import eggs | |
12 import pkg_resources | |
13 pkg_resources.require( "bx-python" ) | |
14 import sys, traceback, fileinput | |
15 from warnings import warn | |
16 from bx.intervals.io import * | |
17 from bx.cookbook import doc_optparse | |
18 from bx.intervals.operations import quicksect | |
19 from galaxy.tools.util.galaxyops import * | |
20 | |
21 assert sys.version_info[:2] >= ( 2, 4 ) | |
22 | |
23 def stop_err(msg): | |
24 sys.stderr.write(msg) | |
25 sys.exit() | |
26 | |
27 def counter(node, start, end): | |
28 global full, partial | |
29 if node.start <= start and node.maxend > start: | |
30 if node.end >= end or (node.start == start and end > node.end > start): | |
31 full += 1 | |
32 elif end > node.end > start: | |
33 partial += 1 | |
34 if node.left and node.left.maxend > start: | |
35 counter(node.left, start, end) | |
36 if node.right: | |
37 counter(node.right, start, end) | |
38 elif start < node.start < end: | |
39 if node.end <= end: | |
40 full += 1 | |
41 else: | |
42 partial += 1 | |
43 if node.left and node.left.maxend > start: | |
44 counter(node.left, start, end) | |
45 if node.right: | |
46 counter(node.right, start, end) | |
47 else: | |
48 if node.left: | |
49 counter(node.left, start, end) | |
50 | |
51 def count_coverage( readers, comments=True ): | |
52 primary = readers[0] | |
53 secondary = readers[1] | |
54 secondary_copy = readers[2] | |
55 | |
56 rightTree = quicksect.IntervalTree() | |
57 for item in secondary: | |
58 if type( item ) is GenomicInterval: | |
59 rightTree.insert( item, secondary.linenum, item.fields ) | |
60 | |
61 bitsets = secondary_copy.binned_bitsets() | |
62 | |
63 global full, partial | |
64 | |
65 for interval in primary: | |
66 if type( interval ) is Header: | |
67 yield interval | |
68 if type( interval ) is Comment and comments: | |
69 yield interval | |
70 elif type( interval ) == GenomicInterval: | |
71 chrom = interval.chrom | |
72 start = int(interval.start) | |
73 end = int(interval.end) | |
74 full = 0 | |
75 partial = 0 | |
76 if chrom not in bitsets: | |
77 bases_covered = 0 | |
78 percent = 0.0 | |
79 full = 0 | |
80 partial = 0 | |
81 else: | |
82 bases_covered = bitsets[ chrom ].count_range( start, end-start ) | |
83 if (end - start) == 0: | |
84 percent = 0 | |
85 else: | |
86 percent = float(bases_covered) / float(end - start) | |
87 if bases_covered: | |
88 root = rightTree.chroms[chrom] #root node for the chrom tree | |
89 counter(root, start, end) | |
90 interval.fields.append(str(bases_covered)) | |
91 interval.fields.append(str(percent)) | |
92 interval.fields.append(str(full)) | |
93 interval.fields.append(str(partial)) | |
94 yield interval | |
95 | |
96 def main(): | |
97 options, args = doc_optparse.parse( __doc__ ) | |
98 | |
99 try: | |
100 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) | |
101 chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) | |
102 in1_fname, in2_fname, out_fname = args | |
103 except: | |
104 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | |
105 | |
106 g1 = NiceReaderWrapper( fileinput.FileInput( in1_fname ), | |
107 chrom_col=chr_col_1, | |
108 start_col=start_col_1, | |
109 end_col=end_col_1, | |
110 strand_col=strand_col_1, | |
111 fix_strand=True ) | |
112 g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ), | |
113 chrom_col=chr_col_2, | |
114 start_col=start_col_2, | |
115 end_col=end_col_2, | |
116 strand_col=strand_col_2, | |
117 fix_strand=True ) | |
118 g2_copy = NiceReaderWrapper( fileinput.FileInput( in2_fname ), | |
119 chrom_col=chr_col_2, | |
120 start_col=start_col_2, | |
121 end_col=end_col_2, | |
122 strand_col=strand_col_2, | |
123 fix_strand=True ) | |
124 | |
125 | |
126 out_file = open( out_fname, "w" ) | |
127 | |
128 try: | |
129 for line in count_coverage([g1,g2,g2_copy]): | |
130 if type( line ) is GenomicInterval: | |
131 out_file.write( "%s\n" % "\t".join( line.fields ) ) | |
132 else: | |
133 out_file.write( "%s\n" % line ) | |
134 except ParseError, exc: | |
135 out_file.close() | |
136 fail( str( exc ) ) | |
137 | |
138 out_file.close() | |
139 | |
140 if g1.skipped > 0: | |
141 print skipped( g1, filedesc=" of 1st dataset" ) | |
142 if g2.skipped > 0: | |
143 print skipped( g2, filedesc=" of 2nd dataset" ) | |
144 elif g2_copy.skipped > 0: | |
145 print skipped( g2_copy, filedesc=" of 2nd dataset" ) | |
146 | |
147 if __name__ == "__main__": | |
148 main() |