Mercurial > repos > artbio > lumpy_sv
comparison pairend_distro.py @ 1:1ed8619a5611 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy-sv commit 0b55a106b1f76e3cc3d89932fef2cc8d3eb24e4f
author | artbio |
---|---|
date | Wed, 26 Jul 2017 18:17:01 -0400 |
parents | 796552c157de |
children | 093bb151a0a8 |
comparison
equal
deleted
inserted
replaced
0:796552c157de | 1:1ed8619a5611 |
---|---|
7 # Department of Public Health Sciences and Center for Public Health Genomics, | 7 # Department of Public Health Sciences and Center for Public Health Genomics, |
8 # University of Virginia | 8 # University of Virginia |
9 # rl6sf@virginia.edu | 9 # rl6sf@virginia.edu |
10 | 10 |
11 import sys | 11 import sys |
12 from optparse import OptionParser | |
13 | |
12 import numpy as np | 14 import numpy as np |
13 from operator import itemgetter | |
14 from optparse import OptionParser | |
15 | 15 |
16 # some constants for sam/bam field ids | 16 # some constants for sam/bam field ids |
17 SAM_FLAG = 1 | 17 SAM_FLAG = 1 |
18 SAM_REFNAME = 2 | 18 SAM_REFNAME = 2 |
19 SAM_MATE_REFNAME = 6 | 19 SAM_MATE_REFNAME = 6 |
20 SAM_ISIZE = 8 | 20 SAM_ISIZE = 8 |
21 | 21 |
22 parser = OptionParser() | 22 parser = OptionParser() |
23 parser.add_option("-r", "--read_length", type="int", dest="read_length", | |
24 help="Read length") | |
25 parser.add_option("-X", dest="X", type="int", | |
26 help="Number of stdevs from mean to extend") | |
27 parser.add_option("-N", dest="N", type="int", help="Number to sample") | |
28 parser.add_option("-o", dest="output_file", help="Output file") | |
29 parser.add_option("-m", dest="mads", type="int", default=10, | |
30 help='''Outlier cutoff in # of median absolute deviations | |
31 (unscaled, upper only)''') | |
23 | 32 |
24 parser.add_option("-r", | |
25 "--read_length", | |
26 type="int", | |
27 dest="read_length", | |
28 help="Read length") | |
29 | |
30 parser.add_option("-X", | |
31 dest="X", | |
32 type="int", | |
33 help="Number of stdevs from mean to extend") | |
34 | |
35 parser.add_option("-N", | |
36 dest="N", | |
37 type="int", | |
38 help="Number to sample") | |
39 | |
40 parser.add_option("-o", | |
41 dest="output_file", | |
42 help="Output file") | |
43 | |
44 parser.add_option("-m", | |
45 dest="mads", | |
46 type="int", | |
47 default=10, | |
48 help="Outlier cutoff in # of median absolute deviations (unscaled, upper only)") | |
49 | 33 |
50 def unscaled_upper_mad(xs): | 34 def unscaled_upper_mad(xs): |
51 """Return a tuple consisting of the median of xs followed by the | 35 """Return a tuple consisting of the median of xs followed by the |
52 unscaled median absolute deviation of the values in xs that lie | 36 unscaled median absolute deviation of the values in xs that lie |
53 above the median. | 37 above the median. |
94 L.append(isize) | 78 L.append(isize) |
95 | 79 |
96 # warn if very few elements in distribution | 80 # warn if very few elements in distribution |
97 min_elements = 1000 | 81 min_elements = 1000 |
98 if len(L) < min_elements: | 82 if len(L) < min_elements: |
99 sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % (len(L), min_elements)) | 83 sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % |
84 (len(L), min_elements)) | |
100 mean = "NA" | 85 mean = "NA" |
101 stdev = "NA" | 86 stdev = "NA" |
102 | 87 |
103 else: | 88 else: |
104 # Remove outliers | 89 # Remove outliers |
108 upper_cutoff = med + options.mads * umad | 93 upper_cutoff = med + options.mads * umad |
109 L = L[L < upper_cutoff] | 94 L = L[L < upper_cutoff] |
110 new_len = len(L) | 95 new_len = len(L) |
111 removed = c - new_len | 96 removed = c - new_len |
112 sys.stderr.write("Removed %d outliers with isize >= %d\n" % | 97 sys.stderr.write("Removed %d outliers with isize >= %d\n" % |
113 (removed, upper_cutoff)) | 98 (removed, upper_cutoff)) |
114 c = new_len | 99 c = new_len |
115 | 100 |
116 mean = np.mean(L) | 101 mean = np.mean(L) |
117 stdev = np.std(L) | 102 stdev = np.std(L) |
118 | 103 |
123 s = 0 | 108 s = 0 |
124 | 109 |
125 for x in L: | 110 for x in L: |
126 if (x >= start) and (x <= end): | 111 if (x >= start) and (x <= end): |
127 j = int(x - start) | 112 j = int(x - start) |
128 H[j] = H[ int(x - start) ] + 1 | 113 H[j] = H[int(x - start)] + 1 |
129 s += 1 | 114 s += 1 |
130 | 115 |
131 f = open(options.output_file, 'w') | 116 f = open(options.output_file, 'w') |
132 | 117 |
133 for i in range(end - start): | 118 for i in range(end - start): |
134 o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n" | 119 o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n" |
135 f.write(o) | 120 f.write(o) |
136 | |
137 | |
138 f.close() | 121 f.close() |
139 | |
140 print('mean:' + str(mean) + '\tstdev:' + str(stdev)) | 122 print('mean:' + str(mean) + '\tstdev:' + str(stdev)) |