comparison pairend_distro.py @ 1:1ed8619a5611 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy-sv commit 0b55a106b1f76e3cc3d89932fef2cc8d3eb24e4f
author artbio
date Wed, 26 Jul 2017 18:17:01 -0400
parents 796552c157de
children 093bb151a0a8
comparison
equal deleted inserted replaced
0:796552c157de 1:1ed8619a5611
7 # Department of Public Health Sciences and Center for Public Health Genomics, 7 # Department of Public Health Sciences and Center for Public Health Genomics,
8 # University of Virginia 8 # University of Virginia
9 # rl6sf@virginia.edu 9 # rl6sf@virginia.edu
10 10
11 import sys 11 import sys
12 from optparse import OptionParser
13
12 import numpy as np 14 import numpy as np
13 from operator import itemgetter
14 from optparse import OptionParser
15 15
16 # some constants for sam/bam field ids 16 # some constants for sam/bam field ids
17 SAM_FLAG = 1 17 SAM_FLAG = 1
18 SAM_REFNAME = 2 18 SAM_REFNAME = 2
19 SAM_MATE_REFNAME = 6 19 SAM_MATE_REFNAME = 6
20 SAM_ISIZE = 8 20 SAM_ISIZE = 8
21 21
22 parser = OptionParser() 22 parser = OptionParser()
23 parser.add_option("-r", "--read_length", type="int", dest="read_length",
24 help="Read length")
25 parser.add_option("-X", dest="X", type="int",
26 help="Number of stdevs from mean to extend")
27 parser.add_option("-N", dest="N", type="int", help="Number to sample")
28 parser.add_option("-o", dest="output_file", help="Output file")
29 parser.add_option("-m", dest="mads", type="int", default=10,
30 help='''Outlier cutoff in # of median absolute deviations
31 (unscaled, upper only)''')
23 32
24 parser.add_option("-r",
25 "--read_length",
26 type="int",
27 dest="read_length",
28 help="Read length")
29
30 parser.add_option("-X",
31 dest="X",
32 type="int",
33 help="Number of stdevs from mean to extend")
34
35 parser.add_option("-N",
36 dest="N",
37 type="int",
38 help="Number to sample")
39
40 parser.add_option("-o",
41 dest="output_file",
42 help="Output file")
43
44 parser.add_option("-m",
45 dest="mads",
46 type="int",
47 default=10,
48 help="Outlier cutoff in # of median absolute deviations (unscaled, upper only)")
49 33
50 def unscaled_upper_mad(xs): 34 def unscaled_upper_mad(xs):
51 """Return a tuple consisting of the median of xs followed by the 35 """Return a tuple consisting of the median of xs followed by the
52 unscaled median absolute deviation of the values in xs that lie 36 unscaled median absolute deviation of the values in xs that lie
53 above the median. 37 above the median.
94 L.append(isize) 78 L.append(isize)
95 79
96 # warn if very few elements in distribution 80 # warn if very few elements in distribution
97 min_elements = 1000 81 min_elements = 1000
98 if len(L) < min_elements: 82 if len(L) < min_elements:
99 sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % (len(L), min_elements)) 83 sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" %
84 (len(L), min_elements))
100 mean = "NA" 85 mean = "NA"
101 stdev = "NA" 86 stdev = "NA"
102 87
103 else: 88 else:
104 # Remove outliers 89 # Remove outliers
108 upper_cutoff = med + options.mads * umad 93 upper_cutoff = med + options.mads * umad
109 L = L[L < upper_cutoff] 94 L = L[L < upper_cutoff]
110 new_len = len(L) 95 new_len = len(L)
111 removed = c - new_len 96 removed = c - new_len
112 sys.stderr.write("Removed %d outliers with isize >= %d\n" % 97 sys.stderr.write("Removed %d outliers with isize >= %d\n" %
113 (removed, upper_cutoff)) 98 (removed, upper_cutoff))
114 c = new_len 99 c = new_len
115 100
116 mean = np.mean(L) 101 mean = np.mean(L)
117 stdev = np.std(L) 102 stdev = np.std(L)
118 103
123 s = 0 108 s = 0
124 109
125 for x in L: 110 for x in L:
126 if (x >= start) and (x <= end): 111 if (x >= start) and (x <= end):
127 j = int(x - start) 112 j = int(x - start)
128 H[j] = H[ int(x - start) ] + 1 113 H[j] = H[int(x - start)] + 1
129 s += 1 114 s += 1
130 115
131 f = open(options.output_file, 'w') 116 f = open(options.output_file, 'w')
132 117
133 for i in range(end - start): 118 for i in range(end - start):
134 o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n" 119 o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n"
135 f.write(o) 120 f.write(o)
136
137
138 f.close() 121 f.close()
139
140 print('mean:' + str(mean) + '\tstdev:' + str(stdev)) 122 print('mean:' + str(mean) + '\tstdev:' + str(stdev))