Mercurial > repos > artbio > lumpy_sv
comparison pairend_distro.py @ 1:1ed8619a5611 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy-sv commit 0b55a106b1f76e3cc3d89932fef2cc8d3eb24e4f
| author | artbio |
|---|---|
| date | Wed, 26 Jul 2017 18:17:01 -0400 |
| parents | 796552c157de |
| children | 093bb151a0a8 |
comparison
equal
deleted
inserted
replaced
| 0:796552c157de | 1:1ed8619a5611 |
|---|---|
| 7 # Department of Public Health Sciences and Center for Public Health Genomics, | 7 # Department of Public Health Sciences and Center for Public Health Genomics, |
| 8 # University of Virginia | 8 # University of Virginia |
| 9 # rl6sf@virginia.edu | 9 # rl6sf@virginia.edu |
| 10 | 10 |
| 11 import sys | 11 import sys |
| 12 from optparse import OptionParser | |
| 13 | |
| 12 import numpy as np | 14 import numpy as np |
| 13 from operator import itemgetter | |
| 14 from optparse import OptionParser | |
| 15 | 15 |
| 16 # some constants for sam/bam field ids | 16 # some constants for sam/bam field ids |
| 17 SAM_FLAG = 1 | 17 SAM_FLAG = 1 |
| 18 SAM_REFNAME = 2 | 18 SAM_REFNAME = 2 |
| 19 SAM_MATE_REFNAME = 6 | 19 SAM_MATE_REFNAME = 6 |
| 20 SAM_ISIZE = 8 | 20 SAM_ISIZE = 8 |
| 21 | 21 |
| 22 parser = OptionParser() | 22 parser = OptionParser() |
| 23 parser.add_option("-r", "--read_length", type="int", dest="read_length", | |
| 24 help="Read length") | |
| 25 parser.add_option("-X", dest="X", type="int", | |
| 26 help="Number of stdevs from mean to extend") | |
| 27 parser.add_option("-N", dest="N", type="int", help="Number to sample") | |
| 28 parser.add_option("-o", dest="output_file", help="Output file") | |
| 29 parser.add_option("-m", dest="mads", type="int", default=10, | |
| 30 help='''Outlier cutoff in # of median absolute deviations | |
| 31 (unscaled, upper only)''') | |
| 23 | 32 |
| 24 parser.add_option("-r", | |
| 25 "--read_length", | |
| 26 type="int", | |
| 27 dest="read_length", | |
| 28 help="Read length") | |
| 29 | |
| 30 parser.add_option("-X", | |
| 31 dest="X", | |
| 32 type="int", | |
| 33 help="Number of stdevs from mean to extend") | |
| 34 | |
| 35 parser.add_option("-N", | |
| 36 dest="N", | |
| 37 type="int", | |
| 38 help="Number to sample") | |
| 39 | |
| 40 parser.add_option("-o", | |
| 41 dest="output_file", | |
| 42 help="Output file") | |
| 43 | |
| 44 parser.add_option("-m", | |
| 45 dest="mads", | |
| 46 type="int", | |
| 47 default=10, | |
| 48 help="Outlier cutoff in # of median absolute deviations (unscaled, upper only)") | |
| 49 | 33 |
| 50 def unscaled_upper_mad(xs): | 34 def unscaled_upper_mad(xs): |
| 51 """Return a tuple consisting of the median of xs followed by the | 35 """Return a tuple consisting of the median of xs followed by the |
| 52 unscaled median absolute deviation of the values in xs that lie | 36 unscaled median absolute deviation of the values in xs that lie |
| 53 above the median. | 37 above the median. |
| 94 L.append(isize) | 78 L.append(isize) |
| 95 | 79 |
| 96 # warn if very few elements in distribution | 80 # warn if very few elements in distribution |
| 97 min_elements = 1000 | 81 min_elements = 1000 |
| 98 if len(L) < min_elements: | 82 if len(L) < min_elements: |
| 99 sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % (len(L), min_elements)) | 83 sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % |
| 84 (len(L), min_elements)) | |
| 100 mean = "NA" | 85 mean = "NA" |
| 101 stdev = "NA" | 86 stdev = "NA" |
| 102 | 87 |
| 103 else: | 88 else: |
| 104 # Remove outliers | 89 # Remove outliers |
| 108 upper_cutoff = med + options.mads * umad | 93 upper_cutoff = med + options.mads * umad |
| 109 L = L[L < upper_cutoff] | 94 L = L[L < upper_cutoff] |
| 110 new_len = len(L) | 95 new_len = len(L) |
| 111 removed = c - new_len | 96 removed = c - new_len |
| 112 sys.stderr.write("Removed %d outliers with isize >= %d\n" % | 97 sys.stderr.write("Removed %d outliers with isize >= %d\n" % |
| 113 (removed, upper_cutoff)) | 98 (removed, upper_cutoff)) |
| 114 c = new_len | 99 c = new_len |
| 115 | 100 |
| 116 mean = np.mean(L) | 101 mean = np.mean(L) |
| 117 stdev = np.std(L) | 102 stdev = np.std(L) |
| 118 | 103 |
| 123 s = 0 | 108 s = 0 |
| 124 | 109 |
| 125 for x in L: | 110 for x in L: |
| 126 if (x >= start) and (x <= end): | 111 if (x >= start) and (x <= end): |
| 127 j = int(x - start) | 112 j = int(x - start) |
| 128 H[j] = H[ int(x - start) ] + 1 | 113 H[j] = H[int(x - start)] + 1 |
| 129 s += 1 | 114 s += 1 |
| 130 | 115 |
| 131 f = open(options.output_file, 'w') | 116 f = open(options.output_file, 'w') |
| 132 | 117 |
| 133 for i in range(end - start): | 118 for i in range(end - start): |
| 134 o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n" | 119 o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n" |
| 135 f.write(o) | 120 f.write(o) |
| 136 | |
| 137 | |
| 138 f.close() | 121 f.close() |
| 139 | |
| 140 print('mean:' + str(mean) + '\tstdev:' + str(stdev)) | 122 print('mean:' + str(mean) + '\tstdev:' + str(stdev)) |
