Mercurial > repos > drosofff > lumpy
annotate pairend_distro.py @ 10:4584440b2634 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit a7bd16b6c6dbfe8b41de2a51c29b197ca660c5c6
| author | drosofff | 
|---|---|
| date | Mon, 12 Dec 2016 08:47:56 -0500 | 
| parents | 8b3daa745d9b | 
| children | 
| rev | line source | 
|---|---|
| 0 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 2 # (c) 2012 - Ryan M. Layer | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 3 # Hall Laboratory | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 4 # Quinlan Laboratory | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 5 # Department of Computer Science | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 6 # Department of Biochemistry and Molecular Genetics | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 7 # Department of Public Health Sciences and Center for Public Health Genomics, | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 8 # University of Virginia | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 9 # rl6sf@virginia.edu | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 10 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 11 import sys | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 12 import numpy as np | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 13 from operator import itemgetter | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 14 from optparse import OptionParser | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 15 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 16 # some constants for sam/bam field ids | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 17 SAM_FLAG = 1 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 18 SAM_REFNAME = 2 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 19 SAM_MATE_REFNAME = 6 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 20 SAM_ISIZE = 8 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 21 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 22 parser = OptionParser() | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 23 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 24 parser.add_option("-r", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 25 "--read_length", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 26 type="int", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 27 dest="read_length", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 28 help="Read length") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 29 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 30 parser.add_option("-X", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 31 dest="X", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 32 type="int", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 33 help="Number of stdevs from mean to extend") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 34 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 35 parser.add_option("-N", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 36 dest="N", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 37 type="int", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 38 help="Number to sample") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 39 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 40 parser.add_option("-o", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 41 dest="output_file", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 42 help="Output file") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 43 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 44 parser.add_option("-m", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 45 dest="mads", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 46 type="int", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 47 default=10, | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 48 help="Outlier cutoff in # of median absolute deviations (unscaled, upper only)") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 49 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 50 def unscaled_upper_mad(xs): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 51 """Return a tuple consisting of the median of xs followed by the | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 52 unscaled median absolute deviation of the values in xs that lie | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 53 above the median. | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 54 """ | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 55 med = np.median(xs) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 56 return med, np.median(xs[xs > med] - med) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 57 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 58 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 59 (options, args) = parser.parse_args() | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 60 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 61 if not options.read_length: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 62 parser.error('Read length not given') | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 63 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 64 if not options.X: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 65 parser.error('X not given') | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 66 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 67 if not options.N: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 68 parser.error('N not given') | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 69 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 70 if not options.output_file: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 71 parser.error('Output file not given') | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 72 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 73 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 74 required = 97 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 75 restricted = 3484 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 76 flag_mask = required | restricted | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 77 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 78 L = [] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 79 c = 0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 80 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 81 for l in sys.stdin: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 82 if c >= options.N: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 83 break | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 84 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 85 A = l.rstrip().split('\t') | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 86 flag = int(A[SAM_FLAG]) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 87 refname = A[SAM_REFNAME] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 88 mate_refname = A[SAM_MATE_REFNAME] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 89 isize = int(A[SAM_ISIZE]) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 90 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 91 want = mate_refname == "=" and flag & flag_mask == required and isize >= 0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 92 if want: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 93 c += 1 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 94 L.append(isize) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 95 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 96 # warn if very few elements in distribution | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 97 min_elements = 1000 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 98 if len(L) < min_elements: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 99 sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % (len(L), min_elements)) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 100 mean = "NA" | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 101 stdev = "NA" | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 102 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 103 else: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 104 # Remove outliers | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 105 L = np.array(L) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 106 L.sort() | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 107 med, umad = unscaled_upper_mad(L) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 108 upper_cutoff = med + options.mads * umad | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 109 L = L[L < upper_cutoff] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 110 new_len = len(L) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 111 removed = c - new_len | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 112 sys.stderr.write("Removed %d outliers with isize >= %d\n" % | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 113 (removed, upper_cutoff)) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 114 c = new_len | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 115 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 116 mean = np.mean(L) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 117 stdev = np.std(L) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 118 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 119 start = options.read_length | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 120 end = int(mean + options.X*stdev) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 121 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 122 H = [0] * (end - start + 1) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 123 s = 0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 124 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 125 for x in L: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 126 if (x >= start) and (x <= end): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 127 j = int(x - start) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 128 H[j] = H[ int(x - start) ] + 1 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 129 s += 1 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 130 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 131 f = open(options.output_file, 'w') | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 132 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 133 for i in range(end - start): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 134 o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n" | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 135 f.write(o) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 136 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 137 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 138 f.close() | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 139 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 140 print('mean:' + str(mean) + '\tstdev:' + str(stdev)) | 
