annotate pairend_distro.py @ 10:4584440b2634 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit a7bd16b6c6dbfe8b41de2a51c29b197ca660c5c6
author drosofff
date Mon, 12 Dec 2016 08:47:56 -0500
parents 8b3daa745d9b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
1 #!/usr/bin/env python
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
2 # (c) 2012 - Ryan M. Layer
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
3 # Hall Laboratory
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
4 # Quinlan Laboratory
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
5 # Department of Computer Science
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
6 # Department of Biochemistry and Molecular Genetics
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
7 # Department of Public Health Sciences and Center for Public Health Genomics,
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
8 # University of Virginia
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
9 # rl6sf@virginia.edu
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
10
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
11 import sys
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
12 import numpy as np
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
13 from operator import itemgetter
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
14 from optparse import OptionParser
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
15
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
16 # some constants for sam/bam field ids
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
17 SAM_FLAG = 1
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
18 SAM_REFNAME = 2
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
19 SAM_MATE_REFNAME = 6
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
20 SAM_ISIZE = 8
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
21
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
22 parser = OptionParser()
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
23
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
24 parser.add_option("-r",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
25 "--read_length",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
26 type="int",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
27 dest="read_length",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
28 help="Read length")
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
29
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
30 parser.add_option("-X",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
31 dest="X",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
32 type="int",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
33 help="Number of stdevs from mean to extend")
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
34
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
35 parser.add_option("-N",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
36 dest="N",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
37 type="int",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
38 help="Number to sample")
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
39
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
40 parser.add_option("-o",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
41 dest="output_file",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
42 help="Output file")
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
43
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
44 parser.add_option("-m",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
45 dest="mads",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
46 type="int",
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
47 default=10,
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
48 help="Outlier cutoff in # of median absolute deviations (unscaled, upper only)")
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
49
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
50 def unscaled_upper_mad(xs):
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
51 """Return a tuple consisting of the median of xs followed by the
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
52 unscaled median absolute deviation of the values in xs that lie
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
53 above the median.
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
54 """
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
55 med = np.median(xs)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
56 return med, np.median(xs[xs > med] - med)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
57
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
58
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
59 (options, args) = parser.parse_args()
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
60
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
61 if not options.read_length:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
62 parser.error('Read length not given')
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
63
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
64 if not options.X:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
65 parser.error('X not given')
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
66
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
67 if not options.N:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
68 parser.error('N not given')
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
69
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
70 if not options.output_file:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
71 parser.error('Output file not given')
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
72
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
73
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
74 required = 97
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
75 restricted = 3484
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
76 flag_mask = required | restricted
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
77
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
78 L = []
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
79 c = 0
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
80
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
81 for l in sys.stdin:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
82 if c >= options.N:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
83 break
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
84
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
85 A = l.rstrip().split('\t')
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
86 flag = int(A[SAM_FLAG])
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
87 refname = A[SAM_REFNAME]
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
88 mate_refname = A[SAM_MATE_REFNAME]
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
89 isize = int(A[SAM_ISIZE])
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
90
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
91 want = mate_refname == "=" and flag & flag_mask == required and isize >= 0
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
92 if want:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
93 c += 1
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
94 L.append(isize)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
95
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
96 # warn if very few elements in distribution
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
97 min_elements = 1000
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
98 if len(L) < min_elements:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
99 sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % (len(L), min_elements))
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
100 mean = "NA"
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
101 stdev = "NA"
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
102
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
103 else:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
104 # Remove outliers
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
105 L = np.array(L)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
106 L.sort()
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
107 med, umad = unscaled_upper_mad(L)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
108 upper_cutoff = med + options.mads * umad
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
109 L = L[L < upper_cutoff]
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
110 new_len = len(L)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
111 removed = c - new_len
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
112 sys.stderr.write("Removed %d outliers with isize >= %d\n" %
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
113 (removed, upper_cutoff))
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
114 c = new_len
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
115
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
116 mean = np.mean(L)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
117 stdev = np.std(L)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
118
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
119 start = options.read_length
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
120 end = int(mean + options.X*stdev)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
121
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
122 H = [0] * (end - start + 1)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
123 s = 0
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
124
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
125 for x in L:
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
126 if (x >= start) and (x <= end):
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
127 j = int(x - start)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
128 H[j] = H[ int(x - start) ] + 1
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
129 s += 1
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
130
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
131 f = open(options.output_file, 'w')
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
132
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
133 for i in range(end - start):
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
134 o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n"
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
135 f.write(o)
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
136
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
137
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
138 f.close()
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
139
8b3daa745d9b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff changeset
140 print('mean:' + str(mean) + '\tstdev:' + str(stdev))