Mercurial > repos > peterjc > coverage_stats
annotate tools/coverage_stats/coverage_stats.py @ 2:7254ece0c0ff draft
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
author | peterjc |
---|---|
date | Thu, 11 May 2017 12:16:10 -0400 |
parents | ca8f63f2f7d4 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 """BAM coverage statistics using samtools idxstats and depth. | |
3 | |
4 This script takes exactly three command line arguments: | |
5 * Input BAM filename | |
6 * Input BAI filename (via Galaxy metadata) | |
7 * Output tabular filename | |
8 | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
9 Optional fourth argument: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
10 * Max coverage depth (integer) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
11 |
0 | 12 This messes about with the filenames to make samtools happy, then |
13 runs "samtools idxstats" and "samtools depth", captures and combines | |
14 the output to the desired output tabular file. | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
15 |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
16 Because "samtools depth" treats the max depth a little fuzzily, this |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
17 tool tries to account for this and applies a clear max-depth cut off. |
0 | 18 """ |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
19 |
0 | 20 import os |
21 import subprocess | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
22 import sys |
0 | 23 import tempfile |
24 | |
25 if "-v" in sys.argv or "--version" in sys.argv: | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
26 # Galaxy seems to invert the order of the two lines |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
27 print("BAM coverage statistics v0.0.5") |
0 | 28 cmd = "samtools 2>&1 | grep -i ^Version" |
29 sys.exit(os.system(cmd)) | |
30 | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
31 # TODO - Proper command line API |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
32 if len(sys.argv) == 4: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
33 bam_filename, bai_filename, tabular_filename = sys.argv[1:] |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
34 max_depth = "8000" |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
35 elif len(sys.argv) == 5: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
36 bam_filename, bai_filename, tabular_filename, max_depth = sys.argv[1:] |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
37 else: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
38 sys.exit("Require 3 or 4 arguments: BAM, BAI, tabular filename, [max depth]") |
0 | 39 |
40 if not os.path.isfile(bam_filename): | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
41 sys.exit("Input BAM file not found: %s" % bam_filename) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
42 if bai_filename == "-": |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
43 # Make this optional for ease of use at the command line by hand: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
44 if os.path.isfile(bam_filename + ".bai"): |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
45 bai_filename = bam_filename + ".bai" |
0 | 46 if not os.path.isfile(bai_filename): |
47 if bai_filename == "None": | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
48 sys.exit("Error: Galaxy did not index your BAM file") |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
49 sys.exit("Input BAI file not found: %s" % bai_filename) |
0 | 50 |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
51 try: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
52 max_depth = int(max_depth) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
53 except ValueError: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
54 sys.exit("Bad argument for max depth: %r" % max_depth) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
55 if max_depth < 0: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
56 sys.exit("Bad argument for max depth: %r" % max_depth) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
57 |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
58 # fuzz factor to ensure can reach max_depth, e.g. region with |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
59 # many reads having a deletion present could underestimate the |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
60 # coverage by capping the number of reads considered |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
61 depth_margin = 100 |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
62 |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
63 # Assign sensible names with real extensions, and setup symlinks: |
0 | 64 tmp_dir = tempfile.mkdtemp() |
65 bam_file = os.path.join(tmp_dir, "temp.bam") | |
66 bai_file = os.path.join(tmp_dir, "temp.bam.bai") | |
67 idxstats_filename = os.path.join(tmp_dir, "idxstats.tsv") | |
68 depth_filename = os.path.join(tmp_dir, "depth.tsv") | |
69 os.symlink(os.path.abspath(bam_filename), bam_file) | |
70 os.symlink(os.path.abspath(bai_filename), bai_file) | |
71 assert os.path.isfile(bam_file), bam_file | |
72 assert os.path.isfile(bai_file), bai_file | |
73 assert os.path.isfile(bam_file + ".bai"), bam_file | |
74 | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
75 |
0 | 76 def clean_up(): |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
77 """Remove our temporary files and directory.""" |
0 | 78 os.remove(idxstats_filename) |
79 os.remove(depth_filename) | |
80 os.remove(bam_file) | |
81 os.remove(bai_file) | |
82 os.rmdir(tmp_dir) | |
83 | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
84 |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
85 def samtools_depth_opt_available(): |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
86 """Determine if samtools depth supports maximum coverage argument.""" |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
87 child = subprocess.Popen(["samtools", "depth"], |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
88 stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
89 # Combined stdout/stderr in case samtools is ever inconsistent |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
90 output, tmp = child.communicate() |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
91 assert tmp is None |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
92 # Expect to find this line in the help text, exact wording could change: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
93 # -d/-m <int> maximum coverage depth [8000] |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
94 return " -d/-m " in output |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
95 |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
96 |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
97 depth_hack = False |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
98 if not samtools_depth_opt_available(): |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
99 if max_depth + depth_margin <= 8000: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
100 sys.stderr.write("WARNING: The version of samtools depth installed does not " |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
101 "support the -d option, however, the requested max-depth " |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
102 "is safely under the default of 8000.\n") |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
103 depth_hack = True |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
104 else: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
105 sys.exit("The version of samtools depth installed does not support the -d option.") |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
106 |
0 | 107 # Run samtools idxstats: |
108 cmd = 'samtools idxstats "%s" > "%s"' % (bam_file, idxstats_filename) | |
109 return_code = os.system(cmd) | |
110 if return_code: | |
111 clean_up() | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
112 sys.exit("Return code %i from command:\n%s" % (return_code, cmd)) |
0 | 113 |
114 # Run samtools depth: | |
115 # TODO - Parse stdout instead? | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
116 if depth_hack: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
117 # Using an old samtools without the -d option, but hard coded default |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
118 # of 8000 should be fine even allowing a margin for fuzzy output |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
119 cmd = 'samtools depth "%s" > "%s"' % (bam_file, depth_filename) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
120 else: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
121 cmd = 'samtools depth -d %i "%s" > "%s"' % (max_depth + depth_margin, bam_file, depth_filename) |
0 | 122 return_code = os.system(cmd) |
123 if return_code: | |
124 clean_up() | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
125 sys.exit("Return code %i from command:\n%s" % (return_code, cmd)) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
126 |
0 | 127 |
128 def load_total_coverage(depth_handle, identifier, length): | |
129 """Parse some of the 'samtools depth' output for coverages. | |
130 | |
131 Returns min_cov (int), max_cov (int) and mean cov (float). | |
132 | |
133 Uses global variables to cache the first line of output from the | |
134 next reference sequence. | |
135 """ | |
136 global depth_ref, depth_pos, depth_reads | |
137 | |
138 # print("====") | |
139 # print("%s coverage calculation, length %i, ..." % (identifier, length)) | |
140 | |
141 if depth_ref is None: | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
142 # Right at start of file / new contig |
0 | 143 line = depth_handle.readline() |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
144 # Are we at the end of the file? |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
145 if not line: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
146 # Must be at the end of the file. |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
147 # This can happen if the file contig(s) had no reads mapped |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
148 return 0, 0, 0.0, 0 |
0 | 149 depth_ref, depth_pos, depth_reads = line.rstrip("\n").split() |
150 depth_pos = int(depth_pos) | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
151 depth_reads = min(max_depth, int(depth_reads)) |
0 | 152 # Can now treat as later references where first line cached |
153 elif identifier != depth_ref: | |
154 # Infer that identifier had coverage zero, | |
155 # and so was not in the 'samtools depth' | |
156 # output. | |
157 # print("%s appears to have no coverage at all" % identifier) | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
158 return 0, 0, 0.0, 0 |
0 | 159 |
160 # Good, at start of expected reference | |
161 bases = depth_reads | |
162 if depth_pos == 1: | |
163 min_cov = depth_reads | |
164 else: | |
165 # print("%s has no coverage at start" % identifier) | |
166 min_cov = 0 | |
167 max_cov = depth_reads | |
168 | |
169 last_pos = depth_pos | |
170 depth_ref = None | |
171 depth_pos = 0 | |
172 depth_reads = 0 | |
173 for line in depth_handle: | |
174 ref, pos, depth = line.rstrip("\n").split() | |
175 pos = int(pos) | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
176 depth = min(max_depth, int(depth)) |
0 | 177 if ref != identifier: |
178 # Reached the end of this identifier's coverage | |
179 # so cache this ready for next identifier | |
180 depth_ref, depth_pos, depth_reads = ref, pos, depth | |
181 break | |
182 bases += depth | |
183 if last_pos + 1 < pos: | |
184 # print("%s has no coverage between %i and %i" % (identifier, last_pos, pos)) | |
185 min_cov = 0 | |
186 else: | |
187 min_cov = min(min_cov, depth) | |
188 max_cov = max(max_cov, depth) | |
189 last_pos = pos | |
190 | |
191 # Reach the end of this identifier's coverage or end of file | |
192 if last_pos < length: | |
193 # print("%s has no coverage at end" % identifier) | |
194 min_cov = 0 | |
195 mean_cov = bases / float(length) | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
196 return min_cov, max_cov, mean_cov, bases |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
197 |
0 | 198 |
199 # Parse and combine the output | |
200 out_handle = open(tabular_filename, "w") | |
201 out_handle.write("#identifer\tlength\tmapped\tplaced\tmin_cov\tmax_cov\tmean_cov\n") | |
202 | |
203 idxstats_handle = open(idxstats_filename) | |
204 depth_handle = open(depth_filename) | |
205 | |
206 depth_ref = None | |
207 depth_pos = 0 | |
208 depth_reads = 0 | |
209 | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
210 global_bases = 0 |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
211 global_length = 0 |
0 | 212 for line in idxstats_handle: |
213 identifier, length, mapped, placed = line.rstrip("\n").split() | |
214 length = int(length) | |
215 mapped = int(mapped) | |
216 placed = int(placed) | |
217 if identifier == "*": | |
218 min_cov = 0 | |
219 max_cov = 0 | |
220 mean_cov = 0.0 | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
221 bases = 0 |
0 | 222 else: |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
223 min_cov, max_cov, mean_cov, bases = load_total_coverage(depth_handle, identifier, length) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
224 if max_cov > max_depth: |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
225 sys.exit("Using max depth %i yet saw max coverage %i for %s" |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
226 % (max_depth, max_cov, identifier)) |
0 | 227 out_handle.write("%s\t%i\t%i\t%i\t%i\t%i\t%0.2f\n" |
228 % (identifier, length, mapped, placed, | |
229 min_cov, max_cov, mean_cov)) | |
230 if not (min_cov <= mean_cov <= max_cov): | |
231 out_handle.write("ERROR, expect min_cov <= mean_cov <= max_cov\n") | |
232 idxstats_handle.close() | |
233 depth_handle.close() | |
234 out_handle.close() | |
235 clean_up() | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
236 sys.exit("Problem with coverage for %s, expect min_cov <= mean_cov <= max_cov" |
0 | 237 % identifier) |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
238 global_length += length |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
239 global_bases += bases |
0 | 240 |
241 idxstats_handle.close() | |
242 depth_handle.close() | |
243 out_handle.close() | |
244 | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
245 print("Total reference length %i with overall mean coverage %0.2f" % (global_length, float(global_bases) / global_length)) |
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
246 |
0 | 247 # Remove the temp symlinks and files: |
248 clean_up() | |
249 | |
250 if depth_ref is not None: | |
2
7254ece0c0ff
v0.0.5 - Supports max coverage depth in recent samtools. Expects samtools 1.4.1 via Conda, not via Tool Shed.
peterjc
parents:
0
diff
changeset
|
251 sys.exit("Left over output from 'samtools depth'? %r" % depth_ref) |