Mercurial > repos > iuc > vsnp_determine_ref_from_data
annotate vsnp_statistics.py @ 7:57bd5b859e86 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
author | iuc |
---|---|
date | Fri, 10 Jun 2022 06:10:23 +0000 |
parents | a8560decb495 |
children |
rev | line source |
---|---|
3
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
2 |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
3 import argparse |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
4 import os |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
5 |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
6 |
5
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
7 class Statistics: |
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
8 |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
9 def __init__(self, file_name, file_size, seq_type, num_seqs, sum_len, min_len, avg_len, |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
10 max_len, q1, q2, q3, sum_gap, n50, pass_q20, pass_q30, read_quality_average): |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
11 self.file_name = file_name |
5
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
12 self.file_size = file_size |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
13 self.seq_type = seq_type |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
14 self.num_seqs = num_seqs |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
15 self.sum_len = sum_len |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
16 self.min_len = min_len |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
17 self.avg_len = avg_len |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
18 self.max_len = max_len |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
19 self.q1 = q1 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
20 self.q2 = q2 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
21 self.q3 = q3 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
22 self.sum_gap = sum_gap |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
23 self.n50 = n50 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
24 self.pass_q20 = pass_q20 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
25 self.pass_q30 = pass_q30 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
26 self.read_quality_average = read_quality_average |
5
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
27 |
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
28 |
3
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
29 def nice_size(size): |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
30 # Returns a readably formatted string with the size |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
31 words = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'] |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
32 prefix = '' |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
33 try: |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
34 size = float(size) |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
35 if size < 0: |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
36 size = abs(size) |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
37 prefix = '-' |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
38 except Exception: |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
39 return '??? bytes' |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
40 for ind, word in enumerate(words): |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
41 step = 1024 ** (ind + 1) |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
42 if step > size: |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
43 size = size / float(1024 ** ind) |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
44 if word == 'bytes': # No decimals for bytes |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
45 return "%s%d bytes" % (prefix, size) |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
46 return "%s%.1f %s" % (prefix, size, word) |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
47 return '??? bytes' |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
48 |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
49 |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
50 def output_statistics(read1_stats, read2_stats, output_file): |
5
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
51 paired_reads = read2_stats is not None |
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
52 if paired_reads: |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
53 columns = ['R1 FASTQ', 'R1 File Size', 'R1 Read Count', 'R1 Length Sum', 'R1 Min Length', |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
54 'R1 Ave Length', 'R1 Max Length', 'R1 Q1', 'R1 Q2', 'R1 Q3', 'R1 Sum Gap', |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
55 'R1 N50', 'R1 Passing Q20', 'R1 Passing Q30', 'R1 Read Quality Ave', 'R2 FASTQ', |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
56 'R2 File Size', 'R2 Read Count', 'R2 Length Sum', 'R2 Min Length', 'R2 Ave Length', |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
57 'R2 Max Length', 'R2 Q1', 'R2 Q2', 'R2 Q3', 'R2 Sum Gap', 'R2 N50', 'R2 Passing Q20', |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
58 'R2 Passing Q30', 'R2 Read Quality Ave'] |
5
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
59 else: |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
60 columns = ['FASTQ', 'File Size', 'Read Count', 'Length Sum', 'Min Length', 'Ave Length', |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
61 'Max Length', 'Q1', 'Q2', 'Q3', 'Sum Gap', 'N50', 'Passing Q20', 'Passing Q30', |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
62 'Read Quality Ave'] |
5
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
63 with open(output_file, "w") as outfh: |
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
64 # Make sure the header starts with a # so |
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
65 # MultiQC can properly handle the output. |
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
66 outfh.write("%s\n" % "\t".join(columns)) |
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
67 line_items = [] |
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
68 # Get the current stats and associated files. |
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
69 # Get and output the statistics. |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
70 line_items.append(read1_stats.file_name) |
5
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
71 line_items.append(read1_stats.file_size) |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
72 line_items.append(read1_stats.num_seqs) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
73 line_items.append(read1_stats.sum_len) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
74 line_items.append(read1_stats.min_len) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
75 line_items.append(read1_stats.avg_len) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
76 line_items.append(read1_stats.max_len) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
77 line_items.append(read1_stats.q1) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
78 line_items.append(read1_stats.q2) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
79 line_items.append(read1_stats.q3) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
80 line_items.append(read1_stats.sum_gap) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
81 line_items.append(read1_stats.n50) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
82 line_items.append(read1_stats.pass_q20) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
83 line_items.append(read1_stats.pass_q30) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
84 line_items.append(read1_stats.read_quality_average) |
5
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
85 if paired_reads: |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
86 line_items.append(read2_stats.file_name) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
87 line_items.append(read2_stats.file_size) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
88 line_items.append(read2_stats.num_seqs) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
89 line_items.append(read2_stats.sum_len) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
90 line_items.append(read2_stats.min_len) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
91 line_items.append(read2_stats.avg_len) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
92 line_items.append(read2_stats.max_len) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
93 line_items.append(read2_stats.q1) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
94 line_items.append(read2_stats.q2) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
95 line_items.append(read2_stats.q3) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
96 line_items.append(read2_stats.sum_gap) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
97 line_items.append(read2_stats.n50) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
98 line_items.append(read2_stats.pass_q20) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
99 line_items.append(read2_stats.pass_q30) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
100 line_items.append(read2_stats.read_quality_average) |
5
a8560decb495
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
101 outfh.write('%s\n' % '\t'.join(str(x) for x in line_items)) |
3
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
102 |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
103 |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
104 def get_statistics(fastq_file, seqkit_stats_file, seqkit_fx2tab_file): |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
105 file_size = nice_size(os.path.getsize(fastq_file)) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
106 # SeqKit statistics. |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
107 with open(seqkit_stats_file, "r") as fh: |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
108 # This is a 2-line file |
3
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
109 for i, line in enumerate(fh): |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
110 if i == 0: |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
111 # Skip header |
3
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
112 continue |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
113 line = line.rstrip('\r\n') |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
114 items = line.split("\t") |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
115 file_name = fastq_file |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
116 seq_type = items[2] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
117 num_seqs = items[3] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
118 sum_len = items[4] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
119 min_len = items[5] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
120 avg_len = items[6] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
121 max_len = items[7] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
122 q1 = items[8] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
123 q2 = items[9] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
124 q3 = items[10] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
125 sum_gap = items[11] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
126 n50 = items[12] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
127 try: |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
128 pass_q20 = items[13] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
129 except IndexError: |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
130 pass_q20 = 0 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
131 try: |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
132 pass_q30 = items[14] |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
133 except IndexError: |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
134 pass_q30 = 0 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
135 # Average read quality is not normalized on length. |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
136 avg_sum = 0 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
137 with open(seqkit_fx2tab_file, "r") as fh: |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
138 for i, line in enumerate(fh): |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
139 if i == 0: |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
140 # Skip header |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
141 continue |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
142 line = line.rstrip('\r\n') |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
143 items = line.split("\t") |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
144 avg_sum += float(items[3]) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
145 read_quality_average = "{:.2f}".format(avg_sum / float(i - 1)) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
146 return Statistics(file_name, file_size, seq_type, num_seqs, sum_len, min_len, avg_len, |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
147 max_len, q1, q2, q3, sum_gap, n50, pass_q20, pass_q30, read_quality_average) |
3
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
148 |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
149 |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
150 parser = argparse.ArgumentParser() |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
151 |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
152 parser.add_argument('--output', action='store', dest='output', help='Output Excel statistics file') |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
153 parser.add_argument('--read1', action='store', dest='read1', help='Required: single read') |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
154 parser.add_argument('--read2', action='store', dest='read2', required=False, default=None, help='Optional: paired read') |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
155 parser.add_argument('--read1_seqkit_stats', action='store', dest='read1_seqkit_stats', help='Output of SeqKit statistics for forward read') |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
156 parser.add_argument('--read2_seqkit_stats', action='store', dest='read2_seqkit_stats', required=False, default=None, help='Output of SeqKit statistics for reverse read') |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
157 parser.add_argument('--read1_seqkit_fx2tab', action='store', dest='read1_seqkit_fx2tab', help='Output of SeqKit fx2tab for forward read') |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
158 parser.add_argument('--read2_seqkit_fx2tab', action='store', dest='read2_seqkit_fx2tab', required=False, default=None, help='Output of SeqKit fx2tab for reverse read') |
3
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
159 |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
160 args = parser.parse_args() |
6853676d2bae
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff
changeset
|
161 |
7
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
162 read1_stats = get_statistics(args.read1, args.read1_seqkit_stats, args.read1_seqkit_fx2tab) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
163 if args.read2 is None: |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
164 read2_stats = None |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
165 else: |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
166 read2_stats = get_statistics(args.read2, args.read2_seqkit_stats, args.read2_seqkit_fx2tab) |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
167 |
57bd5b859e86
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
5
diff
changeset
|
168 output_statistics(read1_stats, read2_stats, args.output) |