annotate vsnp_statistics.py @ 7:57bd5b859e86 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
author iuc
date Fri, 10 Jun 2022 06:10:23 +0000
parents a8560decb495
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
2
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
3 import argparse
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
4 import os
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
5
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
6
5
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
7 class Statistics:
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
8
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
9 def __init__(self, file_name, file_size, seq_type, num_seqs, sum_len, min_len, avg_len,
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
10 max_len, q1, q2, q3, sum_gap, n50, pass_q20, pass_q30, read_quality_average):
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
11 self.file_name = file_name
5
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
12 self.file_size = file_size
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
13 self.seq_type = seq_type
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
14 self.num_seqs = num_seqs
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
15 self.sum_len = sum_len
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
16 self.min_len = min_len
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
17 self.avg_len = avg_len
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
18 self.max_len = max_len
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
19 self.q1 = q1
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
20 self.q2 = q2
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
21 self.q3 = q3
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
22 self.sum_gap = sum_gap
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
23 self.n50 = n50
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
24 self.pass_q20 = pass_q20
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
25 self.pass_q30 = pass_q30
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
26 self.read_quality_average = read_quality_average
5
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
27
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
28
3
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
29 def nice_size(size):
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
30 # Returns a readably formatted string with the size
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
31 words = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
32 prefix = ''
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
33 try:
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
34 size = float(size)
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
35 if size < 0:
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
36 size = abs(size)
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
37 prefix = '-'
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
38 except Exception:
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
39 return '??? bytes'
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
40 for ind, word in enumerate(words):
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
41 step = 1024 ** (ind + 1)
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
42 if step > size:
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
43 size = size / float(1024 ** ind)
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
44 if word == 'bytes': # No decimals for bytes
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
45 return "%s%d bytes" % (prefix, size)
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
46 return "%s%.1f %s" % (prefix, size, word)
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
47 return '??? bytes'
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
48
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
49
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
50 def output_statistics(read1_stats, read2_stats, output_file):
5
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
51 paired_reads = read2_stats is not None
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
52 if paired_reads:
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
53 columns = ['R1 FASTQ', 'R1 File Size', 'R1 Read Count', 'R1 Length Sum', 'R1 Min Length',
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
54 'R1 Ave Length', 'R1 Max Length', 'R1 Q1', 'R1 Q2', 'R1 Q3', 'R1 Sum Gap',
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
55 'R1 N50', 'R1 Passing Q20', 'R1 Passing Q30', 'R1 Read Quality Ave', 'R2 FASTQ',
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
56 'R2 File Size', 'R2 Read Count', 'R2 Length Sum', 'R2 Min Length', 'R2 Ave Length',
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
57 'R2 Max Length', 'R2 Q1', 'R2 Q2', 'R2 Q3', 'R2 Sum Gap', 'R2 N50', 'R2 Passing Q20',
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
58 'R2 Passing Q30', 'R2 Read Quality Ave']
5
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
59 else:
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
60 columns = ['FASTQ', 'File Size', 'Read Count', 'Length Sum', 'Min Length', 'Ave Length',
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
61 'Max Length', 'Q1', 'Q2', 'Q3', 'Sum Gap', 'N50', 'Passing Q20', 'Passing Q30',
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
62 'Read Quality Ave']
5
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
63 with open(output_file, "w") as outfh:
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
64 # Make sure the header starts with a # so
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
65 # MultiQC can properly handle the output.
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
66 outfh.write("%s\n" % "\t".join(columns))
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
67 line_items = []
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
68 # Get the current stats and associated files.
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
69 # Get and output the statistics.
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
70 line_items.append(read1_stats.file_name)
5
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
71 line_items.append(read1_stats.file_size)
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
72 line_items.append(read1_stats.num_seqs)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
73 line_items.append(read1_stats.sum_len)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
74 line_items.append(read1_stats.min_len)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
75 line_items.append(read1_stats.avg_len)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
76 line_items.append(read1_stats.max_len)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
77 line_items.append(read1_stats.q1)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
78 line_items.append(read1_stats.q2)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
79 line_items.append(read1_stats.q3)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
80 line_items.append(read1_stats.sum_gap)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
81 line_items.append(read1_stats.n50)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
82 line_items.append(read1_stats.pass_q20)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
83 line_items.append(read1_stats.pass_q30)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
84 line_items.append(read1_stats.read_quality_average)
5
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
85 if paired_reads:
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
86 line_items.append(read2_stats.file_name)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
87 line_items.append(read2_stats.file_size)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
88 line_items.append(read2_stats.num_seqs)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
89 line_items.append(read2_stats.sum_len)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
90 line_items.append(read2_stats.min_len)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
91 line_items.append(read2_stats.avg_len)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
92 line_items.append(read2_stats.max_len)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
93 line_items.append(read2_stats.q1)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
94 line_items.append(read2_stats.q2)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
95 line_items.append(read2_stats.q3)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
96 line_items.append(read2_stats.sum_gap)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
97 line_items.append(read2_stats.n50)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
98 line_items.append(read2_stats.pass_q20)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
99 line_items.append(read2_stats.pass_q30)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
100 line_items.append(read2_stats.read_quality_average)
5
a8560decb495 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 3
diff changeset
101 outfh.write('%s\n' % '\t'.join(str(x) for x in line_items))
3
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
102
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
103
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
104 def get_statistics(fastq_file, seqkit_stats_file, seqkit_fx2tab_file):
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
105 file_size = nice_size(os.path.getsize(fastq_file))
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
106 # SeqKit statistics.
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
107 with open(seqkit_stats_file, "r") as fh:
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
108 # This is a 2-line file
3
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
109 for i, line in enumerate(fh):
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
110 if i == 0:
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
111 # Skip header
3
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
112 continue
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
113 line = line.rstrip('\r\n')
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
114 items = line.split("\t")
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
115 file_name = fastq_file
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
116 seq_type = items[2]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
117 num_seqs = items[3]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
118 sum_len = items[4]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
119 min_len = items[5]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
120 avg_len = items[6]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
121 max_len = items[7]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
122 q1 = items[8]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
123 q2 = items[9]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
124 q3 = items[10]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
125 sum_gap = items[11]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
126 n50 = items[12]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
127 try:
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
128 pass_q20 = items[13]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
129 except IndexError:
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
130 pass_q20 = 0
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
131 try:
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
132 pass_q30 = items[14]
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
133 except IndexError:
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
134 pass_q30 = 0
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
135 # Average read quality is not normalized on length.
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
136 avg_sum = 0
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
137 with open(seqkit_fx2tab_file, "r") as fh:
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
138 for i, line in enumerate(fh):
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
139 if i == 0:
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
140 # Skip header
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
141 continue
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
142 line = line.rstrip('\r\n')
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
143 items = line.split("\t")
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
144 avg_sum += float(items[3])
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
145 read_quality_average = "{:.2f}".format(avg_sum / float(i - 1))
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
146 return Statistics(file_name, file_size, seq_type, num_seqs, sum_len, min_len, avg_len,
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
147 max_len, q1, q2, q3, sum_gap, n50, pass_q20, pass_q30, read_quality_average)
3
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
148
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
149
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
150 parser = argparse.ArgumentParser()
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
151
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
152 parser.add_argument('--output', action='store', dest='output', help='Output Excel statistics file')
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
153 parser.add_argument('--read1', action='store', dest='read1', help='Required: single read')
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
154 parser.add_argument('--read2', action='store', dest='read2', required=False, default=None, help='Optional: paired read')
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
155 parser.add_argument('--read1_seqkit_stats', action='store', dest='read1_seqkit_stats', help='Output of SeqKit statistics for forward read')
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
156 parser.add_argument('--read2_seqkit_stats', action='store', dest='read2_seqkit_stats', required=False, default=None, help='Output of SeqKit statistics for reverse read')
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
157 parser.add_argument('--read1_seqkit_fx2tab', action='store', dest='read1_seqkit_fx2tab', help='Output of SeqKit fx2tab for forward read')
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
158 parser.add_argument('--read2_seqkit_fx2tab', action='store', dest='read2_seqkit_fx2tab', required=False, default=None, help='Output of SeqKit fx2tab for reverse read')
3
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
159
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
160 args = parser.parse_args()
6853676d2bae "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
iuc
parents:
diff changeset
161
7
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
162 read1_stats = get_statistics(args.read1, args.read1_seqkit_stats, args.read1_seqkit_fx2tab)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
163 if args.read2 is None:
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
164 read2_stats = None
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
165 else:
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
166 read2_stats = get_statistics(args.read2, args.read2_seqkit_stats, args.read2_seqkit_fx2tab)
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
167
57bd5b859e86 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 5
diff changeset
168 output_statistics(read1_stats, read2_stats, args.output)