annotate vsnp_statistics.py @ 4:4535ad8b74f3 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
author iuc
date Fri, 10 Jun 2022 06:08:49 +0000
parents 9ac0b1d5560d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
2
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
3 import argparse
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
4 import os
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
5
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
6
1
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
7 class Statistics:
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
8
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
9 def __init__(self, file_name, file_size, seq_type, num_seqs, sum_len, min_len, avg_len,
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
10 max_len, q1, q2, q3, sum_gap, n50, pass_q20, pass_q30, read_quality_average):
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
11 self.file_name = file_name
1
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
12 self.file_size = file_size
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
13 self.seq_type = seq_type
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
14 self.num_seqs = num_seqs
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
15 self.sum_len = sum_len
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
16 self.min_len = min_len
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
17 self.avg_len = avg_len
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
18 self.max_len = max_len
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
19 self.q1 = q1
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
20 self.q2 = q2
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
21 self.q3 = q3
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
22 self.sum_gap = sum_gap
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
23 self.n50 = n50
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
24 self.pass_q20 = pass_q20
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
25 self.pass_q30 = pass_q30
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
26 self.read_quality_average = read_quality_average
1
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
27
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
28
0
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
29 def nice_size(size):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
30 # Returns a readably formatted string with the size
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
31 words = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
32 prefix = ''
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
33 try:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
34 size = float(size)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
35 if size < 0:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
36 size = abs(size)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
37 prefix = '-'
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
38 except Exception:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
39 return '??? bytes'
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
40 for ind, word in enumerate(words):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
41 step = 1024 ** (ind + 1)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
42 if step > size:
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
43 size = size / float(1024 ** ind)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
44 if word == 'bytes': # No decimals for bytes
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
45 return "%s%d bytes" % (prefix, size)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
46 return "%s%.1f %s" % (prefix, size, word)
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
47 return '??? bytes'
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
48
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
49
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
50 def output_statistics(read1_stats, read2_stats, output_file):
1
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
51 paired_reads = read2_stats is not None
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
52 if paired_reads:
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
53 columns = ['R1 FASTQ', 'R1 File Size', 'R1 Read Count', 'R1 Length Sum', 'R1 Min Length',
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
54 'R1 Ave Length', 'R1 Max Length', 'R1 Q1', 'R1 Q2', 'R1 Q3', 'R1 Sum Gap',
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
55 'R1 N50', 'R1 Passing Q20', 'R1 Passing Q30', 'R1 Read Quality Ave', 'R2 FASTQ',
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
56 'R2 File Size', 'R2 Read Count', 'R2 Length Sum', 'R2 Min Length', 'R2 Ave Length',
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
57 'R2 Max Length', 'R2 Q1', 'R2 Q2', 'R2 Q3', 'R2 Sum Gap', 'R2 N50', 'R2 Passing Q20',
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
58 'R2 Passing Q30', 'R2 Read Quality Ave']
1
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
59 else:
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
60 columns = ['FASTQ', 'File Size', 'Read Count', 'Length Sum', 'Min Length', 'Ave Length',
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
61 'Max Length', 'Q1', 'Q2', 'Q3', 'Sum Gap', 'N50', 'Passing Q20', 'Passing Q30',
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
62 'Read Quality Ave']
1
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
63 with open(output_file, "w") as outfh:
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
64 # Make sure the header starts with a # so
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
65 # MultiQC can properly handle the output.
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
66 outfh.write("%s\n" % "\t".join(columns))
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
67 line_items = []
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
68 # Get the current stats and associated files.
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
69 # Get and output the statistics.
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
70 line_items.append(read1_stats.file_name)
1
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
71 line_items.append(read1_stats.file_size)
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
72 line_items.append(read1_stats.num_seqs)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
73 line_items.append(read1_stats.sum_len)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
74 line_items.append(read1_stats.min_len)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
75 line_items.append(read1_stats.avg_len)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
76 line_items.append(read1_stats.max_len)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
77 line_items.append(read1_stats.q1)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
78 line_items.append(read1_stats.q2)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
79 line_items.append(read1_stats.q3)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
80 line_items.append(read1_stats.sum_gap)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
81 line_items.append(read1_stats.n50)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
82 line_items.append(read1_stats.pass_q20)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
83 line_items.append(read1_stats.pass_q30)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
84 line_items.append(read1_stats.read_quality_average)
1
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
85 if paired_reads:
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
86 line_items.append(read2_stats.file_name)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
87 line_items.append(read2_stats.file_size)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
88 line_items.append(read2_stats.num_seqs)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
89 line_items.append(read2_stats.sum_len)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
90 line_items.append(read2_stats.min_len)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
91 line_items.append(read2_stats.avg_len)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
92 line_items.append(read2_stats.max_len)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
93 line_items.append(read2_stats.q1)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
94 line_items.append(read2_stats.q2)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
95 line_items.append(read2_stats.q3)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
96 line_items.append(read2_stats.sum_gap)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
97 line_items.append(read2_stats.n50)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
98 line_items.append(read2_stats.pass_q20)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
99 line_items.append(read2_stats.pass_q30)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
100 line_items.append(read2_stats.read_quality_average)
1
9ac0b1d5560d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents: 0
diff changeset
101 outfh.write('%s\n' % '\t'.join(str(x) for x in line_items))
0
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
102
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
103
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
104 def get_statistics(fastq_file, seqkit_stats_file, seqkit_fx2tab_file):
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
105 file_size = nice_size(os.path.getsize(fastq_file))
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
106 # SeqKit statistics.
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
107 with open(seqkit_stats_file, "r") as fh:
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
108 # This is a 2-line file
0
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
109 for i, line in enumerate(fh):
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
110 if i == 0:
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
111 # Skip header
0
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
112 continue
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
113 line = line.rstrip('\r\n')
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
114 items = line.split("\t")
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
115 file_name = fastq_file
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
116 seq_type = items[2]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
117 num_seqs = items[3]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
118 sum_len = items[4]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
119 min_len = items[5]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
120 avg_len = items[6]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
121 max_len = items[7]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
122 q1 = items[8]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
123 q2 = items[9]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
124 q3 = items[10]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
125 sum_gap = items[11]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
126 n50 = items[12]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
127 try:
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
128 pass_q20 = items[13]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
129 except IndexError:
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
130 pass_q20 = 0
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
131 try:
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
132 pass_q30 = items[14]
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
133 except IndexError:
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
134 pass_q30 = 0
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
135 # Average read quality is not normalized on length.
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
136 avg_sum = 0
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
137 with open(seqkit_fx2tab_file, "r") as fh:
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
138 for i, line in enumerate(fh):
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
139 if i == 0:
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
140 # Skip header
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
141 continue
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
142 line = line.rstrip('\r\n')
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
143 items = line.split("\t")
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
144 avg_sum += float(items[3])
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
145 read_quality_average = "{:.2f}".format(avg_sum / float(i - 1))
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
146 return Statistics(file_name, file_size, seq_type, num_seqs, sum_len, min_len, avg_len,
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
147 max_len, q1, q2, q3, sum_gap, n50, pass_q20, pass_q30, read_quality_average)
0
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
148
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
149
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
150 parser = argparse.ArgumentParser()
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
151
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
152 parser.add_argument('--output', action='store', dest='output', help='Output Excel statistics file')
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
153 parser.add_argument('--read1', action='store', dest='read1', help='Required: single read')
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
154 parser.add_argument('--read2', action='store', dest='read2', required=False, default=None, help='Optional: paired read')
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
155 parser.add_argument('--read1_seqkit_stats', action='store', dest='read1_seqkit_stats', help='Output of SeqKit statistics for forward read')
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
156 parser.add_argument('--read2_seqkit_stats', action='store', dest='read2_seqkit_stats', required=False, default=None, help='Output of SeqKit statistics for reverse read')
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
157 parser.add_argument('--read1_seqkit_fx2tab', action='store', dest='read1_seqkit_fx2tab', help='Output of SeqKit fx2tab for forward read')
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
158 parser.add_argument('--read2_seqkit_fx2tab', action='store', dest='read2_seqkit_fx2tab', required=False, default=None, help='Output of SeqKit fx2tab for reverse read')
0
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
159
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
160 args = parser.parse_args()
ec6e02f4eab7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff changeset
161
4
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
162 read1_stats = get_statistics(args.read1, args.read1_seqkit_stats, args.read1_seqkit_fx2tab)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
163 if args.read2 is None:
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
164 read2_stats = None
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
165 else:
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
166 read2_stats = get_statistics(args.read2, args.read2_seqkit_stats, args.read2_seqkit_fx2tab)
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
167
4535ad8b74f3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents: 1
diff changeset
168 output_statistics(read1_stats, read2_stats, args.output)