annotate SalmID.py @ 9:43f6b7f6ebb3 draft

planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
author cstrittmatter
date Thu, 30 Apr 2020 21:47:42 -0400
parents fc22ec8e924e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
1 #!/usr/bin/env python3
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
2
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
3
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
4 import gzip
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
5 import io
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
6 import pickle
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
7 import os
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
8 import sys
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
9
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
10 from argparse import ArgumentParser
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
11 try:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
12 from .version import SalmID_version
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
13 except ImportError:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
14 SalmID_version = "version unknown"
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
15
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
16
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
17 def reverse_complement(sequence):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
18 """return the reverse complement of a nucleotide (including IUPAC ambiguous nuceotide codes)"""
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
19 complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N', 'M': 'K', 'R': 'Y', 'W': 'W',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
20 'S': 'S', 'Y': 'R', 'K': 'M', 'V': 'B', 'H': 'D', 'D': 'H', 'B': 'V'}
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
21 return "".join(complement[base] for base in reversed(sequence))
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
22
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
23
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
24 def parse_args():
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
25 "Parse the input arguments, use '-h' for help."
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
26 parser = ArgumentParser(description='SalmID - rapid Kmer based Salmonella identifier from sequence data')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
27 # inputs
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
28 parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + SalmID_version)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
29 parser.add_argument(
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
30 '-i', '--input_file', type=str, required=False, default='None', metavar='your_fastqgz',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
31 help='Single fastq.gz file input, include path to file if file is not in same directory ')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
32 parser.add_argument(
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
33 '-e', '--extension', type=str, required=False, default='.fastq.gz', metavar='file_extension',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
34 help='File extension, if specified without "--input_dir", SalmID will attempt to ID all files\n' +
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
35 ' with this extension in current directory, otherwise files in input directory')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
36
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
37 parser.add_argument(
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
38 '-d', '--input_dir', type=str, required=False, default='.', metavar='directory',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
39 help='Directory which contains data for identification, when not specified files in current directory will be analyzed.')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
40 parser.add_argument(
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
41 '-r', '--report', type=str, required=False, default='percentage', metavar='percentage, coverage or taxonomy',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
42 help='Report either percentage ("percentage") of clade specific kmers recovered, average kmer-coverage ("cov"), or '
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
43 'taxonomy (taxonomic species ID, plus observed mean k-mer coverages and expected coverage).')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
44 parser.add_argument(
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
45 '-m', '--mode', type=str, required=False, default='quick', metavar='quick or thorough',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
46 help='Quick [quick] or thorough [thorough] mode')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
47 if len(sys.argv) == 1:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
48 parser.print_help(sys.stderr)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
49 sys.exit(1)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
50 return parser.parse_args()
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
51
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
52
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
53 def get_av_read_length(file):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
54 """Samples the first 100 reads from a fastq file and return the average read length."""
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
55 i = 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
56 n_reads = 0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
57 total_length = 0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
58 if file.endswith(".gz"):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
59 file_content = io.BufferedReader(gzip.open(file))
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
60 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
61 file_content = open(file, "r").readlines()
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
62 for line in file_content:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
63 if i % 4 == 2:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
64 total_length += len(line.strip())
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
65 n_reads += 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
66 i += 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
67 if n_reads == 100:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
68 break
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
69 return total_length / 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
70
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
71
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
72 def createKmerDict_reads(list_of_strings, kmer):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
73 """Count occurence of K-mers in a list of strings
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
74
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
75 Args:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
76 list_of_strings(list of str): nucleotide sequences as a list of strings
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
77 kmer(int): length of the K-mer to count
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
78
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
79 Returns:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
80 dict: dictionary with kmers as keys, counts for each kmer as values"""
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
81 kmer_table = {}
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
82 for string in list_of_strings:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
83 sequence = string.strip('\n')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
84 if len(sequence) >= kmer:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
85 for i in range(len(sequence) - kmer + 1):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
86 new_mer = sequence[i:i + kmer]
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
87 new_mer_rc = reverse_complement(new_mer)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
88 if new_mer in kmer_table:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
89 kmer_table[new_mer.upper()] += 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
90 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
91 kmer_table[new_mer.upper()] = 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
92 if new_mer_rc in kmer_table:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
93 kmer_table[new_mer_rc.upper()] += 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
94 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
95 kmer_table[new_mer_rc.upper()] = 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
96 return kmer_table
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
97
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
98
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
99 def target_read_kmerizer_multi(file, k, kmerDict_1, kmerDict_2, mode):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
100 mean_1 = None
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
101 mean_2 = None
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
102 i = 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
103 n_reads_1 = 0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
104 n_reads_2 = 0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
105 total_coverage_1 = 0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
106 total_coverage_2 = 0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
107 reads_1 = []
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
108 reads_2 = []
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
109 total_reads = 0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
110 if file.endswith(".gz"):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
111 file_content = io.BufferedReader(gzip.open(file))
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
112 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
113 file_content = open(file, "r").readlines()
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
114 for line in file_content:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
115 start = int((len(line) - k) // 2)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
116 if i % 4 == 2:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
117 total_reads += 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
118 if file.endswith(".gz"):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
119 s1 = line[start:k + start].decode()
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
120 line = line.decode()
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
121 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
122 s1 = line[start:k + start]
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
123 if s1 in kmerDict_1:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
124 n_reads_1 += 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
125 total_coverage_1 += len(line)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
126 reads_1.append(line)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
127 if s1 in kmerDict_2:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
128 n_reads_2 += 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
129 total_coverage_2 += len(line)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
130 reads_2.append(line)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
131 i += 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
132 if mode == 'quick':
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
133 if total_coverage_2 >= 800000:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
134 break
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
135
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
136 if len(reads_1) == 0:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
137 kmer_Dict1 = {}
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
138 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
139 kmer_Dict1 = createKmerDict_reads(reads_1, k)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
140 mers_1 = set([key for key in kmer_Dict1])
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
141 mean_1 = sum([kmer_Dict1[key] for key in kmer_Dict1]) / len(mers_1)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
142 if len(reads_2) == 0:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
143 kmer_Dict2 = {}
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
144 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
145 kmer_Dict2 = createKmerDict_reads(reads_2, k)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
146 mers_2 = set([key for key in kmer_Dict2])
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
147 mean_2 = sum([kmer_Dict2[key] for key in kmer_Dict2]) / len(mers_2)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
148 return kmer_Dict1, kmer_Dict2, mean_1, mean_2, total_reads
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
149
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
150
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
151 def mean_cov_selected_kmers(iterable, kmer_dict, clade_specific_kmers):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
152 '''
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
153 Given an iterable (list, set, dictrionary) returns mean coverage for the kmers in iterable
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
154 :param iterable: set, list or dictionary containing kmers
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
155 :param kmer_dict: dictionary with kmers as keys, kmer-frequency as value
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
156 :param clade_specific_kmers: list, dict or set of clade specific kmers
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
157 :return: mean frequency as float
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
158 '''
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
159 if len(iterable) == 0:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
160 return 0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
161 return sum([kmer_dict[value] for value in iterable]) / len(clade_specific_kmers)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
162
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
163
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
164 def kmer_lists(query_fastq_gz, k,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
165 allmers, allmers_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
166 uniqmers_bongori,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
167 uniqmers_I,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
168 uniqmers_IIa,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
169 uniqmers_IIb,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
170 uniqmers_IIIa,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
171 uniqmers_IIIb,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
172 uniqmers_IV,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
173 uniqmers_VI,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
174 uniqmers_VII,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
175 uniqmers_VIII,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
176 uniqmers_bongori_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
177 uniqmers_S_enterica_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
178 uniqmers_Escherichia_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
179 uniqmers_Listeria_ss_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
180 uniqmers_Lmono_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
181 mode):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
182 dict_invA, dict_rpoB, mean_invA, mean_rpoB, total_reads = target_read_kmerizer_multi(query_fastq_gz, k, allmers,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
183 allmers_rpoB, mode)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
184 target_mers_invA = set([key for key in dict_invA])
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
185 target_mers_rpoB = set([key for key in dict_rpoB])
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
186 if target_mers_invA == 0:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
187 print('No reads found matching invA, no Salmonella in sample?')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
188 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
189 p_bongori = (len(uniqmers_bongori & target_mers_invA) / len(uniqmers_bongori)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
190 p_I = (len(uniqmers_I & target_mers_invA) / len(uniqmers_I)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
191 p_IIa = (len(uniqmers_IIa & target_mers_invA) / len(uniqmers_IIa)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
192 p_IIb = (len(uniqmers_IIb & target_mers_invA) / len(uniqmers_IIb)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
193 p_IIIa = (len(uniqmers_IIIa & target_mers_invA) / len(uniqmers_IIIa)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
194 p_IIIb = (len(uniqmers_IIIb & target_mers_invA) / len(uniqmers_IIIb)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
195 p_VI = (len(uniqmers_VI & target_mers_invA) / len(uniqmers_VI)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
196 p_IV = (len(uniqmers_IV & target_mers_invA) / len(uniqmers_IV)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
197 p_VII = (len(uniqmers_VII & target_mers_invA) / len(uniqmers_VII)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
198 p_VIII = (len(uniqmers_VIII & target_mers_invA) / len(uniqmers_VIII)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
199 p_bongori_rpoB = (len(uniqmers_bongori_rpoB & target_mers_rpoB) / len(uniqmers_bongori_rpoB)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
200 p_Senterica = (len(uniqmers_S_enterica_rpoB & target_mers_rpoB) / len(uniqmers_S_enterica_rpoB)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
201 p_Escherichia = (len(uniqmers_Escherichia_rpoB & target_mers_rpoB) / len(uniqmers_Escherichia_rpoB)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
202 p_Listeria_ss = (len(uniqmers_Listeria_ss_rpoB & target_mers_rpoB) / len(uniqmers_Listeria_ss_rpoB)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
203 p_Lmono = (len(uniqmers_Lmono_rpoB & target_mers_rpoB) / len(uniqmers_Lmono_rpoB)) * 100
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
204 bongori_invA_cov = mean_cov_selected_kmers(uniqmers_bongori & target_mers_invA, dict_invA, uniqmers_bongori)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
205 I_invA_cov = mean_cov_selected_kmers(uniqmers_I & target_mers_invA, dict_invA, uniqmers_I)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
206 IIa_invA_cov = mean_cov_selected_kmers(uniqmers_IIa & target_mers_invA, dict_invA, uniqmers_IIa)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
207 IIb_invA_cov = mean_cov_selected_kmers(uniqmers_IIb & target_mers_invA, dict_invA, uniqmers_IIb)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
208 IIIa_invA_cov = mean_cov_selected_kmers(uniqmers_IIIa & target_mers_invA, dict_invA, uniqmers_IIIa)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
209 IIIb_invA_cov = mean_cov_selected_kmers(uniqmers_IIIb & target_mers_invA, dict_invA, uniqmers_IIIb)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
210 IV_invA_cov = mean_cov_selected_kmers(uniqmers_IV & target_mers_invA, dict_invA, uniqmers_IV)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
211 VI_invA_cov = mean_cov_selected_kmers(uniqmers_VI & target_mers_invA, dict_invA, uniqmers_VI)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
212 VII_invA_cov = mean_cov_selected_kmers(uniqmers_VII & target_mers_invA, dict_invA, uniqmers_VII)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
213 VIII_invA_cov = mean_cov_selected_kmers(uniqmers_VIII & target_mers_invA, dict_invA, uniqmers_VIII)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
214 S_enterica_rpoB_cov = mean_cov_selected_kmers((uniqmers_S_enterica_rpoB & target_mers_rpoB), dict_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
215 uniqmers_S_enterica_rpoB)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
216 S_bongori_rpoB_cov = mean_cov_selected_kmers((uniqmers_bongori_rpoB & target_mers_rpoB), dict_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
217 uniqmers_bongori_rpoB)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
218 Escherichia_rpoB_cov = mean_cov_selected_kmers((uniqmers_Escherichia_rpoB & target_mers_rpoB), dict_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
219 uniqmers_Escherichia_rpoB)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
220 Listeria_ss_rpoB_cov = mean_cov_selected_kmers((uniqmers_Listeria_ss_rpoB & target_mers_rpoB), dict_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
221 uniqmers_Listeria_ss_rpoB)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
222 Lmono_rpoB_cov = mean_cov_selected_kmers((uniqmers_Lmono_rpoB & target_mers_rpoB), dict_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
223 uniqmers_Lmono_rpoB)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
224 coverages = [Listeria_ss_rpoB_cov, Lmono_rpoB_cov, Escherichia_rpoB_cov, S_bongori_rpoB_cov,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
225 S_enterica_rpoB_cov, bongori_invA_cov, I_invA_cov, IIa_invA_cov, IIb_invA_cov,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
226 IIIa_invA_cov, IIIb_invA_cov, IV_invA_cov, VI_invA_cov, VII_invA_cov, VIII_invA_cov]
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
227 locus_scores = [p_Listeria_ss, p_Lmono, p_Escherichia, p_bongori_rpoB, p_Senterica, p_bongori,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
228 p_I, p_IIa, p_IIb, p_IIIa, p_IIIb, p_IV, p_VI, p_VII, p_VIII]
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
229 return locus_scores, coverages, total_reads
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
230
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
231
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
232 def report_taxon(locus_covs, average_read_length, number_of_reads):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
233 list_taxa = [ 'Listeria ss', 'Listeria monocytogenes', 'Escherichia sp.', # noqa: E201
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
234 'Salmonella bongori (rpoB)', 'Salmonella enterica (rpoB)',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
235 'Salmonella bongori (invA)', 'S. enterica subsp. enterica (invA)',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
236 'S. enterica subsp. salamae (invA: clade a)', 'S. enterica subsp. salamae (invA: clade b)',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
237 'S. enterica subsp. arizonae (invA)', 'S. enterica subsp. diarizonae (invA)',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
238 'S. enterica subsp. houtenae (invA)', 'S. enterica subsp. indica (invA)',
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
239 'S. enterica subsp. VII (invA)', 'S. enterica subsp. salamae (invA: clade VIII)' ] # noqa: E202
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
240 if sum(locus_covs) < 1:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
241 rpoB = ('No rpoB matches!', 0)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
242 invA = ('No invA matches!', 0)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
243 return rpoB, invA, 0.0
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
244 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
245 # given list of scores get taxon
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
246 if sum(locus_covs[0:5]) > 0:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
247 best_rpoB = max(range(len(locus_covs[1:5])), key=lambda x: locus_covs[1:5][x]) + 1
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
248 all_rpoB = max(range(len(locus_covs[0:5])), key=lambda x: locus_covs[0:5][x])
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
249 if (locus_covs[best_rpoB] != 0) & (all_rpoB == 0):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
250 rpoB = (list_taxa[best_rpoB], locus_covs[best_rpoB])
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
251 elif (all_rpoB == 0) & (round(sum(locus_covs[1:5]), 1) < 1):
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
252 rpoB = (list_taxa[0], locus_covs[0])
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
253 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
254 rpoB = (list_taxa[best_rpoB], locus_covs[best_rpoB])
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
255 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
256 rpoB = ('No rpoB matches!', 0)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
257 if sum(locus_covs[5:]) > 0:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
258 best_invA = max(range(len(locus_covs[5:])), key=lambda x: locus_covs[5:][x]) + 5
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
259 invA = (list_taxa[best_invA], locus_covs[best_invA])
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
260 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
261 invA = ('No invA matches!', 0)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
262 if 'Listeria' in rpoB[0]:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
263 return rpoB, invA, (average_read_length * number_of_reads) / 3000000
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
264 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
265 return rpoB, invA, (average_read_length * number_of_reads) / 5000000
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
266
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
267
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
268 def main():
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
269 ex_dir = os.path.dirname(os.path.realpath(__file__))
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
270 args = parse_args()
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
271 input_file = args.input_file
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
272 if input_file != 'None':
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
273 files = [input_file]
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
274 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
275 extension = args.extension
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
276 inputdir = args.input_dir
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
277 files = [inputdir + '/' + f for f in os.listdir(inputdir) if f.endswith(extension)]
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
278 report = args.report
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
279 mode = args.mode
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
280 f_invA = open(ex_dir + "/invA_mers_dict", "rb")
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
281 sets_dict_invA = pickle.load(f_invA)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
282 f_invA.close()
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
283 allmers = sets_dict_invA['allmers']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
284 uniqmers_I = sets_dict_invA['uniqmers_I']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
285 uniqmers_IIa = sets_dict_invA['uniqmers_IIa']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
286 uniqmers_IIb = sets_dict_invA['uniqmers_IIb']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
287 uniqmers_IIIa = sets_dict_invA['uniqmers_IIIa']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
288 uniqmers_IIIb = sets_dict_invA['uniqmers_IIIb']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
289 uniqmers_IV = sets_dict_invA['uniqmers_IV']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
290 uniqmers_VI = sets_dict_invA['uniqmers_VI']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
291 uniqmers_VII = sets_dict_invA['uniqmers_VII']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
292 uniqmers_VIII = sets_dict_invA['uniqmers_VIII']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
293 uniqmers_bongori = sets_dict_invA['uniqmers_bongori']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
294
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
295 f = open(ex_dir + "/rpoB_mers_dict", "rb")
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
296 sets_dict = pickle.load(f)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
297 f.close()
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
298
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
299 allmers_rpoB = sets_dict['allmers']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
300 uniqmers_bongori_rpoB = sets_dict['uniqmers_bongori']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
301 uniqmers_S_enterica_rpoB = sets_dict['uniqmers_S_enterica']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
302 uniqmers_Escherichia_rpoB = sets_dict['uniqmers_Escherichia']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
303 uniqmers_Listeria_ss_rpoB = sets_dict['uniqmers_Listeria_ss']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
304 uniqmers_Lmono_rpoB = sets_dict['uniqmers_L_mono']
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
305 # todo: run kmer_lists() once, create list of tuples containing data to be used fro different reports
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
306 if report == 'taxonomy':
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
307 print('file\trpoB\tinvA\texpected coverage')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
308 for f in files:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
309 locus_scores, coverages, reads = kmer_lists(f, 27,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
310 allmers, allmers_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
311 uniqmers_bongori,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
312 uniqmers_I,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
313 uniqmers_IIa,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
314 uniqmers_IIb,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
315 uniqmers_IIIa,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
316 uniqmers_IIIb,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
317 uniqmers_IV,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
318 uniqmers_VI,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
319 uniqmers_VII,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
320 uniqmers_VIII,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
321 uniqmers_bongori_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
322 uniqmers_S_enterica_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
323 uniqmers_Escherichia_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
324 uniqmers_Listeria_ss_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
325 uniqmers_Lmono_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
326 mode)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
327 pretty_covs = [round(cov, 1) for cov in coverages]
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
328 report = report_taxon(pretty_covs, get_av_read_length(f), reads)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
329 print(f.split('/')[-1] + '\t' + report[0][0] + '[' + str(report[0][1]) + ']' + '\t' + report[1][0] +
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
330 '[' + str(report[1][1]) + ']' +
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
331 '\t' + str(round(report[2], 1)))
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
332 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
333 print(
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
334 'file\tListeria sensu stricto (rpoB)\tL. monocytogenes (rpoB)\tEscherichia spp. (rpoB)\tS. bongori (rpoB)\tS. enterica' + # noqa: E122
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
335 '(rpoB)\tS. bongori (invA)\tsubsp. I (invA)\tsubsp. II (clade a: invA)\tsubsp. II' + # noqa: E122
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
336 ' (clade b: invA)\tsubsp. IIIa (invA)\tsubsp. IIIb (invA)\tsubsp.IV (invA)\tsubsp. VI (invA)\tsubsp. VII (invA)' + # noqa: E122
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
337 '\tsubsp. II (clade VIII : invA)')
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
338 if report == 'percentage':
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
339 for f in files:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
340 locus_scores, coverages, reads = kmer_lists(f, 27,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
341 allmers, allmers_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
342 uniqmers_bongori,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
343 uniqmers_I,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
344 uniqmers_IIa,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
345 uniqmers_IIb,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
346 uniqmers_IIIa,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
347 uniqmers_IIIb,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
348 uniqmers_IV,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
349 uniqmers_VI,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
350 uniqmers_VII,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
351 uniqmers_VIII,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
352 uniqmers_bongori_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
353 uniqmers_S_enterica_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
354 uniqmers_Escherichia_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
355 uniqmers_Listeria_ss_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
356 uniqmers_Lmono_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
357 mode)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
358 pretty_scores = [str(round(score)) for score in locus_scores]
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
359 print(f.split('/')[-1] + '\t' + '\t'.join(pretty_scores))
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
360 else:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
361 for f in files:
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
362 locus_scores, coverages, reads = kmer_lists(f, 27,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
363 allmers, allmers_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
364 uniqmers_bongori,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
365 uniqmers_I,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
366 uniqmers_IIa,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
367 uniqmers_IIb,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
368 uniqmers_IIIa,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
369 uniqmers_IIIb,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
370 uniqmers_IV,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
371 uniqmers_VI,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
372 uniqmers_VII,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
373 uniqmers_VIII,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
374 uniqmers_bongori_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
375 uniqmers_S_enterica_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
376 uniqmers_Escherichia_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
377 uniqmers_Listeria_ss_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
378 uniqmers_Lmono_rpoB,
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
379 mode)
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
380 pretty_covs = [str(round(cov, 1)) for cov in coverages]
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
381 print(f.split('/')[-1] + '\t' + '\t'.join(pretty_covs))
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
382
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
383
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
384 if __name__ == '__main__':
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
385 main()
fc22ec8e924e planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff changeset
386