Mercurial > repos > cstrittmatter > ss2v110
annotate SalmID.py @ 9:43f6b7f6ebb3 draft
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
author | cstrittmatter |
---|---|
date | Thu, 30 Apr 2020 21:47:42 -0400 |
parents | fc22ec8e924e |
children |
rev | line source |
---|---|
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
2 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
3 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
4 import gzip |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
5 import io |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
6 import pickle |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
7 import os |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
8 import sys |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
9 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
10 from argparse import ArgumentParser |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
11 try: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
12 from .version import SalmID_version |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
13 except ImportError: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
14 SalmID_version = "version unknown" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
15 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
16 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
17 def reverse_complement(sequence): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
18 """return the reverse complement of a nucleotide (including IUPAC ambiguous nuceotide codes)""" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
19 complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N', 'M': 'K', 'R': 'Y', 'W': 'W', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
20 'S': 'S', 'Y': 'R', 'K': 'M', 'V': 'B', 'H': 'D', 'D': 'H', 'B': 'V'} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
21 return "".join(complement[base] for base in reversed(sequence)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
22 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
23 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
24 def parse_args(): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
25 "Parse the input arguments, use '-h' for help." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
26 parser = ArgumentParser(description='SalmID - rapid Kmer based Salmonella identifier from sequence data') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
27 # inputs |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
28 parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + SalmID_version) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
29 parser.add_argument( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
30 '-i', '--input_file', type=str, required=False, default='None', metavar='your_fastqgz', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
31 help='Single fastq.gz file input, include path to file if file is not in same directory ') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
32 parser.add_argument( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
33 '-e', '--extension', type=str, required=False, default='.fastq.gz', metavar='file_extension', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
34 help='File extension, if specified without "--input_dir", SalmID will attempt to ID all files\n' + |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
35 ' with this extension in current directory, otherwise files in input directory') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
36 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
37 parser.add_argument( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
38 '-d', '--input_dir', type=str, required=False, default='.', metavar='directory', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
39 help='Directory which contains data for identification, when not specified files in current directory will be analyzed.') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
40 parser.add_argument( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
41 '-r', '--report', type=str, required=False, default='percentage', metavar='percentage, coverage or taxonomy', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
42 help='Report either percentage ("percentage") of clade specific kmers recovered, average kmer-coverage ("cov"), or ' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
43 'taxonomy (taxonomic species ID, plus observed mean k-mer coverages and expected coverage).') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
44 parser.add_argument( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
45 '-m', '--mode', type=str, required=False, default='quick', metavar='quick or thorough', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
46 help='Quick [quick] or thorough [thorough] mode') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
47 if len(sys.argv) == 1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
48 parser.print_help(sys.stderr) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
49 sys.exit(1) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
50 return parser.parse_args() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
51 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
52 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
53 def get_av_read_length(file): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
54 """Samples the first 100 reads from a fastq file and return the average read length.""" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
55 i = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
56 n_reads = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
57 total_length = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
58 if file.endswith(".gz"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
59 file_content = io.BufferedReader(gzip.open(file)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
60 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
61 file_content = open(file, "r").readlines() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
62 for line in file_content: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
63 if i % 4 == 2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
64 total_length += len(line.strip()) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
65 n_reads += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
66 i += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
67 if n_reads == 100: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
68 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
69 return total_length / 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
70 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
71 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
72 def createKmerDict_reads(list_of_strings, kmer): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
73 """Count occurence of K-mers in a list of strings |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
74 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
75 Args: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
76 list_of_strings(list of str): nucleotide sequences as a list of strings |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
77 kmer(int): length of the K-mer to count |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
78 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
79 Returns: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
80 dict: dictionary with kmers as keys, counts for each kmer as values""" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
81 kmer_table = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
82 for string in list_of_strings: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
83 sequence = string.strip('\n') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
84 if len(sequence) >= kmer: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
85 for i in range(len(sequence) - kmer + 1): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
86 new_mer = sequence[i:i + kmer] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
87 new_mer_rc = reverse_complement(new_mer) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
88 if new_mer in kmer_table: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
89 kmer_table[new_mer.upper()] += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
90 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
91 kmer_table[new_mer.upper()] = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
92 if new_mer_rc in kmer_table: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
93 kmer_table[new_mer_rc.upper()] += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
94 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
95 kmer_table[new_mer_rc.upper()] = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
96 return kmer_table |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
97 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
98 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
99 def target_read_kmerizer_multi(file, k, kmerDict_1, kmerDict_2, mode): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
100 mean_1 = None |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
101 mean_2 = None |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
102 i = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
103 n_reads_1 = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
104 n_reads_2 = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
105 total_coverage_1 = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
106 total_coverage_2 = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
107 reads_1 = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
108 reads_2 = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
109 total_reads = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
110 if file.endswith(".gz"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
111 file_content = io.BufferedReader(gzip.open(file)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
112 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
113 file_content = open(file, "r").readlines() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
114 for line in file_content: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
115 start = int((len(line) - k) // 2) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
116 if i % 4 == 2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
117 total_reads += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
118 if file.endswith(".gz"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
119 s1 = line[start:k + start].decode() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
120 line = line.decode() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
121 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
122 s1 = line[start:k + start] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
123 if s1 in kmerDict_1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
124 n_reads_1 += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
125 total_coverage_1 += len(line) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
126 reads_1.append(line) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
127 if s1 in kmerDict_2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
128 n_reads_2 += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
129 total_coverage_2 += len(line) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
130 reads_2.append(line) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
131 i += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
132 if mode == 'quick': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
133 if total_coverage_2 >= 800000: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
134 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
135 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
136 if len(reads_1) == 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
137 kmer_Dict1 = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
138 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
139 kmer_Dict1 = createKmerDict_reads(reads_1, k) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
140 mers_1 = set([key for key in kmer_Dict1]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
141 mean_1 = sum([kmer_Dict1[key] for key in kmer_Dict1]) / len(mers_1) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
142 if len(reads_2) == 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
143 kmer_Dict2 = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
144 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
145 kmer_Dict2 = createKmerDict_reads(reads_2, k) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
146 mers_2 = set([key for key in kmer_Dict2]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
147 mean_2 = sum([kmer_Dict2[key] for key in kmer_Dict2]) / len(mers_2) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
148 return kmer_Dict1, kmer_Dict2, mean_1, mean_2, total_reads |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
149 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
150 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
151 def mean_cov_selected_kmers(iterable, kmer_dict, clade_specific_kmers): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
152 ''' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
153 Given an iterable (list, set, dictrionary) returns mean coverage for the kmers in iterable |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
154 :param iterable: set, list or dictionary containing kmers |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
155 :param kmer_dict: dictionary with kmers as keys, kmer-frequency as value |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
156 :param clade_specific_kmers: list, dict or set of clade specific kmers |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
157 :return: mean frequency as float |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
158 ''' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
159 if len(iterable) == 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
160 return 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
161 return sum([kmer_dict[value] for value in iterable]) / len(clade_specific_kmers) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
162 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
163 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
164 def kmer_lists(query_fastq_gz, k, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
165 allmers, allmers_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
166 uniqmers_bongori, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
167 uniqmers_I, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
168 uniqmers_IIa, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
169 uniqmers_IIb, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
170 uniqmers_IIIa, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
171 uniqmers_IIIb, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
172 uniqmers_IV, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
173 uniqmers_VI, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
174 uniqmers_VII, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
175 uniqmers_VIII, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
176 uniqmers_bongori_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
177 uniqmers_S_enterica_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
178 uniqmers_Escherichia_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
179 uniqmers_Listeria_ss_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
180 uniqmers_Lmono_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
181 mode): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
182 dict_invA, dict_rpoB, mean_invA, mean_rpoB, total_reads = target_read_kmerizer_multi(query_fastq_gz, k, allmers, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
183 allmers_rpoB, mode) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
184 target_mers_invA = set([key for key in dict_invA]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
185 target_mers_rpoB = set([key for key in dict_rpoB]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
186 if target_mers_invA == 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
187 print('No reads found matching invA, no Salmonella in sample?') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
188 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
189 p_bongori = (len(uniqmers_bongori & target_mers_invA) / len(uniqmers_bongori)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
190 p_I = (len(uniqmers_I & target_mers_invA) / len(uniqmers_I)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
191 p_IIa = (len(uniqmers_IIa & target_mers_invA) / len(uniqmers_IIa)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
192 p_IIb = (len(uniqmers_IIb & target_mers_invA) / len(uniqmers_IIb)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
193 p_IIIa = (len(uniqmers_IIIa & target_mers_invA) / len(uniqmers_IIIa)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
194 p_IIIb = (len(uniqmers_IIIb & target_mers_invA) / len(uniqmers_IIIb)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
195 p_VI = (len(uniqmers_VI & target_mers_invA) / len(uniqmers_VI)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
196 p_IV = (len(uniqmers_IV & target_mers_invA) / len(uniqmers_IV)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
197 p_VII = (len(uniqmers_VII & target_mers_invA) / len(uniqmers_VII)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
198 p_VIII = (len(uniqmers_VIII & target_mers_invA) / len(uniqmers_VIII)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
199 p_bongori_rpoB = (len(uniqmers_bongori_rpoB & target_mers_rpoB) / len(uniqmers_bongori_rpoB)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
200 p_Senterica = (len(uniqmers_S_enterica_rpoB & target_mers_rpoB) / len(uniqmers_S_enterica_rpoB)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
201 p_Escherichia = (len(uniqmers_Escherichia_rpoB & target_mers_rpoB) / len(uniqmers_Escherichia_rpoB)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
202 p_Listeria_ss = (len(uniqmers_Listeria_ss_rpoB & target_mers_rpoB) / len(uniqmers_Listeria_ss_rpoB)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
203 p_Lmono = (len(uniqmers_Lmono_rpoB & target_mers_rpoB) / len(uniqmers_Lmono_rpoB)) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
204 bongori_invA_cov = mean_cov_selected_kmers(uniqmers_bongori & target_mers_invA, dict_invA, uniqmers_bongori) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
205 I_invA_cov = mean_cov_selected_kmers(uniqmers_I & target_mers_invA, dict_invA, uniqmers_I) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
206 IIa_invA_cov = mean_cov_selected_kmers(uniqmers_IIa & target_mers_invA, dict_invA, uniqmers_IIa) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
207 IIb_invA_cov = mean_cov_selected_kmers(uniqmers_IIb & target_mers_invA, dict_invA, uniqmers_IIb) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
208 IIIa_invA_cov = mean_cov_selected_kmers(uniqmers_IIIa & target_mers_invA, dict_invA, uniqmers_IIIa) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
209 IIIb_invA_cov = mean_cov_selected_kmers(uniqmers_IIIb & target_mers_invA, dict_invA, uniqmers_IIIb) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
210 IV_invA_cov = mean_cov_selected_kmers(uniqmers_IV & target_mers_invA, dict_invA, uniqmers_IV) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
211 VI_invA_cov = mean_cov_selected_kmers(uniqmers_VI & target_mers_invA, dict_invA, uniqmers_VI) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
212 VII_invA_cov = mean_cov_selected_kmers(uniqmers_VII & target_mers_invA, dict_invA, uniqmers_VII) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
213 VIII_invA_cov = mean_cov_selected_kmers(uniqmers_VIII & target_mers_invA, dict_invA, uniqmers_VIII) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
214 S_enterica_rpoB_cov = mean_cov_selected_kmers((uniqmers_S_enterica_rpoB & target_mers_rpoB), dict_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
215 uniqmers_S_enterica_rpoB) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
216 S_bongori_rpoB_cov = mean_cov_selected_kmers((uniqmers_bongori_rpoB & target_mers_rpoB), dict_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
217 uniqmers_bongori_rpoB) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
218 Escherichia_rpoB_cov = mean_cov_selected_kmers((uniqmers_Escherichia_rpoB & target_mers_rpoB), dict_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
219 uniqmers_Escherichia_rpoB) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
220 Listeria_ss_rpoB_cov = mean_cov_selected_kmers((uniqmers_Listeria_ss_rpoB & target_mers_rpoB), dict_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
221 uniqmers_Listeria_ss_rpoB) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
222 Lmono_rpoB_cov = mean_cov_selected_kmers((uniqmers_Lmono_rpoB & target_mers_rpoB), dict_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
223 uniqmers_Lmono_rpoB) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
224 coverages = [Listeria_ss_rpoB_cov, Lmono_rpoB_cov, Escherichia_rpoB_cov, S_bongori_rpoB_cov, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
225 S_enterica_rpoB_cov, bongori_invA_cov, I_invA_cov, IIa_invA_cov, IIb_invA_cov, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
226 IIIa_invA_cov, IIIb_invA_cov, IV_invA_cov, VI_invA_cov, VII_invA_cov, VIII_invA_cov] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
227 locus_scores = [p_Listeria_ss, p_Lmono, p_Escherichia, p_bongori_rpoB, p_Senterica, p_bongori, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
228 p_I, p_IIa, p_IIb, p_IIIa, p_IIIb, p_IV, p_VI, p_VII, p_VIII] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
229 return locus_scores, coverages, total_reads |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
230 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
231 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
232 def report_taxon(locus_covs, average_read_length, number_of_reads): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
233 list_taxa = [ 'Listeria ss', 'Listeria monocytogenes', 'Escherichia sp.', # noqa: E201 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
234 'Salmonella bongori (rpoB)', 'Salmonella enterica (rpoB)', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
235 'Salmonella bongori (invA)', 'S. enterica subsp. enterica (invA)', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
236 'S. enterica subsp. salamae (invA: clade a)', 'S. enterica subsp. salamae (invA: clade b)', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
237 'S. enterica subsp. arizonae (invA)', 'S. enterica subsp. diarizonae (invA)', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
238 'S. enterica subsp. houtenae (invA)', 'S. enterica subsp. indica (invA)', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
239 'S. enterica subsp. VII (invA)', 'S. enterica subsp. salamae (invA: clade VIII)' ] # noqa: E202 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
240 if sum(locus_covs) < 1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
241 rpoB = ('No rpoB matches!', 0) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
242 invA = ('No invA matches!', 0) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
243 return rpoB, invA, 0.0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
244 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
245 # given list of scores get taxon |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
246 if sum(locus_covs[0:5]) > 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
247 best_rpoB = max(range(len(locus_covs[1:5])), key=lambda x: locus_covs[1:5][x]) + 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
248 all_rpoB = max(range(len(locus_covs[0:5])), key=lambda x: locus_covs[0:5][x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
249 if (locus_covs[best_rpoB] != 0) & (all_rpoB == 0): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
250 rpoB = (list_taxa[best_rpoB], locus_covs[best_rpoB]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
251 elif (all_rpoB == 0) & (round(sum(locus_covs[1:5]), 1) < 1): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
252 rpoB = (list_taxa[0], locus_covs[0]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
253 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
254 rpoB = (list_taxa[best_rpoB], locus_covs[best_rpoB]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
255 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
256 rpoB = ('No rpoB matches!', 0) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
257 if sum(locus_covs[5:]) > 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
258 best_invA = max(range(len(locus_covs[5:])), key=lambda x: locus_covs[5:][x]) + 5 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
259 invA = (list_taxa[best_invA], locus_covs[best_invA]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
260 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
261 invA = ('No invA matches!', 0) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
262 if 'Listeria' in rpoB[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
263 return rpoB, invA, (average_read_length * number_of_reads) / 3000000 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
264 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
265 return rpoB, invA, (average_read_length * number_of_reads) / 5000000 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
266 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
267 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
268 def main(): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
269 ex_dir = os.path.dirname(os.path.realpath(__file__)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
270 args = parse_args() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
271 input_file = args.input_file |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
272 if input_file != 'None': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
273 files = [input_file] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
274 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
275 extension = args.extension |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
276 inputdir = args.input_dir |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
277 files = [inputdir + '/' + f for f in os.listdir(inputdir) if f.endswith(extension)] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
278 report = args.report |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
279 mode = args.mode |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
280 f_invA = open(ex_dir + "/invA_mers_dict", "rb") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
281 sets_dict_invA = pickle.load(f_invA) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
282 f_invA.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
283 allmers = sets_dict_invA['allmers'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
284 uniqmers_I = sets_dict_invA['uniqmers_I'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
285 uniqmers_IIa = sets_dict_invA['uniqmers_IIa'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
286 uniqmers_IIb = sets_dict_invA['uniqmers_IIb'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
287 uniqmers_IIIa = sets_dict_invA['uniqmers_IIIa'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
288 uniqmers_IIIb = sets_dict_invA['uniqmers_IIIb'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
289 uniqmers_IV = sets_dict_invA['uniqmers_IV'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
290 uniqmers_VI = sets_dict_invA['uniqmers_VI'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
291 uniqmers_VII = sets_dict_invA['uniqmers_VII'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
292 uniqmers_VIII = sets_dict_invA['uniqmers_VIII'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
293 uniqmers_bongori = sets_dict_invA['uniqmers_bongori'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
294 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
295 f = open(ex_dir + "/rpoB_mers_dict", "rb") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
296 sets_dict = pickle.load(f) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
297 f.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
298 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
299 allmers_rpoB = sets_dict['allmers'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
300 uniqmers_bongori_rpoB = sets_dict['uniqmers_bongori'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
301 uniqmers_S_enterica_rpoB = sets_dict['uniqmers_S_enterica'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
302 uniqmers_Escherichia_rpoB = sets_dict['uniqmers_Escherichia'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
303 uniqmers_Listeria_ss_rpoB = sets_dict['uniqmers_Listeria_ss'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
304 uniqmers_Lmono_rpoB = sets_dict['uniqmers_L_mono'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
305 # todo: run kmer_lists() once, create list of tuples containing data to be used fro different reports |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
306 if report == 'taxonomy': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
307 print('file\trpoB\tinvA\texpected coverage') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
308 for f in files: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
309 locus_scores, coverages, reads = kmer_lists(f, 27, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
310 allmers, allmers_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
311 uniqmers_bongori, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
312 uniqmers_I, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
313 uniqmers_IIa, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
314 uniqmers_IIb, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
315 uniqmers_IIIa, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
316 uniqmers_IIIb, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
317 uniqmers_IV, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
318 uniqmers_VI, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
319 uniqmers_VII, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
320 uniqmers_VIII, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
321 uniqmers_bongori_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
322 uniqmers_S_enterica_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
323 uniqmers_Escherichia_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
324 uniqmers_Listeria_ss_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
325 uniqmers_Lmono_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
326 mode) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
327 pretty_covs = [round(cov, 1) for cov in coverages] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
328 report = report_taxon(pretty_covs, get_av_read_length(f), reads) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
329 print(f.split('/')[-1] + '\t' + report[0][0] + '[' + str(report[0][1]) + ']' + '\t' + report[1][0] + |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
330 '[' + str(report[1][1]) + ']' + |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
331 '\t' + str(round(report[2], 1))) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
332 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
333 print( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
334 'file\tListeria sensu stricto (rpoB)\tL. monocytogenes (rpoB)\tEscherichia spp. (rpoB)\tS. bongori (rpoB)\tS. enterica' + # noqa: E122 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
335 '(rpoB)\tS. bongori (invA)\tsubsp. I (invA)\tsubsp. II (clade a: invA)\tsubsp. II' + # noqa: E122 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
336 ' (clade b: invA)\tsubsp. IIIa (invA)\tsubsp. IIIb (invA)\tsubsp.IV (invA)\tsubsp. VI (invA)\tsubsp. VII (invA)' + # noqa: E122 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
337 '\tsubsp. II (clade VIII : invA)') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
338 if report == 'percentage': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
339 for f in files: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
340 locus_scores, coverages, reads = kmer_lists(f, 27, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
341 allmers, allmers_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
342 uniqmers_bongori, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
343 uniqmers_I, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
344 uniqmers_IIa, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
345 uniqmers_IIb, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
346 uniqmers_IIIa, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
347 uniqmers_IIIb, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
348 uniqmers_IV, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
349 uniqmers_VI, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
350 uniqmers_VII, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
351 uniqmers_VIII, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
352 uniqmers_bongori_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
353 uniqmers_S_enterica_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
354 uniqmers_Escherichia_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
355 uniqmers_Listeria_ss_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
356 uniqmers_Lmono_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
357 mode) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
358 pretty_scores = [str(round(score)) for score in locus_scores] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
359 print(f.split('/')[-1] + '\t' + '\t'.join(pretty_scores)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
360 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
361 for f in files: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
362 locus_scores, coverages, reads = kmer_lists(f, 27, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
363 allmers, allmers_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
364 uniqmers_bongori, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
365 uniqmers_I, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
366 uniqmers_IIa, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
367 uniqmers_IIb, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
368 uniqmers_IIIa, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
369 uniqmers_IIIb, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
370 uniqmers_IV, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
371 uniqmers_VI, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
372 uniqmers_VII, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
373 uniqmers_VIII, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
374 uniqmers_bongori_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
375 uniqmers_S_enterica_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
376 uniqmers_Escherichia_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
377 uniqmers_Listeria_ss_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
378 uniqmers_Lmono_rpoB, |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
379 mode) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
380 pretty_covs = [str(round(cov, 1)) for cov in coverages] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
381 print(f.split('/')[-1] + '\t' + '\t'.join(pretty_covs)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
382 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
383 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
384 if __name__ == '__main__': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
385 main() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
386 |